forked from Fadi987/StyleTTS2
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrun_setup.sh
More file actions
123 lines (101 loc) · 4.16 KB
/
run_setup.sh
File metadata and controls
123 lines (101 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/bin/bash
# This script installs Anaconda distribution and speech related tools then clones the repository,
# creates a conda environment using arabicTTS.yml,
# downloads both splits of the Arabic TTS dataset from Hugging Face using the /cache directory
# for dataset files, then saves the audio files in the wav_data/ folder and meta data in a CSV file.
set -euo pipefail # Exit on error, unset vars, or failed pipes
# ---------------------
# Fetch the Latest Anaconda Installer
# ---------------------
echo "Fetching the latest Anaconda installer from https://repo.anaconda.com/archive/ ..."
ANACONDA_VER=$(curl -s https://repo.anaconda.com/archive/ | grep -Eo 'Anaconda3-[0-9]{4}\.[0-9]{2}-1-Linux-x86_64.sh' | sort -V | tail -n 1)
if [ -z "$ANACONDA_VER" ]; then
echo "Error: Could not determine the latest Anaconda installer version."
exit 1
fi
echo "Latest Anaconda installer: ${ANACONDA_VER}"
# ---------------------
# System Update & Package Installation
# ---------------------
echo "Updating system and installing required packages..."
apt-get update
apt-get install -y vim less espeak-ng wget curl git
# ---------------------
# Download and Install Latest Anaconda
# ---------------------
if [ -d "$HOME/anaconda3" ]; then
echo "Anaconda is already installed at $HOME/anaconda3. Skipping installation..."
else
echo "Downloading Anaconda installer: ${ANACONDA_VER}"
if [ -f "${ANACONDA_VER}" ]; then
echo "Installer ${ANACONDA_VER} already exists. Skipping download."
else
wget https://repo.anaconda.com/archive/${ANACONDA_VER}
fi
chmod +x "${ANACONDA_VER}"
echo "Installing Anaconda to $HOME/anaconda3..."
bash "${ANACONDA_VER}" -b -p "$HOME/anaconda3"
echo "Anaconda installation complete."
fi
source "$HOME/anaconda3/etc/profile.d/conda.sh"
echo "source \$HOME/anaconda3/etc/profile.d/conda.sh" >> ~/.bashrc
echo "Updating conda to the latest version..."
conda update -n base -c defaults conda -y
# ---------------------
# Repository & Environment Setup
# ---------------------
REPO_URL="https://github.com/MachineLearning-IIUI/StyleTTS2_Arabic.git"
REPO_DIR="StyleTTS2_Arabic"
ENV_YML="arabicTTS.yml"
# Parameters for the Python script (which is inside the repo)
DATASET_NAME="NeoBoy/arabic-tts-wav-24k" # Dataset identifier on Hugging Face
SPLITS="train,test" # Comma-separated list of dataset splits
CACHE_DIR="cache" # Directory used for caching dataset files (should be in .gitignore)
OUTPUT_DIR="wav_data" # Directory where audio files will be saved (should be in .gitignore)
META_CSV="dataset_metadata.csv" # CSV file to store metadata
# ---------------------
# Clone or Force Reset Repository
# ---------------------
if [ ! -d "$REPO_DIR" ]; then
echo "Cloning repository from $REPO_URL..."
git clone "$REPO_URL"
cd "$REPO_DIR"
else
echo "Repository folder exists. Discarding local changes before updating..."
cd "$REPO_DIR"
# Reset all local changes
git reset --hard HEAD
# Fetch latest updates and force local branch to match remote
git fetch origin
git reset --hard origin/main
fi
# ---------------------
# Conda Environment Setup
# ---------------------
echo "Creating the conda environment using $ENV_YML..."
conda env create -f "$ENV_YML" || echo "Conda environment may already exist."
ENV_NAME=$(grep "^name:" "$ENV_YML" | awk '{print $2}')
if [ -z "$ENV_NAME" ]; then
echo "Error: Unable to determine environment name from $ENV_YML"
exit 1
fi
echo "Environment created: $ENV_NAME"
echo "Activating conda environment: $ENV_NAME"
conda activate "$ENV_NAME"
# ---------------------
# Upgrade Pip & Reinstall Dependencies
# ---------------------
echo "Upgrading pip and reinstalling all dependencies..."
pip install --upgrade pip
pip install --upgrade --force-reinstall -r requirements.txt
# ---------------------
# Execute the Python Script
# ---------------------
echo "Executing hfData2WavFiles.py with the provided arguments..."
python hfData2WavFiles.py \
--dataset_name "$DATASET_NAME" \
--splits "$SPLITS" \
--cache_dir "$CACHE_DIR" \
--output_dir "$OUTPUT_DIR" \
--meta_csv "$META_CSV"
echo "Processing complete."