StyleTTS2_Arabic/run_setup.sh at main · MachineLearning-IIUI/StyleTTS2_Arabic · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/bin/bash
# This script installs Anaconda distribution and speech related tools then clones the repository,
# creates a conda environment using arabicTTS.yml,
# downloads both splits of the Arabic TTS dataset from Hugging Face using the /cache directory
# for dataset files, then saves the audio files in the wav_data/ folder and meta data in a CSV file.

set -euo pipefail   # Exit on error, unset vars, or failed pipes

# ---------------------
# Fetch the Latest Anaconda Installer
# ---------------------
echo "Fetching the latest Anaconda installer from https://repo.anaconda.com/archive/ ..."
ANACONDA_VER=$(curl -s https://repo.anaconda.com/archive/ | grep -Eo 'Anaconda3-[0-9]{4}\.[0-9]{2}-1-Linux-x86_64.sh' | sort -V | tail -n 1)
if [ -z "$ANACONDA_VER" ]; then
    echo "Error: Could not determine the latest Anaconda installer version."
    exit 1
fi
echo "Latest Anaconda installer: ${ANACONDA_VER}"

# ---------------------
# System Update & Package Installation
# ---------------------
echo "Updating system and installing required packages..."
apt-get update
apt-get install -y vim less espeak-ng wget curl git

# ---------------------
# Download and Install Latest Anaconda
# ---------------------


if [ -d "$HOME/anaconda3" ]; then
    echo "Anaconda is already installed at $HOME/anaconda3. Skipping installation..."
else
    echo "Downloading Anaconda installer: ${ANACONDA_VER}"
    if [ -f "${ANACONDA_VER}" ]; then
        echo "Installer ${ANACONDA_VER} already exists. Skipping download."
    else
        wget https://repo.anaconda.com/archive/${ANACONDA_VER}
    fi
    chmod +x "${ANACONDA_VER}"

    echo "Installing Anaconda to $HOME/anaconda3..."
    bash "${ANACONDA_VER}" -b -p "$HOME/anaconda3"
    echo "Anaconda installation complete."
fi

source "$HOME/anaconda3/etc/profile.d/conda.sh"
echo "source \$HOME/anaconda3/etc/profile.d/conda.sh" >> ~/.bashrc

echo "Updating conda to the latest version..."
conda update -n base -c defaults conda -y


# ---------------------
# Repository & Environment Setup
# ---------------------
REPO_URL="https://github.com/MachineLearning-IIUI/StyleTTS2_Arabic.git"
REPO_DIR="StyleTTS2_Arabic"
ENV_YML="arabicTTS.yml"

# Parameters for the Python script (which is inside the repo)
DATASET_NAME="NeoBoy/arabic-tts-wav-24k"  # Dataset identifier on Hugging Face
SPLITS="train,test"                      # Comma-separated list of dataset splits
CACHE_DIR="cache"                        # Directory used for caching dataset files (should be in .gitignore)
OUTPUT_DIR="wav_data"                    # Directory where audio files will be saved (should be in .gitignore)
META_CSV="dataset_metadata.csv"          # CSV file to store metadata

# ---------------------
# Clone or Force Reset Repository
# ---------------------
if [ ! -d "$REPO_DIR" ]; then
    echo "Cloning repository from $REPO_URL..."
    git clone "$REPO_URL"
    cd "$REPO_DIR"
else
    echo "Repository folder exists. Discarding local changes before updating..."
    cd "$REPO_DIR"

    # Reset all local changes
    git reset --hard HEAD

    # Fetch latest updates and force local branch to match remote
    git fetch origin
    git reset --hard origin/main
fi


# ---------------------
# Conda Environment Setup
# ---------------------
echo "Creating the conda environment using $ENV_YML..."
conda env create -f "$ENV_YML" || echo "Conda environment may already exist."
ENV_NAME=$(grep "^name:" "$ENV_YML" | awk '{print $2}')
if [ -z "$ENV_NAME" ]; then
    echo "Error: Unable to determine environment name from $ENV_YML"
    exit 1
fi
echo "Environment created: $ENV_NAME"

echo "Activating conda environment: $ENV_NAME"
conda activate "$ENV_NAME"

# ---------------------
# Upgrade Pip & Reinstall Dependencies
# ---------------------
echo "Upgrading pip and reinstalling all dependencies..."
pip install --upgrade pip
pip install --upgrade --force-reinstall -r requirements.txt


# ---------------------
# Execute the Python Script
# ---------------------
echo "Executing hfData2WavFiles.py with the provided arguments..."
python hfData2WavFiles.py \
  --dataset_name "$DATASET_NAME" \
  --splits "$SPLITS" \
  --cache_dir "$CACHE_DIR" \
  --output_dir "$OUTPUT_DIR" \
  --meta_csv "$META_CSV"

echo "Processing complete."