diff --git a/language/llama2-70b/SUT.py b/language/llama2-70b/SUT.py index 983e803501..dcff013bd9 100644 --- a/language/llama2-70b/SUT.py +++ b/language/llama2-70b/SUT.py @@ -275,6 +275,15 @@ def load_model(self): ) print("Loaded model") + if torch.cuda.is_available(): + num_gpus = torch.cuda.device_count() + if num_gpus > 1: + print(f"Using {num_gpus} GPUs via DataParallel") + self.model = torch.nn.DataParallel(self.model) + self.model.to("cuda") + else: + self.model.to(self.device) + self.device = torch.device(self.device) if self.device == "cpu": self.model = self.model.to( diff --git a/language/llama2-70b/app_launch.sh b/language/llama2-70b/app_launch.sh new file mode 100644 index 0000000000..ba6df9daeb --- /dev/null +++ b/language/llama2-70b/app_launch.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +MLCOMMONS_REPO_PATH="$(dirname "$(dirname "$PWD")")" + +# Add any volume mounts here with the following syntax +# /path/to/src:/path/to/dir/in/container +MOUNTS=( + $MLCOMMONS_REPO_PATH:$MLCOMMONS_REPO_PATH + /share:/share, +) + +# Set up docker environment file for current user +CI_BUILD_USER=$(id -u -n) +CI_BUILD_UID=$(id -u) +CI_BUILD_GROUP=$(id -g -n) +CI_BUILD_GID=$(id -g) + + +# Build container +apptainer build llm_gpubringup.sif llm_gpubringup.def + +# Build mount flags +declare -a MOUNT_FLAGS +for _mount in ${MOUNTS[@]}; do + _split=($(echo $_mount | tr ':' '\n')) + MOUNT_FLAGS+=("--bind" "${_split[0]}:${_split[1]}") +done + +set -x +sudo apptainer exec --nv --ipc --writable-tmpfs \ + --pwd $PWD \ + "${MOUNT_FLAGS[@]}" \ + --env CI_BUILD_USER=$CI_BUILD_USER \ + --env CI_BUILD_UID=$CI_BUILD_UID \ + --env CI_BUILD_GROUP=$CI_BUILD_GROUP \ + --env CI_BUILD_GID=$CI_BUILD_GID \ + llm_gpubringup.sif \ + bash ./with_the_same_user diff --git a/language/llama2-70b/build.sh b/language/llama2-70b/build.sh index 87afb992fa..06277dc9d2 100644 --- a/language/llama2-70b/build.sh +++ b/language/llama2-70b/build.sh @@ -1,7 +1,7 @@ set -e -conda install pybind11==2.10.4 -c conda-forge -y -conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch-nightly -c nvidia +conda install pybind11==2.10.4 -y +conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch-nightly python -m pip install transformers==4.31.0 nltk==3.8.1 evaluate==0.4.0 absl-py==1.4.0 rouge-score==0.1.2 sentencepiece==0.1.99 accelerate==0.21.0 diff --git a/language/llama2-70b/exec_app.sh b/language/llama2-70b/exec_app.sh new file mode 100644 index 0000000000..32af46bd57 --- /dev/null +++ b/language/llama2-70b/exec_app.sh @@ -0,0 +1,35 @@ +MLCOMMONS_REPO_PATH="$(dirname "$(dirname "$PWD")")" + +# Add any volume mounts here with the following syntax +# /path/to/src:/path/to/dir/in/container +MOUNTS=( + $MLCOMMONS_REPO_PATH:$MLCOMMONS_REPO_PATH, + /share:/share, + /usr/bin/srun:/usr/bin/srun, + /usr/bin/sinfo:/usr/bin/sinfo, + /share/software/spack/opt/spack/linux-rocky8-zen/gcc-8.5.0/slurm-23-11-1-1-yh4vs4sr7xks2nbzffs2hdwe7pqfovsg:/opt/slurm, + /var/spool/slurm/d/conf-cache:/var/spool/slurm/d/conf-cache +) + +CI_BUILD_USER=$(id -u -n) +CI_BUILD_UID=$(id -u) +CI_BUILD_GROUP=$(id -g -n) +CI_BUILD_GID=$(id -g) + +# Build mount flags +declare -a MOUNT_FLAGS +for _mount in ${MOUNTS[@]}; do + _split=($(echo $_mount | tr ':' '\n')) + MOUNT_FLAGS+=("--bind" "${_split[0]}:${_split[1]}") +done + +set -x +apptainer exec --nv --ipc --writable-tmpfs \ + --pwd $PWD \ + "${MOUNT_FLAGS[@]}" \ + --env CI_BUILD_USER=$CI_BUILD_USER \ + --env CI_BUILD_UID=$CI_BUILD_UID \ + --env CI_BUILD_GROUP=$CI_BUILD_GROUP \ + --env CI_BUILD_GID=$CI_BUILD_GID \ + llm_gpubringup.sif \ + bash ./with_the_same_user diff --git a/language/llama2-70b/llm_gpubringup.def b/language/llama2-70b/llm_gpubringup.def new file mode 100644 index 0000000000..22307827d1 --- /dev/null +++ b/language/llama2-70b/llm_gpubringup.def @@ -0,0 +1,48 @@ +Bootstrap: docker +From: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 + +%environment + export LC_ALL=C.UTF-8 + export LANG=C.UTF-8 + export TZ=US/Pacific + export DEBIAN_FRONTEND=noninteractive + export PATH=$PATH:/opt/miniconda3/bin + +%post + # Use bash + SHELL=/bin/bash + + echo "Setting timezone..." + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + + echo "Cleaning apt lists and sources..." + rm -rf /var/lib/apt/lists/* && rm -f /etc/apt/sources.list.d/* + + echo "Updating apt and installing base packages..." + apt-get update && apt-get install -y --no-install-recommends \ + build-essential autoconf libtool git ccache curl wget pkg-config \ + sudo ca-certificates automake libssl-dev bc python3-dev python3-pip \ + google-perftools gdb libglib2.0-dev clang sshfs libre2-dev libboost-dev \ + libnuma-dev numactl sysstat sshpass ntpdate less iputils-ping rsync \ + pkg-config zip g++ zlib1g-dev unzip libarchive-dev + + # Remove unneeded packages + apt-get -y autoremove + apt-get remove -y cmake + + echo "Upgrading pip and setuptools..." + python3 -m pip install --upgrade pip setuptools wheel virtualenv + + echo "Installing Miniconda..." + cd /tmp + wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Linux-x86_64.sh + bash Miniconda3-py310_23.5.2-0-Linux-x86_64.sh -b -p /opt/miniconda3 + chmod -R 777 /opt/miniconda3 + + echo "Creating conda environment llama2-70b..." + /opt/miniconda3/bin/conda create -n llama2-70b python=3.10 + +%runscript + echo "Container built successfully. Use '--nv' for GPU support." + exec "$@" + diff --git a/language/llama2-70b/performance_benchmark.sh b/language/llama2-70b/performance_benchmark.sh new file mode 100644 index 0000000000..abc710dedd --- /dev/null +++ b/language/llama2-70b/performance_benchmark.sh @@ -0,0 +1,11 @@ +python3 -u main.py --scenario Offline --vllm \ + --model-path /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \ + --user-conf user.conf \ + --num-workers 4 \ + --total-sample-count 24576 \ + --dataset-path /share/mlperf_sets/data/validation/llama-2-70b-open-orca-dataset.uri/open_orca_gpt4_tokenized_llama.sampled_24576.pkl \ + --output-log-dir offline-logs \ + --dtype float32 \ + --api-server http://127.0.0.1:8000 \ + --api-model-name /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \ + --device cuda:0 2>&1 | tee offline_performance_log.log diff --git a/language/llama2-70b/run_accuracy.sh b/language/llama2-70b/run_accuracy.sh index b4f7f8ad96..a78e841b10 100644 --- a/language/llama2-70b/run_accuracy.sh +++ b/language/llama2-70b/run_accuracy.sh @@ -1,17 +1,19 @@ -CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}" -DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" +CHECKPOINT_PATH=/share/mlperf_sets/model/llama-2-70b-chat-hf.uri +DATASET_PATH=/share/mlperf_sets/data/validation/llama-2-70b-open-orca-dataset.uri/open_orca_gpt4_tokenized_llama.sampled_24576.pkl mkdir -p "run_outputs" -python3 -u main.py --scenario Offline \ +python3 -u main.py --scenario Offline --vllm\ --model-path ${CHECKPOINT_PATH} \ --accuracy \ - --mlperf-conf mlperf.conf \ --user-conf user.conf \ --total-sample-count 24576 \ --dataset-path ${DATASET_PATH} \ + --num-workers 4 \ --output-log-dir offline_accuracy_loadgen_logs \ --dtype float32 \ + --api-server http://127.0.0.1:8000 \ + --api-model-name ${CHECKPOINT_PATH} --device cuda:0 2>&1 | tee offline_accuracy_log.log python3 evaluate-accuracy.py --checkpoint-path ${CHECKPOINT_PATH} \ diff --git a/language/llama2-70b/vllm.sh b/language/llama2-70b/vllm.sh new file mode 100644 index 0000000000..fe8b8f8b35 --- /dev/null +++ b/language/llama2-70b/vllm.sh @@ -0,0 +1,3 @@ +CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m vllm.entrypoints.openai.api_server \ + --model /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \ + --tensor-parallel-size 4