diff --git a/language/llama2-70b/SUT.py b/language/llama2-70b/SUT.py
index 983e803501..dcff013bd9 100644
--- a/language/llama2-70b/SUT.py
+++ b/language/llama2-70b/SUT.py
@@ -275,6 +275,15 @@ def load_model(self):
         )
         print("Loaded model")
 
+    if torch.cuda.is_available():
+        num_gpus = torch.cuda.device_count()
+        if num_gpus > 1:
+            print(f"Using {num_gpus} GPUs via DataParallel")
+            self.model = torch.nn.DataParallel(self.model)
+        self.model.to("cuda")
+    else:
+        self.model.to(self.device)
+
         self.device = torch.device(self.device)
         if self.device == "cpu":
             self.model = self.model.to(
diff --git a/language/llama2-70b/app_launch.sh b/language/llama2-70b/app_launch.sh
new file mode 100644
index 0000000000..ba6df9daeb
--- /dev/null
+++ b/language/llama2-70b/app_launch.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+MLCOMMONS_REPO_PATH="$(dirname "$(dirname "$PWD")")"
+
+# Add any volume mounts here with the following syntax
+# /path/to/src:/path/to/dir/in/container
+MOUNTS=(
+    $MLCOMMONS_REPO_PATH:$MLCOMMONS_REPO_PATH
+    /share:/share,
+)
+
+# Set up docker environment file for current user
+CI_BUILD_USER=$(id -u -n)
+CI_BUILD_UID=$(id -u)
+CI_BUILD_GROUP=$(id -g -n)
+CI_BUILD_GID=$(id -g)
+
+
+# Build container
+apptainer build llm_gpubringup.sif llm_gpubringup.def
+
+# Build mount flags
+declare -a MOUNT_FLAGS
+for _mount in ${MOUNTS[@]}; do
+    _split=($(echo $_mount | tr ':' '\n'))
+    MOUNT_FLAGS+=("--bind" "${_split[0]}:${_split[1]}")
+done
+
+set -x
+sudo apptainer exec --nv --ipc --writable-tmpfs \
+  --pwd $PWD \
+  "${MOUNT_FLAGS[@]}" \
+  --env CI_BUILD_USER=$CI_BUILD_USER \
+  --env CI_BUILD_UID=$CI_BUILD_UID \
+  --env CI_BUILD_GROUP=$CI_BUILD_GROUP \
+  --env CI_BUILD_GID=$CI_BUILD_GID \
+  llm_gpubringup.sif \
+  bash ./with_the_same_user
diff --git a/language/llama2-70b/build.sh b/language/llama2-70b/build.sh
index 87afb992fa..06277dc9d2 100644
--- a/language/llama2-70b/build.sh
+++ b/language/llama2-70b/build.sh
@@ -1,7 +1,7 @@
 set -e
 
-conda install pybind11==2.10.4 -c conda-forge -y
-conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch-nightly -c nvidia
+conda install pybind11==2.10.4 -y
+conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch-nightly
 python -m pip install transformers==4.31.0 nltk==3.8.1 evaluate==0.4.0 absl-py==1.4.0 rouge-score==0.1.2 sentencepiece==0.1.99 accelerate==0.21.0
 
 
diff --git a/language/llama2-70b/exec_app.sh b/language/llama2-70b/exec_app.sh
new file mode 100644
index 0000000000..32af46bd57
--- /dev/null
+++ b/language/llama2-70b/exec_app.sh
@@ -0,0 +1,35 @@
+MLCOMMONS_REPO_PATH="$(dirname "$(dirname "$PWD")")"
+
+# Add any volume mounts here with the following syntax
+# /path/to/src:/path/to/dir/in/container
+MOUNTS=(
+    $MLCOMMONS_REPO_PATH:$MLCOMMONS_REPO_PATH,
+    /share:/share,
+    /usr/bin/srun:/usr/bin/srun,
+    /usr/bin/sinfo:/usr/bin/sinfo,
+    /share/software/spack/opt/spack/linux-rocky8-zen/gcc-8.5.0/slurm-23-11-1-1-yh4vs4sr7xks2nbzffs2hdwe7pqfovsg:/opt/slurm,
+    /var/spool/slurm/d/conf-cache:/var/spool/slurm/d/conf-cache
+)
+
+CI_BUILD_USER=$(id -u -n)
+CI_BUILD_UID=$(id -u)
+CI_BUILD_GROUP=$(id -g -n)
+CI_BUILD_GID=$(id -g)
+
+# Build mount flags
+declare -a MOUNT_FLAGS
+for _mount in ${MOUNTS[@]}; do
+    _split=($(echo $_mount | tr ':' '\n'))
+    MOUNT_FLAGS+=("--bind" "${_split[0]}:${_split[1]}")
+done
+
+set -x
+apptainer exec --nv --ipc --writable-tmpfs \
+  --pwd $PWD \
+  "${MOUNT_FLAGS[@]}" \
+  --env CI_BUILD_USER=$CI_BUILD_USER \
+  --env CI_BUILD_UID=$CI_BUILD_UID \
+  --env CI_BUILD_GROUP=$CI_BUILD_GROUP \
+  --env CI_BUILD_GID=$CI_BUILD_GID \
+  llm_gpubringup.sif \
+  bash ./with_the_same_user
diff --git a/language/llama2-70b/llm_gpubringup.def b/language/llama2-70b/llm_gpubringup.def
new file mode 100644
index 0000000000..22307827d1
--- /dev/null
+++ b/language/llama2-70b/llm_gpubringup.def
@@ -0,0 +1,48 @@
+Bootstrap: docker
+From: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+
+%environment
+    export LC_ALL=C.UTF-8
+    export LANG=C.UTF-8
+    export TZ=US/Pacific
+    export DEBIAN_FRONTEND=noninteractive
+    export PATH=$PATH:/opt/miniconda3/bin
+
+%post
+    # Use bash
+    SHELL=/bin/bash
+
+    echo "Setting timezone..."
+    ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+    echo "Cleaning apt lists and sources..."
+    rm -rf /var/lib/apt/lists/* && rm -f /etc/apt/sources.list.d/*
+
+    echo "Updating apt and installing base packages..."
+    apt-get update && apt-get install -y --no-install-recommends \
+        build-essential autoconf libtool git ccache curl wget pkg-config \
+        sudo ca-certificates automake libssl-dev bc python3-dev python3-pip \
+        google-perftools gdb libglib2.0-dev clang sshfs libre2-dev libboost-dev \
+        libnuma-dev numactl sysstat sshpass ntpdate less iputils-ping rsync \
+        pkg-config zip g++ zlib1g-dev unzip libarchive-dev
+
+    # Remove unneeded packages
+    apt-get -y autoremove
+    apt-get remove -y cmake
+
+    echo "Upgrading pip and setuptools..."
+    python3 -m pip install --upgrade pip setuptools wheel virtualenv
+
+    echo "Installing Miniconda..."
+    cd /tmp
+    wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Linux-x86_64.sh
+    bash Miniconda3-py310_23.5.2-0-Linux-x86_64.sh -b -p /opt/miniconda3
+    chmod -R 777 /opt/miniconda3
+
+    echo "Creating conda environment llama2-70b..."
+    /opt/miniconda3/bin/conda create -n llama2-70b python=3.10
+
+%runscript
+    echo "Container built successfully. Use '--nv' for GPU support."
+    exec "$@"
+
diff --git a/language/llama2-70b/performance_benchmark.sh b/language/llama2-70b/performance_benchmark.sh
new file mode 100644
index 0000000000..abc710dedd
--- /dev/null
+++ b/language/llama2-70b/performance_benchmark.sh
@@ -0,0 +1,11 @@
+python3 -u main.py --scenario Offline --vllm \
+        --model-path /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \
+        --user-conf user.conf \
+        --num-workers 4 \
+        --total-sample-count 24576 \
+        --dataset-path /share/mlperf_sets/data/validation/llama-2-70b-open-orca-dataset.uri/open_orca_gpt4_tokenized_llama.sampled_24576.pkl \
+        --output-log-dir offline-logs \
+        --dtype float32 \
+        --api-server http://127.0.0.1:8000 \
+        --api-model-name /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \
+        --device cuda:0 2>&1 | tee offline_performance_log.log
diff --git a/language/llama2-70b/run_accuracy.sh b/language/llama2-70b/run_accuracy.sh
index b4f7f8ad96..a78e841b10 100644
--- a/language/llama2-70b/run_accuracy.sh
+++ b/language/llama2-70b/run_accuracy.sh
@@ -1,17 +1,19 @@
-CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}"
-DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}"
+CHECKPOINT_PATH=/share/mlperf_sets/model/llama-2-70b-chat-hf.uri
+DATASET_PATH=/share/mlperf_sets/data/validation/llama-2-70b-open-orca-dataset.uri/open_orca_gpt4_tokenized_llama.sampled_24576.pkl
 
 mkdir -p "run_outputs"
 
-python3 -u main.py --scenario Offline \
+python3 -u main.py --scenario Offline --vllm\
         --model-path ${CHECKPOINT_PATH} \
         --accuracy \
-        --mlperf-conf mlperf.conf \
         --user-conf user.conf \
         --total-sample-count 24576 \
         --dataset-path ${DATASET_PATH} \
+        --num-workers 4 \
         --output-log-dir offline_accuracy_loadgen_logs \
         --dtype float32 \
+        --api-server http://127.0.0.1:8000 \
+        --api-model-name ${CHECKPOINT_PATH}
         --device cuda:0 2>&1 | tee offline_accuracy_log.log
 
 python3 evaluate-accuracy.py --checkpoint-path ${CHECKPOINT_PATH} \
diff --git a/language/llama2-70b/vllm.sh b/language/llama2-70b/vllm.sh
new file mode 100644
index 0000000000..fe8b8f8b35
--- /dev/null
+++ b/language/llama2-70b/vllm.sh
@@ -0,0 +1,3 @@
+CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m vllm.entrypoints.openai.api_server \
+  --model /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \
+  --tensor-parallel-size 4