Skip to content

Commit 0a036ed

Browse files
Add support for OpenShift 14.04
Signed-off-by: Karthik Vetrivel <[email protected]>
1 parent ba7e6de commit 0a036ed

File tree

3 files changed

+233
-11
lines changed

3 files changed

+233
-11
lines changed

rhel9/nvidia-driver

100755100644
Lines changed: 159 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ PID_FILE=${RUN_DIR}/${0##*/}.pid
88
DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"}
99
KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
1010
NUM_VGPU_DEVICES=0
11+
GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}"
12+
USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
1113
NVIDIA_MODULE_PARAMS=()
1214
NVIDIA_UVM_MODULE_PARAMS=()
1315
NVIDIA_MODESET_MODULE_PARAMS=()
1416
NVIDIA_PEERMEM_MODULE_PARAMS=()
1517
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
16-
USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
1718
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
1819
RHEL_VERSION=${RHEL_VERSION:-""}
1920
RHEL_MAJOR_VERSION=9
@@ -211,7 +212,10 @@ _create_driver_package() (
211212
local nvidia_modeset_sign_args=""
212213
local nvidia_uvm_sign_args=""
213214

214-
trap "make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION}/build clean > /dev/null" EXIT
215+
# Skip cleanup trap for DTK builds - modules are copied after this function returns
216+
if [ "${PACKAGE_TAG:-}" != "builtin" ]; then
217+
trap "make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION}/build clean > /dev/null" EXIT
218+
fi
215219

216220
echo "Compiling NVIDIA driver kernel modules..."
217221
cd /usr/src/nvidia-${DRIVER_VERSION}/${KERNEL_TYPE}
@@ -566,11 +570,7 @@ _install_driver() {
566570
install_args+=("--skip-module-load")
567571
fi
568572

569-
IGNORE_CC_MISMATCH=1 nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"}
570-
# May need to add no-cc-check for Rhel, otherwise it complains about cc missing in path
571-
# /proc/version and lib/modules/KERNEL_VERSION/proc are different, by default installer looks at /proc/ so, added the proc-mount-point
572-
# TODO: remove the -a flag. its not needed. in the new driver version, license-acceptance is implicit
573-
#nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check --no-cc-version-check --proc-mount-point /lib/modules/${KERNEL_VERSION}/proc ${install_args[@]+"${install_args[@]}"}
573+
IGNORE_CC_MISMATCH=1 nvidia-installer --silent --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"}
574574
}
575575

576576
# Mount the driver rootfs into the run directory with the exception of sysfs.
@@ -701,6 +701,114 @@ _start_vgpu_topology_daemon() {
701701
nvidia-topologyd
702702
}
703703

704+
_ensure_persistence() {
705+
local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
706+
if pid=$(<"${pid_file}" 2>/dev/null) && [ -n "${pid}" ] && kill -0 "${pid}" 2>/dev/null; then
707+
return 0
708+
fi
709+
710+
if command -v nvidia-persistenced >/dev/null 2>&1; then
711+
nvidia-persistenced --persistence-mode || true
712+
else
713+
echo "nvidia-persistenced not found; continuing without persistence"
714+
fi
715+
}
716+
717+
_build_driver_config() {
718+
local nvidia_params="" nvidia_uvm_params="" nvidia_modeset_params="" nvidia_peermem_params=""
719+
720+
# Read module parameters from conf files
721+
if [ -f "/drivers/nvidia.conf" ]; then
722+
nvidia_params=$(cat "/drivers/nvidia.conf" | tr '\n' ' ')
723+
fi
724+
if [ -f "/drivers/nvidia-uvm.conf" ]; then
725+
nvidia_uvm_params=$(cat "/drivers/nvidia-uvm.conf" | tr '\n' ' ')
726+
fi
727+
if [ -f "/drivers/nvidia-modeset.conf" ]; then
728+
nvidia_modeset_params=$(cat "/drivers/nvidia-modeset.conf" | tr '\n' ' ')
729+
fi
730+
if [ -f "/drivers/nvidia-peermem.conf" ]; then
731+
nvidia_peermem_params=$(cat "/drivers/nvidia-peermem.conf" | tr '\n' ' ')
732+
fi
733+
734+
local config="DRIVER_VERSION=${DRIVER_VERSION}
735+
KERNEL_VERSION=$(uname -r)
736+
GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED:-false}
737+
USE_HOST_MOFED=${USE_HOST_MOFED:-false}
738+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
739+
NVIDIA_MODULE_PARAMS=${nvidia_params}
740+
NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
741+
NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
742+
NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params}"
743+
744+
# Append config file contents directly
745+
for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
746+
if [ -f "/drivers/$conf_file" ]; then
747+
config="${config}
748+
$(cat "/drivers/$conf_file")"
749+
fi
750+
done
751+
752+
echo "$config"
753+
}
754+
755+
_store_driver_config() {
756+
local config_file="/run/nvidia/driver-config.state"
757+
echo "Storing driver configuration state..."
758+
_build_driver_config > "$config_file"
759+
echo "Driver configuration stored at $config_file"
760+
}
761+
762+
_should_use_fast_path() {
763+
[ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ] || return 1
764+
local current_config=$(_build_driver_config)
765+
local stored_config=$(cat /run/nvidia/driver-config.state 2>/dev/null || echo "")
766+
[ "${current_config}" = "${stored_config}" ]
767+
}
768+
769+
_userspace_only_install() {
770+
echo "Detected matching loaded driver & config (${DRIVER_VERSION}); performing userspace-only install"
771+
772+
_unmount_rootfs
773+
_update_package_cache
774+
775+
# Skip kernel-related steps for userspace-only install
776+
# KERNEL_VERSION is already set from uname -r, no need to resolve from yum
777+
# Kernel headers/devel/modules are not needed for userspace-only install
778+
779+
cd /drivers
780+
[ ! -d "NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}" ] && sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run -x
781+
cd NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}
782+
783+
784+
echo "DEBUG: Current directory: $(pwd)"
785+
echo "DEBUG: Checking for ./nvidia-installer:"
786+
ls -la ./nvidia-installer 2>&1 || echo " ./nvidia-installer NOT FOUND"
787+
echo "DEBUG: Checking PATH for nvidia-installer:"
788+
which nvidia-installer 2>&1 || echo " nvidia-installer NOT in PATH"
789+
790+
791+
echo "Installing userspace components (libraries and binaries)..."
792+
local install_args="--silent --no-kernel-module --no-nouveau-check --no-nvidia-modprobe --no-drm --no-peermem --ui=none"
793+
[ "${ACCEPT_LICENSE}" = "yes" ] && install_args="$install_args --accept-license"
794+
IGNORE_CC_MISMATCH=1 ./nvidia-installer $install_args
795+
796+
# Copy kernel module sources if not already present (needed for other containers)
797+
if [ ! -d "/usr/src/nvidia-${DRIVER_VERSION}" ]; then
798+
_resolve_kernel_type || exit 1
799+
mkdir -p /usr/src/nvidia-${DRIVER_VERSION}
800+
cp -r LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION}/
801+
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
802+
fi
803+
804+
_mount_rootfs
805+
_ensure_persistence
806+
_write_kernel_update_hook
807+
_store_driver_config
808+
809+
echo "Userspace-only install complete"
810+
}
811+
704812
_prepare() {
705813
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
706814
_find_vgpu_driver_version || exit 1
@@ -758,6 +866,7 @@ _load() {
758866
_load_driver
759867
_mount_rootfs
760868
_write_kernel_update_hook
869+
_store_driver_config
761870

762871
echo "Done, now waiting for signal"
763872
sleep infinity &
@@ -768,7 +877,49 @@ _load() {
768877
}
769878

770879
init() {
771-
_prepare_exclusive
880+
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
881+
_find_vgpu_driver_version || exit 1
882+
fi
883+
884+
echo -e "\n========== NVIDIA Software Installer ==========\n"
885+
echo -e "Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION}\n"
886+
887+
exec 3> ${PID_FILE}
888+
if ! flock -n 3; then
889+
echo "An instance of the NVIDIA driver is already running, aborting"
890+
exit 1
891+
fi
892+
echo $$ >&3
893+
894+
trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
895+
trap "_shutdown" EXIT
896+
897+
if _should_use_fast_path; then
898+
_userspace_only_install
899+
900+
echo "Userspace-only install complete, now waiting for signal"
901+
sleep infinity &
902+
trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM
903+
trap - EXIT
904+
while true; do wait $! || continue; done
905+
exit 0
906+
fi
907+
908+
_unload_driver || exit 1
909+
_unmount_rootfs
910+
911+
# Install the userspace components and copy the kernel module sources.
912+
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
913+
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
914+
sh /tmp/install.sh nvinstall
915+
916+
# Determine the kernel module type
917+
_resolve_kernel_type || exit 1
918+
919+
# Copy the kernel module sources
920+
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
921+
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-$DRIVER_VERSION && \
922+
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION/.manifest
772923

773924
_build
774925

rhel9/ocp_dtk_entrypoint

100755100644
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,50 @@ echo "Running $*"
1010
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
1111
source $SCRIPT_DIR/common.sh
1212

13+
_build_driver_config() {
14+
local nvidia_params="" nvidia_uvm_params="" nvidia_modeset_params="" nvidia_peermem_params=""
15+
16+
# Read module parameters from conf files
17+
if [ -f "/drivers/nvidia.conf" ]; then
18+
nvidia_params=$(cat "/drivers/nvidia.conf" | tr '\n' ' ')
19+
fi
20+
if [ -f "/drivers/nvidia-uvm.conf" ]; then
21+
nvidia_uvm_params=$(cat "/drivers/nvidia-uvm.conf" | tr '\n' ' ')
22+
fi
23+
if [ -f "/drivers/nvidia-modeset.conf" ]; then
24+
nvidia_modeset_params=$(cat "/drivers/nvidia-modeset.conf" | tr '\n' ' ')
25+
fi
26+
if [ -f "/drivers/nvidia-peermem.conf" ]; then
27+
nvidia_peermem_params=$(cat "/drivers/nvidia-peermem.conf" | tr '\n' ' ')
28+
fi
29+
30+
local config="DRIVER_VERSION=${DRIVER_VERSION}
31+
KERNEL_VERSION=$(uname -r)
32+
GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED:-false}
33+
USE_HOST_MOFED=${USE_HOST_MOFED:-false}
34+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
35+
NVIDIA_MODULE_PARAMS=${nvidia_params}
36+
NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
37+
NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
38+
NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params}"
39+
40+
for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
41+
if [ -f "/drivers/$conf_file" ]; then
42+
config="${config}
43+
$(cat "/drivers/$conf_file")"
44+
fi
45+
done
46+
47+
echo "$config"
48+
}
49+
50+
_should_use_fast_path() {
51+
[ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ] || return 1
52+
local current_config=$(_build_driver_config)
53+
local stored_config=$(cat /run/nvidia/driver-config.state 2>/dev/null || echo "")
54+
[ "${current_config}" = "${stored_config}" ]
55+
}
56+
1357
nv-ctr-run-with-dtk() {
1458
set -x
1559

@@ -18,6 +62,13 @@ nv-ctr-run-with-dtk() {
1862
exec bash -x nvidia-driver init
1963
fi
2064

65+
if _should_use_fast_path; then
66+
echo "Fast path detected: skipping DTK build and module copy, proceeding with userspace-only install"
67+
exec bash -x nvidia-driver init
68+
fi
69+
70+
echo "Fast path not detected: building driver and modules"
71+
2172
if [[ ! -f "$DRIVER_TOOLKIT_SHARED_DIR/dir_prepared" ]]; then
2273
cp -r \
2374
/tmp/install.sh \

ubuntu22.04/nvidia-driver

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ _start_vgpu_topology_daemon() {
639639
nvidia-topologyd
640640
}
641641

642-
_ensure_persistence_running() {
642+
_ensure_persistenced() {
643643
local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
644644
if pid=$(<"${pid_file}" 2>/dev/null) && [ -n "${pid}" ] && kill -0 "${pid}" 2>/dev/null; then
645645
return 0
@@ -653,11 +653,31 @@ _ensure_persistence_running() {
653653
}
654654

655655
_build_driver_config() {
656+
local nvidia_params="" nvidia_uvm_params="" nvidia_modeset_params="" nvidia_peermem_params=""
657+
658+
# Read module parameters from conf files
659+
if [ -f "/drivers/nvidia.conf" ]; then
660+
nvidia_params=$(cat "/drivers/nvidia.conf" | tr '\n' ' ')
661+
fi
662+
if [ -f "/drivers/nvidia-uvm.conf" ]; then
663+
nvidia_uvm_params=$(cat "/drivers/nvidia-uvm.conf" | tr '\n' ' ')
664+
fi
665+
if [ -f "/drivers/nvidia-modeset.conf" ]; then
666+
nvidia_modeset_params=$(cat "/drivers/nvidia-modeset.conf" | tr '\n' ' ')
667+
fi
668+
if [ -f "/drivers/nvidia-peermem.conf" ]; then
669+
nvidia_peermem_params=$(cat "/drivers/nvidia-peermem.conf" | tr '\n' ' ')
670+
fi
671+
656672
local config="DRIVER_VERSION=${DRIVER_VERSION}
657673
KERNEL_VERSION=$(uname -r)
658674
GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED}
659675
USE_HOST_MOFED=${USE_HOST_MOFED}
660-
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE}"
676+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE}
677+
NVIDIA_MODULE_PARAMS=${nvidia_params}
678+
NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
679+
NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
680+
NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params}"
661681

662682
# Append config file contents directly
663683
for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
@@ -741,7 +761,7 @@ init() {
741761
_mount_rootfs
742762

743763
# Ensure persistence daemon is running
744-
_ensure_persistence_running
764+
_ensure_persistenced
745765

746766
# Write kernel update hook
747767
_write_kernel_update_hook

0 commit comments

Comments
 (0)