@@ -8,12 +8,13 @@ PID_FILE=${RUN_DIR}/${0##*/}.pid
88DRIVER_VERSION=${DRIVER_VERSION:? " Missing DRIVER_VERSION env" }
99KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
1010NUM_VGPU_DEVICES=0
11+ GPU_DIRECT_RDMA_ENABLED=" ${GPU_DIRECT_RDMA_ENABLED:- false} "
12+ USE_HOST_MOFED=" ${USE_HOST_MOFED:- false} "
1113NVIDIA_MODULE_PARAMS=()
1214NVIDIA_UVM_MODULE_PARAMS=()
1315NVIDIA_MODESET_MODULE_PARAMS=()
1416NVIDIA_PEERMEM_MODULE_PARAMS=()
1517TARGETARCH=${TARGETARCH:? " Missing TARGETARCH env" }
16- USE_HOST_MOFED=" ${USE_HOST_MOFED:- false} "
1718DNF_RELEASEVER=${DNF_RELEASEVER:- " " }
1819RHEL_VERSION=${RHEL_VERSION:- " " }
1920RHEL_MAJOR_VERSION=9
@@ -211,7 +212,10 @@ _create_driver_package() (
211212 local nvidia_modeset_sign_args=" "
212213 local nvidia_uvm_sign_args=" "
213214
214- trap " make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION} /build clean > /dev/null" EXIT
215+ # Skip cleanup trap for DTK builds - modules are copied after this function returns
216+ if [ " ${PACKAGE_TAG:- } " != " builtin" ]; then
217+ trap " make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION} /build clean > /dev/null" EXIT
218+ fi
215219
216220 echo " Compiling NVIDIA driver kernel modules..."
217221 cd /usr/src/nvidia-${DRIVER_VERSION} /${KERNEL_TYPE}
@@ -566,11 +570,7 @@ _install_driver() {
566570 install_args+=(" --skip-module-load" )
567571 fi
568572
569- IGNORE_CC_MISMATCH=1 nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+" ${install_args[@]} " }
570- # May need to add no-cc-check for Rhel, otherwise it complains about cc missing in path
571- # /proc/version and lib/modules/KERNEL_VERSION/proc are different, by default installer looks at /proc/ so, added the proc-mount-point
572- # TODO: remove the -a flag. its not needed. in the new driver version, license-acceptance is implicit
573- # nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check --no-cc-version-check --proc-mount-point /lib/modules/${KERNEL_VERSION}/proc ${install_args[@]+"${install_args[@]}"}
573+ IGNORE_CC_MISMATCH=1 nvidia-installer --silent --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+" ${install_args[@]} " }
574574}
575575
576576# Mount the driver rootfs into the run directory with the exception of sysfs.
@@ -701,6 +701,114 @@ _start_vgpu_topology_daemon() {
701701 nvidia-topologyd
702702}
703703
704+ _ensure_persistence () {
705+ local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
706+ if pid=$( < " ${pid_file} " 2> /dev/null) && [ -n " ${pid} " ] && kill -0 " ${pid} " 2> /dev/null; then
707+ return 0
708+ fi
709+
710+ if command -v nvidia-persistenced > /dev/null 2>&1 ; then
711+ nvidia-persistenced --persistence-mode || true
712+ else
713+ echo " nvidia-persistenced not found; continuing without persistence"
714+ fi
715+ }
716+
717+ _build_driver_config () {
718+ local nvidia_params=" " nvidia_uvm_params=" " nvidia_modeset_params=" " nvidia_peermem_params=" "
719+
720+ # Read module parameters from conf files
721+ if [ -f " /drivers/nvidia.conf" ]; then
722+ nvidia_params=$( cat " /drivers/nvidia.conf" | tr ' \n' ' ' )
723+ fi
724+ if [ -f " /drivers/nvidia-uvm.conf" ]; then
725+ nvidia_uvm_params=$( cat " /drivers/nvidia-uvm.conf" | tr ' \n' ' ' )
726+ fi
727+ if [ -f " /drivers/nvidia-modeset.conf" ]; then
728+ nvidia_modeset_params=$( cat " /drivers/nvidia-modeset.conf" | tr ' \n' ' ' )
729+ fi
730+ if [ -f " /drivers/nvidia-peermem.conf" ]; then
731+ nvidia_peermem_params=$( cat " /drivers/nvidia-peermem.conf" | tr ' \n' ' ' )
732+ fi
733+
734+ local config=" DRIVER_VERSION=${DRIVER_VERSION}
735+ KERNEL_VERSION=$( uname -r)
736+ GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED:- false}
737+ USE_HOST_MOFED=${USE_HOST_MOFED:- false}
738+ KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:- auto}
739+ NVIDIA_MODULE_PARAMS=${nvidia_params}
740+ NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
741+ NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
742+ NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params} "
743+
744+ # Append config file contents directly
745+ for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
746+ if [ -f " /drivers/$conf_file " ]; then
747+ config=" ${config}
748+ $( cat " /drivers/$conf_file " ) "
749+ fi
750+ done
751+
752+ echo " $config "
753+ }
754+
755+ _store_driver_config () {
756+ local config_file=" /run/nvidia/driver-config.state"
757+ echo " Storing driver configuration state..."
758+ _build_driver_config > " $config_file "
759+ echo " Driver configuration stored at $config_file "
760+ }
761+
762+ _should_use_fast_path () {
763+ [ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ] || return 1
764+ local current_config=$( _build_driver_config)
765+ local stored_config=$( cat /run/nvidia/driver-config.state 2> /dev/null || echo " " )
766+ [ " ${current_config} " = " ${stored_config} " ]
767+ }
768+
769+ _userspace_only_install () {
770+ echo " Detected matching loaded driver & config (${DRIVER_VERSION} ); performing userspace-only install"
771+
772+ _unmount_rootfs
773+ _update_package_cache
774+
775+ # Skip kernel-related steps for userspace-only install
776+ # KERNEL_VERSION is already set from uname -r, no need to resolve from yum
777+ # Kernel headers/devel/modules are not needed for userspace-only install
778+
779+ cd /drivers
780+ [ ! -d " NVIDIA-Linux-${DRIVER_ARCH} -${DRIVER_VERSION} " ] && sh NVIDIA-Linux-${DRIVER_ARCH} -${DRIVER_VERSION} .run -x
781+ cd NVIDIA-Linux-${DRIVER_ARCH} -${DRIVER_VERSION}
782+
783+
784+ echo " DEBUG: Current directory: $( pwd) "
785+ echo " DEBUG: Checking for ./nvidia-installer:"
786+ ls -la ./nvidia-installer 2>&1 || echo " ./nvidia-installer NOT FOUND"
787+ echo " DEBUG: Checking PATH for nvidia-installer:"
788+ which nvidia-installer 2>&1 || echo " nvidia-installer NOT in PATH"
789+
790+
791+ echo " Installing userspace components (libraries and binaries)..."
792+ local install_args=" --silent --no-kernel-module --no-nouveau-check --no-nvidia-modprobe --no-drm --no-peermem --ui=none"
793+ [ " ${ACCEPT_LICENSE} " = " yes" ] && install_args=" $install_args --accept-license"
794+ IGNORE_CC_MISMATCH=1 ./nvidia-installer $install_args
795+
796+ # Copy kernel module sources if not already present (needed for other containers)
797+ if [ ! -d " /usr/src/nvidia-${DRIVER_VERSION} " ]; then
798+ _resolve_kernel_type || exit 1
799+ mkdir -p /usr/src/nvidia-${DRIVER_VERSION}
800+ cp -r LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION} /
801+ sed ' 9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION} /.manifest
802+ fi
803+
804+ _mount_rootfs
805+ _ensure_persistence
806+ _write_kernel_update_hook
807+ _store_driver_config
808+
809+ echo " Userspace-only install complete"
810+ }
811+
704812_prepare () {
705813 if [ " ${DRIVER_TYPE} " = " vgpu" ]; then
706814 _find_vgpu_driver_version || exit 1
@@ -758,6 +866,7 @@ _load() {
758866 _load_driver
759867 _mount_rootfs
760868 _write_kernel_update_hook
869+ _store_driver_config
761870
762871 echo " Done, now waiting for signal"
763872 sleep infinity &
@@ -768,7 +877,49 @@ _load() {
768877}
769878
770879init () {
771- _prepare_exclusive
880+ if [ " ${DRIVER_TYPE} " = " vgpu" ]; then
881+ _find_vgpu_driver_version || exit 1
882+ fi
883+
884+ echo -e " \n========== NVIDIA Software Installer ==========\n"
885+ echo -e " Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION} \n"
886+
887+ exec 3> ${PID_FILE}
888+ if ! flock -n 3; then
889+ echo " An instance of the NVIDIA driver is already running, aborting"
890+ exit 1
891+ fi
892+ echo $$ >&3
893+
894+ trap " echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
895+ trap " _shutdown" EXIT
896+
897+ if _should_use_fast_path; then
898+ _userspace_only_install
899+
900+ echo " Userspace-only install complete, now waiting for signal"
901+ sleep infinity &
902+ trap " echo 'Caught signal'; _shutdown && { kill $! ; exit 0; }" HUP INT QUIT PIPE TERM
903+ trap - EXIT
904+ while true ; do wait $! || continue ; done
905+ exit 0
906+ fi
907+
908+ _unload_driver || exit 1
909+ _unmount_rootfs
910+
911+ # Install the userspace components and copy the kernel module sources.
912+ sh NVIDIA-Linux-$DRIVER_ARCH -$DRIVER_VERSION .run -x && \
913+ cd NVIDIA-Linux-$DRIVER_ARCH -$DRIVER_VERSION && \
914+ sh /tmp/install.sh nvinstall
915+
916+ # Determine the kernel module type
917+ _resolve_kernel_type || exit 1
918+
919+ # Copy the kernel module sources
920+ mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
921+ mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-$DRIVER_VERSION && \
922+ sed ' 9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION /.manifest
772923
773924 _build
774925
0 commit comments