Remote Dektop with GPU Acceleration (Admin Guide)

From HPC Wiki
Admin Guide Remote Dektop with GPU Acceleration
Jump to navigation Jump to search

Using GPUs for remote hardware acceleration

The goal is to enable HPC users to use hardware acceleration on GPU equipped compute nodes in an interactive environment.

TurboVNC + VirtualGL

  • TurboVNC functions as a high-speed virtual X server
  • VirtualGL adds hardware-accelerated OpenGL to TurboVNC

Example Setup (from an xcat postscript, modify to your needs)

yum install -y xorg-x11-server-Xorg

# INSTALL DRIVERS FROM BINARY (current centos 7.7 nvidia rpms do not run well with GTX2080Ti cards)
NVIDIA-Linux-x86_64-440.59.run -q --ui=none --install-libglvnd

# Configure X for VirtualGL
nvidia-xconfig -a --allow-empty-initial-configuration --virtual=1920x1200

# Unload nvidia drivers
modprobe -r nvidia_drm
modprobe -r nvidia_uvm

# Install VirtualGL
yum install -y VirtualGL-2.6.3.x86_64.rpm

# Configure VirtualGL (with these flags very unsecure!)
/opt/VirtualGL/bin/vglserver_config -config +s +f +t

# Load back nvidia drivers
modprobe nvidia_drm
modprobe nvidia_uvm

# Enabling Persistence Mode
nvidia-smi -pm 1

# Für constant clock frequencies
nvidia-smi -acp UNRESTRICTED

# Start an X server to handle 3D OpenGL rendering
nohup Xorg :0 > /root/xorg.0.log &

# Install WM, menubar, filemanager, terminal
yum install -y openbox
yum install -y tint2
yum install -y caja adwaita-gtk2-theme mate-terminal caja-open-terminal

# get custom config files
cp /Applic.HPC/visualisierung/vis-gpu/openbox/menu.xml /etc/xdg/openbox/menu.xml
cp /Applic.HPC/visualisierung/vis-gpu/openbox/rc.xml /etc/xdg/openbox/rc.xml
cp /Applic.HPC/visualisierung/vis-gpu/openbox/open_scratch.py /etc/xdg/openbox/open_scratch.py
cp /Applic.HPC/visualisierung/vis-gpu/tint2rc /etc/xdg/tint2/tint2rc

Desktop Environment

Try to keep the overall installed software for a functioning Desktop environment as minimal as possible. We went with a combination of:

  • Openbox Window Manager
  • tint2 menubar

and customized those for our needs. Copy customized config files to the nodes on startup:

  • Tint2:
tint2rc


  • Openbox
menu.xml
rc.xml
open_scratch.py


Desktop view

SLURM Integration

We added a module vis-gpu which gives access to a vnc_gpu.sh script. The script automatially reserves one of the dedicated GPU nodes with a fixed amount of CPUs, one GPU and a limited time of 12h. It then starts the turbovnc server and prints instructions to the terminal on how to create an ssh tunnel and conntect to the VNC server.

#!/bin/bash

# List of tunnable ports
ports=( "5910" "5911" "5912" "5913" "5914" "5915" "0" )

function get_vnc_display() {
    /Applic.HPC/TurboVNC/bin/vncserver -list | \
    sed --quiet --regexp-extended \
    '/X DISPLAY/{n; s/^[[:space:]]*:([[:digit:]]+)[[:space:]]+.*$/\1/p}'
}

function get_tunnel_port(){
for port in "${ports[@]}"
do
    if [ $port = "0" ];
    then
        echo $port
        break
    fi
    if [ ! -f /Applic.HPC/visualisierung/ports/$port ] && [ ! -f /Applic.HPC/visualisierung/ports/$port.bak ];
    then
        touch /Applic.HPC/visualisierung/ports/$port
        echo $port
        break
    fi
done
}

TUNNEL_PORT=$(get_tunnel_port)
if [ $TUNNEL_PORT = '0' ]; then
    echo "Too many connections, no port available, aborting..."
    exit
fi

# Allocate nodes with slurm
function allocate_nodes_with_slurm() {
    rm /Applic.HPC/visualisierung/ports/$TUNNEL_PORT
    # Submit command
    local -a SUBMIT_COMMAND=()
    SUBMIT_COMMAND+=( "salloc" )
    SUBMIT_COMMAND+=( "--job-name=start_vnc_desktop" )
    SUBMIT_COMMAND+=( "--time=12:00:00" )
    SUBMIT_COMMAND+=( "--partition=vis-gpu" )
    SUBMIT_COMMAND+=( "--gres=gpu:1" )
    SUBMIT_COMMAND+=( "--cpus-per-task=6")
    SUBMIT_COMMAND+=( "srun" "--propagate=" "--pty" "/Applic.HPC/visualisierung/vis-gpu/vnc_gpu.sh" )

    exec "${SUBMIT_COMMAND[@]}"
}

# Check if we are in batch environment
if [[ -z "${SLURM_JOBID}" ]]; then
    allocate_nodes_with_slurm
fi

# Signalhandler
trap terminate_vncserver 0 1 2 3 6 9 11 15 16 20

function start_vncserver() {
    running=$(get_vnc_display)
    if [[ ! -z  "$running" ]];then
        echo "#### You can't have more than one session"
        exit
    fi  
    /Applic.HPC/TurboVNC/bin/vncserver -xstartup /Applic.HPC/visualisierung/vis-gpu/xstartup.turbovnc 
}

function terminate_vncserver() {
    local VNC_DISPLAY=$(get_vnc_display)
    if [[ -z "${VNC_DISPLAY}" ]]; then
        return
    fi  
    /Applic.HPC/TurboVNC/bin/vncserver -kill :${VNC_DISPLAY}
    mv /Applic.HPC/visualisierung/ports/$TUNNEL_PORT /Applic.HPC/visualisierung/ports/$TUNNEL_PORT.bak
}

function stop_already_running_vncserver() {
    local VNC_DISPLAY
    while VNC_DISPLAY=$(get_vnc_display) && [[ -n "${VNC_DISPLAY}" ]]
    do
        echo "Stopping VNC desktop already running on display :${VNC_DISPLAY}"
        ${VNC_SERVER} -kill :${VNC_DISPLAY}
        sleep 5
    done
}

function report_connect_info() {
    local VNC_DISPLAY=$(get_vnc_display)
    local VNC_PORT=$((5900 + ${VNC_DISPLAY}))
    local VNC_HOST="$(hostname -s)"
    local VNC_USER="$(whoami)"

    if [[ -z "${VNC_DISPLAY}" ]]; then
        echo "No VNC display found"
        exit 7
    fi  
    cat <<-EOF
##########################

Your Session is ready:

        On Linux, run:
        ssh -L ${VNC_PORT}:localhost:${TUNNEL_PORT} ${VNC_USER}@palma.uni-muenster.de 'ssh -L ${TUNNEL_PORT}:localhost:${VNC_PORT} ${VNC_HOST}'

        On Windows:
        plink -L ${VNC_PORT}:localhost:${TUNNEL_PORT} ${VNC_USER}@palma.uni-muenster.de ssh -L ${TUNNEL_PORT}:localhost:${VNC_PORT} ${VNC_HOST}
    
        in your terminal and just keep it open, then connect your VNC client to localhost:${VNC_PORT}

        Password: ${VNC_PASSWORD}

##########################

        To end your session, press CTRL + C, once in this terminal and once in your local ssh terminal.

##########################

EOF
}

function exec_interactive_shell() {
    local PPID_COMMAND_NAME=$(ps --no-headers -o comm= --pid "${PPID}")
    if [[ "${PPID_COMMAND_NAME}" =~ slurmstepd ]]; then
        bash -i
        exit
    fi
}

function set_vncpasswd() {
    local PASSWORD="$(    openssl rand -base64 8)"
    local VIEWPASSWORD="$(openssl rand -base64 8)"

    VNC_PASSWORD="${PASSWORD:0:8}"
    VNC_VIEWPASSWORD="${VIEWPASSWORD:0:8}"

    local VNC_PASSWD_FILE="${HOME}/.vnc/passwd"
#    install -D --mode="u=rw,og=" /dev/null "${VNC_PASSWD_FILE}" 
    /Applic.HPC/TurboVNC/bin/vncpasswd -f > "${VNC_PASSWD_FILE}" <<-EOF
${VNC_PASSWORD}
${VNC_PASSWORD}
${VNC_VIEWPASSWORD}
EOF
}

set_vncpasswd
start_vncserver
report_connect_info
sleep 12h