Difference between revisions of "Admin Guide Remote Dektop with GPU Acceleration"

From HPC Wiki
Admin Guide Remote Dektop with GPU Acceleration
Jump to navigation Jump to search
 
Line 1: Line 1:
[[Category:HPC-Admin|Remote Dektop with GPU Acceleration]]
+
[[Category:HPC-Admin|Remote Dektop with GPU Acceleration]]<nowiki />
[[Category:HPC.NRW-Best-Practices|Remote Dektop with GPU Acceleration]]
+
[[Category:HPC.NRW-Best-Practices|Remote Dektop with GPU Acceleration]]<nowiki />
[[Category:Remote-Desktop]]
+
[[Category:Remote-Desktop]]<nowiki />
 +
{{DISPLAYTITLE:Remote Dektop with GPU Acceleration (Admin Guide)}}<nowiki />
  
 
= Using GPUs for remote hardware acceleration =
 
= Using GPUs for remote hardware acceleration =

Latest revision as of 20:16, 9 December 2020

Using GPUs for remote hardware acceleration

The goal is to enable HPC users to use hardware acceleration on GPU equipped compute nodes in an interactive environment.

TurboVNC + VirtualGL

  • TurboVNC functions as a high-speed virtual X server
  • VirtualGL adds hardware-accelerated OpenGL to TurboVNC

Example Setup (from an xcat postscript, modify to your needs)

yum install -y xorg-x11-server-Xorg

# INSTALL DRIVERS FROM BINARY (current centos 7.7 nvidia rpms do not run well with GTX2080Ti cards)
NVIDIA-Linux-x86_64-440.59.run -q --ui=none --install-libglvnd

# Configure X for VirtualGL
nvidia-xconfig -a --allow-empty-initial-configuration --virtual=1920x1200

# Unload nvidia drivers
modprobe -r nvidia_drm
modprobe -r nvidia_uvm

# Install VirtualGL
yum install -y VirtualGL-2.6.3.x86_64.rpm

# Configure VirtualGL (with these flags very unsecure!)
/opt/VirtualGL/bin/vglserver_config -config +s +f +t

# Load back nvidia drivers
modprobe nvidia_drm
modprobe nvidia_uvm

# Enabling Persistence Mode
nvidia-smi -pm 1

# Für constant clock frequencies
nvidia-smi -acp UNRESTRICTED

# Start an X server to handle 3D OpenGL rendering
nohup Xorg :0 > /root/xorg.0.log &

# Install WM, menubar, filemanager, terminal
yum install -y openbox
yum install -y tint2
yum install -y caja adwaita-gtk2-theme mate-terminal caja-open-terminal

# get custom config files
cp /Applic.HPC/visualisierung/vis-gpu/openbox/menu.xml /etc/xdg/openbox/menu.xml
cp /Applic.HPC/visualisierung/vis-gpu/openbox/rc.xml /etc/xdg/openbox/rc.xml
cp /Applic.HPC/visualisierung/vis-gpu/openbox/open_scratch.py /etc/xdg/openbox/open_scratch.py
cp /Applic.HPC/visualisierung/vis-gpu/tint2rc /etc/xdg/tint2/tint2rc

Desktop Environment

Try to keep the overall installed software for a functioning Desktop environment as minimal as possible. We went with a combination of:

  • Openbox Window Manager
  • tint2 menubar

and customized those for our needs. Copy customized config files to the nodes on startup:

  • Tint2:
tint2rc


  • Openbox
menu.xml
rc.xml
open_scratch.py


Desktop view

SLURM Integration

We added a module vis-gpu which gives access to a vnc_gpu.sh script. The script automatially reserves one of the dedicated GPU nodes with a fixed amount of CPUs, one GPU and a limited time of 12h. It then starts the turbovnc server and prints instructions to the terminal on how to create an ssh tunnel and conntect to the VNC server.

#!/bin/bash

# List of tunnable ports
ports=( "5910" "5911" "5912" "5913" "5914" "5915" "0" )

function get_vnc_display() {
    /Applic.HPC/TurboVNC/bin/vncserver -list | \
    sed --quiet --regexp-extended \
    '/X DISPLAY/{n; s/^[[:space:]]*:([[:digit:]]+)[[:space:]]+.*$/\1/p}'
}

function get_tunnel_port(){
for port in "${ports[@]}"
do
    if [ $port = "0" ];
    then
        echo $port
        break
    fi
    if [ ! -f /Applic.HPC/visualisierung/ports/$port ] && [ ! -f /Applic.HPC/visualisierung/ports/$port.bak ];
    then
        touch /Applic.HPC/visualisierung/ports/$port
        echo $port
        break
    fi
done
}

TUNNEL_PORT=$(get_tunnel_port)
if [ $TUNNEL_PORT = '0' ]; then
    echo "Too many connections, no port available, aborting..."
    exit
fi

# Allocate nodes with slurm
function allocate_nodes_with_slurm() {
    rm /Applic.HPC/visualisierung/ports/$TUNNEL_PORT
    # Submit command
    local -a SUBMIT_COMMAND=()
    SUBMIT_COMMAND+=( "salloc" )
    SUBMIT_COMMAND+=( "--job-name=start_vnc_desktop" )
    SUBMIT_COMMAND+=( "--time=12:00:00" )
    SUBMIT_COMMAND+=( "--partition=vis-gpu" )
    SUBMIT_COMMAND+=( "--gres=gpu:1" )
    SUBMIT_COMMAND+=( "--cpus-per-task=6")
    SUBMIT_COMMAND+=( "srun" "--propagate=" "--pty" "/Applic.HPC/visualisierung/vis-gpu/vnc_gpu.sh" )

    exec "${SUBMIT_COMMAND[@]}"
}

# Check if we are in batch environment
if [[ -z "${SLURM_JOBID}" ]]; then
    allocate_nodes_with_slurm
fi

# Signalhandler
trap terminate_vncserver 0 1 2 3 6 9 11 15 16 20

function start_vncserver() {
    running=$(get_vnc_display)
    if [[ ! -z  "$running" ]];then
        echo "#### You can't have more than one session"
        exit
    fi  
    /Applic.HPC/TurboVNC/bin/vncserver -xstartup /Applic.HPC/visualisierung/vis-gpu/xstartup.turbovnc 
}

function terminate_vncserver() {
    local VNC_DISPLAY=$(get_vnc_display)
    if [[ -z "${VNC_DISPLAY}" ]]; then
        return
    fi  
    /Applic.HPC/TurboVNC/bin/vncserver -kill :${VNC_DISPLAY}
    mv /Applic.HPC/visualisierung/ports/$TUNNEL_PORT /Applic.HPC/visualisierung/ports/$TUNNEL_PORT.bak
}

function stop_already_running_vncserver() {
    local VNC_DISPLAY
    while VNC_DISPLAY=$(get_vnc_display) && [[ -n "${VNC_DISPLAY}" ]]
    do
        echo "Stopping VNC desktop already running on display :${VNC_DISPLAY}"
        ${VNC_SERVER} -kill :${VNC_DISPLAY}
        sleep 5
    done
}

function report_connect_info() {
    local VNC_DISPLAY=$(get_vnc_display)
    local VNC_PORT=$((5900 + ${VNC_DISPLAY}))
    local VNC_HOST="$(hostname -s)"
    local VNC_USER="$(whoami)"

    if [[ -z "${VNC_DISPLAY}" ]]; then
        echo "No VNC display found"
        exit 7
    fi  
    cat <<-EOF
##########################

Your Session is ready:

        On Linux, run:
        ssh -L ${VNC_PORT}:localhost:${TUNNEL_PORT} ${VNC_USER}@palma.uni-muenster.de 'ssh -L ${TUNNEL_PORT}:localhost:${VNC_PORT} ${VNC_HOST}'

        On Windows:
        plink -L ${VNC_PORT}:localhost:${TUNNEL_PORT} ${VNC_USER}@palma.uni-muenster.de ssh -L ${TUNNEL_PORT}:localhost:${VNC_PORT} ${VNC_HOST}
    
        in your terminal and just keep it open, then connect your VNC client to localhost:${VNC_PORT}

        Password: ${VNC_PASSWORD}

##########################

        To end your session, press CTRL + C, once in this terminal and once in your local ssh terminal.

##########################

EOF
}

function exec_interactive_shell() {
    local PPID_COMMAND_NAME=$(ps --no-headers -o comm= --pid "${PPID}")
    if [[ "${PPID_COMMAND_NAME}" =~ slurmstepd ]]; then
        bash -i
        exit
    fi
}

function set_vncpasswd() {
    local PASSWORD="$(    openssl rand -base64 8)"
    local VIEWPASSWORD="$(openssl rand -base64 8)"

    VNC_PASSWORD="${PASSWORD:0:8}"
    VNC_VIEWPASSWORD="${VIEWPASSWORD:0:8}"

    local VNC_PASSWD_FILE="${HOME}/.vnc/passwd"
#    install -D --mode="u=rw,og=" /dev/null "${VNC_PASSWD_FILE}" 
    /Applic.HPC/TurboVNC/bin/vncpasswd -f > "${VNC_PASSWD_FILE}" <<-EOF
${VNC_PASSWORD}
${VNC_PASSWORD}
${VNC_VIEWPASSWORD}
EOF
}

set_vncpasswd
start_vncserver
report_connect_info
sleep 12h