sr320@Stevens-MacBook-Air-184 ~ % ssh sr320@klone.hyak.uw.edu
(sr320@klone.hyak.uw.edu) Password:
(sr320@klone.hyak.uw.edu) Duo two-factor login for sr320
Enter a passcode or select one of the following options:
1. Duo Push to XXX-XXX-3626
2. Duo Push to iOS
3. Phone call to XXX-XXX-3626
Passcode or option (1-3): 1
Let’s get everyone up an running with Klone and RStudio.
Log in
CD to srlab directory
cd /gscratch/srlab/
Create user directory in srlab (if not present)
mkdir sr320
Add containers to your local directory
-rwxr-xr-x 1 samwhite all 3311583232 2024-12-04 srlab-bioinformatics-container-09dfcb2.sif
-rwxrwxr-x 1 samwhite srlab 2985074688 2024-10-17 srlab-bioinformatics-container-2bd5d44.sif
-rwxr-xr-x 1 samwhite all 3008335872 2024-11-06 srlab-bioinformatics-container-586bf21.sif
-rwxr-xr-x 1 samwhite all 3008442368 2024-11-07 srlab-bioinformatics-container-f4142f4.sif
-rwxr-xr-x 1 samwhite all 3380895744 2025-01-04 srlab-R4.4-bioinformatics-container-703094b.sif
Specifically srlab-R4.4-bioinformatics-container-703094b.sif
#within user directory
cp ../containers/srlab-R4.4-bioinformatics-container-703094b.sif .
See Footnotes for more information on containers.
Users should add the following line in ~/.Renviron
. If you don’t have a ~/.Renviron
, you can create it like this: touch ~/.Renviron
R_LIBS_USER
in~/.Renviron
. Example:
cat ~/.Renviron
# Set local library installation path
R_LIBS_USER=/gscratch/srlab/${USER}/R_libs_apptainer
This configuration directs R to install and access packages from a custom directory rather than the system-wide library, which is especially useful in shared or containerized environments.
SLURM JOB SCRIPT
cp ../sr320/rstudio-server.job .
User needs to (maybe) set/change the following in the SLURM script before starting script:
#SBATCH --partition=
#SBATCH --time=
#SBATCH --cpus-per-task=
#SBATCH --mem=
#SBATCH --chdir=
RSTUDIO_CWD="/gscratch/srlab/sr320" # UPDATE THIS LINE
RSTUDIO_SIF="srlab-R4.4-bioinformatics-container-703094b.sif" # UPDATE THIS LINE
Partition Options
cpu-g2-mem2x
or ckpt
[sr320@klone-login03 containers]$ hyakalloc
Account resources available to user: sr320
╭─────────┬──────────────┬──────┬────────┬──────┬───────╮
│ Account │ Partition │ CPUs │ Memory │ GPUs │ │
├─────────┼──────────────┼──────┼────────┼──────┼───────┤
│ srlab │ cpu-g2-mem2x │ 32 │ 490G │ 0 │ TOTAL │
│ │ │ 12 │ 100G │ 0 │ USED │
│ │ │ 20 │ 390G │ 0 │ FREE │
╰─────────┴──────────────┴──────┴────────┴──────┴───────╯
Checkpoint Resources (ckpt)
╭───────┬──────┬──────╮
│ │ CPUs │ GPUs │
├───────┼──────┼──────┤
│ Idle: │ 4557 │ 148 │
╰───────┴──────┴──────╯
Open
nano rstudio-server.job
#!/bin/sh
#SBATCH --job-name=rstudio-server
#SBATCH --account=srlab
#SBATCH --partition=cpu-g2-mem2x #update this line - use hyakalloc to find partitions you can use
#SBATCH --time=15-08:00:00
#SBATCH --ntasks=1 # Number of tasks
#SBATCH --cpus-per-task=12 # Increased threads from 4 to 8
#SBATCH --nodes=1
#SBATCH --mem=100G
#SBATCH --signal=USR2
#SBATCH --output=%x_%j.out
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/srlab/sr320
# This script will request a single CPU with four threads with 20GB of RAM for 2 hours.
# You can adjust --time, --nodes, --ntasks, and --mem above to adjust these settings for your session.
# --output=%x_%j.out creates a output file called rstudio-server_NNNNNNNN.out
# where the %x is short hand for --job-name above and the N's are an 8-digit
# jobID assigned by SLURM when our job is submitted.
RSTUDIO_CWD="/gscratch/srlab/sr320" # UPDATE THIS LINE
RSTUDIO_SIF="srlab-R4.4-bioinformatics-container-703094b.sif" # UPDATE THIS LINE
# Create temp directory for ephemeral content to bind-mount in the container
RSTUDIO_TMP=$(/usr/bin/python3 -c 'import tempfile; print(tempfile.mkdtemp())')
mkdir -p -m 700 \
${RSTUDIO_TMP}/run \
${RSTUDIO_TMP}/tmp \
${RSTUDIO_TMP}/var/lib/rstudio-server
cat > ${RSTUDIO_TMP}/database.conf <<END
provider=sqlite
directory=/var/lib/rstudio-server
END
# Set OMP_NUM_THREADS to prevent OpenBLAS (and any other OpenMP-enhanced
# libraries used by R) from spawning more threads than the number of processors
# allocated to the job.
#
# Set R_LIBS_USER to a path specific to rocker/rstudio to avoid conflicts with
# personal libraries from any R installation in the host environment
cat > ${RSTUDIO_TMP}/rsession.sh <<END
#!/bin/sh
export OMP_NUM_THREADS=${SLURM_JOB_CPUS_PER_NODE}
export R_LIBS_USER=${RSTUDIO_CWD}/R
exec /usr/lib/rstudio-server/bin/rsession "\${@}"
END
chmod +x ${RSTUDIO_TMP}/rsession.sh
export APPTAINER_BIND="${RSTUDIO_CWD}:${RSTUDIO_CWD},/gscratch:/gscratch,${RSTUDIO_TMP}/run:/run,${RSTUDIO_TMP}/tmp:/tmp,${RSTUDIO_TMP}/database.conf:/etc/rstudio/database.conf,${RSTUDIO_TMP}/rsession.sh:/etc/rstudio/rsession.sh,${RSTUDIO_TMP}/var/lib/rstudio-server:/var/lib/rstudio-server"
# Do not suspend idle sessions.
# Alternative to setting session-timeout-minutes=0 in /etc/rstudio/rsession.conf
export APPTAINERENV_RSTUDIO_SESSION_TIMEOUT=0
export APPTAINERENV_USER=$(id -un)
export APPTAINERENV_PASSWORD=$(openssl rand -base64 15)
# get unused socket per https://unix.stackexchange.com/a/132524
# tiny race condition between the python & apptainer commands
readonly PORT=$(/mmfs1/sw/pyenv/versions/3.9.5/bin/python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1]); s.close()')
cat 1>&2 <<END
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:${HOSTNAME}:${PORT} ${APPTAINERENV_USER}@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: ${APPTAINERENV_USER}
password: ${APPTAINERENV_PASSWORD}
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f ${SLURM_JOB_ID}
END
source /etc/bashrc
module load apptainer
apptainer exec --cleanenv --home ${RSTUDIO_CWD} ${RSTUDIO_CWD}/${RSTUDIO_SIF} \
--www-port ${PORT} \
rserver --auth-none=0 \
--auth-pam-helper-path=pam-helper \
--auth-stay-signed-in-days=30 \
--auth-timeout-minutes=0 \
--rsession-path=/etc/rstudio/rsession.sh \
--server-user=${APPTAINERENV_USER}
APPTAINER_EXIT_CODE=$?
echo "rserver exited $APPTAINER_EXIT_CODE" 1>&2
exit $APPTAINER_EXIT_CODE
Run
sbatch rstudio-server.job
Folllow Instructions in output file to connect to RStudio.
eg
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3263:56093 sr320@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: sr320
password: PDQlr876755PgP3g37c
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 24359989
Footnotes
srlab-bioinformatics-container-<git commit hash>.sif: A "catch-all" container which holds most of the commonly used bioinformatics software
used by the Roberts Lab. The <git commit hash> is taken from the definition file used to construct the container. The definition file can be found at
/gscratch/srlab/gitrepos/RobertsLab/code/apptainer_definition_files/srlab-bioinformatics-container.def.
Definition File
[sr320@klone-login03 containers]$ cat /gscratch/srlab/gitrepos/RobertsLab/code/apptainer_definition_files/srlab-bioinformatics-container.def
# NOTE:
# To use RepeatMasker with this container, RepeatMasker must be installed on the local computer in the following location:
#
# /gscratch/srlab/programs/RepeatMasker
#
# RepeatMasker must be configured on the local computer prior to running via this container.
# The container expects the following files:
#
# /gscratch/srlab/programs/RepeatMasker/Libraries/famdb/dfam38_full.0.h5
# /gscratch/srlab/programs/RepeatMasker/Libraries/famdb/rmlib.config
Bootstrap: docker
From: rocker/rstudio:4.4.1
%files
# Load file with R package installation commands in to container at /tmp
# Expects file called "r_packages_installs.R" to be in current directory.
r_packages_installs.R /tmp/
%setup
%environment
### Set program locations/versions for system PATH ###
programs_location="/srlab/programs"
bismark_version="0.24.2"
bowtie2_version="2.5.4"
diamond_version="2.1.9"
fastp_version="0.23.4"
fastqc_version="0.12.1"
gffcompare_version="gffcompare-0.12.6"
hisat2_version="2.2.1"
kallisto_version="0.51.1"
miniforge_version="24.7.1-0"
multiqc_version="1.24.1"
ncbi_blast_version="2.16.0"
ncbi_datasets_version="13.34.0"
repeatmasker_version="4.1.7-p1"
rmblast_version="2.14.1"
salmon_version="1.10.0"
samtools_version="1.20"
stringtie_version="2.2.1"
trimmomatic_version="0.39"
mamba_envs_dir="${programs_location}/miniforge3-${miniforge_version}/envs"
######################################################
export LC_ALL=C
### Set container system PATH ###
PATH="/usr/local/sbin:\
/usr/local/bin:\
/usr/sbin:\
/usr/bin:\
/sbin:\
/bin:\
${programs_location}:\
${programs_location}/Bismark-${bismark_version}:\
${programs_location}/bowtie2-${bowtie2_version}-sra-linux-x86_64:\
${programs_location}/fastqc-${fastqc_version}:\
${programs_location}/${gffcompare_version}:\
${programs_location}/hisat2-${hisat2_version}:\
${programs_location}/kallisto-${kallisto_version}/build/src:\
${programs_location}/miniforge3-${miniforge_version}/bin:\
${mamba_envs_dir}/multiqc_env/bin:\
${programs_location}/ncbi-blast-${ncbi_blast_version}+/bin:\
${programs_location}/ncbi-datasets-${ncbi_datasets_version}:\
${programs_location}/RepeatMasker:\
${programs_location}/rmblast-${rmblast_version}/bin:\
${programs_location}/salmon-latest_linux_x86_64/bin:\
${programs_location}/samtools-${samtools_version}:\
${programs_location}/stringtie-${stringtie_version}.Linux_x86_64:\
${programs_location}/Trimmomatic-${trimmomatic_version}:\
${programs_location}/trf409.linux64"
######################################################
%post
# Install common/base libraries/dependencies
apt update && \
apt install -y --no-install-recommends \
\
apt-utils \
aptitude \
automake \
build-essential \
bzip2 \
cmake \
coreutils \
curl \
debconf \
default-jdk \
dialog \
emboss \
gcc-multilib \
gdebi-core \
gfortran \
git \
gobjc++ \
hmmer \
htop \
infernal \
libapparmor1 \
libbz2-dev \
libc6 \
libcurl4-openssl-dev \
libedit2 \
libfontconfig1 \
liblzma-dev \
libncurses5-dev \
libncurses-dev \
libncursesw5-dev \
libpango1.0-dev \
libpng-dev \
libreadline-dev \
libsm6 \
libsparsehash-dev \
libssl-dev \
liburi-escape-xs-perl \
liburi-perl \
libxml2 \
libxrender1 \
libz-dev \
libbz2-dev \
liblzma-dev \
libxt6 \
libxtst6 \
libzmq3-dev \
locales \
locales-all \
man \
nano \
ncurses-dev \
net-tools \
openssh-client \
parallel \
pkg-config \
psmisc \
python3 \
python-setuptools \
rsync \
ruby \
software-properties-common \
sqlite3 \
sudo \
texinfo \
tree \
unzip \
wget \
xorg-dev
zlib1g-dev
# Run R package installation script file
Rscript /tmp/r_packages_installs.R
########## Set program versions ##########
bedtools_version="v2.31.0"
bismark_version="0.24.2"
bowtie2_version="2.5.4"
diamond_version="2.1.9"
fastp_version="0.23.4"
fastqc_version="0.12.1"
gffcompare_version="gffcompare-0.12.6"
hisat2_version="2.2.1"
kallisto_version="0.51.1"
miniforge_version="24.7.1-0"
multiqc_version="1.24.1"
ncbi_blast_version="2.16.0"
ncbi_datasets_version="16.34.0"
repeatmasker_version="4.1.7-p1"
rmblast_version="2.14.1"
salmon_version="1.10.0"
stringtie_version="2.2.1"
samtools_version="1.20"
trimmomatic_version="0.39"
##########################################
# Set threads for faster builds
threads="40"
# Make programs directory
mkdir --parents /srlab/programs
# Change to installation directory
cd /srlab/programs
#Install Miniforge (Conda/Mamba)
wget https://github.com/conda-forge/miniforge/releases/download/${miniforge_version}/Miniforge3-${miniforge_version}-Linux-x86_64.sh
bash Miniforge3-${miniforge_version}-Linux-x86_64.sh -b -p ./miniforge3-${miniforge_version}
export PATH="$PATH:/srlab/programs/miniforge3-${miniforge_version}/bin"
mamba init
. ./miniforge3-${miniforge_version}/etc/profile.d/conda.sh
rm Miniforge3-${miniforge_version}-Linux-x86_64.sh
## Configure conda channels
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge
conda config --set channel_priority strict
# Install bedtools
wget https://github.com/arq5x/bedtools2/releases/download/${bedtools_version}/bedtools.static
mv bedtools.static bedtools
chmod a+x bedtools
# Install Bismark
wget https://github.com/FelixKrueger/Bismark/archive/refs/tags/v${bismark_version}.zip
unzip v${bismark_version}.zip
rm v${bismark_version}.zip
# Install bowtie2
wget https://github.com/BenLangmead/bowtie2/releases/download/v${bowtie2_version}/bowtie2-${bowtie2_version}-sra-linux-x86_64.zip
unzip bowtie2-${bowtie2_version}-sra-linux-x86_64.zip
rm bowtie2-${bowtie2_version}-sra-linux-x86_64.zip
# Install DIAMOND BLAST
wget https://github.com/bbuchfink/diamond/releases/download/v${diamond_version}/diamond-linux64.tar.gz
tar -xzvf diamond-linux64.tar.gz
rm diamond-linux64.tar.gz
# Install fastp
wget http://opengene.org/fastp/fastp.${fastp_version}
mv fastp.${fastp_version} fastp
chmod a+x ./fastp
# Install FastQC
wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${fastqc_version}.zip
unzip fastqc_v${fastqc_version}.zip
mv FastQC fastqc-${fastqc_version}
rm fastqc_v${fastqc_version}.zip
# Install gffcompare
wget http://ccb.jhu.edu/software/stringtie/dl/${gffcompare_version}.Linux_x86_64.tar.gz
tar -xzvf ${gffcompare_version}.Linux_x86_64.tar.gz
rm ${gffcompare_version}.Linux_x86_64.tar.gz
# Install HISAT2
wget https://github.com/DaehwanKimLab/hisat2/archive/refs/tags/v${hisat2_version}.tar.gz
tar -xzvf v${hisat2_version}.tar.gz
cd hisat2-${hisat2_version}
make -j ${threads}
cd -
rm v${hisat2_version}.tar.gz
# Install kallisto
wget https://github.com/pachterlab/kallisto/archive/refs/tags/v${kallisto_version}.tar.gz
tar -xzvf v${kallisto_version}.tar.gz
cd kallisto-${kallisto_version}
mkdir build
cd build
cmake ..
make
cd /srlab/programs
rm v${kallisto_version}.tar.gz
# Install MultiQC
mamba create -n multiqc_env multiqc=1.24.1
# Install NCBI datasets
mkdir --parents ncbi-datasets-v${ncbi_datasets_version}
cd ncbi-datasets-v${ncbi_datasets_version}
wget https://github.com/ncbi/datasets/releases/download/v16.34.0/linux-amd64.cli.package.zip \
&& unzip linux-amd64.cli.package.zip \
&& rm linux-amd64.cli.package.zip
cd /srlab/programs
# Install NCBI BLAST
wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${ncbi_blast_version}/ncbi-blast-${ncbi_blast_version}+-x64-linux.tar.gz \
&& tar -zxvpf ncbi-blast-${ncbi_blast_version}+-x64-linux.tar.gz \
&& rm ncbi-blast-${ncbi_blast_version}+-x64-linux.tar.gz
# RepeatMasker conda environment with Python3 and h5py using mamba
mamba create -n repeatmasker-env python=3.8 h5py -y
# Install RepeatMasker
wget https://www.repeatmasker.org/RepeatMasker/RepeatMasker-${repeatmasker_version}.tar.gz
tar -xzvf RepeatMasker-${repeatmasker_version}.tar.gz
rm RepeatMasker-${repeatmasker_version}.tar.gz
## Remove default RepeatMasker DB
rm /srlab/programs/RepeatMasker/Libraries/famdb/*
# Set up symlinks to RepeatMasker files on Klone
ln -s /gscratch/srlab/programs/RepeatMasker/Libraries/famdb/dfam38_full.0.h5 /srlab/programs/RepeatMasker/Libraries/famdb/dfam38_full.0.h5
ln -s /gscratch/srlab/programs/RepeatMasker/Libraries/famdb/rmlib.config /srlab/programs/RepeatMasker/Libraries/famdb/rmlib.config
# Install RMBLAST
wget https://www.repeatmasker.org/rmblast/rmblast-${rmblast_version}+-x64-linux.tar.gz
tar -xzvf rmblast-${rmblast_version}+-x64-linux.tar.gz
rm rmblast-${rmblast_version}+-x64-linux.tar.gz
# Install salmon
wget https://github.com/COMBINE-lab/salmon/releases/download/v${salmon_version}/salmon-${salmon_version}_linux_x86_64.tar.gz
tar -xzvf salmon-${salmon_version}_linux_x86_64.tar.gz
rm salmon-${salmon_version}_linux_x86_64.tar.gz
# Install samtools
wget https://github.com/samtools/samtools/releases/download/${samtools_version}/samtools-${samtools_version}.tar.bz2
tar -xjvf samtools-${samtools_version}.tar.bz2
cd samtools-${samtools_version}
./configure
make -j ${threads}
cd -
rm samtools-${samtools_version}.tar.bz2
# Install trimmomatic
wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-${trimmomatic_version}.zip
unzip Trimmomatic-${trimmomatic_version}.zip
chmod +x -R Trimmomatic-${trimmomatic_version}
rm Trimmomatic-${trimmomatic_version}.zip
# Install StringTie
wget http://ccb.jhu.edu/software/stringtie/dl/stringtie-${stringtie_version}.Linux_x86_64.tar.gz
tar -xzvf stringtie-${stringtie_version}.Linux_x86_64.tar.gz
rm stringtie-${stringtie_version}.Linux_x86_64.tar.gz
# Install TRF
wget https://github.com/Benson-Genomics-Lab/TRF/releases/download/v4.09.1/trf409.linux64
chmod +x trf409.linux64
%labels
Author Sam White
Version v0.0.5
%help
This is an Apptainer definition file for an Ubuntu container containing bioinformatics software for the Roberts Lab.