Need for Speed

Can we speed up samtools?

noting after the fact…

nthreads=INT
Specifies the number of threads to use during encoding and/or decoding. For BAM this will be encoding only. In CRAM the threads are dynamically shared between encoder and decoder.
time \
for file in ../output/05.2-lncRNA/*.sam; do
    base=$(basename "$file" .sam)
    /home/shared/samtools-1.12/samtools view -bS "$file" | \
    /home/shared/samtools-1.12/samtools sort \
    -o ../output/05.2-lncRNA/t1"$base".sorted.bam
done

real 161m45.269s user 190m4.654s sys 5m31.658s

process_file() {
    file=$1
    base=$(basename "$file" .sam)
    /home/shared/samtools-1.12/samtools view -bS "$file" | \
    /home/shared/samtools-1.12/samtools sort \
    -o ../output/05.2-lncRNA/t3"$base".sorted.bam
}
export -f process_file

time \
find ../output/05.2-lncRNA/ -name "*.sam" | parallel -j4 process_file

real 64m0.721s user 197m30.191s sys 5m34.827s

process_file() {
    file=$1
    base=$(basename "$file" .sam)
    /home/shared/samtools-1.12/samtools view -bS "$file" | \
    /home/shared/samtools-1.12/samtools sort \
    -o ../output/05.2-lncRNA/t4"$base".sorted.bam
}
export -f process_file

time \
find ../output/05.2-lncRNA/ -name "*.sam" | parallel -j8 process_file

real 36m52.767s
user 199m38.271s
sys 5m50.486s

process_file() {
    file=$1
    base=$(basename "$file" .sam)
    /home/shared/samtools-1.12/samtools view -bS "$file" | \
    /home/shared/samtools-1.12/samtools sort \
    -o ../output/05.2-lncRNA/t6"$base".sorted.bam
}
export -f process_file

time \
find ../output/05.2-lncRNA/ -name "*.sam" | parallel -j20 process_file

real 38m47.367s user 201m18.650s sys 6m20.804s

Written on August 18, 2023