Need for Speed
Can we speed up samtools?
noting after the fact…
nthreads=INT
Specifies the number of threads to use during encoding and/or decoding. For BAM this will be encoding only. In CRAM the threads are dynamically shared between encoder and decoder.
time \
for file in ../output/05.2-lncRNA/*.sam; do
base=$(basename "$file" .sam)
/home/shared/samtools-1.12/samtools view -bS "$file" | \
/home/shared/samtools-1.12/samtools sort \
-o ../output/05.2-lncRNA/t1"$base".sorted.bam
done
real 161m45.269s user 190m4.654s sys 5m31.658s
process_file() {
file=$1
base=$(basename "$file" .sam)
/home/shared/samtools-1.12/samtools view -bS "$file" | \
/home/shared/samtools-1.12/samtools sort \
-o ../output/05.2-lncRNA/t3"$base".sorted.bam
}
export -f process_file
time \
find ../output/05.2-lncRNA/ -name "*.sam" | parallel -j4 process_file
real 64m0.721s user 197m30.191s sys 5m34.827s
process_file() {
file=$1
base=$(basename "$file" .sam)
/home/shared/samtools-1.12/samtools view -bS "$file" | \
/home/shared/samtools-1.12/samtools sort \
-o ../output/05.2-lncRNA/t4"$base".sorted.bam
}
export -f process_file
time \
find ../output/05.2-lncRNA/ -name "*.sam" | parallel -j8 process_file
real 36m52.767s
user 199m38.271s
sys 5m50.486s
process_file() {
file=$1
base=$(basename "$file" .sam)
/home/shared/samtools-1.12/samtools view -bS "$file" | \
/home/shared/samtools-1.12/samtools sort \
-o ../output/05.2-lncRNA/t6"$base".sorted.bam
}
export -f process_file
time \
find ../output/05.2-lncRNA/ -name "*.sam" | parallel -j20 process_file
real 38m47.367s user 201m18.650s sys 6m20.804s
Written on August 18, 2023