Cod alignment

bwa
cod
wgs
Author
Affiliation

Steven Roberts

Published

July 8, 2025

Genome

cd ../data

curl -O https://owl.fish.washington.edu/halfshell/genomic-databank/GCF_031168955.1_ASM3116895v1_genomic.fna
bwa index ../data/GCF_031168955.1_ASM3116895v1_genomic.fna

Reads - getting on raven

ecotype

mkdir -p ../data/fastq-ecotype
cd ../data/fastq-ecotype
wget -r -np -nd -A "*.gz" https://owl.fish.washington.edu/nightingales/G_macrocephalus/30-1149634506/00_fastq/

Align

# Path to reference genome (must be indexed with bwa index)
REFERENCE="../data/GCF_031168955.1_ASM3116895v1_genomic.fna"

# Number of threads per BWA job
THREADS=42


# Directory containing fastq files
FASTQ_DIR="../data/fastq-ecotype"

# Output directory (you can change this)
OUT_DIR="../output/14-BWA"

# File suffixes
R1_SUFFIX="_R1_001.fastq.gz"
R2_SUFFIX="_R2_001.fastq.gz"
OUT_SUFFIX=".sam"

# -------------------------------
# RUN BWA IN PARALLEL
# -------------------------------
# Run loop
for sample in $(ls "${FASTQ_DIR}"/*"${R1_SUFFIX}" | xargs -n 1 basename | sed "s/${R1_SUFFIX}//" | sort -u); do
  R1_FILE="${FASTQ_DIR}/${sample}${R1_SUFFIX}"
  R2_FILE="${FASTQ_DIR}/${sample}${R2_SUFFIX}"
  OUT_FILE="${OUT_DIR}/${sample}${OUT_SUFFIX}"

  echo "Running BWA on ${sample}..."
  bwa mem -t "${THREADS}" "${REFERENCE}" "${R1_FILE}" "${R2_FILE}" > "${OUT_FILE}"
done

Convert SAM to BAM


# Directory containing .sam files
SAM_DIR="../output/14-BWA"

# Output directory (can be same as SAM_DIR or different)
OUT_DIR="../output/14-BWA"  # Change this if you want bams elsewhere


# Number of threads for sorting
THREADS=42


# Loop through all .sam files
for sam in "${SAM_DIR}"/*.sam; do
  # Get base name (e.g., sample1)
  sample=$(basename "${sam}" .sam)

  # Define output file
  sorted_bam="${OUT_DIR}/${sample}.sorted.bam"

  echo "Converting $sam to $sorted_bam..."

  # Convert and sort
  samtools view -@ $THREADS -bS "$sam" | samtools sort -@ $THREADS -o "$sorted_bam" -

  # Index the sorted BAM
  samtools index "$sorted_bam"
done