# Global R options
knitr::opts_chunk$set(echo = TRUE)
# Define key paths and tool directories
OUT_DIR <- "../output/23-Apul-energy-GO/"
evalue <- "1E-20"
fasta <- "../data/Apulchra-genome.pep.faa"
# Export these as environment variables for bash chunks.
Sys.setenv(
OUT_DIR = OUT_DIR,
evalue = evalue,
fasta =fasta
)Tackled two primary tasks
Identified proteins involved in key energy utilization pathways and identified proteins corresponding to epigenetic machinery.
For the energy utilization pathways,
we focused on the following:
- Glycolysis GO:0006096
- Gluconeogenesis GO:0006094
- Lipolysis/lipid catabolism GO:0016042
- Fatty acid beta oxidation GO:0006635
- Starvation GO:0042594
- Lipid biosynthesis GO:0008610
- Protein catabolic process GO:0030163
and ran something on order of.
Variables
Glycolysis GO:0006096
GO="0006096"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in "${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-dbtype prot \
-out "${OUT_DIR}"SwissProt-GO:"${GO}"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db "${OUT_DIR}"SwissProt-GO:"${GO}" \
-out "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-evalue "${evalue}" \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
2> "${OUT_DIR}"blast_warnings"${GO}".txt
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tabFor Machinery…
more along the lines of
Apul
https://raw.githubusercontent.com/urol-e5/deep-dive-expression/main/D-Apul/data/Apulchra-genome.pep.faa
cd ../data
curl -o Apulchra-genome.pep.faa https://raw.githubusercontent.com/urol-e5/deep-dive-expression/main/D-Apul/data/Apulchra-genome.pep.faahead ../data/Apulchra-genome.pep.faa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
-in ../data/Apulchra-genome.pep.faa \
-dbtype prot \
-out ../output/25-Apul-epimods-blast/Apul-proteinshead ../data/Machinery.fastafasta="../data/Machinery.fasta"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
-query $fasta \
-db ../output/25-Apul-epimods-blast/Apul-proteins \
-out ../output/25-Apul-epimods-blast/Mach-blastp-Apul_out.tab \
-evalue 1E-05 \
-num_threads 48 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6wc -l ../output/25-Apul-epimods-blast/Mach-blastp-Apul_out.tabhead ../output/25-Apul-epimods-blast/Mach-blastp-Apul_out.tab