# Global R options
::opts_chunk$set(echo = TRUE)
knitr
# Define key paths and tool directories
<- "../output/23-Apul-energy-GO/"
OUT_DIR <- "1E-20"
evalue <- "../data/Apulchra-genome.pep.faa"
fasta
# Export these as environment variables for bash chunks.
Sys.setenv(
OUT_DIR = OUT_DIR,
evalue = evalue,
fasta =fasta
)
Tackled two primary tasks
Identified proteins involved in key energy utilization pathways and identified proteins corresponding to epigenetic machinery.
For the energy utilization pathways,
we focused on the following:
- Glycolysis GO:0006096
- Gluconeogenesis GO:0006094
- Lipolysis/lipid catabolism GO:0016042
- Fatty acid beta oxidation GO:0006635
- Starvation GO:0042594
- Lipid biosynthesis GO:0008610
- Protein catabolic process GO:0030163
and ran something on order of.
Variables
Glycolysis GO:0006096
GO="0006096"
curl -H "Accept: text/plain" "https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28go%3A"${GO}"%29%29+AND+%28reviewed%3Atrue%29" -o "${OUT_DIR}"SwissProt-GO:"${GO}".fa
head "${OUT_DIR}"SwissProt-GO:"${GO}".fa
echo "Number of Proteins"
grep -c ">" "${OUT_DIR}"SwissProt-GO:"${GO}".fa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
"${OUT_DIR}"SwissProt-GO:"${GO}".fa \
-in \
-dbtype prot "${OUT_DIR}"SwissProt-GO:"${GO}"
-out
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query "${OUT_DIR}"SwissProt-GO:"${GO}" \
-db "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab \
-out "${evalue}" \
-evalue \
-num_threads 42 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 > "${OUT_DIR}"blast_warnings"${GO}".txt
2
head "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
echo "Number of hits"
wc -l "${OUT_DIR}"Apul_blastp-GO:"${GO}"_out.tab
For Machinery…
more along the lines of
Apul
https://raw.githubusercontent.com/urol-e5/deep-dive-expression/main/D-Apul/data/Apulchra-genome.pep.faa
cd ../data
curl -o Apulchra-genome.pep.faa https://raw.githubusercontent.com/urol-e5/deep-dive-expression/main/D-Apul/data/Apulchra-genome.pep.faa
head ../data/Apulchra-genome.pep.faa
/home/shared/ncbi-blast-2.15.0+/bin/makeblastdb \
\
-in ../data/Apulchra-genome.pep.faa \
-dbtype prot -out ../output/25-Apul-epimods-blast/Apul-proteins
head ../data/Machinery.fasta
fasta="../data/Machinery.fasta"
/home/shared/ncbi-blast-2.15.0+/bin/blastp \
$fasta \
-query \
-db ../output/25-Apul-epimods-blast/Apul-proteins \
-out ../output/25-Apul-epimods-blast/Mach-blastp-Apul_out.tab \
-evalue 1E-05 \
-num_threads 48 \
-max_target_seqs 1 \
-max_hsps 1 -outfmt 6
wc -l ../output/25-Apul-epimods-blast/Mach-blastp-Apul_out.tab
head ../output/25-Apul-epimods-blast/Mach-blastp-Apul_out.tab