Getting appropriate IDs for Gene, Transcripts

for cod, looking ahead to Uniprot Accessions
R
blast
cod
Author
Affiliation

Steven Roberts

Published

May 21, 2024

head ../output/10-hisat-deseq2/DEGlist.tab
"baseMean"  "log2FoldChange"    "lfcSE" "stat"  "pvalue"    "padj"
"gene-abce1|abce1"  262.700269478681    0.105952784879971   0.0281220392741257  3.76760674598215    0.000164820070819158    0.00169579580885594
"gene-si:dkey-6i22.5|si:dkey-6i22.5"    23.1818150225127    -0.27199275490447   0.0723746042802814  -3.75812424274041   0.000171191878435429    0.00174456130354749
"gene-LOC132463139|LOC132463139"    264.894726704886    0.294505009896922   0.0596592538711384  4.93645144361076    7.95568331237438e-07    2.12004635637234e-05
"gene-prkaa1|prkaa1"    3214.61285704688    -0.126782734464496  0.0340426269133444  -3.72423475976815   0.000195908561479354    0.00193395624569665
"gene-snx27a|snx27a"    1220.23705269789    -0.0630710658454225 0.01399905805089    -4.50537926310069   6.62545018246717e-06    0.000125962116117261
"gene-LOC132467924|LOC132467924"    197.522767123278    0.0945071207278742  0.032030373339586   2.9505469613454 0.00317211817956902 0.0171388805882921
"gene-LOC132462420|LOC132462420"    713.695192687075    -0.134317843644132  0.0311548193019831  -4.31130228495925   1.62295854600018e-05    0.00026130311652756
"gene-copa|copa"    1141.65308019589    0.0553273042877379  0.0180028215560976  3.07325738442364    0.00211735779654983 0.0125618143711436
"gene-LOC132461740|LOC132461740"    2138.07063376679    0.125918081978082   0.0341318095843598  3.68917099654104    0.000224985969638598    0.00216220282509821
ls /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1
cds_from_genomic.fna
GCF_031168955.1_ASM3116895v1_genomic.fna
genomic.gff
genomic.gtf
protein.faa
rna.fna
sequence_report.jsonl
head /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/*
==> /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/cds_from_genomic.fna <==
>lcl|NC_082382.1_cds_XP_059916776.1_1 [gene=rereb] [db_xref=GeneID:132464430] [protein=arginine-glutamic acid dipeptide repeats protein isoform X1] [protein_id=XP_059916776.1] [location=join(44883..44908,45901..45949,46305..46429,50198..50297,50452..50550,50685..50765,50865..51018,51179..51271,57813..58012,59841..60005,64360..64470,67157..68541,70221..70425,70825..71635,72380..72517,72610..72790,75074..75107)] [gbkey=CDS]
ATGGACGACCTCTTCAGTCCGCGGAGGAGCTTGAACAGCACCCAAGGGGAGATACGAGTGGGACCAAGTCACCAGGCCAA
GCTTCCCGAGCTGCAGCCACGGTCAGCCCCTAGCCTCCAGACTCAGACGGAGAGCGAGGAGCTGGTGTGGACCCCGGGAG
TCAATGACTGTGATCTACTCATGTACCTGAGAGCGGCAAGGAGCATGGCAGCGTTTGCAGGGATGTGTGATGGCGGCTCG
ACGGAGGATGGGTGCCTGGCGGCGTCCCGTGATGACACCACTCTCAACGCACTGAACATGTTGCATGCCAGCCATTACGA
TGCAGCCAAAGCTCTCCAGCATCTGGTGAAGAAACCAGTTCCAAAGCTGATTGAGAAGTGCTGGTCAGAGGACGATGTGA
AACGCTTCATCAAAGGCCTTAGGCTTTACGGCAAGAACTTCTTCCGCATTCGAAAAGAGCTTCTGCCCAGCAAAAAGACG
GGTGAGCTGATCACGTTCTACTACCACTGGAAGAAGACGCCGGAGGCCTCAGGAACCAGAGCCTATCGCCAGCACCGTCG
CCAGCCATCTTCACGCAAGGCTAAGACCCGGTCAGCCACGGCCCCCGTCAGCACGCCGTCCCGGTCCCACTCACTGGACA
TGAGCTCCGCCAGTGAGGATGACATGGAAAGTGAAGACAGTGAACAAGACTTGAAGCGGTCCACCTGCAGCCACTGTGGC

==> /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/GCF_031168955.1_ASM3116895v1_genomic.fna <==
>NC_082382.1 Gadus macrocephalus chromosome 1, ASM3116895v1
CAGACCTCCAATAGAGAGCTGCTCCCCCTTCGCACAAAGCCGCTGCTGGTGAATGCTCGAAGCGTTGTGTGATTGAATCG
CTTTAATGCCGTTCCATGTCACGTTGATCGTTTTTTTGCACAACGAGCAAAAAGCTTCCCGGTCATTTCCCATCACGGGT
TTCAGCCAGTCTTTAAATGTCGGGTCGTCAACCCATGTTTGCGAaaacttgcatttacccattcTCTCATAGAGCCAGAA
ACTCAGCCGTACAGAACTCTGAGGGGAAAAGGCGGAAAATGTTGctggtgttacaccgaattctgtgacccaccgaaaag
tgtggcCCCAGGGGGTGcagttgcacattttctgcaacatgcacgtgtgcattataacaaaaaacataaataaaacataa
gatctctggtgggtctttctttttttgatactaacattaattctaaacactattttacacagtagcctatactaggaaat
gctcaaaggtaaatcagcagAATTTAACcatgactgtagctttttatgggtaaagaagcaacggtgaaggacccTACTAA
ACGCccgtctgtaaaatgctaatcaaatcaatcaaatgtatttattaagcacctttaataaaaaggtaattggttggggg
gagatgttatgttttatgtatgtttttgtcatgcctgtctacgcttcaaactcgtgcatattgcagtaaatatgcaacag

==> /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/genomic.gff <==
##gff-version 3
#!gff-spec-version 1.21
#!processor NCBI annotwriter
#!genome-build ASM3116895v1
#!genome-build-accession NCBI_Assembly:GCF_031168955.1
#!annotation-date 09/26/2023
#!annotation-source NCBI RefSeq GCF_031168955.1-RS_2023_09
##sequence-region NC_082382.1 1 26289739
##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=80720
NC_082382.1 RefSeq  region  1   26289739    .   +   .   ID=NC_082382.1:1..26289739;Dbxref=taxon:80720;Name=1;chromosome=1;gbkey=Src;genome=chromosome;mol_type=genomic DNA

==> /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/genomic.gtf <==
#gtf-version 2.2
#!genome-build ASM3116895v1
#!genome-build-accession NCBI_Assembly:GCF_031168955.1
#!annotation-date 09/26/2023
#!annotation-source NCBI RefSeq GCF_031168955.1-RS_2023_09
NC_082382.1 Gnomon  gene    7826    12944   .   +   .   gene_id "LOC132464423"; transcript_id ""; db_xref "GeneID:132464423"; description "uncharacterized LOC132464423"; gbkey "Gene"; gene "LOC132464423"; gene_biotype "lncRNA"; 
NC_082382.1 Gnomon  transcript  7826    12944   .   +   .   gene_id "LOC132464423"; transcript_id "XR_009527261.1"; db_xref "GeneID:132464423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; gbkey "ncRNA"; gene "LOC132464423"; product "uncharacterized LOC132464423"; transcript_biotype "lnc_RNA"; 
NC_082382.1 Gnomon  exon    7826    8896    .   +   .   gene_id "LOC132464423"; transcript_id "XR_009527261.1"; db_xref "GeneID:132464423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; gene "LOC132464423"; product "uncharacterized LOC132464423"; transcript_biotype "lnc_RNA"; exon_number "1"; 
NC_082382.1 Gnomon  exon    12398   12944   .   +   .   gene_id "LOC132464423"; transcript_id "XR_009527261.1"; db_xref "GeneID:132464423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; gene "LOC132464423"; product "uncharacterized LOC132464423"; transcript_biotype "lnc_RNA"; exon_number "2"; 
NC_082382.1 Gnomon  gene    44750   76860   .   +   .   gene_id "rereb"; transcript_id ""; db_xref "GeneID:132464430"; description "arginine-glutamic acid dipeptide (RE) repeats b"; gbkey "Gene"; gene "rereb"; gene_biotype "protein_coding"; 

==> /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/protein.faa <==
>XP_059891391.1 putative helicase MOV-10 [Gadus macrocephalus]
MVTTLFSASRLVTEGIPPGFYSHIFVDEAGQPAEPEGVIPLAGLLDPKRGQVVLAGDPKQLGPIVKSPLAKKHGLGVSML
ERLMELNVYKKTEETGYNERFITKLLRNYRSHGRLLTIPNELFYESELQVWADKDIRNSLCEWKHLPSKGFPLIFHEVTG
RMRREDNASLFNASLFNEDEVAILMQYLKALLEDVPPEDIGLIAPYRKQVERINKALKIEFPRNTAKLKVCTVDAFQGEE
KRVILLSTVRSTSRDPRPPSSVGFLADPKRFNVAMTRAQALLIVAGNSEALTKDRIWSRFIEYCKEHGGYTKTMTTD
>XP_059891392.1 uncharacterized protein LOC132445431 isoform X1 [Gadus macrocephalus]
MSGTQLNLLWVVAQQLMPGTWTEETGLDIEGVPQQLYGIDCGVFMVMYSWYITMDAHFDFNVLDMPHLRRWWCKLLLDNY
GIEGCGKRFCHFTQEGHQMVNGLLAPVFRVTRKRKVLTKADDVFLKDTIEAAAWCQLQTFTDHVSLPMVIGVEGAEQQAL
LAELKSVDRSCPEESLNRIEPFQFFFNSKKDYEMFCVEMFDRRKLKVFAYWE
>XP_059891393.1 uncharacterized protein LOC132445431 isoform X1 [Gadus macrocephalus]

==> /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/rna.fna <==
>XM_060035408.1 PREDICTED: Gadus macrocephalus putative helicase MOV-10 (LOC132473465), mRNA
AGTTCAATTGCAACTGGGTAGAGGGCGGCATCTATATCCCCGATAAAGAAGAGCTGGAGAAGCATAAAATCATGGTCACC
ACCCTTTTCTCCGCTTCAAGGCTGGTTACGGAAGGCATCCCTCCAGGCTTTTACAGCCATATCTTTGTTGACGAGGCAGG
ACAACCTGCAGAGCCTGAAGGGGTTATCCCCCTGGCAGGCCTACTGGACCCAAAGCGTGGCCAGGTAGTGTTGGCAGGAG
ACCCCAAACAGTTGGGCCCCATCGTCAAATCCCCCCTAGCCAAGAAGCATGGACTTGGTGTATCAATGCTGGAGCGTCTG
ATGGAGTTGAATGTGTACAAAAAGACAGAGGAAACGGGGTACAACGAGCGTTTCATCACCAAGCTGCTGAGGAACTACAG
GTCTCATGGCAGACTTCTAACGATCCCAAATGAGCTGTTCTACGAGAGCGAACTCCAGGTGTGGGCCGATAAGGATATCC
GCAACTCCTTATGTGAATGGAAGCACCTTCCCAGCAAGGGATTCCCGCTGATCTTCCATGAGGTCACTGGACGCATGCGT
CGCGAGGACAACGCCTCACTGTTCAACGCCTCACTGTTCAACGAAGACGAGGTGGCGATTCTAATGCAGTATCTGAAAGC
ACTGTTGGAAGATGTCCCCCCAGAAGACATAGGCCTCATTGCCCCATACAGGAAACAAGTGGAGAGGATCAACAAGGCTC

==> /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/sequence_report.jsonl <==
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"1","gcCount":"11966204","gcPercent":45.5,"genbankAccession":"CP133525.1","length":26289739,"refseqAccession":"NC_082382.1","role":"assembled-molecule","sequenceName":"1"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"2","gcCount":"10889746","gcPercent":46.0,"genbankAccession":"CP133526.1","length":23805147,"refseqAccession":"NC_082383.1","role":"assembled-molecule","sequenceName":"2"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"3","gcCount":"12260317","gcPercent":45.5,"genbankAccession":"CP133527.1","length":26984552,"refseqAccession":"NC_082384.1","role":"assembled-molecule","sequenceName":"3"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"4","gcCount":"15920082","gcPercent":45.5,"genbankAccession":"CP133528.1","length":35077496,"refseqAccession":"NC_082385.1","role":"assembled-molecule","sequenceName":"4"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"5","gcCount":"10179291","gcPercent":46.0,"genbankAccession":"CP133529.1","length":22175970,"refseqAccession":"NC_082386.1","role":"assembled-molecule","sequenceName":"5"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"6","gcCount":"12603262","gcPercent":45.0,"genbankAccession":"CP133530.1","length":27900680,"refseqAccession":"NC_082387.1","role":"assembled-molecule","sequenceName":"6"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"7","gcCount":"12814946","gcPercent":45.5,"genbankAccession":"CP133531.1","length":28186669,"refseqAccession":"NC_082388.1","role":"assembled-molecule","sequenceName":"7"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"8","gcCount":"10854035","gcPercent":45.5,"genbankAccession":"CP133532.1","length":23760065,"refseqAccession":"NC_082389.1","role":"assembled-molecule","sequenceName":"8"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"9","gcCount":"10660006","gcPercent":45.5,"genbankAccession":"CP133533.1","length":23393856,"refseqAccession":"NC_082390.1","role":"assembled-molecule","sequenceName":"9"}
{"assemblyAccession":"GCF_031168955.1","assemblyUnit":"Primary Assembly","assignedMoleculeLocationType":"Chromosome","chrName":"10","gcCount":"10758777","gcPercent":45.5,"genbankAccession":"CP133534.1","length":23728004,"refseqAccession":"NC_082391.1","role":"assembled-molecule","sequenceName":"10"}
cat /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/rna.fna | grep ">" | head -20
>XM_060035408.1 PREDICTED: Gadus macrocephalus putative helicase MOV-10 (LOC132473465), mRNA
>XM_060035409.1 PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X2, mRNA
>XM_060035410.1 PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X3, mRNA
>XM_060035411.1 PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X4, mRNA
>XM_060035412.1 PREDICTED: Gadus macrocephalus ATP-sensitive inward rectifier potassium channel 12-like (LOC132475957), transcript variant X2, mRNA
>XM_060035413.1 PREDICTED: Gadus macrocephalus F-box only protein 41-like (LOC132445432), transcript variant X1, mRNA
>XM_060035414.1 PREDICTED: Gadus macrocephalus F-box only protein 41-like (LOC132445432), transcript variant X2, mRNA
>XM_060035415.1 PREDICTED: Gadus macrocephalus F-box only protein 41-like (LOC132445432), transcript variant X3, mRNA
>XM_060035416.1 PREDICTED: Gadus macrocephalus Kv channel-interacting protein 4-like (LOC132445433), mRNA
>XM_060035417.1 PREDICTED: Gadus macrocephalus RNA binding motif protein 46 (rbm46), mRNA
>XM_060035418.1 PREDICTED: Gadus macrocephalus elongation factor 1-alpha-like (LOC132445437), mRNA
>XM_060035419.1 PREDICTED: Gadus macrocephalus zona pellucida sperm-binding protein 3-like (LOC132445439), transcript variant X1, mRNA
>XM_060035420.1 PREDICTED: Gadus macrocephalus zona pellucida sperm-binding protein 3-like (LOC132445439), transcript variant X2, mRNA
>XM_060035421.1 PREDICTED: Gadus macrocephalus zona pellucida sperm-binding protein 3-like (LOC132445439), transcript variant X3, mRNA
>XM_060035422.1 PREDICTED: Gadus macrocephalus zona pellucida sperm-binding protein 3-like (LOC132445439), transcript variant X4, mRNA
>XM_060035423.1 PREDICTED: Gadus macrocephalus uncharacterized LOC132445438 (LOC132445438), mRNA
>XM_060035424.1 PREDICTED: Gadus macrocephalus UFM1-specific peptidase 1 (non-functional) (ufsp1), mRNA
>XM_060035425.1 PREDICTED: Gadus macrocephalus HAUS augmin-like complex, subunit 4 (haus4), mRNA
>XM_060035426.1 PREDICTED: Gadus macrocephalus endonuclease domain-containing 1 protein-like (LOC132445442), mRNA
>XM_060035427.1 PREDICTED: Gadus macrocephalus KH and NYN domain containing (khnyn), transcript variant X1, mRNA
cat /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/protein.faa | grep ">" | head -20
>XP_059891391.1 putative helicase MOV-10 [Gadus macrocephalus]
>XP_059891392.1 uncharacterized protein LOC132445431 isoform X1 [Gadus macrocephalus]
>XP_059891393.1 uncharacterized protein LOC132445431 isoform X1 [Gadus macrocephalus]
>XP_059891394.1 uncharacterized protein LOC132445431 isoform X2 [Gadus macrocephalus]
>XP_059891395.1 ATP-sensitive inward rectifier potassium channel 12-like [Gadus macrocephalus]
>XP_059891396.1 F-box only protein 41-like isoform X1 [Gadus macrocephalus]
>XP_059891397.1 F-box only protein 41-like isoform X1 [Gadus macrocephalus]
>XP_059891398.1 F-box only protein 41-like isoform X2 [Gadus macrocephalus]
>XP_059891399.1 Kv channel-interacting protein 4-like [Gadus macrocephalus]
>XP_059891400.1 probable RNA-binding protein 46 [Gadus macrocephalus]
>XP_059891401.1 elongation factor 1-alpha-like [Gadus macrocephalus]
>XP_059891402.1 zona pellucida sperm-binding protein 3-like isoform X1 [Gadus macrocephalus]
>XP_059891403.1 zona pellucida sperm-binding protein 3-like isoform X2 [Gadus macrocephalus]
>XP_059891404.1 zona pellucida sperm-binding protein 3-like isoform X3 [Gadus macrocephalus]
>XP_059891405.1 zona pellucida sperm-binding protein 3-like isoform X4 [Gadus macrocephalus]
>XP_059891406.1 uncharacterized protein LOC132445438 [Gadus macrocephalus]
>XP_059891407.1 inactive Ufm1-specific protease 1 [Gadus macrocephalus]
>XP_059891408.1 HAUS augmin-like complex subunit 4 [Gadus macrocephalus]
>XP_059891409.1 endonuclease domain-containing 1 protein-like [Gadus macrocephalus]
>XP_059891410.1 protein KHNYN isoform X1 [Gadus macrocephalus]
cat /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/cds_from_genomic.fna | grep ">" | head -20
>lcl|NC_082382.1_cds_XP_059916776.1_1 [gene=rereb] [db_xref=GeneID:132464430] [protein=arginine-glutamic acid dipeptide repeats protein isoform X1] [protein_id=XP_059916776.1] [location=join(44883..44908,45901..45949,46305..46429,50198..50297,50452..50550,50685..50765,50865..51018,51179..51271,57813..58012,59841..60005,64360..64470,67157..68541,70221..70425,70825..71635,72380..72517,72610..72790,75074..75107)] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916797.1_2 [gene=rereb] [db_xref=GeneID:132464430] [protein=arginine-glutamic acid dipeptide repeats protein isoform X3] [protein_id=XP_059916797.1] [location=join(44883..44908,45901..45949,46305..46429,50198..50297,50452..50550,50685..50765,50865..51018,51179..51271,57813..58012,59841..60005,64363..64470,67157..68541,70221..70425,70825..71635,72380..72517,72610..72790,75074..75107)] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916784.1_3 [gene=rereb] [db_xref=GeneID:132464430] [protein=arginine-glutamic acid dipeptide repeats protein isoform X2] [protein_id=XP_059916784.1] [location=join(44883..44908,45901..45949,46305..46429,50198..50297,50452..50550,50685..50765,50865..51018,51179..51271,57813..58012,59841..60005,64360..64470,67157..68541,70221..70425,70825..71635,72380..72517,72610..72824)] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916809.1_4 [gene=LOC132464448] [db_xref=GeneID:132464448] [protein=uncharacterized protein LOC132464448 isoform X1] [protein_id=XP_059916809.1] [location=complement(join(73693..74042,77417..77494,77632..77748,77849..81089))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916817.1_5 [gene=LOC132464448] [db_xref=GeneID:132464448] [protein=uncharacterized protein LOC132464448 isoform X1] [protein_id=XP_059916817.1] [location=complement(join(73693..74042,77417..77494,77632..77748,77849..81089))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916826.1_6 [gene=LOC132464448] [db_xref=GeneID:132464448] [protein=uncharacterized protein LOC132464448 isoform X2] [protein_id=XP_059916826.1] [location=complement(join(77409..77494,77632..77748,77849..81089))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916845.1_7 [gene=c1h1orf159] [db_xref=GeneID:132464464] [protein=uncharacterized protein C1orf159 homolog isoform X2] [protein_id=XP_059916845.1] [location=complement(join(84432..84529,86056..86080,86194..86222,86358..86492,86861..86917,87015..87089,87396..87480,87569..87640))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916837.1_8 [gene=c1h1orf159] [db_xref=GeneID:132464464] [protein=uncharacterized protein C1orf159 homolog isoform X1] [protein_id=XP_059916837.1] [location=complement(join(84432..84529,86056..86080,86194..86222,86358..86492,86861..86917,87015..87110,87396..87480,87569..87640))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916912.1_9 [gene=LOC132464512] [db_xref=GeneID:132464512] [protein=interactor of HORMAD1 protein 1 isoform X1] [protein_id=XP_059916912.1] [location=join(89542..89597,89694..89853,90272..90414,90506..90554,90635..90719,90936..91051,91144..91884)] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916921.1_10 [gene=LOC132464512] [db_xref=GeneID:132464512] [protein=interactor of HORMAD1 protein 1 isoform X2] [protein_id=XP_059916921.1] [location=join(89542..89597,89694..89853,90272..90414,90506..90554,90635..90719,90936..91051,91455..91589)] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916891.1_11 [gene=kbtbd12] [db_xref=GeneID:132464496] [protein=kelch repeat and BTB domain-containing protein 12] [protein_id=XP_059916891.1] [location=complement(join(93437..93609,93716..93913,94092..94242,94443..94713,94861..95927))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916878.1_12 [gene=slc26a6l] [db_xref=GeneID:132464487] [protein=solute carrier family 26 member 6, like] [protein_id=XP_059916878.1] [location=join(97328..97467,97814..97953,98103..98213,100474..100652,100734..100898,101117..101269,101590..101672,101775..101922,102014..102127,102477..102554,103021..103116,103219..103325,103612..103681,109717..109809,109881..109991,110106..110519,110797..110851,111554..111699,111813..111839)] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916903.1_13 [gene=p4htmb] [db_xref=GeneID:132464503] [protein=transmembrane prolyl 4-hydroxylase] [protein_id=XP_059916903.1] [location=complement(join(112626..112840,113209..113332,113541..113631,114059..114244,115325..115487,116890..116986,117200..117390,117893..117974,118090..118410))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916864.1_14 [gene=LOC132464480] [db_xref=GeneID:132464480] [protein=myosin heavy chain, fast skeletal muscle] [protein_id=XP_059916864.1] [location=complement(join(122587..122607,123965..124096,124184..124279,126315..126419,126521..126691,126795..126920,127002..127205,127296..127604,127685..127809,127921..128086,128350..128533,128618..128814,128912..129030,129513..129639,130320..130709,130818..130908,130999..131144,131239..131415,131501..131743,131835..132090,132191..132327,132658..132781,132877..132994,133088..133175,133447..133517,133638..133944,134045..134215,134305..134454,134559..134677,134788..134926,135009..135112,135215..135313,135399..135462,135588..135680,135910..136021,136108..136135,136222..136378,136466..136609,136692..136892))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916933.1_15 [gene=LOC132464525] [db_xref=GeneID:132464525] [protein=myosin heavy chain, fast skeletal muscle-like] [protein_id=XP_059916933.1] [location=join(153117..153317,153556..153699,153787..153943,154043..154070,154153..154267,154358..154450,154539..154602,154685..154783,154873..154976,155217..155355,155593..155711,155820..155969,156067..156237,156323..156626,156798..156871,157253..157340,157460..157577,157666..157789,157872..158008,158111..158366,158455..158697,158791..158967,159089..159234,159323..159413,159492..159881,159990..160116,160207..160325,160414..160610,160695..160878,160963..161128,161226..161350,162086..162394,162475..162678,162787..162912,163111..163281,163366..163470,163922..164017,164101..164232,164422..164439)] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916964.1_16 [gene=LOC132464531] [db_xref=GeneID:132464531] [protein=voltage-dependent calcium channel subunit alpha-2/delta-2-like isoform X3] [protein_id=XP_059916964.1] [location=complement(join(180132..180233,181028..181110,183052..183161,183261..183384,183509..183564,183720..183772,183890..184042,184199..184270,185008..185055,185149..185240,185511..185606,188185..188288,188730..188792,189575..189665,189750..189847,189930..189990,190342..190418,190502..190563,192548..192619,192718..192789,193007..193081,193164..193238,193854..193919,194949..195038,195185..195234,195861..195939,196052..196162,196346..196507,196633..196732,198661..198711,199050..199107,199192..199323,199445..199580,199751..199795,216033..216114,226287..226408))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916955.1_17 [gene=LOC132464531] [db_xref=GeneID:132464531] [protein=voltage-dependent calcium channel subunit alpha-2/delta-2-like isoform X2] [protein_id=XP_059916955.1] [location=complement(join(180132..180233,181028..181110,183052..183161,183261..183384,183509..183564,183720..183772,183890..184042,184199..184270,185008..185055,185149..185240,185511..185606,188185..188288,188730..188792,189575..189665,189750..189847,189930..189990,190342..190418,190502..190563,192548..192619,192718..192789,193007..193081,193164..193238,193854..193919,194949..195038,195185..195234,195861..195939,196052..196162,196346..196507,196633..196732,198661..198711,199050..199107,199192..199323,199445..199580,210281..210340,210659..210775,216033..216114,226287..226408))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916945.1_18 [gene=LOC132464531] [db_xref=GeneID:132464531] [protein=voltage-dependent calcium channel subunit alpha-2/delta-2-like isoform X1] [protein_id=XP_059916945.1] [location=complement(join(180132..180233,181028..181110,183052..183161,183261..183384,183509..183564,183720..183772,183890..184042,184199..184270,185008..185055,185149..185240,185511..185606,188185..188288,188730..188792,189575..189665,189750..189847,189930..189990,190342..190418,190502..190563,192548..192619,192718..192789,193007..193081,193164..193238,193854..193919,194949..195038,195185..195234,195861..195939,196052..196162,196346..196507,196633..196732,198661..198711,199050..199107,199192..199323,199445..199580,199751..199795,210281..210340,210659..210775,216033..216114,226287..226408))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059916988.1_19 [gene=LOC132464555] [db_xref=GeneID:132464555] [protein=aminoacylase-1A-like] [protein_id=XP_059916988.1] [location=complement(join(228757..228933,231338..231398,231518..231597,231677..231745,233158..233302,239422..239471,239589..239662,239750..239806,240427..240516,240637..240713,242007..242101,242190..242294,242402..242466,243332..243464))] [gbkey=CDS]
>lcl|NC_082382.1_cds_XP_059917028.1_20 [gene=abhd14a] [db_xref=GeneID:132464582] [protein=protein ABHD14A] [protein_id=XP_059917028.1] [location=complement(join(245995..246177,247432..247667,248014..248129,248241..248500))] [gbkey=CDS]

Blast RNA fasta


cd ../data
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2024_02.fasta.gz
gunzip -k uniprot_sprot_r2024_02.fasta.gz

cd ../data
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2024_02.fasta.gz
gunzip -k uniprot_sprot_r2024_02.fasta.gz
mkdir ../blastdb
/home/shared/ncbi-blast-2.11.0+/bin/makeblastdb \
-in ../data/uniprot_sprot_r2024_02.fasta \
-dbtype prot \
-out ../blastdb/uniprot_sprot_r2024_02
/home/shared/ncbi-blast-2.15.0+/bin/blastx \
-query ../data/ncbi_dataset/data/GCF_031168955.1/rna.fna \
-db ../blastdb/uniprot_sprot_r2024_02 \
-out ../output/11-annotation/PcodRNA_uniprot_blastx.tab \
-evalue 1E-20 \
-num_threads 30 \
-max_target_seqs 1 \
-outfmt 6
head ../output/11-annotation/PcodRNA_uniprot_blastx.tab
XM_060035408.1  sp|Q1LXK4|M10B1_DANRE   50.575  348 153 5   6   1004    623 966 3.85e-97    321
XM_060035412.1  sp|F1NHE9|KCJ12_CHICK   63.014  365 110 2   1513    2607    45  384 1.17e-135   426
XM_060035413.1  sp|Q8TF61|FBX41_HUMAN   53.644  343 153 2   1643    2665    534 872 2.62e-98    331
XM_060035414.1  sp|Q8TF61|FBX41_HUMAN   53.644  343 153 2   1630    2652    534 872 2.42e-98    331
XM_060035415.1  sp|Q8TF61|FBX41_HUMAN   53.644  343 153 2   1573    2595    534 872 1.65e-98    331
XM_060035416.1  sp|Q8HYN7|KCIP4_MACFA   87.037  108 14  0   16  339 143 250 2.04e-59    205
XM_060035417.1  sp|Q08BH5|RBM46_DANRE   76.014  296 66  2   25  909 1   292 3.41e-139   420
XM_060035418.1  sp|P08736|EF1A1_DROME   93.519  108 7   0   4   327 1   108 5.61e-67    213
XM_060035419.1  sp|P97708|ZP3_RAT   33.618  351 202 9   181 1182    37  373 2.03e-45    167
XM_060035420.1  sp|P97708|ZP3_RAT   33.333  351 202 10  181 1179    37  373 4.82e-43    161
blast <- read.csv("../output/11-annotation/PcodRNA_uniprot_blastx.tab", sep = '\t', header = FALSE)
head(blast)
              V1                    V2     V3  V4  V5 V6   V7   V8  V9 V10
1 XM_060035408.1 sp|Q1LXK4|M10B1_DANRE 50.575 348 153  5    6 1004 623 966
2 XM_060035412.1 sp|F1NHE9|KCJ12_CHICK 63.014 365 110  2 1513 2607  45 384
3 XM_060035413.1 sp|Q8TF61|FBX41_HUMAN 53.644 343 153  2 1643 2665 534 872
4 XM_060035414.1 sp|Q8TF61|FBX41_HUMAN 53.644 343 153  2 1630 2652 534 872
5 XM_060035415.1 sp|Q8TF61|FBX41_HUMAN 53.644 343 153  2 1573 2595 534 872
6 XM_060035416.1 sp|Q8HYN7|KCIP4_MACFA 87.037 108  14  0   16  339 143 250
        V11 V12
1  3.85e-97 321
2 1.17e-135 426
3  2.62e-98 331
4  2.42e-98 331
5  1.65e-98 331
6  2.04e-59 205
cat /home/shared/8TB_HDD_03/sr320/github/project-cod-temperature/data/ncbi_dataset/data/GCF_031168955.1/rna.fna | grep ">" | head -2
>XM_060035408.1 PREDICTED: Gadus macrocephalus putative helicase MOV-10 (LOC132473465), mRNA
>XM_060035409.1 PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X2, mRNA
perl -e '$count=0; $len=0; while(<>) {s/\r?\n//; s/\t/ /g; if (s/^>//) { if ($. != 1) {print "\n"} s/ |$/\t/; $count++; $_ .= "\t";} else {s/ //g; $len += length($_)} print $_;} print "\n"; warn "\nConverted $count FASTA records in $. lines to tabular format\nTotal sequence length: $len\n\n";' \
../data/ncbi_dataset/data/GCF_031168955.1/rna.fna  > ../output/11-annotation/GCF_031168955.1_rna.tab 
head ../output/11-annotation/GCF_031168955.1_rna.tab
XM_060035408.1  PREDICTED: Gadus macrocephalus putative helicase MOV-10 (LOC132473465), mRNA    AGTTCAATTGCAACTGGGTAGAGGGCGGCATCTATATCCCCGATAAAGAAGAGCTGGAGAAGCATAAAATCATGGTCACCACCCTTTTCTCCGCTTCAAGGCTGGTTACGGAAGGCATCCCTCCAGGCTTTTACAGCCATATCTTTGTTGACGAGGCAGGACAACCTGCAGAGCCTGAAGGGGTTATCCCCCTGGCAGGCCTACTGGACCCAAAGCGTGGCCAGGTAGTGTTGGCAGGAGACCCCAAACAGTTGGGCCCCATCGTCAAATCCCCCCTAGCCAAGAAGCATGGACTTGGTGTATCAATGCTGGAGCGTCTGATGGAGTTGAATGTGTACAAAAAGACAGAGGAAACGGGGTACAACGAGCGTTTCATCACCAAGCTGCTGAGGAACTACAGGTCTCATGGCAGACTTCTAACGATCCCAAATGAGCTGTTCTACGAGAGCGAACTCCAGGTGTGGGCCGATAAGGATATCCGCAACTCCTTATGTGAATGGAAGCACCTTCCCAGCAAGGGATTCCCGCTGATCTTCCATGAGGTCACTGGACGCATGCGTCGCGAGGACAACGCCTCACTGTTCAACGCCTCACTGTTCAACGAAGACGAGGTGGCGATTCTAATGCAGTATCTGAAAGCACTGTTGGAAGATGTCCCCCCAGAAGACATAGGCCTCATTGCCCCATACAGGAAACAAGTGGAGAGGATCAACAAGGCTCTTAAGATCGAGTTCCCCAGAAACACAGCAAAGTTAAAGGTTTGCACAGTGGACGCGTTTCAGGGCGAGGAGAAGCGGGTGATTCTGCTGTCCACAGTGAGAAGCACCAGCCGGGACCCCAGACCCCCCTCCTCCGTGGGGTTCCTTGCTGACCCCAAGAGGTTCAACGTGGCCATGACCCGCGCCCAAGCCCTGTTGATCGTGGCGGGGAACTCAGAGGCGCTGACAAAGGACAGGATTTGGAGCAGGTTCATCGAGTACTGCAAAGAACACGGAGGTTACACCAAGACCATGACCACCGATTGACGTGCATGCACGGACACATACGCACACTGACAACCACACTCACGTACACACATACAACCTCCGCACACACACACACACACACACACACAAACACACACATACACCTTCTCCACACAAAATGATATTAAGGACTAAAAACCTCACATTTTATTTGACATTGTTGCATCAGCCTATTAGAGGAGAATTTTTTTGACCATTAGGCCTTCCCATCGTTGTGATGGAAAATCTATCATAATCATAAAGTTTTTCGTTTTGAACTTAGGTTTTGTTTACTATTCCGTGTTTGTTCTTGCCGTCCTGCTCTCCTTCGGGGTCATCTAAAATGTAAACCTTTGTGGTTGATGATGTAATCTGTTCTCACTCTGTTCCTGTGTTATCTTCTGTTGATTTGGGCCTGTTCCACGACCCTGGGGAGCTTCAGGAAACAAAGGGTTACATGGCACGGACGCGACCCCTTATTTATGACATCCTGAGGAACTTCAATCAATAGATTACCATCTGGTTAACAAAGGCTTTTGGTTTATTGGGGGACAGCTGCCCTCAAGAACCCAATCTAAGTGTATAAGAACTCCTGACTTAGCCACTTTAGAGTATGAAGTAAGACACAGGGCAAACTCCCATCTGAGGCCTCAAATTACTGTAGTGTATGCCTGTTTTATTGTTTTTTGATGCATGTTGTGATGTATCTTTGTTCGTACTTTTATTACAAATATATAGGACCACCAATTGCCTAAAA
XM_060035409.1  PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X2, mRNA TTCCTGGTCCTCTCAATATTGCTGATATCATAACCAGAGGGGCCAGTCCTCAAGACCTGGATGAAAGTTCAGAATGGCAGAACGGACCAACATTCTTGAAGCTACCAGTGGATGAGTGGCCAGTTAAATCTGCCAAAGAGTTGGTGATGGCTGCCAGAGACGGTGTCAACAGATTACAGAAGAAAGCGTTTGTTGCTGCACTGACCCGAGCACAAGCCAAGGCACAACTGCTGCCGGATCCAAAGGATACTGACACGCAGAAGCCTTCTGAACTAAAGCAAGACCAGGAACCAACTCAGACCCAAACACAACCAAAGAGGCCACCAGCAGGATCAACTGTCCAAGAACTGGTGGATGTCAAGCGGTTCAGTAACCTAAGCCGACTTGTCAAAACAGTTGCCTGGATCCGGAGAGCAGCAAGGATGTTCATGAAAGGGAACAAGCGAACTGCAAACAATCCAAAGTGGGAGGCAGTGTCGTTTTCAAAAGTCATCTCAGTGACAGAGAGGGAAGACGCCCTAAAAGACATCTTCCTTGCGGCACAGCAAAGTGCATCCTTCCCAAGCACAACCACAGACAGGCTGGTGGTGTACAGAGACCAAGAGACTGGATTGTTGGTCTGTGGGGGTCGTGTGCAGATCTTCAATGAAGATAAAGTTGCTGTCCCCATCTTGCCTTACGAAGCCTGGGTGTCGACACTGTTAGCACGAGAAGCCCACGAGGAGAACCACGATGGAGTGGCCGGGACCTTGCTCAAGATGAGAAGAAGAGCATGGGTCGTGAAAGGTCGGAGAATTGCTCAAAAAGTGGTCGAAAACTGCATGTTCTGCAGGAAAACTAAAGCAAAAAGATGCCAGCAAATAATGGGTGATCTACCTCCAGAGAGGACAGAACCAGCTGCCCCATTCCACTACACAACAGTCGACCTCTTCGGACCCTACCAAGTCAGGGATGATGTAAAGAAAAGAGTGTCACTGAAGGTTTGGGGAATTGTGTTTTGTTGCATGGCCTCCAGAGCTATTCACACCGAGCTGGTGAACTCTCAGTCCACAGAGAGTTTTCTGTTTGCCTACCAGAGGTTCACAGCACTAAGAGGTCATCCAAAGAAAATCTGGTCAGATCCTGGGACCAACTTCATTGGGGCTAAGCCAGTCCTAGAAGAACAGTACCGATTCTTTGCCAATCTTGACAAAGCTACCCTGGAGGAGAGAGCTGCCAAGGATGGCACAGAATGGTCGTGGAAGATTCAACCAGCCGATTCTCCACACCGGAATGGTGCCGCAGAAGCTGCTGTGCGCATTGTCAAGAGAGCACTGCAGAGTCTTGGAGGAGAGTCCGGCCTAAGTTGGAGTGAATTCCAAACAACTCTCTACACGGCTGCCAATCTTGCAAATGAAAGACCAATTGACGCCAGGACACAGAGCCGAGAAGACAGCGTCCAGTTCATCACTCCTAACTGTCTCCTGCTCGGACGAGCATCACAGGGTGGAGATGTCAGAACATTCGACTTCAGTGACTATCCCTACAAAAGGCTTAAAGAAATGCAAGCGCAAGTTAACAAGTTCTGGAGGAACTGGAGTCAACTTGCTGGTCCTAACTTATTCGTAAGGAACAAATGGCATACCGCCAAGAGAAATGTCGCAGTCGGGGACATCGTCTGGATGGCGGACCAAAACGCCCTTAGGGGTCAGTTCAGGATTGCAAGAGTGGTTAGCGTCAACTCGGACAGCAAAGGAGTTGTGAGGGACGTCAATGTCAGAACCTTCCCAAGCTACCCCGTTCCTGTCACAAGGCCTACCGGAGCAAAAGTAAGTCACCGAACATCTAAGAAATTCAAAGAAAAGATCCCAGCAGCAGTTCTTCACAGAGACGTAAGGCGACTAGTGATCTTGCTTCCCACCGAAGAACAGAACTAAACATCTGACCCAAGTTGCTCCTGTAACATGCGACCTCCCAGGTGTCTCCACTGGAAGGTCGAGTGGGAGGTGTGAAGTCAACTTGGAAAATCCCTGCTGGATATGAAGGGGTTAATATGCAACGCCGACTGACAGCTGGGGGAATCCCTCCTCAGAGCGTTACCTGGGAAATCCACGTCATTGCGTCACCTTACCGGAAAGGACAGAAAGGGGAAGAGAGACTCTTTCAGGAAACGCCAGTTGCAAACAAATCAGACAGCGTTCGGTCCAGCAGCAGCAAAGCTGCGCTAGAGAACTTGGAAAAAGCAGCGCTGGTACAGCCATAAAAAGCCTGGAGAAACGGCCGGTTTTTTACCTTGCAGTATACCTAGCAGTTTACCACTTGCAACCCACTTACAACTGTACCCACAATACCTACCTGCTACAACTTACCTGTACCTGTGAAAATACAAAAAGAAAAGTAAAGAGGAGTTAAAGAAGGAAAACCGTGTGGTCATTGCATGAGTGGAACCCAGTTAAACCTCCTATGGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
XM_060035410.1  PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X3, mRNA CAGAATTCATGAAAATATGCTTAGACATTAAAAACTATTAATAATATTTATATTTATATTTATATATTTATAAGTATTTAAATGCATATATGAAAAACATGTTTGTATTTACTATTTCATTCTGCACTGCATTTACTGTAGTTATTTTTGGTTTCTTTAAAGGTTTTTTACCTTGCAGTATACCTAGCAGTTTACCACTTGCAACCCACTTACAACTGTACCCACAATACCTACCTGCTACAACTTACCTGTACCTGTGAAAATACAAAAAGAAAAGTAAAGAGGAGTTAAAGAAGGAAAACCGTGTGGTCATTGCATGAGTGGAACCCAGTTAAACCTCCTATGGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
XM_060035411.1  PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X4, mRNA CCCTGCTGTAAGTATTTCTTTCTTAAAGGTACAACATGTCTCTGGCCCGCCGAGTGTCATTGTAGGCCTACTTGTAAATGAAAATTAAAGCTTTCTATGGTGATTAACTTGTCTATTCTAGGTGGGAAACGCTTGTTTGCACTTGGTCAAGGAGATGGCCGAACATCAAGGAAAATATGTATTTCTTCTTGACCTCCACATCCCCCCAACATGGCTCCAATCGCCAAATTCAGATCATCTGCGCAGCTTTCCAGTTGACATCACCACCATGGATGCAGTGATTGTCCCACTGTGGACACCAGGGCATTTTCTGCTTAGTGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
XM_060035412.1  PREDICTED: Gadus macrocephalus ATP-sensitive inward rectifier potassium channel 12-like (LOC132475957), transcript variant X2, mRNA AAGCGTTGTGTGTCGCTGAAGCAACATCCAGTGCCAGGTCTGACTGAAGCTAGCTGGTTAGCATAGCTGACCTATGTATGCTTTGTATGCATTATGAGCGTTAGCCTAAAGGGTAACAGCAGTAATCTGGTCTGATTACAGGCTATTTATAAATCTCTCCCCTCGACCCCAAACTAACCTCGCTGACCACTAGCAGCAGGATCTGAACCATTTTTCCCTTTTCACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAGTTTCTATATTGATCGAAAATGGAGGCTCTGGAAACACCTTTAACTCTGTTTCTCTGTTTCAAACTGTGAAATAGGGAGGTCTTTTAGGAGAGGAGAAGGGGGAGGTGTGAGTGGAAGAGGAGGAGAGGGGTGGAAAGAGAAAGAGGGAAAGGTGAGTTCTGGAGGAGTAGAGGATTAGAAGAAGGCCTTCAGGTCCCCATGCTAGTGCGGTGAGGTGAGGGAGCAGGGGGATGGCAGGAGGAGTAAGAGGCTGCATCATGCAGTCCACCCCTTGCAGCACTAGAACCTCCTCCTCTGCCCTTCTGATCCAGTGGAACAACGAGGTGATGATCTACCAGCCAACCGTGACCCTGCCTCCACGAGCCTACGGAATGAGACATGTTTTGATGCAGGTTATGAGAGTTCGGACTTCGGATGAACACATATAAACCGTCTCTCAACTCCTCTGGCCCCCCTCTTCTTACCTCTCCTCTCCTCCCTTCCCATCACCCACCATCACCGCCTTCTCTTCACAGGAGATGGACACAGCAGATCTGACTGGACAGAGCACATCTCCTCCTCCCCTCCTCCTATGGGATTGCGCTGGTGTGGGGGGTGGAGGACAAAGTTCCTCTCCTCTCCGCCTCCACCCCTCCTACTATAGAGTTGTGCTGGTGTGTAGGACAGAGTTAACAGACAGGATTACAGCCAGAGGACGGCAGAGGCAGTGAAGGAGAAAATAAAGGATTTTCTCAGAGGAGGGAAAGCCAAGGAAAGATTGACCAAACAGAACTCTGGGAGAATGAACATGGAACCTTTGAGCTGAACTGAAGCAGAGTGATTTCGACCTTAGCAAGTCAGCGTTTGGACAGGTATAACGCTGGTCTGTGGTGCTAGGGGGCTGACCAGGGCCACCGGGGCCCCTGTGTCCAGAGCGTGGTGCCTGGGAGCCACTGCCAGGGGTGATGGGAACAAGACGATCCAGCAGGTTCAGCCTGGCGTCCGTCGTCCTCCAAGAAGACGAGCACCGTAAGGTCTCCAGCCAGGGTCTCCTGAACGGCCACGACTCCCCGCCGCGCTCCTCCACCACCTCCTCCTTGGCCACCGGCGGGAGGGAGGAGGAGCAGCAGCGGGGAGGGATGAGCGGCTCCAGCGTCCGGGGGGGCCCGACGCGGGGCCCCTCCGGACCGCCGCGGAGCCGCTTCGTGAAGAAGAGCGGCCACTGCAACGTGGCGTTCAGCAACCTGGAGGACCGGAGCCAGCGCTACCTGGCCGACCTCTTCACCACCTGCGTGGACGTCCGCTGGCGCCACCTGCTGCTGCTCTTCTGCGTCAGCTTCCTGCTCTCCTGGCTCTTCTTCGGGCTCGTCTTCTACCTGGTGTCCCTGGCCCACGGGGACTTCCAGGACCCGTCTGACGTGTCGCCGGCGGGAGCGACCCAGGGCCCGGGGGGCCGGAGGGAGCGCACGCCGTGCCTCCTCCACGTGCACGGCCTCCTGGGCGCGCTCCTGTTCTCCATGGAGACCCAGACCACCATCGGCTACGGCTGGCGCTGCGTGACGGAGGAGTGCCCGGTGGCCGTGGCGACGGTGGTGGTGCAGTCGGTCGTGGGCTGCATCCTCGACTCCTTCATGATCGGCACCATCATGGCCAAGATGGCGCGGCCCAAGAAGCGGAACCAGACGCTGATGTTCTCGCGGAACGCCGTGATCGCGCTGCGCGACGGCCGGCTCTGCCTCATGTGGCGGGTGGGGAACCTGCGGCACTCGCACATCGTGGAGGCCCACGTGCGCGCCCAGCTCCTCCGGCCCTACGTCACGGAGGAGGGCGAGTTCGTCCCCCTGGAGCAGATGGACCTCAACGTGGGCTACGACGACGGCACCGACCGCATCTTCCTCGTGTCGCCGCTCGTCATCGTCCACGAGATCGACAAGGACAGCCCGCTGTACTCGCTGAGCCGGGCCGACCTGGAGGCGGAGCACTTTGAGATCGTGGTCATCCTCGAGGGCATGGTGGAGGCTACGGCCATGTCCACCCAGTTCCGCAGCTCCTACCTGGCCCGCGAGGTCTTCTGGGGCCACCGCTTCGAGCCCGTGATCTGCGAGGACCGCGACCGCTACAGGGTGGACTACGCGCGCTTCCACCAGACCTACGAGGTGCCGTCCACGCCGCACCTCAGCGCCAAGGAGCTGGACGAGGCCGCCAGCCGGCCCCCCTCCGCCAGGACGACGCGACCGCCACCGTCGGCCAAGGAGGCGCCGAGCTCGTTCTGCTACGACAACGAGGTGGCGCTGATCTGCGGCGAGGACGACGACGAAGACGATGACGACGACGAGAACGACATCTTTGACTTGCGCCGGACACTGTCGTCGCCGGGGGGGAGGCGGGAGGAGAGGAGGACCTCGGTGACCGTGGACCTCCCGAAGGCGGGACAGGACTCGGCTGCCTTGACAACGGGCCGCCAGAGCTTGATGTGCGTGCTGGACATGGACAACCACCAGAGGGAGTTCGATATGCTGCAGACGGCAATTCCGCTCGATCCGCAGTCCTACAAGAGCGAGCAGGAGATGTAGAGGTCTGAGGGACACTGGACGGGGAAGACTGCTCGGGGGTTGTTCTCAGCCTTGCTTCTGTTGGTTGATGTTTGGGGTTGGAGAGTGGGACATATCTATTTTACTGTATGGTGGAGAAACTAGAAAACGGATTGATTATTAAACCACGATGAGCCGTATTGAGGCAGTGGTCATTCAAGAAATGAAATTTTGCTGAGAAGTGAACTTGATTCCGACTGAATAGTCATTCTCCATCACATAAAAACCTCATGACAACAACATTAAGACAAGATACACATGACGGTTGTCTGACTAATGCTCAAATGCTCCTCTATTTCCAAGTCCCTGAGAGGCCAAAATGTGAGGACTGAAAACCGTTGTTCTTGAACTTGAATGTGTGATGATTCTGTGTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTCTCTTCTTCAAATTGCTATTATATTAATATTTTAGTTTTCTTGAGGGTTACAAATATTCAGGTAGTACAAATTGCTAAACATCCACATTAAGCACAACAATAACTTGATGTTGATCATATCATGAGAGCACAGTCACTTTGCTATCATGACAGATGCTACTGTTATGGTTTAGTATTGATTTTTGATATAGCATACTACTGTAAGTGCTCACTAATGTCCCTGCTTATGCACAAAGCCCGGGTTGAGAAGCCTATTTCTGACCAAATGCTTAAAATAAAAAATTACAAAGCACTTTAATTGTTATTGTTAATAAATATTTACACAAATTAACACCCTATAGAAAAAAAATGTTAGGTGACGGCGATGGAGTTTTCAGACCTCTAGTGCCCTCTATTGGTGGTGAGGATTAACTGCTTATTTGTGTTGGTTGGAGAAAATCAGGAAATACACTTCACACAGCAGCTGTGTACAGCAGCTGGACAGACGGCTTTTAACCAGAAGACAGGGGTTGTTTTTGTTATTTGATTTGCTTCAGTGCCGCGAGCACGAGCCTATGGCCCATTTCCCCAGGACAGCTCACTTTAGTTACCCATTAATAGAATAGACAATATGTGATGTTAATAGAATGGATTGATCAGAAGTATCGCGATGTTAACTTTGGGGTCAGAACTACAGACAGCAAAAATAGGTCAGGGCCGACTATCATCAAATTGTTTTGGCTGAAGTCACACACAAAGTATCAATCAG
XM_060035413.1  PREDICTED: Gadus macrocephalus F-box only protein 41-like (LOC132445432), transcript variant X1, mRNA   AGGTCACGTGCGCCACGTCATTAGCACGCTGCACGGCCACAGCGTGCGGAGAGTTTTACCGGAGGGAGGGATGATGTAGCACGCGGGCCAGTCGGTTGACAGTTCTCTGCTTGGCTCGGCTCTAAGAGACACGGCCGTACCGGAGTGGACCTGAATACCGTGCATCCAGGCCAGACAGGGTGCGGATATATGGGCGAATCATCTAATGAATTAGTGAATTAAGCCAGCAGGCCGACGGTCTAAATTCTGGTCGGGTGGTGCTCGGCGGTTAAGGCAGAAGTTTTTACACCGGATAATATCGACTCCATTCCGCGGTGGAATCACAGGGTTGTCGTGAAGGTGCTGGTGCTGGAGCATGCTGGTAGCAGCAGCTGACCAGACCTTCTCATGTCCAGGCTGTGGATCAGAAGGGACCTTCAACTCCGTCCTGGACCTCCAAAACCACCTGGTCAACACACACACCTATCAGACCCTGCTCGGTCTATCCAAGGTGCGTGCCAGGAGCTCTACACCAGGGTTCCTCCTGCGCCTCCCTGGTCCGACTGTCTCCCAGGGACAGAGCTCCTCCCTGGGCATGGAGTCCACCAGAGACCCCCTGCCTCTGGCCGGTCTGGACCTGGCCTCCTCCACCGCCTCCACCCAGCTTCTCAGGGTGATGTTAGGGGCCGCAGGAGGATCTTCTCTGGGAGTCCCCCAGGACCCCTCCAGGGCTCTGGCTCTGCCTAGCCCTACAGGCCTCACATCCACTGCCTTTTTAGCTCTGGAGGACCATATAGGCCTCAGGAGGAGCCTGGGGCTGGAGTTGGGGTATCCCCCAGTGGAGTGCTTCTCCGTGGGGCCAGGCCTGGAAGAAAGACTAGAGCTGAGGCTGGACATGCAGGTGGCCACCGCCGTAGCCGAGTTGGAGGAGAGGGTGAGGGGGCGTGTCCATCACCTGAAGGCGGAGCTACAGGAAAGAGAGGCGGAGCTAGAGCGAGAGAGGAGGAAGGGAGAACGTCTCGTGAGAGAGAAGGACGAAGTGGAGGAGAGAGCGGCGTACCTGTCCAGACAGGCTTCCATAGCCATGGAGATGATGGAGGGAGTAAAGCGAGAACTGAAGGGCAAAGAGGACGAACTGGCCAAACGAAAACAGGATATGCATCAGGTGCAGGTGTTCCTGAGAGATACAGCAGAGAAGGAGGCAGAAGCTAAAATGAAACTACAGATGTTTATGGAGTCGTTACTCGATCGAGCCGACCATGCAGAGAGACAGCTGCTGCAGATCGCACCCGGTCACACGCACCCGCAACGACACGTACACACACCGATGTACACACACACTCCGGGACACACACACAGAGGTGTCTCCTCACCTGTGTGGGGTCGCGCAGGGCGGAGCTTGGATGGCAGTGTGGAGGACATGCTTGGAGCCAGGTCGCCGGTAACCATGGCAACTCAGAGGAGTTACAGTGTTTCTGGATCTTATAGACTTGGAGACCAACTCTACAACCACCATCCATACAACGACTGGGCTGGAGGGAACCGCTGGGTGAACAGCTACCATCGTTACCACAGTACCGAGGAGGAGAAGGAGGAGGAAGAGGATGACGAAGACGACGAGGAGCAGATATGGAACACACCTGAGATGATAAGACGGACTGCTGCACCAGATTTGTCTCCCTCCTCTAATGGTTGCCATAGCACCCATTGCCTGGGGGTGGAGACTCTACGGTTGAGGGCGGGGCTTTTTTGTGTCTTCCCATATTTGGACGTTGCTTCCCTGCTGCATGCGGCTGAGGTGTGCACTGATTGGAGGTGCGTTGCTAGGCACCCGGCTGTATGGACACGCCTACTTCTGGAGAACGTCACGGTGTCCACCAAGTTCCTGGTCACCCTGTCTCAGTGGTGTACACAGACCCGGTCCCTGGTTCTGAAGAACCTGAGAGGCAGAACCAGACGACCTGGAGAGAGCAGAGAGGACTACCAGACCCTCAAACGGGGCTGCCTGGAGGAAGGGGTGGAGGCTGTCTTGCGCTCAGCGGGGGGCAGTCTGCTCTACCTGTCTGTCTGTCAGTGTTCCAATGTGCTGACAGACAGGTCCCTGTGGCTGGCCAGCTGCTACTCCCCCAACCTACACACCATCACGTACAGGAGTCCCGGTGAGGGGGTGGGTCAGGAGGTCTTGTGGGCGCTGGGAGCCGGCTGCAGAACCATAGCTCACATGAAGTTCACCCCCCTAAACCCCAGCCAGCAGCCTCATCGCCTTGGCAACCGTAGCCTGCAGACCATCGGTCGGTGTTGGCCGGACCTGCGCTCCCTCAGCGTGGGCGGGGCCGGGTGTGGCACTCAGGGATTGGCTGCTGTGGTGCGCAGCTGTGTGTGTCTATTAGAGCTGGAGTTGGAGTGCGTTTCAAAGGTCGACCTGAAGGTGGCGACAGAGCTCTGTAATAACGGACTCACCAACTTGGAGACTTTGACGTTGACGCACACTGCCATCACTGAGGAAGCTATACTGCACTTTCAAAGTAAATGTGTTAATCTCAGGTCCATGGTGGTGCTGATGAGGAAGAGTCATGCTAACGAAGGTTCGCTTGAGGAGGACAGCGTGTTCAGAGATAACCTGGAGGCTCTTAAGGTTCTAACCCGGTCTCCAGGTCTTTGTGGCATCCTGCAGGTCAAAGAAGAATACTGAACTCTTCAAAACACGCACACGCACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACGACAATCGGTTATCAATGATAAGTGTAATAAATGTCTTGTTTCTA
XM_060035414.1  PREDICTED: Gadus macrocephalus F-box only protein 41-like (LOC132445432), transcript variant X2, mRNA   AGGTCACGTGCGCCACGTCATTAGCACGCTGCACGGCCACAGCGTGCGGAGAGTTTTACCGGAGGGAGGGATGATGTAGCACGCGGGCCAGTCGGTTGACAGTTCTCTGCTTGGCTCGGCTCTAAGAGACACGGCCGTACCGGAGTGGACCTGAATACCGTGCATCCAGGCCAGACAGGGTGCGGATATATGGGCGAATCATCTAATGAATTAGTGAATTAAGCCAGCAGGCCGACGGTCTAAATTCTGGTCGGGTGGTGCTCGGCGGTTAAGGCAGAAGTTTTTACACCGGATAATATCGACTCCATTCCGCGGTGGAATCACAGGTGCTGGTGCTGGAGCATGCTGGTAGCAGCAGCTGACCAGACCTTCTCATGTCCAGGCTGTGGATCAGAAGGGACCTTCAACTCCGTCCTGGACCTCCAAAACCACCTGGTCAACACACACACCTATCAGACCCTGCTCGGTCTATCCAAGGTGCGTGCCAGGAGCTCTACACCAGGGTTCCTCCTGCGCCTCCCTGGTCCGACTGTCTCCCAGGGACAGAGCTCCTCCCTGGGCATGGAGTCCACCAGAGACCCCCTGCCTCTGGCCGGTCTGGACCTGGCCTCCTCCACCGCCTCCACCCAGCTTCTCAGGGTGATGTTAGGGGCCGCAGGAGGATCTTCTCTGGGAGTCCCCCAGGACCCCTCCAGGGCTCTGGCTCTGCCTAGCCCTACAGGCCTCACATCCACTGCCTTTTTAGCTCTGGAGGACCATATAGGCCTCAGGAGGAGCCTGGGGCTGGAGTTGGGGTATCCCCCAGTGGAGTGCTTCTCCGTGGGGCCAGGCCTGGAAGAAAGACTAGAGCTGAGGCTGGACATGCAGGTGGCCACCGCCGTAGCCGAGTTGGAGGAGAGGGTGAGGGGGCGTGTCCATCACCTGAAGGCGGAGCTACAGGAAAGAGAGGCGGAGCTAGAGCGAGAGAGGAGGAAGGGAGAACGTCTCGTGAGAGAGAAGGACGAAGTGGAGGAGAGAGCGGCGTACCTGTCCAGACAGGCTTCCATAGCCATGGAGATGATGGAGGGAGTAAAGCGAGAACTGAAGGGCAAAGAGGACGAACTGGCCAAACGAAAACAGGATATGCATCAGGTGCAGGTGTTCCTGAGAGATACAGCAGAGAAGGAGGCAGAAGCTAAAATGAAACTACAGATGTTTATGGAGTCGTTACTCGATCGAGCCGACCATGCAGAGAGACAGCTGCTGCAGATCGCACCCGGTCACACGCACCCGCAACGACACGTACACACACCGATGTACACACACACTCCGGGACACACACACAGAGGTGTCTCCTCACCTGTGTGGGGTCGCGCAGGGCGGAGCTTGGATGGCAGTGTGGAGGACATGCTTGGAGCCAGGTCGCCGGTAACCATGGCAACTCAGAGGAGTTACAGTGTTTCTGGATCTTATAGACTTGGAGACCAACTCTACAACCACCATCCATACAACGACTGGGCTGGAGGGAACCGCTGGGTGAACAGCTACCATCGTTACCACAGTACCGAGGAGGAGAAGGAGGAGGAAGAGGATGACGAAGACGACGAGGAGCAGATATGGAACACACCTGAGATGATAAGACGGACTGCTGCACCAGATTTGTCTCCCTCCTCTAATGGTTGCCATAGCACCCATTGCCTGGGGGTGGAGACTCTACGGTTGAGGGCGGGGCTTTTTTGTGTCTTCCCATATTTGGACGTTGCTTCCCTGCTGCATGCGGCTGAGGTGTGCACTGATTGGAGGTGCGTTGCTAGGCACCCGGCTGTATGGACACGCCTACTTCTGGAGAACGTCACGGTGTCCACCAAGTTCCTGGTCACCCTGTCTCAGTGGTGTACACAGACCCGGTCCCTGGTTCTGAAGAACCTGAGAGGCAGAACCAGACGACCTGGAGAGAGCAGAGAGGACTACCAGACCCTCAAACGGGGCTGCCTGGAGGAAGGGGTGGAGGCTGTCTTGCGCTCAGCGGGGGGCAGTCTGCTCTACCTGTCTGTCTGTCAGTGTTCCAATGTGCTGACAGACAGGTCCCTGTGGCTGGCCAGCTGCTACTCCCCCAACCTACACACCATCACGTACAGGAGTCCCGGTGAGGGGGTGGGTCAGGAGGTCTTGTGGGCGCTGGGAGCCGGCTGCAGAACCATAGCTCACATGAAGTTCACCCCCCTAAACCCCAGCCAGCAGCCTCATCGCCTTGGCAACCGTAGCCTGCAGACCATCGGTCGGTGTTGGCCGGACCTGCGCTCCCTCAGCGTGGGCGGGGCCGGGTGTGGCACTCAGGGATTGGCTGCTGTGGTGCGCAGCTGTGTGTGTCTATTAGAGCTGGAGTTGGAGTGCGTTTCAAAGGTCGACCTGAAGGTGGCGACAGAGCTCTGTAATAACGGACTCACCAACTTGGAGACTTTGACGTTGACGCACACTGCCATCACTGAGGAAGCTATACTGCACTTTCAAAGTAAATGTGTTAATCTCAGGTCCATGGTGGTGCTGATGAGGAAGAGTCATGCTAACGAAGGTTCGCTTGAGGAGGACAGCGTGTTCAGAGATAACCTGGAGGCTCTTAAGGTTCTAACCCGGTCTCCAGGTCTTTGTGGCATCCTGCAGGTCAAAGAAGAATACTGAACTCTTCAAAACACGCACACGCACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACGACAATCGGTTATCAATGATAAGTGTAATAAATGTCTTGTTTCTA
XM_060035415.1  PREDICTED: Gadus macrocephalus F-box only protein 41-like (LOC132445432), transcript variant X3, mRNA   GGTCACGTGCGCCACGTCATTAGCACGCTGCACGGCCACAGCGTGCGGAGAGTTTTACCGGAGGGAGGGATGATGTAGCACGCGGGCCAGTCGGTTGACAGTTCTCTGCTTGGCTCGGCTCTAAGAGACACGGCCGTACCGGAGTGGACCTGAATACCGTGCATCCAGGCCAGACAGGGTGCGGATATATGGGCGAATCATCTAATGAATTAGTGAATTAAGCCAGCAGGCCGACGGTCTAAATTCTGGTCGGGTGGTGCTCGGCGGTTAAGGCAGAAGTTTTTACACCGGATAATATCGACTCCATTCCGCGGTGGAATCACAGGCTGTGGATCAGAAGGGACCTTCAACTCCGTCCTGGACCTCCAAAACCACCTGGTCAACACACACACCTATCAGACCCTGCTCGGTCTATCCAAGGTGCGTGCCAGGAGCTCTACACCAGGGTTCCTCCTGCGCCTCCCTGGTCCGACTGTCTCCCAGGGACAGAGCTCCTCCCTGGGCATGGAGTCCACCAGAGACCCCCTGCCTCTGGCCGGTCTGGACCTGGCCTCCTCCACCGCCTCCACCCAGCTTCTCAGGGTGATGTTAGGGGCCGCAGGAGGATCTTCTCTGGGAGTCCCCCAGGACCCCTCCAGGGCTCTGGCTCTGCCTAGCCCTACAGGCCTCACATCCACTGCCTTTTTAGCTCTGGAGGACCATATAGGCCTCAGGAGGAGCCTGGGGCTGGAGTTGGGGTATCCCCCAGTGGAGTGCTTCTCCGTGGGGCCAGGCCTGGAAGAAAGACTAGAGCTGAGGCTGGACATGCAGGTGGCCACCGCCGTAGCCGAGTTGGAGGAGAGGGTGAGGGGGCGTGTCCATCACCTGAAGGCGGAGCTACAGGAAAGAGAGGCGGAGCTAGAGCGAGAGAGGAGGAAGGGAGAACGTCTCGTGAGAGAGAAGGACGAAGTGGAGGAGAGAGCGGCGTACCTGTCCAGACAGGCTTCCATAGCCATGGAGATGATGGAGGGAGTAAAGCGAGAACTGAAGGGCAAAGAGGACGAACTGGCCAAACGAAAACAGGATATGCATCAGGTGCAGGTGTTCCTGAGAGATACAGCAGAGAAGGAGGCAGAAGCTAAAATGAAACTACAGATGTTTATGGAGTCGTTACTCGATCGAGCCGACCATGCAGAGAGACAGCTGCTGCAGATCGCACCCGGTCACACGCACCCGCAACGACACGTACACACACCGATGTACACACACACTCCGGGACACACACACAGAGGTGTCTCCTCACCTGTGTGGGGTCGCGCAGGGCGGAGCTTGGATGGCAGTGTGGAGGACATGCTTGGAGCCAGGTCGCCGGTAACCATGGCAACTCAGAGGAGTTACAGTGTTTCTGGATCTTATAGACTTGGAGACCAACTCTACAACCACCATCCATACAACGACTGGGCTGGAGGGAACCGCTGGGTGAACAGCTACCATCGTTACCACAGTACCGAGGAGGAGAAGGAGGAGGAAGAGGATGACGAAGACGACGAGGAGCAGATATGGAACACACCTGAGATGATAAGACGGACTGCTGCACCAGATTTGTCTCCCTCCTCTAATGGTTGCCATAGCACCCATTGCCTGGGGGTGGAGACTCTACGGTTGAGGGCGGGGCTTTTTTGTGTCTTCCCATATTTGGACGTTGCTTCCCTGCTGCATGCGGCTGAGGTGTGCACTGATTGGAGGTGCGTTGCTAGGCACCCGGCTGTATGGACACGCCTACTTCTGGAGAACGTCACGGTGTCCACCAAGTTCCTGGTCACCCTGTCTCAGTGGTGTACACAGACCCGGTCCCTGGTTCTGAAGAACCTGAGAGGCAGAACCAGACGACCTGGAGAGAGCAGAGAGGACTACCAGACCCTCAAACGGGGCTGCCTGGAGGAAGGGGTGGAGGCTGTCTTGCGCTCAGCGGGGGGCAGTCTGCTCTACCTGTCTGTCTGTCAGTGTTCCAATGTGCTGACAGACAGGTCCCTGTGGCTGGCCAGCTGCTACTCCCCCAACCTACACACCATCACGTACAGGAGTCCCGGTGAGGGGGTGGGTCAGGAGGTCTTGTGGGCGCTGGGAGCCGGCTGCAGAACCATAGCTCACATGAAGTTCACCCCCCTAAACCCCAGCCAGCAGCCTCATCGCCTTGGCAACCGTAGCCTGCAGACCATCGGTCGGTGTTGGCCGGACCTGCGCTCCCTCAGCGTGGGCGGGGCCGGGTGTGGCACTCAGGGATTGGCTGCTGTGGTGCGCAGCTGTGTGTGTCTATTAGAGCTGGAGTTGGAGTGCGTTTCAAAGGTCGACCTGAAGGTGGCGACAGAGCTCTGTAATAACGGACTCACCAACTTGGAGACTTTGACGTTGACGCACACTGCCATCACTGAGGAAGCTATACTGCACTTTCAAAGTAAATGTGTTAATCTCAGGTCCATGGTGGTGCTGATGAGGAAGAGTCATGCTAACGAAGGTTCGCTTGAGGAGGACAGCGTGTTCAGAGATAACCTGGAGGCTCTTAAGGTTCTAACCCGGTCTCCAGGTCTTTGTGGCATCCTGCAGGTCAAAGAAGAATACTGAACTCTTCAAAACACGCACACGCACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACGACAATCGGTTATCAATGATAAGTGTAATAAATGTCTTGTTTCTA
XM_060035416.1  PREDICTED: Gadus macrocephalus Kv channel-interacting protein 4-like (LOC132445433), mRNA   TCTTGTTTCAAACGTCAGGATTTCGTCATGGGCCTCTCGATCCTCCTGAGGGGAACCATCACGGAGAAGCTCAACTGGGCCTTCAACCTGTACGACATCAACAAGGACGGCTACATCACCAAAGAGGAAATGCTGGACATCATGAAGGCGATCTACGACATGATGGGGAAGTGCACGTATCCGGTGCTCAAAGAGGAGACACCGCATCAACACGTGGAGATATTCTTCCAGAAAATGGATAAGAACAAAGACGGCGTGGTCACCATCGATGAGTTCATCGACTGTTGCCAAAATGACGATAACATCATGCGGTCGATGCATCTGTTTGAAAACGTCATCTAAACCGTGGAGATGATATGACCGCCGAGGCTATGGTGGGGGGGGGGGACGCCGTCGTGGTCTTCATCCTCATAATCTTCACCCGGGCTCCACTCTGCCCGGTCTGGACTGTAGGGAGCAGACCCACGTTTCAACGTTTCACGCCCACTCGGTGGAAGCAGTCTCAAAATGACATTCTATTAATAATTAACATTTCTAGATCAATGTTCAACACTTTGTTTCATTCCTGAATGGTACACGTGTTGCCTGGGAGGAATACCTTTCACAACAAGGAATAATTACTATGTTTACGGTCACATTGTGTTTCTTTGCAAGCGATAACCCGGGTCAGCTTTCAGCATTATTGACCGAGGAGTTTTATCACTAAGATGAGTGTGTAGTTTGAGAACAAGAGTCATAGCAAGAGATTATTTATTTTTCTGTTGGCCGTAGTATTTGCTTTGAAGGATCAAAATCACTGTGATATAATCCATTCATAATTAACATTAAGGAAATAACTAACGATTATCCTATTGATTTTATATTTGAAACTCAGTTTCCGTCAGAGTTTCAACGTTTCTGGAACATTCTTACCATAAGTTACACGATGTGGTCTGTACCGTGTTCCCCAAAGTGAGCATTCACAGAACTTAGATTGTACAATTTTTCATATTGTACATATTACAGATGATTTAATTGCTTTAGATAAAAACCTCTATGAAAAACTTTTGAACTTTATTCGTGTTTACTTGCCAACTAGAAGGAGACCGCGCAGAAATTGTGTGAAATCAGACCTTTCAATTCTAACTTTATCAACAGAAGCACAGGGATACATATAAATATATATTTACCATGAGTATAAAATTAAACAATATCTTAACCAGAGATAGGCTGCTTTCAATGTTTTTCATTTAATCTGATGTGTATTACCTTGATGCATACTTTCCGTAATTCCTCTATGAGTTTCAAAAGGGTTCATTTTGCTGTTTTTCCCCCGAAAGGCAATGGCTTGAGTTTATTTATATGGCATTGTTGCCATTGGTTAATCGATTGCCAGTTGTACAGTATTACAAGCGATTATCTTACAGCCTTTTTAAATGTTGGCTACACATACTTATTTGCGTGTTTTAGTGAGAGTGTTGTTTGAACGATTGTTTTCAGTCAGTCTCTTACTATGTGGTAGTTTGTATAGGTCTCTTTCGATCTTGGGTAAATGTGCTTTTGGCCCCTCTCATCCTTTTTCCTTGCCATGTTGGTATGGAGTACTAATCCGAGCAGAAGGCCTGCTGGCTAAACCTTTACTTTATTTAATACTTGATCTTAACAGTTGACAAATGGCCCTCTGTCTCAATTAGAAAACTGTATTCATATAATTTAGATACCAGCTGTGTTGGTTCTTACAAATGCTTTTAACCACGAAAAGTTAGTTTTTAGGAAATGCAAACAGATAAAACGCTACATTAATTTCTGAATGAGGCATGCTCGTTATTGATTAAGCATGCATTTCCCTTTTGAGAGGAGTACTTGAAATTGTGTTTCTGAGCAGAGGCAACGTGTTCATTGTTAAGTCAAAAGAGATGACTCGTCTTTTTCATTGCGTTGTTTTGTTTTGTTTTTTATGCCTTGACCCTTACGGAAATGTAAATTTGATCAACACATCTCTATTTAAAAATTTGATATGTATTTGTGTTTGTCTGAGAAAATGGTTGAATATACTACGGCTGGTGTTTCATCCCATGACTTTCATGTAAGACTGTTTTGGAAATCAAAACATTTCTGGAGGTGGTACATTTAAATATTTTATCATAATACTTTGCGCCTGGTTTGTTAACTCATTTACAAAATATTTTCAGGCTGGACAATTTCTTGATAAGGGACTCATTACCAATCAGAAATAGTTGTATAACCACATTTGTCGATATTTTTTTCTAGTTCAGCCCCTCACATGATACTATTTTGCCCTAACAACATTTGTTGGTTTGGGCCTGGGGTTATTCAAGAAAAAACAATTAAAACAGAAACAAAACTG
XM_060035417.1  PREDICTED: Gadus macrocephalus RNA binding motif protein 46 (rbm46), mRNA   GCCATAACGGAGCGGGACTGTGGTATGGACACCAACCAGGCAGTGCCCGTAGAGGTGGCGCTCCTGGAGCTCATGAAGAAGACCGGATACAGCATGATGCAGGAGAACGGCCAGCGCAAGTACGGCCCACCCCCAGGTTGGGACGGCCCGCCCCCGCCCAGGGGCAGCGAGGTGTTTGTCGGGAAGATTCCCAGGGACATGTATGAGGATGAGCTGGTGCCCGTGTTCGAGAAGGCCGGCTACATCTACACATTCAGACTCATGATGGAGTTCAGCGGAGAGAACCGCGGCTATGCCTTCGTAATGTACACCAAAAGAGAGATGGCGCAGAAGGCCATCCACATGCTGGATAACTATGAGATCCGACCGGGGAAGTACATCGGAGTGTGCGTGAGTCTGGACAACTGCCGTCTCTTTGTGGGCTCACTTCCCAAAGACAAGAACAAAGAGGAGATAATGGAGGAGATGAAGAAGGTAACAGAGGGAGTTGTGGATGTCATAGTGTATCCCAGTGCTACTGATAAGAACAGGAACAGAGGCTTCGCCTTTGTAGAGTACGAGTCCCACAAGGCTGCAGCAATGGCCCGCAGGAAACTCATACCAGAATCACTACAGCTGTGGGGCCTCAACATCCAGGTGGACTGGGCGGAGCCGGAGAAGGACGTGGATGAGGAGACCATGCAGCGCGTCAGGGTCCTATACGTGCGTAACCTGATGCTGAGCACCAGCGAGGAGACCCTGCAGGAGGCGTTCTCCTGCGTGCGCCCCGGCTCAGTGGAGCGCGTGAAGAAGCTGACGGACTACGCCTTCGTCCACTACCGCAGCCGGCAGGACGCGCTGGACGCCCTGCGCCTCATGAACGGCACGCTCATCGACGGCGCCCTGGTCACCGTGTCCCTGGCCAAGCCCGCGGCGGGCAGGGACGGAGGCGGCGCCGCGTGGAGGCACGGCGGCGGCGGCGGGAACCGAGGCTACGTGGGGAGGAACCCTGCCATGGCGGGGGGAGGCAGAGGAGGCGGAGACGGCCTGTTTCTGTTTCAGCGCGAGAACGGAGGAGTGGAGGGGAGGAGGTGCACCACCCCGGGGCCTCTCGGCCTTCTGCCGGCATGCCTGGGAAGCCCCGTGTACCTCGGTGGAGCAGGAGGGTCGGGAGATGGGGCTCAGAGGGTGTTCCCCCTCCCTCCGGGTACGCCCCTCTCCCCAAGCAGCCTGCAAGCCCTGAAGCCCTACCAGATGTCCAGTTACGTCAGCCTCCTGGAACACTTCTGCTATCTGCATTCTTGGAGCCCTCCAGACTACTACCTCTTCTCCACCCCTGGGCCCAAGATACTGCTAATATTTAAGGTGGTGCTGGTCTCTACCCAGAACAGCTACTTGCCAGACAAGCTGTGTGCTCTGGTGGAGGATGCAAGGGAGCTGGCTGCACAGCACGCACTCTGGAGCCTAGACGGTTCCTGGCTGTCTGGTGATAATGGTCCTGGTTCTCCAGCCCCCCTCTCTCCCCCTCTAAGTGCTGTTGGGGTGGGCTACAGCTCTAGAGCCTTGACCCCCTGTTTTACCTCCCCGTACCCCAACTCGTCCCTCTCTCCCCCGCTGCCCTCCCTCTCCCAAATCTACGCCCCCCTGCCGTCCCCCTTCTACTGACGGAACACCTGCGCTTGGTTGAGAGAGGAGAGAGACAGGAGGAGAACAGAGAGAGAGGAGACCAGGAGAGGGCCAGATGAAGAGAGCAAAGAGGGAGACCGGGGGAGAGACACAGGAGGAAAGCAGACGAAGAGAGAGAGAGAGAGGTTACAGAGAACATGAGAGAGAGAGAGAGAGGAGAGAAGACCCTACAGTGACCCGTGACTATTTCTCTCTCCCGTATTCAGGCAGAACAGTAGAGAGGGGGAGAGCCAAACGTTTTTATTGTGTGCTTTTCTGTGTGTTCCATGCATGCCGTTGTTGAGGTTATTGTTTGTCTTTTGAACCTTTTTTGTTCCGCCTTGATTGTGAAAAATGAATACTGTTTCTAACGTA
rnatab <- read.csv("../output/11-annotation/GCF_031168955.1_rna.tab", sep = '\t', header = FALSE, row.names=NULL)
head(rnatab)
              V1
1 XM_060035408.1
2 XM_060035409.1
3 XM_060035410.1
4 XM_060035411.1
5 XM_060035412.1
6 XM_060035413.1
                                                                                                                                   V2
1                                                        PREDICTED: Gadus macrocephalus putative helicase MOV-10 (LOC132473465), mRNA
2                             PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X2, mRNA
3                             PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X3, mRNA
4                             PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X4, mRNA
5 PREDICTED: Gadus macrocephalus ATP-sensitive inward rectifier potassium channel 12-like (LOC132475957), transcript variant X2, mRNA
6                               PREDICTED: Gadus macrocephalus F-box only protein 41-like (LOC132445432), transcript variant X1, mRNA
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  V3
1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  AGTTCAATTGCAACTGGGTAGAGGGCGGCATCTATATCCCCGATAAAGAAGAGCTGGAGAAGCATAAAATCATGGTCACCACCCTTTTCTCCGCTTCAAGGCTGGTTACGGAAGGCATCCCTCCAGGCTTTTACAGCCATATCTTTGTTGACGAGGCAGGACAACCTGCAGAGCCTGAAGGGGTTATCCCCCTGGCAGGCCTACTGGACCCAAAGCGTGGCCAGGTAGTGTTGGCAGGAGACCCCAAACAGTTGGGCCCCATCGTCAAATCCCCCCTAGCCAAGAAGCATGGACTTGGTGTATCAATGCTGGAGCGTCTGATGGAGTTGAATGTGTACAAAAAGACAGAGGAAACGGGGTACAACGAGCGTTTCATCACCAAGCTGCTGAGGAACTACAGGTCTCATGGCAGACTTCTAACGATCCCAAATGAGCTGTTCTACGAGAGCGAACTCCAGGTGTGGGCCGATAAGGATATCCGCAACTCCTTATGTGAATGGAAGCACCTTCCCAGCAAGGGATTCCCGCTGATCTTCCATGAGGTCACTGGACGCATGCGTCGCGAGGACAACGCCTCACTGTTCAACGCCTCACTGTTCAACGAAGACGAGGTGGCGATTCTAATGCAGTATCTGAAAGCACTGTTGGAAGATGTCCCCCCAGAAGACATAGGCCTCATTGCCCCATACAGGAAACAAGTGGAGAGGATCAACAAGGCTCTTAAGATCGAGTTCCCCAGAAACACAGCAAAGTTAAAGGTTTGCACAGTGGACGCGTTTCAGGGCGAGGAGAAGCGGGTGATTCTGCTGTCCACAGTGAGAAGCACCAGCCGGGACCCCAGACCCCCCTCCTCCGTGGGGTTCCTTGCTGACCCCAAGAGGTTCAACGTGGCCATGACCCGCGCCCAAGCCCTGTTGATCGTGGCGGGGAACTCAGAGGCGCTGACAAAGGACAGGATTTGGAGCAGGTTCATCGAGTACTGCAAAGAACACGGAGGTTACACCAAGACCATGACCACCGATTGACGTGCATGCACGGACACATACGCACACTGACAACCACACTCACGTACACACATACAACCTCCGCACACACACACACACACACACACACAAACACACACATACACCTTCTCCACACAAAATGATATTAAGGACTAAAAACCTCACATTTTATTTGACATTGTTGCATCAGCCTATTAGAGGAGAATTTTTTTGACCATTAGGCCTTCCCATCGTTGTGATGGAAAATCTATCATAATCATAAAGTTTTTCGTTTTGAACTTAGGTTTTGTTTACTATTCCGTGTTTGTTCTTGCCGTCCTGCTCTCCTTCGGGGTCATCTAAAATGTAAACCTTTGTGGTTGATGATGTAATCTGTTCTCACTCTGTTCCTGTGTTATCTTCTGTTGATTTGGGCCTGTTCCACGACCCTGGGGAGCTTCAGGAAACAAAGGGTTACATGGCACGGACGCGACCCCTTATTTATGACATCCTGAGGAACTTCAATCAATAGATTACCATCTGGTTAACAAAGGCTTTTGGTTTATTGGGGGACAGCTGCCCTCAAGAACCCAATCTAAGTGTATAAGAACTCCTGACTTAGCCACTTTAGAGTATGAAGTAAGACACAGGGCAAACTCCCATCTGAGGCCTCAAATTACTGTAGTGTATGCCTGTTTTATTGTTTTTTGATGCATGTTGTGATGTATCTTTGTTCGTACTTTTATTACAAATATATAGGACCACCAATTGCCTAAAA
2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              TTCCTGGTCCTCTCAATATTGCTGATATCATAACCAGAGGGGCCAGTCCTCAAGACCTGGATGAAAGTTCAGAATGGCAGAACGGACCAACATTCTTGAAGCTACCAGTGGATGAGTGGCCAGTTAAATCTGCCAAAGAGTTGGTGATGGCTGCCAGAGACGGTGTCAACAGATTACAGAAGAAAGCGTTTGTTGCTGCACTGACCCGAGCACAAGCCAAGGCACAACTGCTGCCGGATCCAAAGGATACTGACACGCAGAAGCCTTCTGAACTAAAGCAAGACCAGGAACCAACTCAGACCCAAACACAACCAAAGAGGCCACCAGCAGGATCAACTGTCCAAGAACTGGTGGATGTCAAGCGGTTCAGTAACCTAAGCCGACTTGTCAAAACAGTTGCCTGGATCCGGAGAGCAGCAAGGATGTTCATGAAAGGGAACAAGCGAACTGCAAACAATCCAAAGTGGGAGGCAGTGTCGTTTTCAAAAGTCATCTCAGTGACAGAGAGGGAAGACGCCCTAAAAGACATCTTCCTTGCGGCACAGCAAAGTGCATCCTTCCCAAGCACAACCACAGACAGGCTGGTGGTGTACAGAGACCAAGAGACTGGATTGTTGGTCTGTGGGGGTCGTGTGCAGATCTTCAATGAAGATAAAGTTGCTGTCCCCATCTTGCCTTACGAAGCCTGGGTGTCGACACTGTTAGCACGAGAAGCCCACGAGGAGAACCACGATGGAGTGGCCGGGACCTTGCTCAAGATGAGAAGAAGAGCATGGGTCGTGAAAGGTCGGAGAATTGCTCAAAAAGTGGTCGAAAACTGCATGTTCTGCAGGAAAACTAAAGCAAAAAGATGCCAGCAAATAATGGGTGATCTACCTCCAGAGAGGACAGAACCAGCTGCCCCATTCCACTACACAACAGTCGACCTCTTCGGACCCTACCAAGTCAGGGATGATGTAAAGAAAAGAGTGTCACTGAAGGTTTGGGGAATTGTGTTTTGTTGCATGGCCTCCAGAGCTATTCACACCGAGCTGGTGAACTCTCAGTCCACAGAGAGTTTTCTGTTTGCCTACCAGAGGTTCACAGCACTAAGAGGTCATCCAAAGAAAATCTGGTCAGATCCTGGGACCAACTTCATTGGGGCTAAGCCAGTCCTAGAAGAACAGTACCGATTCTTTGCCAATCTTGACAAAGCTACCCTGGAGGAGAGAGCTGCCAAGGATGGCACAGAATGGTCGTGGAAGATTCAACCAGCCGATTCTCCACACCGGAATGGTGCCGCAGAAGCTGCTGTGCGCATTGTCAAGAGAGCACTGCAGAGTCTTGGAGGAGAGTCCGGCCTAAGTTGGAGTGAATTCCAAACAACTCTCTACACGGCTGCCAATCTTGCAAATGAAAGACCAATTGACGCCAGGACACAGAGCCGAGAAGACAGCGTCCAGTTCATCACTCCTAACTGTCTCCTGCTCGGACGAGCATCACAGGGTGGAGATGTCAGAACATTCGACTTCAGTGACTATCCCTACAAAAGGCTTAAAGAAATGCAAGCGCAAGTTAACAAGTTCTGGAGGAACTGGAGTCAACTTGCTGGTCCTAACTTATTCGTAAGGAACAAATGGCATACCGCCAAGAGAAATGTCGCAGTCGGGGACATCGTCTGGATGGCGGACCAAAACGCCCTTAGGGGTCAGTTCAGGATTGCAAGAGTGGTTAGCGTCAACTCGGACAGCAAAGGAGTTGTGAGGGACGTCAATGTCAGAACCTTCCCAAGCTACCCCGTTCCTGTCACAAGGCCTACCGGAGCAAAAGTAAGTCACCGAACATCTAAGAAATTCAAAGAAAAGATCCCAGCAGCAGTTCTTCACAGAGACGTAAGGCGACTAGTGATCTTGCTTCCCACCGAAGAACAGAACTAAACATCTGACCCAAGTTGCTCCTGTAACATGCGACCTCCCAGGTGTCTCCACTGGAAGGTCGAGTGGGAGGTGTGAAGTCAACTTGGAAAATCCCTGCTGGATATGAAGGGGTTAATATGCAACGCCGACTGACAGCTGGGGGAATCCCTCCTCAGAGCGTTACCTGGGAAATCCACGTCATTGCGTCACCTTACCGGAAAGGACAGAAAGGGGAAGAGAGACTCTTTCAGGAAACGCCAGTTGCAAACAAATCAGACAGCGTTCGGTCCAGCAGCAGCAAAGCTGCGCTAGAGAACTTGGAAAAAGCAGCGCTGGTACAGCCATAAAAAGCCTGGAGAAACGGCCGGTTTTTTACCTTGCAGTATACCTAGCAGTTTACCACTTGCAACCCACTTACAACTGTACCCACAATACCTACCTGCTACAACTTACCTGTACCTGTGAAAATACAAAAAGAAAAGTAAAGAGGAGTTAAAGAAGGAAAACCGTGTGGTCATTGCATGAGTGGAACCCAGTTAAACCTCCTATGGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  CAGAATTCATGAAAATATGCTTAGACATTAAAAACTATTAATAATATTTATATTTATATTTATATATTTATAAGTATTTAAATGCATATATGAAAAACATGTTTGTATTTACTATTTCATTCTGCACTGCATTTACTGTAGTTATTTTTGGTTTCTTTAAAGGTTTTTTACCTTGCAGTATACCTAGCAGTTTACCACTTGCAACCCACTTACAACTGTACCCACAATACCTACCTGCTACAACTTACCTGTACCTGTGAAAATACAAAAAGAAAAGTAAAGAGGAGTTAAAGAAGGAAAACCGTGTGGTCATTGCATGAGTGGAACCCAGTTAAACCTCCTATGGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            CCCTGCTGTAAGTATTTCTTTCTTAAAGGTACAACATGTCTCTGGCCCGCCGAGTGTCATTGTAGGCCTACTTGTAAATGAAAATTAAAGCTTTCTATGGTGATTAACTTGTCTATTCTAGGTGGGAAACGCTTGTTTGCACTTGGTCAAGGAGATGGCCGAACATCAAGGAAAATATGTATTTCTTCTTGACCTCCACATCCCCCCAACATGGCTCCAATCGCCAAATTCAGATCATCTGCGCAGCTTTCCAGTTGACATCACCACCATGGATGCAGTGATTGTCCCACTGTGGACACCAGGGCATTTTCTGCTTAGTGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
5 AAGCGTTGTGTGTCGCTGAAGCAACATCCAGTGCCAGGTCTGACTGAAGCTAGCTGGTTAGCATAGCTGACCTATGTATGCTTTGTATGCATTATGAGCGTTAGCCTAAAGGGTAACAGCAGTAATCTGGTCTGATTACAGGCTATTTATAAATCTCTCCCCTCGACCCCAAACTAACCTCGCTGACCACTAGCAGCAGGATCTGAACCATTTTTCCCTTTTCACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAGTTTCTATATTGATCGAAAATGGAGGCTCTGGAAACACCTTTAACTCTGTTTCTCTGTTTCAAACTGTGAAATAGGGAGGTCTTTTAGGAGAGGAGAAGGGGGAGGTGTGAGTGGAAGAGGAGGAGAGGGGTGGAAAGAGAAAGAGGGAAAGGTGAGTTCTGGAGGAGTAGAGGATTAGAAGAAGGCCTTCAGGTCCCCATGCTAGTGCGGTGAGGTGAGGGAGCAGGGGGATGGCAGGAGGAGTAAGAGGCTGCATCATGCAGTCCACCCCTTGCAGCACTAGAACCTCCTCCTCTGCCCTTCTGATCCAGTGGAACAACGAGGTGATGATCTACCAGCCAACCGTGACCCTGCCTCCACGAGCCTACGGAATGAGACATGTTTTGATGCAGGTTATGAGAGTTCGGACTTCGGATGAACACATATAAACCGTCTCTCAACTCCTCTGGCCCCCCTCTTCTTACCTCTCCTCTCCTCCCTTCCCATCACCCACCATCACCGCCTTCTCTTCACAGGAGATGGACACAGCAGATCTGACTGGACAGAGCACATCTCCTCCTCCCCTCCTCCTATGGGATTGCGCTGGTGTGGGGGGTGGAGGACAAAGTTCCTCTCCTCTCCGCCTCCACCCCTCCTACTATAGAGTTGTGCTGGTGTGTAGGACAGAGTTAACAGACAGGATTACAGCCAGAGGACGGCAGAGGCAGTGAAGGAGAAAATAAAGGATTTTCTCAGAGGAGGGAAAGCCAAGGAAAGATTGACCAAACAGAACTCTGGGAGAATGAACATGGAACCTTTGAGCTGAACTGAAGCAGAGTGATTTCGACCTTAGCAAGTCAGCGTTTGGACAGGTATAACGCTGGTCTGTGGTGCTAGGGGGCTGACCAGGGCCACCGGGGCCCCTGTGTCCAGAGCGTGGTGCCTGGGAGCCACTGCCAGGGGTGATGGGAACAAGACGATCCAGCAGGTTCAGCCTGGCGTCCGTCGTCCTCCAAGAAGACGAGCACCGTAAGGTCTCCAGCCAGGGTCTCCTGAACGGCCACGACTCCCCGCCGCGCTCCTCCACCACCTCCTCCTTGGCCACCGGCGGGAGGGAGGAGGAGCAGCAGCGGGGAGGGATGAGCGGCTCCAGCGTCCGGGGGGGCCCGACGCGGGGCCCCTCCGGACCGCCGCGGAGCCGCTTCGTGAAGAAGAGCGGCCACTGCAACGTGGCGTTCAGCAACCTGGAGGACCGGAGCCAGCGCTACCTGGCCGACCTCTTCACCACCTGCGTGGACGTCCGCTGGCGCCACCTGCTGCTGCTCTTCTGCGTCAGCTTCCTGCTCTCCTGGCTCTTCTTCGGGCTCGTCTTCTACCTGGTGTCCCTGGCCCACGGGGACTTCCAGGACCCGTCTGACGTGTCGCCGGCGGGAGCGACCCAGGGCCCGGGGGGCCGGAGGGAGCGCACGCCGTGCCTCCTCCACGTGCACGGCCTCCTGGGCGCGCTCCTGTTCTCCATGGAGACCCAGACCACCATCGGCTACGGCTGGCGCTGCGTGACGGAGGAGTGCCCGGTGGCCGTGGCGACGGTGGTGGTGCAGTCGGTCGTGGGCTGCATCCTCGACTCCTTCATGATCGGCACCATCATGGCCAAGATGGCGCGGCCCAAGAAGCGGAACCAGACGCTGATGTTCTCGCGGAACGCCGTGATCGCGCTGCGCGACGGCCGGCTCTGCCTCATGTGGCGGGTGGGGAACCTGCGGCACTCGCACATCGTGGAGGCCCACGTGCGCGCCCAGCTCCTCCGGCCCTACGTCACGGAGGAGGGCGAGTTCGTCCCCCTGGAGCAGATGGACCTCAACGTGGGCTACGACGACGGCACCGACCGCATCTTCCTCGTGTCGCCGCTCGTCATCGTCCACGAGATCGACAAGGACAGCCCGCTGTACTCGCTGAGCCGGGCCGACCTGGAGGCGGAGCACTTTGAGATCGTGGTCATCCTCGAGGGCATGGTGGAGGCTACGGCCATGTCCACCCAGTTCCGCAGCTCCTACCTGGCCCGCGAGGTCTTCTGGGGCCACCGCTTCGAGCCCGTGATCTGCGAGGACCGCGACCGCTACAGGGTGGACTACGCGCGCTTCCACCAGACCTACGAGGTGCCGTCCACGCCGCACCTCAGCGCCAAGGAGCTGGACGAGGCCGCCAGCCGGCCCCCCTCCGCCAGGACGACGCGACCGCCACCGTCGGCCAAGGAGGCGCCGAGCTCGTTCTGCTACGACAACGAGGTGGCGCTGATCTGCGGCGAGGACGACGACGAAGACGATGACGACGACGAGAACGACATCTTTGACTTGCGCCGGACACTGTCGTCGCCGGGGGGGAGGCGGGAGGAGAGGAGGACCTCGGTGACCGTGGACCTCCCGAAGGCGGGACAGGACTCGGCTGCCTTGACAACGGGCCGCCAGAGCTTGATGTGCGTGCTGGACATGGACAACCACCAGAGGGAGTTCGATATGCTGCAGACGGCAATTCCGCTCGATCCGCAGTCCTACAAGAGCGAGCAGGAGATGTAGAGGTCTGAGGGACACTGGACGGGGAAGACTGCTCGGGGGTTGTTCTCAGCCTTGCTTCTGTTGGTTGATGTTTGGGGTTGGAGAGTGGGACATATCTATTTTACTGTATGGTGGAGAAACTAGAAAACGGATTGATTATTAAACCACGATGAGCCGTATTGAGGCAGTGGTCATTCAAGAAATGAAATTTTGCTGAGAAGTGAACTTGATTCCGACTGAATAGTCATTCTCCATCACATAAAAACCTCATGACAACAACATTAAGACAAGATACACATGACGGTTGTCTGACTAATGCTCAAATGCTCCTCTATTTCCAAGTCCCTGAGAGGCCAAAATGTGAGGACTGAAAACCGTTGTTCTTGAACTTGAATGTGTGATGATTCTGTGTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTCTCTTCTTCAAATTGCTATTATATTAATATTTTAGTTTTCTTGAGGGTTACAAATATTCAGGTAGTACAAATTGCTAAACATCCACATTAAGCACAACAATAACTTGATGTTGATCATATCATGAGAGCACAGTCACTTTGCTATCATGACAGATGCTACTGTTATGGTTTAGTATTGATTTTTGATATAGCATACTACTGTAAGTGCTCACTAATGTCCCTGCTTATGCACAAAGCCCGGGTTGAGAAGCCTATTTCTGACCAAATGCTTAAAATAAAAAATTACAAAGCACTTTAATTGTTATTGTTAATAAATATTTACACAAATTAACACCCTATAGAAAAAAAATGTTAGGTGACGGCGATGGAGTTTTCAGACCTCTAGTGCCCTCTATTGGTGGTGAGGATTAACTGCTTATTTGTGTTGGTTGGAGAAAATCAGGAAATACACTTCACACAGCAGCTGTGTACAGCAGCTGGACAGACGGCTTTTAACCAGAAGACAGGGGTTGTTTTTGTTATTTGATTTGCTTCAGTGCCGCGAGCACGAGCCTATGGCCCATTTCCCCAGGACAGCTCACTTTAGTTACCCATTAATAGAATAGACAATATGTGATGTTAATAGAATGGATTGATCAGAAGTATCGCGATGTTAACTTTGGGGTCAGAACTACAGACAGCAAAAATAGGTCAGGGCCGACTATCATCAAATTGTTTTGGCTGAAGTCACACACAAAGTATCAATCAG
6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       AGGTCACGTGCGCCACGTCATTAGCACGCTGCACGGCCACAGCGTGCGGAGAGTTTTACCGGAGGGAGGGATGATGTAGCACGCGGGCCAGTCGGTTGACAGTTCTCTGCTTGGCTCGGCTCTAAGAGACACGGCCGTACCGGAGTGGACCTGAATACCGTGCATCCAGGCCAGACAGGGTGCGGATATATGGGCGAATCATCTAATGAATTAGTGAATTAAGCCAGCAGGCCGACGGTCTAAATTCTGGTCGGGTGGTGCTCGGCGGTTAAGGCAGAAGTTTTTACACCGGATAATATCGACTCCATTCCGCGGTGGAATCACAGGGTTGTCGTGAAGGTGCTGGTGCTGGAGCATGCTGGTAGCAGCAGCTGACCAGACCTTCTCATGTCCAGGCTGTGGATCAGAAGGGACCTTCAACTCCGTCCTGGACCTCCAAAACCACCTGGTCAACACACACACCTATCAGACCCTGCTCGGTCTATCCAAGGTGCGTGCCAGGAGCTCTACACCAGGGTTCCTCCTGCGCCTCCCTGGTCCGACTGTCTCCCAGGGACAGAGCTCCTCCCTGGGCATGGAGTCCACCAGAGACCCCCTGCCTCTGGCCGGTCTGGACCTGGCCTCCTCCACCGCCTCCACCCAGCTTCTCAGGGTGATGTTAGGGGCCGCAGGAGGATCTTCTCTGGGAGTCCCCCAGGACCCCTCCAGGGCTCTGGCTCTGCCTAGCCCTACAGGCCTCACATCCACTGCCTTTTTAGCTCTGGAGGACCATATAGGCCTCAGGAGGAGCCTGGGGCTGGAGTTGGGGTATCCCCCAGTGGAGTGCTTCTCCGTGGGGCCAGGCCTGGAAGAAAGACTAGAGCTGAGGCTGGACATGCAGGTGGCCACCGCCGTAGCCGAGTTGGAGGAGAGGGTGAGGGGGCGTGTCCATCACCTGAAGGCGGAGCTACAGGAAAGAGAGGCGGAGCTAGAGCGAGAGAGGAGGAAGGGAGAACGTCTCGTGAGAGAGAAGGACGAAGTGGAGGAGAGAGCGGCGTACCTGTCCAGACAGGCTTCCATAGCCATGGAGATGATGGAGGGAGTAAAGCGAGAACTGAAGGGCAAAGAGGACGAACTGGCCAAACGAAAACAGGATATGCATCAGGTGCAGGTGTTCCTGAGAGATACAGCAGAGAAGGAGGCAGAAGCTAAAATGAAACTACAGATGTTTATGGAGTCGTTACTCGATCGAGCCGACCATGCAGAGAGACAGCTGCTGCAGATCGCACCCGGTCACACGCACCCGCAACGACACGTACACACACCGATGTACACACACACTCCGGGACACACACACAGAGGTGTCTCCTCACCTGTGTGGGGTCGCGCAGGGCGGAGCTTGGATGGCAGTGTGGAGGACATGCTTGGAGCCAGGTCGCCGGTAACCATGGCAACTCAGAGGAGTTACAGTGTTTCTGGATCTTATAGACTTGGAGACCAACTCTACAACCACCATCCATACAACGACTGGGCTGGAGGGAACCGCTGGGTGAACAGCTACCATCGTTACCACAGTACCGAGGAGGAGAAGGAGGAGGAAGAGGATGACGAAGACGACGAGGAGCAGATATGGAACACACCTGAGATGATAAGACGGACTGCTGCACCAGATTTGTCTCCCTCCTCTAATGGTTGCCATAGCACCCATTGCCTGGGGGTGGAGACTCTACGGTTGAGGGCGGGGCTTTTTTGTGTCTTCCCATATTTGGACGTTGCTTCCCTGCTGCATGCGGCTGAGGTGTGCACTGATTGGAGGTGCGTTGCTAGGCACCCGGCTGTATGGACACGCCTACTTCTGGAGAACGTCACGGTGTCCACCAAGTTCCTGGTCACCCTGTCTCAGTGGTGTACACAGACCCGGTCCCTGGTTCTGAAGAACCTGAGAGGCAGAACCAGACGACCTGGAGAGAGCAGAGAGGACTACCAGACCCTCAAACGGGGCTGCCTGGAGGAAGGGGTGGAGGCTGTCTTGCGCTCAGCGGGGGGCAGTCTGCTCTACCTGTCTGTCTGTCAGTGTTCCAATGTGCTGACAGACAGGTCCCTGTGGCTGGCCAGCTGCTACTCCCCCAACCTACACACCATCACGTACAGGAGTCCCGGTGAGGGGGTGGGTCAGGAGGTCTTGTGGGCGCTGGGAGCCGGCTGCAGAACCATAGCTCACATGAAGTTCACCCCCCTAAACCCCAGCCAGCAGCCTCATCGCCTTGGCAACCGTAGCCTGCAGACCATCGGTCGGTGTTGGCCGGACCTGCGCTCCCTCAGCGTGGGCGGGGCCGGGTGTGGCACTCAGGGATTGGCTGCTGTGGTGCGCAGCTGTGTGTGTCTATTAGAGCTGGAGTTGGAGTGCGTTTCAAAGGTCGACCTGAAGGTGGCGACAGAGCTCTGTAATAACGGACTCACCAACTTGGAGACTTTGACGTTGACGCACACTGCCATCACTGAGGAAGCTATACTGCACTTTCAAAGTAAATGTGTTAATCTCAGGTCCATGGTGGTGCTGATGAGGAAGAGTCATGCTAACGAAGGTTCGCTTGAGGAGGACAGCGTGTTCAGAGATAACCTGGAGGCTCTTAAGGTTCTAACCCGGTCTCCAGGTCTTTGTGGCATCCTGCAGGTCAAAGAAGAATACTGAACTCTTCAAAACACGCACACGCACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACGACAATCGGTTATCAATGATAAGTGTAATAAATGTCTTGTTTCTA
library(stringr)
library(dplyr)
rnatabID <- rnatab %>% mutate(geneID = str_extract(V2, "\\(([^)]+)\\)(?=,)")) %>% mutate(geneID = str_replace_all(geneID, "\\(|\\)", ""))
head(rnatabID)
              V1
1 XM_060035408.1
2 XM_060035409.1
3 XM_060035410.1
4 XM_060035411.1
5 XM_060035412.1
6 XM_060035413.1
                                                                                                                                   V2
1                                                        PREDICTED: Gadus macrocephalus putative helicase MOV-10 (LOC132473465), mRNA
2                             PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X2, mRNA
3                             PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X3, mRNA
4                             PREDICTED: Gadus macrocephalus uncharacterized LOC132445431 (LOC132445431), transcript variant X4, mRNA
5 PREDICTED: Gadus macrocephalus ATP-sensitive inward rectifier potassium channel 12-like (LOC132475957), transcript variant X2, mRNA
6                               PREDICTED: Gadus macrocephalus F-box only protein 41-like (LOC132445432), transcript variant X1, mRNA
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  V3
1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  AGTTCAATTGCAACTGGGTAGAGGGCGGCATCTATATCCCCGATAAAGAAGAGCTGGAGAAGCATAAAATCATGGTCACCACCCTTTTCTCCGCTTCAAGGCTGGTTACGGAAGGCATCCCTCCAGGCTTTTACAGCCATATCTTTGTTGACGAGGCAGGACAACCTGCAGAGCCTGAAGGGGTTATCCCCCTGGCAGGCCTACTGGACCCAAAGCGTGGCCAGGTAGTGTTGGCAGGAGACCCCAAACAGTTGGGCCCCATCGTCAAATCCCCCCTAGCCAAGAAGCATGGACTTGGTGTATCAATGCTGGAGCGTCTGATGGAGTTGAATGTGTACAAAAAGACAGAGGAAACGGGGTACAACGAGCGTTTCATCACCAAGCTGCTGAGGAACTACAGGTCTCATGGCAGACTTCTAACGATCCCAAATGAGCTGTTCTACGAGAGCGAACTCCAGGTGTGGGCCGATAAGGATATCCGCAACTCCTTATGTGAATGGAAGCACCTTCCCAGCAAGGGATTCCCGCTGATCTTCCATGAGGTCACTGGACGCATGCGTCGCGAGGACAACGCCTCACTGTTCAACGCCTCACTGTTCAACGAAGACGAGGTGGCGATTCTAATGCAGTATCTGAAAGCACTGTTGGAAGATGTCCCCCCAGAAGACATAGGCCTCATTGCCCCATACAGGAAACAAGTGGAGAGGATCAACAAGGCTCTTAAGATCGAGTTCCCCAGAAACACAGCAAAGTTAAAGGTTTGCACAGTGGACGCGTTTCAGGGCGAGGAGAAGCGGGTGATTCTGCTGTCCACAGTGAGAAGCACCAGCCGGGACCCCAGACCCCCCTCCTCCGTGGGGTTCCTTGCTGACCCCAAGAGGTTCAACGTGGCCATGACCCGCGCCCAAGCCCTGTTGATCGTGGCGGGGAACTCAGAGGCGCTGACAAAGGACAGGATTTGGAGCAGGTTCATCGAGTACTGCAAAGAACACGGAGGTTACACCAAGACCATGACCACCGATTGACGTGCATGCACGGACACATACGCACACTGACAACCACACTCACGTACACACATACAACCTCCGCACACACACACACACACACACACACAAACACACACATACACCTTCTCCACACAAAATGATATTAAGGACTAAAAACCTCACATTTTATTTGACATTGTTGCATCAGCCTATTAGAGGAGAATTTTTTTGACCATTAGGCCTTCCCATCGTTGTGATGGAAAATCTATCATAATCATAAAGTTTTTCGTTTTGAACTTAGGTTTTGTTTACTATTCCGTGTTTGTTCTTGCCGTCCTGCTCTCCTTCGGGGTCATCTAAAATGTAAACCTTTGTGGTTGATGATGTAATCTGTTCTCACTCTGTTCCTGTGTTATCTTCTGTTGATTTGGGCCTGTTCCACGACCCTGGGGAGCTTCAGGAAACAAAGGGTTACATGGCACGGACGCGACCCCTTATTTATGACATCCTGAGGAACTTCAATCAATAGATTACCATCTGGTTAACAAAGGCTTTTGGTTTATTGGGGGACAGCTGCCCTCAAGAACCCAATCTAAGTGTATAAGAACTCCTGACTTAGCCACTTTAGAGTATGAAGTAAGACACAGGGCAAACTCCCATCTGAGGCCTCAAATTACTGTAGTGTATGCCTGTTTTATTGTTTTTTGATGCATGTTGTGATGTATCTTTGTTCGTACTTTTATTACAAATATATAGGACCACCAATTGCCTAAAA
2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              TTCCTGGTCCTCTCAATATTGCTGATATCATAACCAGAGGGGCCAGTCCTCAAGACCTGGATGAAAGTTCAGAATGGCAGAACGGACCAACATTCTTGAAGCTACCAGTGGATGAGTGGCCAGTTAAATCTGCCAAAGAGTTGGTGATGGCTGCCAGAGACGGTGTCAACAGATTACAGAAGAAAGCGTTTGTTGCTGCACTGACCCGAGCACAAGCCAAGGCACAACTGCTGCCGGATCCAAAGGATACTGACACGCAGAAGCCTTCTGAACTAAAGCAAGACCAGGAACCAACTCAGACCCAAACACAACCAAAGAGGCCACCAGCAGGATCAACTGTCCAAGAACTGGTGGATGTCAAGCGGTTCAGTAACCTAAGCCGACTTGTCAAAACAGTTGCCTGGATCCGGAGAGCAGCAAGGATGTTCATGAAAGGGAACAAGCGAACTGCAAACAATCCAAAGTGGGAGGCAGTGTCGTTTTCAAAAGTCATCTCAGTGACAGAGAGGGAAGACGCCCTAAAAGACATCTTCCTTGCGGCACAGCAAAGTGCATCCTTCCCAAGCACAACCACAGACAGGCTGGTGGTGTACAGAGACCAAGAGACTGGATTGTTGGTCTGTGGGGGTCGTGTGCAGATCTTCAATGAAGATAAAGTTGCTGTCCCCATCTTGCCTTACGAAGCCTGGGTGTCGACACTGTTAGCACGAGAAGCCCACGAGGAGAACCACGATGGAGTGGCCGGGACCTTGCTCAAGATGAGAAGAAGAGCATGGGTCGTGAAAGGTCGGAGAATTGCTCAAAAAGTGGTCGAAAACTGCATGTTCTGCAGGAAAACTAAAGCAAAAAGATGCCAGCAAATAATGGGTGATCTACCTCCAGAGAGGACAGAACCAGCTGCCCCATTCCACTACACAACAGTCGACCTCTTCGGACCCTACCAAGTCAGGGATGATGTAAAGAAAAGAGTGTCACTGAAGGTTTGGGGAATTGTGTTTTGTTGCATGGCCTCCAGAGCTATTCACACCGAGCTGGTGAACTCTCAGTCCACAGAGAGTTTTCTGTTTGCCTACCAGAGGTTCACAGCACTAAGAGGTCATCCAAAGAAAATCTGGTCAGATCCTGGGACCAACTTCATTGGGGCTAAGCCAGTCCTAGAAGAACAGTACCGATTCTTTGCCAATCTTGACAAAGCTACCCTGGAGGAGAGAGCTGCCAAGGATGGCACAGAATGGTCGTGGAAGATTCAACCAGCCGATTCTCCACACCGGAATGGTGCCGCAGAAGCTGCTGTGCGCATTGTCAAGAGAGCACTGCAGAGTCTTGGAGGAGAGTCCGGCCTAAGTTGGAGTGAATTCCAAACAACTCTCTACACGGCTGCCAATCTTGCAAATGAAAGACCAATTGACGCCAGGACACAGAGCCGAGAAGACAGCGTCCAGTTCATCACTCCTAACTGTCTCCTGCTCGGACGAGCATCACAGGGTGGAGATGTCAGAACATTCGACTTCAGTGACTATCCCTACAAAAGGCTTAAAGAAATGCAAGCGCAAGTTAACAAGTTCTGGAGGAACTGGAGTCAACTTGCTGGTCCTAACTTATTCGTAAGGAACAAATGGCATACCGCCAAGAGAAATGTCGCAGTCGGGGACATCGTCTGGATGGCGGACCAAAACGCCCTTAGGGGTCAGTTCAGGATTGCAAGAGTGGTTAGCGTCAACTCGGACAGCAAAGGAGTTGTGAGGGACGTCAATGTCAGAACCTTCCCAAGCTACCCCGTTCCTGTCACAAGGCCTACCGGAGCAAAAGTAAGTCACCGAACATCTAAGAAATTCAAAGAAAAGATCCCAGCAGCAGTTCTTCACAGAGACGTAAGGCGACTAGTGATCTTGCTTCCCACCGAAGAACAGAACTAAACATCTGACCCAAGTTGCTCCTGTAACATGCGACCTCCCAGGTGTCTCCACTGGAAGGTCGAGTGGGAGGTGTGAAGTCAACTTGGAAAATCCCTGCTGGATATGAAGGGGTTAATATGCAACGCCGACTGACAGCTGGGGGAATCCCTCCTCAGAGCGTTACCTGGGAAATCCACGTCATTGCGTCACCTTACCGGAAAGGACAGAAAGGGGAAGAGAGACTCTTTCAGGAAACGCCAGTTGCAAACAAATCAGACAGCGTTCGGTCCAGCAGCAGCAAAGCTGCGCTAGAGAACTTGGAAAAAGCAGCGCTGGTACAGCCATAAAAAGCCTGGAGAAACGGCCGGTTTTTTACCTTGCAGTATACCTAGCAGTTTACCACTTGCAACCCACTTACAACTGTACCCACAATACCTACCTGCTACAACTTACCTGTACCTGTGAAAATACAAAAAGAAAAGTAAAGAGGAGTTAAAGAAGGAAAACCGTGTGGTCATTGCATGAGTGGAACCCAGTTAAACCTCCTATGGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  CAGAATTCATGAAAATATGCTTAGACATTAAAAACTATTAATAATATTTATATTTATATTTATATATTTATAAGTATTTAAATGCATATATGAAAAACATGTTTGTATTTACTATTTCATTCTGCACTGCATTTACTGTAGTTATTTTTGGTTTCTTTAAAGGTTTTTTACCTTGCAGTATACCTAGCAGTTTACCACTTGCAACCCACTTACAACTGTACCCACAATACCTACCTGCTACAACTTACCTGTACCTGTGAAAATACAAAAAGAAAAGTAAAGAGGAGTTAAAGAAGGAAAACCGTGTGGTCATTGCATGAGTGGAACCCAGTTAAACCTCCTATGGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            CCCTGCTGTAAGTATTTCTTTCTTAAAGGTACAACATGTCTCTGGCCCGCCGAGTGTCATTGTAGGCCTACTTGTAAATGAAAATTAAAGCTTTCTATGGTGATTAACTTGTCTATTCTAGGTGGGAAACGCTTGTTTGCACTTGGTCAAGGAGATGGCCGAACATCAAGGAAAATATGTATTTCTTCTTGACCTCCACATCCCCCCAACATGGCTCCAATCGCCAAATTCAGATCATCTGCGCAGCTTTCCAGTTGACATCACCACCATGGATGCAGTGATTGTCCCACTGTGGACACCAGGGCATTTTCTGCTTAGTGGTTGTTGCGCAACAATTGATGCCTGGCACCTGGACCGAGGAGACTGGCTTGGACATTGAGGGTGTTCCACAACAACTATATGGCATCGACTGTGGAGTCTTTATGGTCATGTACTCCTGGTACATTACCATGGACGCACACTTCGATTTCAACGTTTTGGATATGCCTCATCTCAGGAGGTGGTGGTGCAAGCTCCTTTTGGACAACTACGGGATTGAAGGATGTGGCAAAAGATTCTGTCATTTCACCCAAGAAGGACACCAGATGGTGAATGGACTCCTGGCACCGGTGTTCCGGGTAACACGGAAGCGGAAGGTTTTGACCAAGGCAGATGATGTTTTCCTGAAGGACACAATTGAGGCAGCAGCATGGTGCCAGTTGCAGACCTTCACCGACCACGTTTCTCTCCCGATGGTCATTGGGGTGGAAGGTGCAGAGCAGCAGGCATTGTTAGCAGAGCTGAAATCCGTTGACCGCAGTTGCCCGGAAGAAAGCCTAAACAGGATTGAACCGTTCCAGTTCTTCTTCAATTCAAAGAAGGACTACGAAATGTTCTGTGTGGAGATGTTTGACCGGAGAAAGCTGAAAGTGTTTGCTTATTGGGAGTAGCGACATTTTACCCATTGTTTTAATTGAACTGGGGGACCGGGGGCCGATTCTTTATGTTATCATTTATATAACCAGTTCTAAAACCATTTTCATTTGTCTTTATAAATAAAATGATGTC
5 AAGCGTTGTGTGTCGCTGAAGCAACATCCAGTGCCAGGTCTGACTGAAGCTAGCTGGTTAGCATAGCTGACCTATGTATGCTTTGTATGCATTATGAGCGTTAGCCTAAAGGGTAACAGCAGTAATCTGGTCTGATTACAGGCTATTTATAAATCTCTCCCCTCGACCCCAAACTAACCTCGCTGACCACTAGCAGCAGGATCTGAACCATTTTTCCCTTTTCACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAGTTTCTATATTGATCGAAAATGGAGGCTCTGGAAACACCTTTAACTCTGTTTCTCTGTTTCAAACTGTGAAATAGGGAGGTCTTTTAGGAGAGGAGAAGGGGGAGGTGTGAGTGGAAGAGGAGGAGAGGGGTGGAAAGAGAAAGAGGGAAAGGTGAGTTCTGGAGGAGTAGAGGATTAGAAGAAGGCCTTCAGGTCCCCATGCTAGTGCGGTGAGGTGAGGGAGCAGGGGGATGGCAGGAGGAGTAAGAGGCTGCATCATGCAGTCCACCCCTTGCAGCACTAGAACCTCCTCCTCTGCCCTTCTGATCCAGTGGAACAACGAGGTGATGATCTACCAGCCAACCGTGACCCTGCCTCCACGAGCCTACGGAATGAGACATGTTTTGATGCAGGTTATGAGAGTTCGGACTTCGGATGAACACATATAAACCGTCTCTCAACTCCTCTGGCCCCCCTCTTCTTACCTCTCCTCTCCTCCCTTCCCATCACCCACCATCACCGCCTTCTCTTCACAGGAGATGGACACAGCAGATCTGACTGGACAGAGCACATCTCCTCCTCCCCTCCTCCTATGGGATTGCGCTGGTGTGGGGGGTGGAGGACAAAGTTCCTCTCCTCTCCGCCTCCACCCCTCCTACTATAGAGTTGTGCTGGTGTGTAGGACAGAGTTAACAGACAGGATTACAGCCAGAGGACGGCAGAGGCAGTGAAGGAGAAAATAAAGGATTTTCTCAGAGGAGGGAAAGCCAAGGAAAGATTGACCAAACAGAACTCTGGGAGAATGAACATGGAACCTTTGAGCTGAACTGAAGCAGAGTGATTTCGACCTTAGCAAGTCAGCGTTTGGACAGGTATAACGCTGGTCTGTGGTGCTAGGGGGCTGACCAGGGCCACCGGGGCCCCTGTGTCCAGAGCGTGGTGCCTGGGAGCCACTGCCAGGGGTGATGGGAACAAGACGATCCAGCAGGTTCAGCCTGGCGTCCGTCGTCCTCCAAGAAGACGAGCACCGTAAGGTCTCCAGCCAGGGTCTCCTGAACGGCCACGACTCCCCGCCGCGCTCCTCCACCACCTCCTCCTTGGCCACCGGCGGGAGGGAGGAGGAGCAGCAGCGGGGAGGGATGAGCGGCTCCAGCGTCCGGGGGGGCCCGACGCGGGGCCCCTCCGGACCGCCGCGGAGCCGCTTCGTGAAGAAGAGCGGCCACTGCAACGTGGCGTTCAGCAACCTGGAGGACCGGAGCCAGCGCTACCTGGCCGACCTCTTCACCACCTGCGTGGACGTCCGCTGGCGCCACCTGCTGCTGCTCTTCTGCGTCAGCTTCCTGCTCTCCTGGCTCTTCTTCGGGCTCGTCTTCTACCTGGTGTCCCTGGCCCACGGGGACTTCCAGGACCCGTCTGACGTGTCGCCGGCGGGAGCGACCCAGGGCCCGGGGGGCCGGAGGGAGCGCACGCCGTGCCTCCTCCACGTGCACGGCCTCCTGGGCGCGCTCCTGTTCTCCATGGAGACCCAGACCACCATCGGCTACGGCTGGCGCTGCGTGACGGAGGAGTGCCCGGTGGCCGTGGCGACGGTGGTGGTGCAGTCGGTCGTGGGCTGCATCCTCGACTCCTTCATGATCGGCACCATCATGGCCAAGATGGCGCGGCCCAAGAAGCGGAACCAGACGCTGATGTTCTCGCGGAACGCCGTGATCGCGCTGCGCGACGGCCGGCTCTGCCTCATGTGGCGGGTGGGGAACCTGCGGCACTCGCACATCGTGGAGGCCCACGTGCGCGCCCAGCTCCTCCGGCCCTACGTCACGGAGGAGGGCGAGTTCGTCCCCCTGGAGCAGATGGACCTCAACGTGGGCTACGACGACGGCACCGACCGCATCTTCCTCGTGTCGCCGCTCGTCATCGTCCACGAGATCGACAAGGACAGCCCGCTGTACTCGCTGAGCCGGGCCGACCTGGAGGCGGAGCACTTTGAGATCGTGGTCATCCTCGAGGGCATGGTGGAGGCTACGGCCATGTCCACCCAGTTCCGCAGCTCCTACCTGGCCCGCGAGGTCTTCTGGGGCCACCGCTTCGAGCCCGTGATCTGCGAGGACCGCGACCGCTACAGGGTGGACTACGCGCGCTTCCACCAGACCTACGAGGTGCCGTCCACGCCGCACCTCAGCGCCAAGGAGCTGGACGAGGCCGCCAGCCGGCCCCCCTCCGCCAGGACGACGCGACCGCCACCGTCGGCCAAGGAGGCGCCGAGCTCGTTCTGCTACGACAACGAGGTGGCGCTGATCTGCGGCGAGGACGACGACGAAGACGATGACGACGACGAGAACGACATCTTTGACTTGCGCCGGACACTGTCGTCGCCGGGGGGGAGGCGGGAGGAGAGGAGGACCTCGGTGACCGTGGACCTCCCGAAGGCGGGACAGGACTCGGCTGCCTTGACAACGGGCCGCCAGAGCTTGATGTGCGTGCTGGACATGGACAACCACCAGAGGGAGTTCGATATGCTGCAGACGGCAATTCCGCTCGATCCGCAGTCCTACAAGAGCGAGCAGGAGATGTAGAGGTCTGAGGGACACTGGACGGGGAAGACTGCTCGGGGGTTGTTCTCAGCCTTGCTTCTGTTGGTTGATGTTTGGGGTTGGAGAGTGGGACATATCTATTTTACTGTATGGTGGAGAAACTAGAAAACGGATTGATTATTAAACCACGATGAGCCGTATTGAGGCAGTGGTCATTCAAGAAATGAAATTTTGCTGAGAAGTGAACTTGATTCCGACTGAATAGTCATTCTCCATCACATAAAAACCTCATGACAACAACATTAAGACAAGATACACATGACGGTTGTCTGACTAATGCTCAAATGCTCCTCTATTTCCAAGTCCCTGAGAGGCCAAAATGTGAGGACTGAAAACCGTTGTTCTTGAACTTGAATGTGTGATGATTCTGTGTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTCTCTTCTTCAAATTGCTATTATATTAATATTTTAGTTTTCTTGAGGGTTACAAATATTCAGGTAGTACAAATTGCTAAACATCCACATTAAGCACAACAATAACTTGATGTTGATCATATCATGAGAGCACAGTCACTTTGCTATCATGACAGATGCTACTGTTATGGTTTAGTATTGATTTTTGATATAGCATACTACTGTAAGTGCTCACTAATGTCCCTGCTTATGCACAAAGCCCGGGTTGAGAAGCCTATTTCTGACCAAATGCTTAAAATAAAAAATTACAAAGCACTTTAATTGTTATTGTTAATAAATATTTACACAAATTAACACCCTATAGAAAAAAAATGTTAGGTGACGGCGATGGAGTTTTCAGACCTCTAGTGCCCTCTATTGGTGGTGAGGATTAACTGCTTATTTGTGTTGGTTGGAGAAAATCAGGAAATACACTTCACACAGCAGCTGTGTACAGCAGCTGGACAGACGGCTTTTAACCAGAAGACAGGGGTTGTTTTTGTTATTTGATTTGCTTCAGTGCCGCGAGCACGAGCCTATGGCCCATTTCCCCAGGACAGCTCACTTTAGTTACCCATTAATAGAATAGACAATATGTGATGTTAATAGAATGGATTGATCAGAAGTATCGCGATGTTAACTTTGGGGTCAGAACTACAGACAGCAAAAATAGGTCAGGGCCGACTATCATCAAATTGTTTTGGCTGAAGTCACACACAAAGTATCAATCAG
6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       AGGTCACGTGCGCCACGTCATTAGCACGCTGCACGGCCACAGCGTGCGGAGAGTTTTACCGGAGGGAGGGATGATGTAGCACGCGGGCCAGTCGGTTGACAGTTCTCTGCTTGGCTCGGCTCTAAGAGACACGGCCGTACCGGAGTGGACCTGAATACCGTGCATCCAGGCCAGACAGGGTGCGGATATATGGGCGAATCATCTAATGAATTAGTGAATTAAGCCAGCAGGCCGACGGTCTAAATTCTGGTCGGGTGGTGCTCGGCGGTTAAGGCAGAAGTTTTTACACCGGATAATATCGACTCCATTCCGCGGTGGAATCACAGGGTTGTCGTGAAGGTGCTGGTGCTGGAGCATGCTGGTAGCAGCAGCTGACCAGACCTTCTCATGTCCAGGCTGTGGATCAGAAGGGACCTTCAACTCCGTCCTGGACCTCCAAAACCACCTGGTCAACACACACACCTATCAGACCCTGCTCGGTCTATCCAAGGTGCGTGCCAGGAGCTCTACACCAGGGTTCCTCCTGCGCCTCCCTGGTCCGACTGTCTCCCAGGGACAGAGCTCCTCCCTGGGCATGGAGTCCACCAGAGACCCCCTGCCTCTGGCCGGTCTGGACCTGGCCTCCTCCACCGCCTCCACCCAGCTTCTCAGGGTGATGTTAGGGGCCGCAGGAGGATCTTCTCTGGGAGTCCCCCAGGACCCCTCCAGGGCTCTGGCTCTGCCTAGCCCTACAGGCCTCACATCCACTGCCTTTTTAGCTCTGGAGGACCATATAGGCCTCAGGAGGAGCCTGGGGCTGGAGTTGGGGTATCCCCCAGTGGAGTGCTTCTCCGTGGGGCCAGGCCTGGAAGAAAGACTAGAGCTGAGGCTGGACATGCAGGTGGCCACCGCCGTAGCCGAGTTGGAGGAGAGGGTGAGGGGGCGTGTCCATCACCTGAAGGCGGAGCTACAGGAAAGAGAGGCGGAGCTAGAGCGAGAGAGGAGGAAGGGAGAACGTCTCGTGAGAGAGAAGGACGAAGTGGAGGAGAGAGCGGCGTACCTGTCCAGACAGGCTTCCATAGCCATGGAGATGATGGAGGGAGTAAAGCGAGAACTGAAGGGCAAAGAGGACGAACTGGCCAAACGAAAACAGGATATGCATCAGGTGCAGGTGTTCCTGAGAGATACAGCAGAGAAGGAGGCAGAAGCTAAAATGAAACTACAGATGTTTATGGAGTCGTTACTCGATCGAGCCGACCATGCAGAGAGACAGCTGCTGCAGATCGCACCCGGTCACACGCACCCGCAACGACACGTACACACACCGATGTACACACACACTCCGGGACACACACACAGAGGTGTCTCCTCACCTGTGTGGGGTCGCGCAGGGCGGAGCTTGGATGGCAGTGTGGAGGACATGCTTGGAGCCAGGTCGCCGGTAACCATGGCAACTCAGAGGAGTTACAGTGTTTCTGGATCTTATAGACTTGGAGACCAACTCTACAACCACCATCCATACAACGACTGGGCTGGAGGGAACCGCTGGGTGAACAGCTACCATCGTTACCACAGTACCGAGGAGGAGAAGGAGGAGGAAGAGGATGACGAAGACGACGAGGAGCAGATATGGAACACACCTGAGATGATAAGACGGACTGCTGCACCAGATTTGTCTCCCTCCTCTAATGGTTGCCATAGCACCCATTGCCTGGGGGTGGAGACTCTACGGTTGAGGGCGGGGCTTTTTTGTGTCTTCCCATATTTGGACGTTGCTTCCCTGCTGCATGCGGCTGAGGTGTGCACTGATTGGAGGTGCGTTGCTAGGCACCCGGCTGTATGGACACGCCTACTTCTGGAGAACGTCACGGTGTCCACCAAGTTCCTGGTCACCCTGTCTCAGTGGTGTACACAGACCCGGTCCCTGGTTCTGAAGAACCTGAGAGGCAGAACCAGACGACCTGGAGAGAGCAGAGAGGACTACCAGACCCTCAAACGGGGCTGCCTGGAGGAAGGGGTGGAGGCTGTCTTGCGCTCAGCGGGGGGCAGTCTGCTCTACCTGTCTGTCTGTCAGTGTTCCAATGTGCTGACAGACAGGTCCCTGTGGCTGGCCAGCTGCTACTCCCCCAACCTACACACCATCACGTACAGGAGTCCCGGTGAGGGGGTGGGTCAGGAGGTCTTGTGGGCGCTGGGAGCCGGCTGCAGAACCATAGCTCACATGAAGTTCACCCCCCTAAACCCCAGCCAGCAGCCTCATCGCCTTGGCAACCGTAGCCTGCAGACCATCGGTCGGTGTTGGCCGGACCTGCGCTCCCTCAGCGTGGGCGGGGCCGGGTGTGGCACTCAGGGATTGGCTGCTGTGGTGCGCAGCTGTGTGTGTCTATTAGAGCTGGAGTTGGAGTGCGTTTCAAAGGTCGACCTGAAGGTGGCGACAGAGCTCTGTAATAACGGACTCACCAACTTGGAGACTTTGACGTTGACGCACACTGCCATCACTGAGGAAGCTATACTGCACTTTCAAAGTAAATGTGTTAATCTCAGGTCCATGGTGGTGCTGATGAGGAAGAGTCATGCTAACGAAGGTTCGCTTGAGGAGGACAGCGTGTTCAGAGATAACCTGGAGGCTCTTAAGGTTCTAACCCGGTCTCCAGGTCTTTGTGGCATCCTGCAGGTCAAAGAAGAATACTGAACTCTTCAAAACACGCACACGCACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACGACAATCGGTTATCAATGATAAGTGTAATAAATGTCTTGTTTCTA
        geneID
1 LOC132473465
2 LOC132445431
3 LOC132445431
4 LOC132445431
5 LOC132475957
6 LOC132445432