Read in Cross-Species Methylation miRNA Comparison Data
# Read CSV from remote URLbiomin_data <-read_csv("https://gannet.fish.washington.edu/v1_web/owlshell/bu-github/ConTra/output/biomin_comparison_20251130_052506/cross_species_methylation_mirna_comparison.csv")
Rows: 611 Columns: 15
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): og_id
dbl (11): n_species, apul_context_strength, apul_improvement, apul_r2, peve_...
lgl (3): apul_context_dependent, peve_context_dependent, ptua_context_depen...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Display structure of the dataglimpse(biomin_data)
# Identify columns that contain "context" in their name (case insensitive)context_cols <-grep("context", names(biomin_data), ignore.case =TRUE, value =TRUE)cat("Context-related columns found:\n")
# Filter for rows where any context dependent column is TRUE# First, let's see unique values in context columnsif (length(context_cols) >0) {for (col in context_cols) {cat("\nUnique values in", col, ":\n")print(unique(biomin_data[[col]])) }}
# Identify species-specific context dependent columnsspecies_context_cols <- context_cols[!grepl("shared|common", context_cols, ignore.case =TRUE)]# Create summary of which OGs are TRUE for each speciesspecies_summary <- biomin_data %>%select(og_id, all_of(context_cols), everything())# Count how many species have TRUE for each OGif (length(context_cols) >0) { og_species_count <- biomin_data %>%rowwise() %>%mutate(species_true_count =sum(c_across(all_of(context_cols)) ==TRUE|c_across(all_of(context_cols)) =="TRUE"|c_across(all_of(context_cols)) =="true", na.rm =TRUE) ) %>%ungroup()# OGs with TRUE in exactly one species (species-specific) single_species_ogs <- og_species_count %>%filter(species_true_count ==1)# OGs shared across two species two_species_ogs <- og_species_count %>%filter(species_true_count ==2)# OGs shared across three species three_species_ogs <- og_species_count %>%filter(species_true_count ==3)cat("OGs with context-dependent = TRUE in exactly 1 species:", nrow(single_species_ogs), "\n")cat("OGs with context-dependent = TRUE in exactly 2 species:", nrow(two_species_ogs), "\n")cat("OGs with context-dependent = TRUE in all 3 species:", nrow(three_species_ogs), "\n")}
OGs with context-dependent = TRUE in exactly 1 species: 205
OGs with context-dependent = TRUE in exactly 2 species: 32
OGs with context-dependent = TRUE in all 3 species: 2
# Summary of shared OGs with quantitative metricsif (exists("two_species_ogs") &&nrow(two_species_ogs) >0) {cat("\n=== OGs Shared Across Two Species ===\n") two_species_summary <- two_species_ogs %>%select(og_id, all_of(context_cols), where(is.numeric))print(two_species_summary)}
if (exists("three_species_ogs") &&nrow(three_species_ogs) >0) {cat("\n=== OGs Shared Across All Three Species ===\n") three_species_summary <- three_species_ogs %>%select(og_id, all_of(context_cols), where(is.numeric))print(three_species_summary)}
# Generate dynamic summary paragraphif (exists("context_cols") &&length(context_cols) >0) {# Get counts per species species_counts <-sapply(context_cols, function(col) {sum(biomin_data[[col]] ==TRUE| biomin_data[[col]] =="TRUE"| biomin_data[[col]] =="true", na.rm =TRUE) })# Get OG lists per species species_ogs_list <-lapply(context_cols, function(col) { biomin_data %>%filter(.data[[col]] ==TRUE| .data[[col]] =="TRUE"| .data[[col]] =="true") %>%pull(og_id) })names(species_ogs_list) <- context_cols# Build paragraphcat("\n## Cross-Species Context-Dependent Methylation and miRNA Analysis\n\n")cat("This analysis examined orthogroup (OG) patterns across species for context-dependent methylation and miRNA associations. ")# Species-specific findingsfor (i inseq_along(context_cols)) { col <- context_cols[i] species_name <-gsub("_context.*|context_dependent_|_methylation.*", "", col, ignore.case =TRUE) ogs <- species_ogs_list[[col]]if (length(ogs) >0) {cat("**", species_name, "** showed context-dependent patterns in ", length(ogs), " OGs (", paste(head(ogs, 5), collapse =", "), if(length(ogs) >5) ", ..."else"", "). ", sep ="") } }# Shared patternsif (exists("two_species_ogs") &&nrow(two_species_ogs) >0) {cat("\n\nA total of **", nrow(two_species_ogs), " OGs** exhibited context-dependent patterns shared across exactly two species", sep ="")if (nrow(two_species_ogs) <=10) {cat(" (", paste(two_species_ogs$og_id, collapse =", "), ")", sep ="") }cat(". ") }if (exists("three_species_ogs") &&nrow(three_species_ogs) >0) {cat("Notably, **", nrow(three_species_ogs), " OGs** showed conserved context-dependent patterns across all three species", sep ="")if (nrow(three_species_ogs) <=10) {cat(" (", paste(three_species_ogs$og_id, collapse =", "), ")", sep ="") }cat(", suggesting evolutionary conservation of these regulatory mechanisms. ") }# Quantitative highlightsif (exists("context_true_data") &&nrow(context_true_data) >0) { numeric_cols <-names(context_true_data)[sapply(context_true_data, is.numeric)]if (length(numeric_cols) >0) {cat("\n\nQuantitatively, context-dependent OGs showed the following characteristics: ")for (nc inhead(numeric_cols, 3)) { mean_val <-mean(context_true_data[[nc]], na.rm =TRUE) sd_val <-sd(context_true_data[[nc]], na.rm =TRUE)cat("**", nc, "** (mean ± SD: ", round(mean_val, 3), " ± ", round(sd_val, 3), "); ", sep ="") } } }cat("\n")}
Cross-Species Context-Dependent Methylation and miRNA Analysis
This analysis examined orthogroup (OG) patterns across species for context-dependent methylation and miRNA associations. apul showed context-dependent patterns in 47 OGs (OG_01452, OG_02190, OG_06616, OG_06592, OG_10121, …). peve showed context-dependent patterns in 69 OGs (OG_06119, OG_07153, OG_01753, OG_04467, OG_07150, …). ptua showed context-dependent patterns in 159 OGs (OG_02619, OG_06119, OG_05637, OG_07153, OG_01414, …).
A total of 32 OGs exhibited context-dependent patterns shared across exactly two species. Notably, 2 OGs showed conserved context-dependent patterns across all three species (OG_06592, OG_04243), suggesting evolutionary conservation of these regulatory mechanisms.