---
title: "Context dependent DNA methylation :: miRNA regulation of gene expression"
description: "Contra"
categories: ["Epigenetics", "Transcriptomics"]
#citation:
date: 12-1-2025
image: http://gannet.fish.washington.edu/seashell/snaps/Monosnap_Image_2025-12-01_19-30-14.png # finding a good image
author:
- name: Steven Roberts
url:
orcid: 0000-0001-8302-1138
affiliation: Professor, UW - School of Aquatic and Fishery Sciences
affiliation-url: https://robertslab.info
#url: # self-defined
draft: false # setting this to `true` will prevent your post from appearing on your listing page until you're ready!
format:
html:
code-fold: FALSE
code-tools: true
code-copy: true
highlight-style: github
code-overflow: wrap
#runtime: shiny
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
```
# Read in Cross-Species Methylation miRNA Comparison Data
```{r read-data}
# Read CSV from remote URL
biomin_data <- read_csv("https://gannet.fish.washington.edu/v1_web/owlshell/bu-github/ConTra/output/biomin_comparison_20251130_052506/cross_species_methylation_mirna_comparison.csv")
# Display structure of the data
glimpse(biomin_data)
```
```{r view-columns}
# View column names to identify context dependent columns
colnames(biomin_data)
```
# Summary of Context Dependent Groups
```{r context-dependent-summary}
# Identify columns that contain "context" in their name (case insensitive)
context_cols <- grep("context", names(biomin_data), ignore.case = TRUE, value = TRUE)
cat("Context-related columns found:\n")
print(context_cols)
```
```{r filter-context-true}
# Filter for rows where any context dependent column is TRUE
# First, let's see unique values in context columns
if (length(context_cols) > 0) {
for (col in context_cols) {
cat("\nUnique values in", col, ":\n")
print(unique(biomin_data[[col]]))
}
}
```
```{r summarize-context-dependent}
# Filter and summarize data where context dependent columns have TRUE values
if (length(context_cols) > 0) {
# Create filter for rows where any context dependent column is TRUE
context_true_data <- biomin_data %>%
filter(if_any(all_of(context_cols), ~ . == TRUE | . == "TRUE" | . == "true"))
cat("Number of rows with context dependent = TRUE:", nrow(context_true_data), "\n\n")
# Summarize by og_id
if ("og_id" %in% names(context_true_data)) {
context_summary <- context_true_data %>%
group_by(og_id) %>%
summarise(
n_records = n(),
across(where(is.numeric), ~ mean(.x, na.rm = TRUE), .names = "mean_{.col}"),
.groups = "drop"
)
print(context_summary)
} else {
cat("Column 'og_id' not found in data\n")
print(head(context_true_data))
}
}
```
```{r detailed-view}
# Display detailed view of context-dependent records
if (exists("context_true_data") && nrow(context_true_data) > 0) {
cat("\nDetailed view of context-dependent records:\n")
print(context_true_data)
}
```
# Species-Specific and Shared Context-Dependent OGs
```{r species-analysis}
# Identify species-specific context dependent columns
species_context_cols <- context_cols[!grepl("shared|common", context_cols, ignore.case = TRUE)]
# Create summary of which OGs are TRUE for each species
species_summary <- biomin_data %>%
select(og_id, all_of(context_cols), everything())
# Count how many species have TRUE for each OG
if (length(context_cols) > 0) {
og_species_count <- biomin_data %>%
rowwise() %>%
mutate(
species_true_count = sum(c_across(all_of(context_cols)) == TRUE |
c_across(all_of(context_cols)) == "TRUE" |
c_across(all_of(context_cols)) == "true", na.rm = TRUE)
) %>%
ungroup()
# OGs with TRUE in exactly one species (species-specific)
single_species_ogs <- og_species_count %>%
filter(species_true_count == 1)
# OGs shared across two species
two_species_ogs <- og_species_count %>%
filter(species_true_count == 2)
# OGs shared across three species
three_species_ogs <- og_species_count %>%
filter(species_true_count == 3)
cat("OGs with context-dependent = TRUE in exactly 1 species:", nrow(single_species_ogs), "\n")
cat("OGs with context-dependent = TRUE in exactly 2 species:", nrow(two_species_ogs), "\n")
cat("OGs with context-dependent = TRUE in all 3 species:", nrow(three_species_ogs), "\n")
}
```
```{r species-specific-details}
# Detailed breakdown by species
for (col in context_cols) {
species_name <- gsub("_context.*|context_dependent_|_methylation.*", "", col, ignore.case = TRUE)
true_count <- sum(biomin_data[[col]] == TRUE | biomin_data[[col]] == "TRUE" | biomin_data[[col]] == "true", na.rm = TRUE)
cat("\n", species_name, "- OGs with context-dependent = TRUE:", true_count, "\n")
species_ogs <- biomin_data %>%
filter(.data[[col]] == TRUE | .data[[col]] == "TRUE" | .data[[col]] == "true") %>%
select(og_id, all_of(col), where(is.numeric))
if (nrow(species_ogs) > 0) {
print(species_ogs)
}
}
```
```{r shared-ogs-summary}
# Summary of shared OGs with quantitative metrics
if (exists("two_species_ogs") && nrow(two_species_ogs) > 0) {
cat("\n=== OGs Shared Across Two Species ===\n")
two_species_summary <- two_species_ogs %>%
select(og_id, all_of(context_cols), where(is.numeric))
print(two_species_summary)
}
if (exists("three_species_ogs") && nrow(three_species_ogs) > 0) {
cat("\n=== OGs Shared Across All Three Species ===\n")
three_species_summary <- three_species_ogs %>%
select(og_id, all_of(context_cols), where(is.numeric))
print(three_species_summary)
}
```
```{r quantitative-summary}
# Generate quantitative summary statistics for context-dependent OGs
if (exists("context_true_data") && nrow(context_true_data) > 0) {
numeric_cols <- names(context_true_data)[sapply(context_true_data, is.numeric)]
if (length(numeric_cols) > 0) {
cat("\n=== Quantitative Summary of Context-Dependent OGs ===\n")
quant_summary <- context_true_data %>%
summarise(across(all_of(numeric_cols),
list(mean = ~mean(.x, na.rm = TRUE),
sd = ~sd(.x, na.rm = TRUE),
min = ~min(.x, na.rm = TRUE),
max = ~max(.x, na.rm = TRUE)),
.names = "{.col}_{.fn}"))
# Transpose for better readability
quant_summary_long <- quant_summary %>%
pivot_longer(everything(), names_to = "metric", values_to = "value")
print(quant_summary_long)
}
}
```
# Summary Paragraph
```{r generate-paragraph, results='asis'}
# Generate dynamic summary paragraph
if (exists("context_cols") && length(context_cols) > 0) {
# Get counts per species
species_counts <- sapply(context_cols, function(col) {
sum(biomin_data[[col]] == TRUE | biomin_data[[col]] == "TRUE" | biomin_data[[col]] == "true", na.rm = TRUE)
})
# Get OG lists per species
species_ogs_list <- lapply(context_cols, function(col) {
biomin_data %>%
filter(.data[[col]] == TRUE | .data[[col]] == "TRUE" | .data[[col]] == "true") %>%
pull(og_id)
})
names(species_ogs_list) <- context_cols
# Build paragraph
cat("\n## Cross-Species Context-Dependent Methylation and miRNA Analysis\n\n")
cat("This analysis examined orthogroup (OG) patterns across species for context-dependent methylation and miRNA associations. ")
# Species-specific findings
for (i in seq_along(context_cols)) {
col <- context_cols[i]
species_name <- gsub("_context.*|context_dependent_|_methylation.*", "", col, ignore.case = TRUE)
ogs <- species_ogs_list[[col]]
if (length(ogs) > 0) {
cat("**", species_name, "** showed context-dependent patterns in ", length(ogs), " OGs (",
paste(head(ogs, 5), collapse = ", "), if(length(ogs) > 5) ", ..." else "", "). ", sep = "")
}
}
# Shared patterns
if (exists("two_species_ogs") && nrow(two_species_ogs) > 0) {
cat("\n\nA total of **", nrow(two_species_ogs), " OGs** exhibited context-dependent patterns shared across exactly two species", sep = "")
if (nrow(two_species_ogs) <= 10) {
cat(" (", paste(two_species_ogs$og_id, collapse = ", "), ")", sep = "")
}
cat(". ")
}
if (exists("three_species_ogs") && nrow(three_species_ogs) > 0) {
cat("Notably, **", nrow(three_species_ogs), " OGs** showed conserved context-dependent patterns across all three species", sep = "")
if (nrow(three_species_ogs) <= 10) {
cat(" (", paste(three_species_ogs$og_id, collapse = ", "), ")", sep = "")
}
cat(", suggesting evolutionary conservation of these regulatory mechanisms. ")
}
# Quantitative highlights
if (exists("context_true_data") && nrow(context_true_data) > 0) {
numeric_cols <- names(context_true_data)[sapply(context_true_data, is.numeric)]
if (length(numeric_cols) > 0) {
cat("\n\nQuantitatively, context-dependent OGs showed the following characteristics: ")
for (nc in head(numeric_cols, 3)) {
mean_val <- mean(context_true_data[[nc]], na.rm = TRUE)
sd_val <- sd(context_true_data[[nc]], na.rm = TRUE)
cat("**", nc, "** (mean ± SD: ", round(mean_val, 3), " ± ", round(sd_val, 3), "); ", sep = "")
}
}
}
cat("\n")
}
```