FGESA analysis
library(dplyr)
library(ggplot2)
library(fgsea)
# Step 1: Compute a combined score using avg_log2FC and p-value
ranked_genes1 <- sheet4_data %>%
dplyr::mutate(combined_score = avg_log2FC * -log10(p_val)) %>% # Create the combined score
dplyr::arrange(desc(combined_score)) %>% # Rank by the combined score
dplyr::mutate(gene_id = gene) %>%
dplyr::select(gene_id, combined_score)
# Step 2: Convert the ranked genes into a named vector for GSEA
ranked_genes_vector <- setNames(ranked_genes1$combined_score, ranked_genes1$gene_id)
# Step 3: Load Hallmark pathways using msigdbr
install.packages("msigdbr")
library(msigdbr)
hallmark_sets <- msigdbr(species = "Homo sapiens", category = "H")
# Step 4: Prepare gene sets for GSEA
hallmark_sets_list <- hallmark_sets %>%
split(.$gs_name) %>%
lapply(function(x) x$gene_symbol)
# Step 5: Run GSEA with the new ranking
gsea_results <- fgsea(pathways = hallmark_sets_list,
stats = ranked_genes_vector,
minSize = 10,
maxSize = 500)
# Step 6: View and visualize results
head(gsea_results)
# Visualization of top 10 pathways using ggplot2
top_pathways <- gsea_results[1:10, ]
ggplot(top_pathways, aes(x = reorder(pathway, NES), y = NES, color = pval)) +
geom_point(size = 3) +
scale_color_gradient(low = "blue", high = "red") +
coord_flip() +
labs(title = "Top 10 GSEA Pathways (Combined Ranking)",
x = "Pathway", y = "Normalized Enrichment Score (NES)", color = "P-value") +
theme_minimal()
댓글
댓글 쓰기