FGESA analysis

 library(dplyr)

library(ggplot2)

library(fgsea)


# Step 1: Compute a combined score using avg_log2FC and p-value

ranked_genes1 <- sheet4_data %>%

    dplyr::mutate(combined_score = avg_log2FC * -log10(p_val)) %>%  # Create the combined score

    dplyr::arrange(desc(combined_score)) %>%  # Rank by the combined score

    dplyr::mutate(gene_id = gene) %>%

    dplyr::select(gene_id, combined_score)


# Step 2: Convert the ranked genes into a named vector for GSEA

ranked_genes_vector <- setNames(ranked_genes1$combined_score, ranked_genes1$gene_id)


# Step 3: Load Hallmark pathways using msigdbr

install.packages("msigdbr")

library(msigdbr)

hallmark_sets <- msigdbr(species = "Homo sapiens", category = "H")


# Step 4: Prepare gene sets for GSEA

hallmark_sets_list <- hallmark_sets %>%

  split(.$gs_name) %>%

  lapply(function(x) x$gene_symbol)


# Step 5: Run GSEA with the new ranking

gsea_results <- fgsea(pathways = hallmark_sets_list, 

                      stats = ranked_genes_vector, 

                      minSize = 10, 

                      maxSize = 500)


# Step 6: View and visualize results

head(gsea_results)


# Visualization of top 10 pathways using ggplot2

top_pathways <- gsea_results[1:10, ]


ggplot(top_pathways, aes(x = reorder(pathway, NES), y = NES, color = pval)) + 

  geom_point(size = 3) + 

  scale_color_gradient(low = "blue", high = "red") + 

  coord_flip() + 

  labs(title = "Top 10 GSEA Pathways (Combined Ranking)", 

       x = "Pathway", y = "Normalized Enrichment Score (NES)", color = "P-value") +

  theme_minimal()


inf값이 포함된 경우
# Step 1: Check if there are any non-finite values in the ranked_genes_vector
non_finite_indices <- !is.finite(ranked_genes_vector)

# 비유한 값이 있는 경우, 해당 값을 제거합니다.
if (any(non_finite_indices)) {
  ranked_genes_vector <- ranked_genes_vector[is.finite(ranked_genes_vector)]
}

# Step 2: GSEA 재실행
gsea_results <- fgsea(pathways = hallmark_sets_list, 
                      stats = ranked_genes_vector, 
                      minSize = 10, 
                      maxSize = 500)


댓글

이 블로그의 인기 게시물

#single cell sequencing 기초 분석 - #1 R 설치 및 package 설치

리눅스 기초 #10 GATK calling을 사용하기 위하여, reference file indexing하는 방법

Single cell 분석을 위한 package 소개