Loop구문 cluster별로 분석

 Idents(seurat_combined1) <- seurat_combined1$seurat_clusters # 결과 저장용 리스트 초기화 comparison_markers <- list() comparison_pathways <- list() comparison_GO <- list() # 0부터 19까지 모든 클러스터에 대해 반복 작업 수행 for (cluster_id in 0:19) {      cat("Processing: Cluster", cluster_id, "vs others (Clusters excluding", cluster_id, ")\n")      # 기준 클러스터 (예: Cluster 0)와 나머지 클러스터를 "others"로 합침   reference_cluster <- as.character(cluster_id)      # 나머지 클러스터들을 "others"로 설정 (현재 클러스터 제외)   seurat_combined1$combined_cluster <- ifelse(seurat_combined1$seurat_clusters == reference_cluster, reference_cluster, "others")      # Idents를 combined_cluster로 설정하여 "others"와 특정 클러스터를 기준으로 분석   Idents(seurat_combined1) <- "combined_cluster"      # ==========================   # 마커 분석   # ==========================   markers <- FindMarkers(seurat_combined1, ident.1 = reference_cluster, ident.2 = "ot...

FGESA analysis

 library(dplyr) library(ggplot2) library(fgsea) # Step 1: Compute a combined score using avg_log2FC and p-value ranked_genes1 <- sheet4_data %>%     dplyr::mutate(combined_score = avg_log2FC * -log10(p_val)) %>%  # Create the combined score     dplyr::arrange(desc(combined_score)) %>%  # Rank by the combined score     dplyr::mutate(gene_id = gene) %>%     dplyr::select(gene_id, combined_score) # Step 2: Convert the ranked genes into a named vector for GSEA ranked_genes_vector <- setNames(ranked_genes1$combined_score, ranked_genes1$gene_id) # Step 3: Load Hallmark pathways using msigdbr install.packages("msigdbr") library(msigdbr) hallmark_sets <- msigdbr(species = "Homo sapiens", category = "H") # Step 4: Prepare gene sets for GSEA hallmark_sets_list <- hallmark_sets %>%   split(.$gs_name) %>%   lapply(function(x) x$gene_symbol) # Step 5: Run GSEA with the new ranking gsea_results <- fgsea(...

CCLE_database 비교

library(depmap) library(dplyr) CCLE <- depmap::depmap_TPM() depmap_metadata <- depmap::depmap_metadata() CCLE_M <- depmap::depmap_mutationCalls() kras_mutation <- CCLE_M %>% filter(gene_name == "KRAS") colon_samples <- depmap_metadata %>% filter(primary_disease == "Colon/Colorectal Cancer") %>% left_join(kras_mutation %>% select(depmap_id, var_class), by = "depmap_id") %>% mutate(KRAS_status = ifelse(is.na(var_class), "No", "Yes")) expression_data <- CCLE %>% filter(depmap_id %in% colon_samples$depmap_id) expression_data_with_kras <- expression_data %>% + left_join(colon_samples %>% select(depmap_id, KRAS_status), by = "depmap_id") t_test_results <- expression_data_with_kras %>% group_by(gene_name) %>% summarize(p_value = t.test(rna_expression ~ KRAS_status)$p.value) # p-value가 0.05 이하인 유전자만 선택 significant_genes <- t_test_results %>% filter(p_value < 0.05) %...

pre-selected gene vs TCGA expression 비교

library(TCGAbiolinks) library(limma) library(edgeR) library(ggrepel) #CRC data download query <- GDCquery( project = "TCGA-COAD", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "STAR - Counts" ) GDCdownload(query) colon_data <- GDCprepare ( query ) #TCGA-CRC data에서 gene_name & FPKM 값 추출 gene_name <- rowData(colon_data)$gene_name fpkm_values <- assay(colon_data, "fpkm_unstrand") df <- data.frame(values = gene_name) expression_data <- data.frame(gene_name = df, fpkm = fpkm_values) #pre-selected gene list 생성 #기존의 output으로 만든 파일 활용 file_path <- "top_markers_통합.xlsx" sheet1_data <- read_excel(file_path, sheet = 1) sheet2_data <- read_excel(file_path, sheet = 2) sheet3_data <- read_excel(file_path, sheet = 3) sheet4_data <- read_excel(file_path, sheet = 4) F1 <- sheet1_data %>%filter(p_val < 0.05 & abs(avg_log2FC) > 0.5)...
  *excel sheet 불러오기 file_path <- "top_markers_통합.xlsx" sheet1_data <- read_excel(file_path, sheet = 1) sheet2_data <- read_excel(file_path, sheet = 2) sheet3_data <- read_excel(file_path, sheet = 3) sheet4_data <- read_excel(file_path, sheet = 4) top_markers의 구조 Gene/p_val/avg_log2FC/pct.1/pct.2/p_val_adj *Gene filtering filtered_genes <- sheet1_data %>%filter(p_val < 0.05 & abs(avg_log2FC) > 0.5) %>% select(Gene) filtered_genes2 <- sheet2_data %>%filter(p_val < 0.05 & abs(avg_log2FC) > 0.5) %>% select(gene) filtered_genes3 <- sheet3_data %>%filter(p_val < 0.05 & abs(avg_log2FC) > 0.5) %>% select(gene) filtered_genes4 <- sheet4_data %>%filter(p_val < 0.05 & abs(avg_log2FC) > 0.5) %>% select(gene) *TCGA data filtering gene_name 추출 gene_name <- rowData(colon_data)$gene_name fpkm_values <- assay(colon_data, "fpkm_unstrand") df <- data.frame(values = gene_name) intersecti...

추출한 TCGA data를 기반으로 분석하기

library(limma) # 샘플 이름에서 '01A'는 암 (Cancer), '01B'는 정상 (Normal)으로 구분 sample_ids <- colnames(expr_data_filtered) # 발현 데이터의 샘플 이름 # '01A'와 '01B'를 사용하여 정상/암 샘플 구분 normal_samples <- sample_ids[grepl("11A", sample_ids)] # '11A'가 포함된 샘플 cancer_samples <- sample_ids[grepl("01A", sample_ids)] # '01A'가 포함된 샘플 # 정상 샘플과 암 샘플을 사용하여 발현 데이터 분리 expr_data_normal <- expr_data_filtered[, normal_samples] expr_data_cancer <- expr_data_filtered[, cancer_samples] # 결과 확인 print(dim(expr_data_normal)) # 정상 샘플의 차원 [1] 115 15 print(dim(expr_data_cancer)) # 암 샘플의 차원 [1] 115 465 # 조건 설정: 정상 vs 암 (두 그룹 비교) group <- factor(c(rep("Normal", length(normal_samples)), rep("Cancer", length(cancer_samples)))) # 데이터 결합 expr_data_combined <- cbind(expr_data_normal, expr_data_cancer) # 설계 행렬 생성 (두 그룹을 비교) design <- model.matrix(~ group) # limma 분석 fit <- lmFit(expr_data_combined, design) fit <- e...