SJ's blog

글

10월, 2024의 게시물 표시

FGSEA data table로 저장하기

- 10월 28, 2024

fgsea file의 경우, list로 구성되어 있기 때문에 vector 구조로 변환 해야 함. library(tidyr) GESA_output_df <- as.data.frame(GESA_output ) GESA_output_df <- unnest(GESA_output_df , everything()) write.table(GESA_output_df , file = "이름.txt", row.names = F, quote = F, sep = "\t") 혹시 error가 생성될 경우 (S4 클래스를 벡터로 강제형변화하는 방법은 없습니다) library(tidyr) library(dplyr) # gsea_results를 데이터프레임으로 변환하는 방법 GESA_output_df <- as_tibble(gsea_results) # as.data.frame 대신 as_tibble 사용 # 데이터 구조를 펼치는 방법 (필요에 따라 조정) GESA_output_df <- GESA_output_df %>% unnest(cols = everything()) # 파일로 저장 write.table(GESA_output_df, file = "HCT116_H.txt", row.names = FALSE, sep = "\t")

자세한 내용 보기

Single cell analysis #8 데이터 시각화 (GSEA-2)

- 10월 22, 2024

# 메타데이터에서 클러스터 정보 할당 seurat_combined <- SetIdent(seurat_combined, value = seurat_combined$seurat_clusters) # 클러스터 ID 확인 unique(Idents(seurat_combined)) # 이제 클러스터 ID를 확인 target_cluster <- "7" # 예: 클러스터 ID가 문자열인 경우 seurat_clustered <- subset(seurat_combined, idents = target_cluster) # 해당 클러스터의 마커 찾기 # 현재 클러스터 ID 확인 all_clusters <- unique(Idents(seurat_combined)) # "12"를 제외한 나머지 클러스터 ID 수집 other_clusters <- all_clusters[all_clusters != "7"] # FindMarkers 실행 markers_target <- FindMarkers(seurat_combined, ident.1 = "7", ident.2 = other_clusters) # 유전자 랭킹 생성 ranks_target <- markers_target$avg_log2FC names(ranks_target) <- rownames(markers_target) # HALLMARK 경로 가져오기 gene_sets_hallmark <- msigdbr(species = "Homo sapiens", category = "H") hallmark_pathways <- split(x = gene_sets_hallmark$gene_symbol, f = gene_sets_hallmark$gs_name) # FGSEA 실행 fgsea_results_target <- fgsea(pathways = hallmark_pathways...

자세한 내용 보기

Single cell analysis #7 데이터 시각화 (GSEA-1)

- 10월 22, 2024

# 필요한 패키지 로드 library(fgsea) library(msigdbr) library(patchwork) library(ggplot2) # ggplot2 패키지 로드 # 유전자 랭킹 생성 ranks_ht04 <- markers_ht04$avg_log2FC names(ranks_ht04) <- rownames(markers_ht04) # HALLMARK 경로 가져오기 gene_sets_hallmark <- msigdbr(species = "Homo sapiens", category = "H") hallmark_pathways <- split(x = gene_sets_hallmark$gene_symbol, f = gene_sets_hallmark$gs_name) # FGSEA 실행 fgsea_results_ht04_hallmark <- fgsea(pathways = hallmark_pathways, stats = ranks_ht04, nperm = 1000) # 상위 경로 선택 (예: p-value 기준 상위 3개 경로 선택) top_pathways <- fgsea_results_ht04_hallmark[order(fgsea_results_ht04_hallmark$pval), ]$pathway[1:6] # 여러 경로에 대한 plotEnrichment 결과 생성 p1 <- plotEnrichment(hallmark_pathways[[top_pathways[1]]], ranks_ht04) + labs(title = top_pathways[1]) + theme_minimal() + theme(plot.title = element_text(size = 14, face = "bold")) + # 제목 스타일 조정 scale_color_manual(values = c("blue")) + # 선 색상 변경 theme(...

자세한 내용 보기

Single cell analysis #7 데이터 시각화 (heatmap)

- 10월 22, 2024

#6번의 과정을 통해 clustering generation을 완료한 이후, top-down gene 유전자를 분석하고 시각화하는 코드는 아래와 같음. # combine된 데이터가 있다는 가정하에 다음 분석 수행 table(seurat_combined$orig.ident) seurat_combined$sample <- ifelse(seurat_combined$orig.ident == "ht04", "ht04", "other") # sample 컬럼 확인 table(seurat_combined$sample) Idents(seurat_combined) <- "sample" # 데이터 레이어 결합 seurat_combined <- JoinLayers(seurat_combined) markers_ht04 <- FindMarkers(seurat_combined, ident.1 = "ht04", ident.2 = "other") top_markers_ht04 <- markers_ht04 %>% filter(p_val_adj < 0.05) %>% top_n(10, avg_log2FC) #내가 원하는 유전자 expression 관찰 FeaturePlot(seurat_combined, features = c("CDKN1A", "CDKN1B", "PMAIP1", "BBC3"), reduction = "umap", split.by = "orig.ident", ncol=2) # 상위 20개 유전자 선택 top_genes <- rownames(markers_ht04[order(markers_ht04$avg_log2FC, decreasing = TRUE), ])[1:100] # 하위 20개 유전자 선택 bottom_...

자세한 내용 보기

Single cell RNA analysis _ #6 조건별 차이가 나는 클러스터 generation

- 10월 22, 2024

#data load가 끝난 이후부터 data 이름 하단 ht01, ht02, ht03, ht04로 명명 [2개의 data를 분석하는 방법 - 시간 소요가 적기에 training 시 적절함] #data normalization seurat_A <- NormalizeData(ht01) seurat_B <- NormalizeData(ht02) seurat_combined <- merge(seurat_A, y = seurat_B, add.cell.ids = c("A", "B")) #변수 유전자 identify seurat_combined <- FindVariableFeatures(seurat_combined) #차원 축소 후 이미지화 seurat_combined <- ScaleData(seurat_combined) seurat_combined <- RunPCA(seurat_combined) seurat_combined <- RunTSNE(seurat_combined, dims = 1:15) seurat_combined <- RunUMAP(seurat_combined, dims = 1:10) seurat_combined <- FindNeighbors(seurat_combined, dims = 1:10) seurat_combined <- FindClusters(seurat_combined, resolution = 0.8) DimPlot(seurat_combined, reduction = "umap", group.by = "seurat_clusters",split.by = "orig.ident") [4개의 data 통합 분석] #data normalization seurat_A <- NormalizeData(ht01) seurat_B <- NormalizeData(ht02) seurat_C <- Norma...

자세한 내용 보기