CCLE_database 비교

library(depmap)
library(dplyr)

CCLE <- depmap::depmap_TPM()
depmap_metadata <- depmap::depmap_metadata()
CCLE_M <- depmap::depmap_mutationCalls()

kras_mutation <- CCLE_M %>% filter(gene_name == "KRAS")

colon_samples <- depmap_metadata %>%
filter(primary_disease == "Colon/Colorectal Cancer") %>% 
left_join(kras_mutation %>% 
select(depmap_id, var_class), by = "depmap_id") %>% 
mutate(KRAS_status = ifelse(is.na(var_class), "No", "Yes"))

expression_data <- CCLE %>%
filter(depmap_id %in% colon_samples$depmap_id)

expression_data_with_kras <- expression_data %>%
+     left_join(colon_samples %>% select(depmap_id, KRAS_status), by = "depmap_id")

t_test_results <- expression_data_with_kras %>%
group_by(gene_name) %>%
summarize(p_value = t.test(rna_expression ~ KRAS_status)$p.value)

 # p-value가 0.05 이하인 유전자만 선택
significant_genes <- t_test_results %>%
filter(p_value < 0.05) %>%
pull(gene_name)  # 유의미한 유전자 목록 추출

expression_data_significant <- expression_data_with_kras %>%
filter(gene_name %in% significant_genes)

ggplot(expression_data_significant, aes(x = KRAS_status, y = rna_expression, fill = KRAS_status)) +
geom_boxplot() +
facet_wrap(~ gene_name, scales = "free_y") +  # 각 유전자별로 박스를 나누어 보여주기
labs(title = "KRAS Mutation Status vs Gene Expression (Significant Genes)",
x = "KRAS Status", y = "RNA Expression") +
theme_minimal() +
theme(legend.position = "none") 

matching_genes <- F1 %>% 
filter(gene %in% significant_genes) %>% 
pull(gene)

expression_data_matching <- expression_data_with_kras %>%
filter(gene_name %in% matching_genes)

ggplot(expression_data_matching, aes(x = KRAS_status, y = rna_expression, fill = KRAS_status)) +
geom_boxplot() +
facet_wrap(~ gene_name, scales = "free_y") +  # 각 유전자별로 박스를 나누어 보여주기
labs(title = "KRAS Mutation Status vs Gene Expression (Significant F1 Genes)",
x = "KRAS Status", y = "RNA Expression") +
theme_minimal() +
theme(legend.position = "none")

expression_data_LGALS3 <- expression_data_with_kras %>%
filter(gene_name == "LGALS3")
t_test_result <- t.test(rna_expression ~ KRAS_status, data = expression_data_LGALS3)
p_value <- t_test_result$p.value

ggplot(expression_data_LGALS3, aes(x = KRAS_status, y = rna_expression, fill = KRAS_status)) +
geom_boxplot() +
annotate("text", x = 1.5, y = max(expression_data_LGALS3$rna_expression), 
label = paste("p-value =", round(p_value, 3)), size = 5, hjust = 1) +  # p-value 표시
labs(title = "KRAS Mutation Status vs LGALS3 Gene Expression (t-test)",
x = "KRAS Status", y = "RNA Expression") +
theme_minimal() +
theme(legend.position = "none")


댓글

이 블로그의 인기 게시물

#single cell sequencing 기초 분석 - #1 R 설치 및 package 설치

리눅스 기초 #10 GATK calling을 사용하기 위하여, reference file indexing하는 방법

Single cell 분석을 위한 package 소개