genes_p50_path <- file.path(PROJHOME, "reference",
"genes.protein_coding.1kb_flank.p50_overlap.bed")
genes_p50 <-
readr::read_tsv(genes_p50_path, col_names = c("chrom", "start", "end", "name")) %>%
tidyr::separate(name, c("gene_id", "gene"), sep = "_")
is_defined <- col_data$coo_ns %in% c("ABC", "GCB")
texpr_p50 <- texpr[ , is_defined]
gois <- rownames(texpr)[gene_ids_filt %in% genes_p50$gene_id]
nfkbiz_mutants <- muts %>% dplyr::filter(gene == "NFKBIZ") %$% patient %>% unique()
data <-
texpr_p50 %>%
as.data.frame() %>%
tibble::rownames_to_column("gene") %>%
tidyr::gather(patient, expr, -gene) %>%
dplyr::left_join(col_data, by = "patient") %>%
dplyr::mutate(nfkbiz = ifelse(patient %in% nfkbiz_mutants, "mutated", "not_mutated"))
p50_abc_vs_gcb <-
data %>%
dplyr::select(-patient) %>%
dplyr::group_by(coo_ns, gene, nfkbiz) %>%
tidyr::nest() %>%
tidyr::spread(nfkbiz, data) %>%
dplyr::mutate(
wilcox = purrr::map2(mutated, not_mutated, ~wilcox.test(.x$expr, .y$expr)),
pval = purrr::map_dbl(wilcox, "p.value")) %>%
dplyr::group_by(coo_ns) %>%
dplyr::mutate(qval = p.adjust(pval, "BH")) %>%
dplyr::select(coo_ns, gene, pval, qval) %>%
dplyr::ungroup() %>%
dplyr::arrange(pval)
knitr::kable(head(p50_abc_vs_gcb, 30))
ABC |
LTB_ENSG00000227507 |
0.0000150 |
0.2320012 |
ABC |
HIST1H2BH |
0.0000537 |
0.4154006 |
ABC |
SLC22A5 |
0.0002830 |
0.9541278 |
ABC |
ACADM |
0.0003267 |
0.9541278 |
GCB |
CCL24_ENSG00000106178 |
0.0003817 |
1.0000000 |
ABC |
POU4F1 |
0.0004777 |
0.9541278 |
ABC |
NOL4 |
0.0005008 |
0.9541278 |
ABC |
HSF5 |
0.0006447 |
0.9541278 |
GCB |
DLGAP1 |
0.0006483 |
1.0000000 |
ABC |
PLAC8 |
0.0006559 |
0.9541278 |
ABC |
TSHZ2 |
0.0006925 |
0.9541278 |
GCB |
ADA |
0.0007679 |
1.0000000 |
GCB |
AC015660.1 |
0.0008767 |
1.0000000 |
ABC |
GMPPA |
0.0011191 |
0.9541278 |
GCB |
OXCT2 |
0.0011294 |
1.0000000 |
ABC |
CCL28 |
0.0011486 |
0.9541278 |
ABC |
KIAA1919 |
0.0012103 |
0.9541278 |
ABC |
KPNA5 |
0.0012422 |
0.9541278 |
GCB |
MAP7D2 |
0.0012511 |
1.0000000 |
ABC |
PLK3 |
0.0013083 |
0.9541278 |
ABC |
USP53 |
0.0014504 |
0.9541278 |
ABC |
NKX3-1 |
0.0015994 |
0.9541278 |
ABC |
SMOC1 |
0.0016281 |
0.9541278 |
ABC |
DNAJA2 |
0.0016480 |
0.9541278 |
ABC |
ALG1L |
0.0017505 |
0.9541278 |
ABC |
SH3YL1 |
0.0017782 |
0.9541278 |
ABC |
SLC36A4 |
0.0018702 |
0.9541278 |
ABC |
GADD45A |
0.0019178 |
0.9541278 |
ABC |
IBTK |
0.0019665 |
0.9541278 |
ABC |
TNF_ENSG00000232810 |
0.0019665 |
0.9541278 |
p50_abc_vs_gcb %>%
dplyr::mutate(is_goi = gene %in% gois) %>%
ggplot(aes(x = pval, fill = is_goi)) +
geom_histogram(bins = 25) +
scale_fill_manual(values = c(`FALSE` = "grey", `TRUE` = "red")) +
facet_grid(~coo_ns)