genes_p50_path <- file.path(PROJHOME, "reference", 
                            "genes.protein_coding.1kb_flank.p50_overlap.bed")

genes_p50 <- 
  readr::read_tsv(genes_p50_path, col_names = c("chrom", "start", "end", "name")) %>% 
  tidyr::separate(name, c("gene_id", "gene"), sep = "_")
is_defined <- col_data$coo_ns %in% c("ABC", "GCB")
texpr_p50 <- texpr[ , is_defined]
gois <- rownames(texpr)[gene_ids_filt %in% genes_p50$gene_id]

nfkbiz_mutants <- muts %>% dplyr::filter(gene == "NFKBIZ") %$% patient %>% unique()

data <- 
  texpr_p50 %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column("gene") %>% 
  tidyr::gather(patient, expr, -gene) %>% 
  dplyr::left_join(col_data, by = "patient") %>% 
  dplyr::mutate(nfkbiz = ifelse(patient %in% nfkbiz_mutants, "mutated", "not_mutated"))

p50_abc_vs_gcb <- 
  data %>% 
  dplyr::select(-patient) %>% 
  dplyr::group_by(coo_ns, gene, nfkbiz) %>% 
  tidyr::nest() %>% 
  tidyr::spread(nfkbiz, data) %>% 
  dplyr::mutate(
    wilcox = purrr::map2(mutated, not_mutated, ~wilcox.test(.x$expr, .y$expr)), 
    pval = purrr::map_dbl(wilcox, "p.value")) %>% 
  dplyr::group_by(coo_ns) %>% 
  dplyr::mutate(qval = p.adjust(pval, "BH")) %>% 
  dplyr::select(coo_ns, gene, pval, qval) %>% 
  dplyr::ungroup() %>% 
  dplyr::arrange(pval)

knitr::kable(head(p50_abc_vs_gcb, 30))
coo_ns gene pval qval
ABC LTB_ENSG00000227507 0.0000150 0.2320012
ABC HIST1H2BH 0.0000537 0.4154006
ABC SLC22A5 0.0002830 0.9541278
ABC ACADM 0.0003267 0.9541278
GCB CCL24_ENSG00000106178 0.0003817 1.0000000
ABC POU4F1 0.0004777 0.9541278
ABC NOL4 0.0005008 0.9541278
ABC HSF5 0.0006447 0.9541278
GCB DLGAP1 0.0006483 1.0000000
ABC PLAC8 0.0006559 0.9541278
ABC TSHZ2 0.0006925 0.9541278
GCB ADA 0.0007679 1.0000000
GCB AC015660.1 0.0008767 1.0000000
ABC GMPPA 0.0011191 0.9541278
GCB OXCT2 0.0011294 1.0000000
ABC CCL28 0.0011486 0.9541278
ABC KIAA1919 0.0012103 0.9541278
ABC KPNA5 0.0012422 0.9541278
GCB MAP7D2 0.0012511 1.0000000
ABC PLK3 0.0013083 0.9541278
ABC USP53 0.0014504 0.9541278
ABC NKX3-1 0.0015994 0.9541278
ABC SMOC1 0.0016281 0.9541278
ABC DNAJA2 0.0016480 0.9541278
ABC ALG1L 0.0017505 0.9541278
ABC SH3YL1 0.0017782 0.9541278
ABC SLC36A4 0.0018702 0.9541278
ABC GADD45A 0.0019178 0.9541278
ABC IBTK 0.0019665 0.9541278
ABC TNF_ENSG00000232810 0.0019665 0.9541278
p50_abc_vs_gcb %>% 
  dplyr::mutate(is_goi = gene %in% gois) %>% 
  ggplot(aes(x = pval, fill = is_goi)) +
  geom_histogram(bins = 25) +
  scale_fill_manual(values = c(`FALSE` = "grey", `TRUE` = "red")) +
  facet_grid(~coo_ns)