Section 2: DESeq2 analysis
DESeq2 analyses steps
This section uses the count data (for selected datasets) generated in Section 1 to do differential expression (DE) analyses using DESeq2. Briefly, the count data are imported in R, batch corrected using ComBat_seq, then DE analyses were performed for various contrasts using DESeq2. Results were visualized as volcano plots, and cell enrichment performed using PlacentaCellEnrich (PCE).
Prerequisites
R packages required for this section are loaded
setwd("~/github/BAPvsTrophoblast_Amnion")
# load the modules
library(sva)
library(tidyverse)
library(DESeq2)
library(vsn)
library(pheatmap)
library(ggrepel)
library(RColorBrewer)
library(reshape2)
require(biomaRt)
library(EnhancedVolcano)
library(TissueEnrich)
library(plotly)
library(DT)
library(cowplot)
library(biomaRt)
library(tidytext)
library(ggpubr)
library(scales)Import datasets
The counts data and its associated metadata (coldata) are imported for analyses.
counts = 'assets/counts-subset-v5.txt'
groupFile = 'assets/batch-subset-v5.txt'
coldata <-
read.csv(
groupFile,
row.names = 1,
sep = "\t",
stringsAsFactors = TRUE
)
cts <- as.matrix(read.csv(counts, sep = "\t", row.names = "gene.ids"))Inspect the coldata.
DT::datatable(coldata)Reorder columns of cts according to coldata rows. Check if samples in both files match.
colnames(cts)
#> [1] "Naive_H9_hESCs_1" "Naive_H9_hESCs_2"
#> [3] "nTE_D1.Naive_H9_hESCs_1" "nTE_D1.Naive_H9_hESCs_2"
#> [5] "nTE_D2.Naive_H9_hESCs_1" "nTE_D2.Naive_H9_hESCs_2"
#> [7] "nTE_D3.Naive_H9_hESCs_1" "nTE_D3.Naive_H9_hESCs_2"
#> [9] "nCT_P3.Naive_H9_hESCs_1" "nCT_P3.Naive_H9_hESCs_2"
#> [11] "nCT_P10.Naive_H9_hESCs_1" "nCT_P10.Naive_H9_hESCs_2"
#> [13] "nCT_P15.Naive_H9_hESCs_1" "nCT_P15.Naive_H9_hESCs_2"
#> [15] "cR_nCT_P15.Naive_H9_hESCs_1" "cR_nCT_P15.Naive_H9_hESCs_2"
#> [17] "nCT_P15.409B2_iPSC_hESCs_1" "nCT_P15.409B2_iPSC_hESCs_2"
#> [19] "Placenta.derived_tbSCs_CT30_Ex1" "Placenta.derived_tbSCs_CT30_Ex2"
#> [21] "nST.Naive_H9_Ex1" "nST.Naive_H9_Ex2"
#> [23] "nEVT.Naive_H9_Ex1" "nEVT.Naive_H9_Ex2"
#> [25] "Primed_H9_hESCs_1" "Primed_H9_hESCs_2"
#> [27] "pBAP_D1.Primed_H9_hESCs_1" "pBAP_D1.Primed_H9_hESCs_2"
#> [29] "pBAP_D2.Primed_H9_hESCs_1" "pBAP_D2.Primed_H9_hESCs_2"
#> [31] "pBAP_D3.Primed_H9_hESCs_1" "pBAP_D3.Primed_H9_hESCs_2"
#> [33] "CytoTB_7_gestational_wks_1" "CytoTB_7_gestational_wks_2"
#> [35] "CytoTB_9_gestational_wks_1" "CytoTB_11_gestational_wks_1"
#> [37] "hESC_H1_STB_gt70um_D8_BAP_1" "hESC_H1_STB_gt70um_D8_BAP_2"
#> [39] "hESC_H1_STB_gt70um_D8_BAP_3" "hESC_H1_STB_40.70um_D8_BAP_1"
#> [41] "hESC_H1_STB_40.70um_D8_BAP_2" "hESC_H1_STB_40.70um_D8_BAP_3"
#> [43] "hESC_H1_STB_lt40um_D8_BAP_1" "hESC_H1_STB_lt40um_D8_BAP_2"
#> [45] "hESC_H1_STB_lt40um_D8_BAP_3" "hESC_H1_D8_MEF.CM.and.FGF2_1"
#> [47] "hESC_H1_D8_MEF.CM.and.FGF2_2" "hESC_H1_D8_MEF.CM.and.FGF2_3"
#> [49] "hESC_H9_untr_0h.1" "hESC_H9_untr_0h.2"
#> [51] "hESC_H9_BMP4_8h.1" "hESC_H9_BMP4_8h.2"
#> [53] "hESC_H9_BMP4_16h.1" "hESC_H9_BMP4_16h.2"
#> [55] "hESC_H9_BMP4_24h.1" "hESC_H9_BMP4_24h.2"
#> [57] "hESC_H9_BMP4_48h.1" "hESC_H9_BMP4_48h.2"
#> [59] "hESC_H9_BMP4_72h.1" "hESC_H9_BMP4_72h.2"
all(rownames(coldata) %in% colnames(cts))
#> [1] TRUE
cts <- cts[, rownames(coldata)]Batch correction
Using ComBat_seq (SVA package) to run batch correction - using bioproject IDs as variable (dataset origin).
cov1 <- as.factor(coldata$BioProject)
adjusted_counts <- ComBat_seq(cts, batch = cov1, group = NULL)
#> Found 3 batches
#> Using null model in ComBat-seq.
#> Adjusting for 0 covariate(s) or covariate level(s)
#> Estimating dispersions
#> Fitting the GLM model
#> Shrinkage off - using GLM estimates for parameters
#> Adjusting the data
all(rownames(coldata) %in% colnames(cts))
#> [1] TRUE
cts <- cts[, rownames(coldata)]DESeq2
The batch corrected read counts are then used for running DESeq2 analyses
dds <- DESeqDataSetFromMatrix(countData = adjusted_counts,
colData = coldata,
design = ~ condition)
vsd <- vst(dds, blind = FALSE)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]
dds <- DESeq(dds)
dds
#> class: DESeqDataSet
#> dim: 16692 60
#> metadata(1): version
#> assays(4): counts mu H cooks
#> rownames(16692): ENSG00000000003.15 ENSG00000000005.6 ...
#> ENSG00000288695.1 ENSG00000288698.1
#> rowData names(130): baseMean baseVar ... deviance maxCooks
#> colnames(60): cR_nCT_P15.Naive_H9_hESCs_1 cR_nCT_P15.Naive_H9_hESCs_2
#> ... hESC_H9_BMP4_72h.1 hESC_H9_BMP4_72h.2
#> colData names(3): BioProject condition sizeFactorVarious contrasts are set up as follows (a total of 8 combinations)
res.PH9vsBAP <-
results(dds,
contrast = c(
"condition",
"Primed_H9_hESCs",
"pBAP_D3.Primed_H9_hESCs"))
res.K00vsK72 <-
results(dds,
contrast = c(
"condition",
"hESC_H9_untr_0h",
"hESC_H9_BMP4_72h"))
res.UNDvsSTB <-
results(dds,
contrast = c(
"condition",
"hESC_H1_D8_MEF.CM.and.FGF2",
"hESC_H1_STB_gt70um_D8_BAP"))
res.K72vsSTB <-
results(dds,
contrast = c(
"condition",
"hESC_H9_BMP4_72h",
"hESC_H1_STB_gt70um_D8_BAP"))
res.BAPvsSTB <-
results(
dds,
contrast = c(
"condition",
"pBAP_D3.Primed_H9_hESCs",
"hESC_H1_STB_gt70um_D8_BAP"))
res.BAPvsK72 <-
results(dds,
contrast = c(
"condition",
"pBAP_D3.Primed_H9_hESCs",
"hESC_H9_BMP4_72h"))
res.K72vsL40 <-
results(dds,
contrast = c(
"condition",
"hESC_H9_BMP4_72h",
"hESC_H1_STB_lt40um_D8_BAP"))
res.BAPvsL40 <-
results(
dds,
contrast = c(
"condition",
"pBAP_D3.Primed_H9_hESCs",
"hESC_H1_STB_lt40um_D8_BAP"
)
)The following function is to save DESeq2 results as well as generate variables to hold the gene lists for running PCE later on.
processDE <- function(res.se, string) {
res.se <- res.se[order(res.se$padj),]
res.data <-
merge(as.data.frame(res.se),
as.data.frame(counts(dds, normalized = TRUE)),
by = "row.names",
sort = FALSE)
names(res.data)[1] <- "Gene"
write_delim(res.data,
file = paste0("DESeq2results-", string, "_fc.tsv"),
delim = "\t")
res.up <-
res.data %>%
filter(log2FoldChange >= 1) %>%
filter(padj <= 0.05) %>%
arrange(desc(log2FoldChange)) %>%
dplyr::select(Gene)
res.dw <-
res.data %>%
filter(log2FoldChange <= -1) %>%
filter(padj <= 0.05) %>%
arrange(desc(log2FoldChange)) %>%
dplyr::select(Gene)
res.up.new <-
annot[annot$ensembl_gene_id_version %in% res.up$Gene, ]
res.dw.new <-
annot[annot$ensembl_gene_id_version %in% res.dw$Gene, ]
pce.up1 <- paste0(string, ".up.pce", 1)
pce.dw1 <- paste0(string, ".dw.pce", 1)
pce.up2 <- paste0(string, ".up.pce", 2)
pce.dw2 <- paste0(string, ".dw.pce", 2)
assign(pce.up1, res.up.new$ensembl_gene_id, envir = .GlobalEnv)
assign(pce.dw1, res.dw.new$ensembl_gene_id, envir = .GlobalEnv)
assign(pce.up2, res.up.new$external_gene_name, envir = .GlobalEnv)
assign(pce.dw2, res.dw.new$external_gene_name, envir = .GlobalEnv)
}Creating gene lists
The gene lists have Ensembl gene-ID-version. We need them as gene-IDs. We also need other metadata later for these lists. From Ensembl we will download metadata and attach to these lists.
ensembl = useMart("ENSEMBL_MART_ENSEMBL")
listDatasets(ensembl) %>%
filter(str_detect(description, "Human"))
#> dataset description version
#> 1 hsapiens_gene_ensembl Human genes (GRCh38.p13) GRCh38.p13
ensembl = useDataset("hsapiens_gene_ensembl", mart = ensembl)
listFilters(ensembl) %>%
filter(str_detect(name, "ensembl"))
#> name
#> 1 ensembl_gene_id
#> 2 ensembl_gene_id_version
#> 3 ensembl_transcript_id
#> 4 ensembl_transcript_id_version
#> 5 ensembl_peptide_id
#> 6 ensembl_peptide_id_version
#> 7 ensembl_exon_id
#> description
#> 1 Gene stable ID(s) [e.g. ENSG00000000003]
#> 2 Gene stable ID(s) with version [e.g. ENSG00000000003.15]
#> 3 Transcript stable ID(s) [e.g. ENST00000000233]
#> 4 Transcript stable ID(s) with version [e.g. ENST00000000233.10]
#> 5 Protein stable ID(s) [e.g. ENSP00000000233]
#> 6 Protein stable ID(s) with version [e.g. ENSP00000000233.5]
#> 7 Exon ID(s) [e.g. ENSE00000000003]
filterType <- "ensembl_gene_id_version"
filterValues <- rownames(cts)
listAttributes(ensembl) %>%
head(20)
#> name description
#> 1 ensembl_gene_id Gene stable ID
#> 2 ensembl_gene_id_version Gene stable ID version
#> 3 ensembl_transcript_id Transcript stable ID
#> 4 ensembl_transcript_id_version Transcript stable ID version
#> 5 ensembl_peptide_id Protein stable ID
#> 6 ensembl_peptide_id_version Protein stable ID version
#> 7 ensembl_exon_id Exon stable ID
#> 8 description Gene description
#> 9 chromosome_name Chromosome/scaffold name
#> 10 start_position Gene start (bp)
#> 11 end_position Gene end (bp)
#> 12 strand Strand
#> 13 band Karyotype band
#> 14 transcript_start Transcript start (bp)
#> 15 transcript_end Transcript end (bp)
#> 16 transcription_start_site Transcription start site (TSS)
#> 17 transcript_length Transcript length (including UTRs and CDS)
#> 18 transcript_tsl Transcript support level (TSL)
#> 19 transcript_gencode_basic GENCODE basic annotation
#> 20 transcript_appris APPRIS annotation
#> page
#> 1 feature_page
#> 2 feature_page
#> 3 feature_page
#> 4 feature_page
#> 5 feature_page
#> 6 feature_page
#> 7 feature_page
#> 8 feature_page
#> 9 feature_page
#> 10 feature_page
#> 11 feature_page
#> 12 feature_page
#> 13 feature_page
#> 14 feature_page
#> 15 feature_page
#> 16 feature_page
#> 17 feature_page
#> 18 feature_page
#> 19 feature_page
#> 20 feature_page
attributeNames <- c('ensembl_gene_id_version',
'ensembl_gene_id',
'external_gene_name')
annot <- getBM(
attributes = attributeNames,
filters = filterType,
values = filterValues,
mart = ensembl
)
isDup <- duplicated(annot$ensembl_gene_id)
dup <- annot$ensembl_gene_id[isDup]
annot <- annot[!annot$ensembl_gene_id %in% dup, ] #this object will be saved and used laterThe results are saved as tsv files.
processDE(res.PH9vsBAP, "PH9vsBAP")
processDE(res.K00vsK72, "K00vsK72")
processDE(res.UNDvsSTB, "UNDvsSTB")
processDE(res.K72vsSTB, "K72vsSTB")
processDE(res.BAPvsSTB, "BAPvsSTB")
processDE(res.BAPvsK72, "BAPvsK72")
processDE(res.K72vsL40, "K72vsL40")
processDE(res.BAPvsL40, "BAPvsL40")mart <-
read.csv(
"assets/mart-genes.tsv",
sep = "\t",
stringsAsFactors = TRUE,
header = TRUE
) #this object was obtained from Ensembl as we illustrated in "Creating gene lists"volcanoPlots <-
function(res.se,
string,
first,
second,
color1,
color2,
color3,
ChartTitle) {
res.se <- res.se[order(res.se$padj), ]
res.se <-
rownames_to_column(as.data.frame(res.se[order(res.se$padj), ]))
names(res.se)[1] <- "Gene"
res.data <-
merge(res.se,
mart,
by.x = "Gene",
by.y = "ensembl_gene_id_version")
res.data <- res.data %>% mutate_all(na_if, "")
res.data <- res.data %>% mutate_all(na_if, " ")
res.data <-
res.data %>% mutate(gene_symbol = coalesce(gene_symbol, Gene))
res.data$diffexpressed <- "other.genes"
res.data$diffexpressed[res.data$log2FoldChange >= 1 &
res.data$padj <= 0.05] <-
paste("Higher expression in", first)
res.data$diffexpressed[res.data$log2FoldChange <= -1 &
res.data$padj <= 0.05] <-
paste("Higher expression in", second)
res.data$delabel <- ""
res.data$delabel[res.data$log2FoldChange >= 1
& res.data$padj <= 0.05
&
!is.na(res.data$padj)] <-
res.data$gene_symbol[res.data$log2FoldChange >= 1
&
res.data$padj <= 0.05
&
!is.na(res.data$padj)]
res.data$delabel[res.data$log2FoldChange <= -1
& res.data$padj <= 0.05
&
!is.na(res.data$padj)] <-
res.data$gene_symbol[res.data$log2FoldChange <= -1
&
res.data$padj <= 0.05
&
!is.na(res.data$padj)]
outpath <- "interactive/"
gg <-
ggplot(res.data,
aes(
x = log2FoldChange,
y = -log10(padj),
col = diffexpressed,
label = delabel
)) +
geom_point(alpha = 0.5) +
xlim(-20, 20) +
theme_classic() +
scale_color_manual(name = "Expression", values = c(color1, color2, color3)) +
ggtitle(ChartTitle) +
xlab(paste("log2 fold change")) +
ylab("-log10 pvalue (adjusted)") +
theme(legend.text.align = 0)
saveWidget(ggplotly(gg), file = paste0(outpath, "/Figure_volcano_", string, ".html"))
}Volcano Plots (interactive)
Running Volcano plots for each comparison are shown below.
volcanoPlots(
res.PH9vsBAP,
"PH9vsBAP",
"pH9_Io",
"H9_pBAP_D3_Io",
"#0571B0",
"#483D8B",
"#4d4d4d",
ChartTitle = "pH9_Io vs. H9_pBAP_D3_Io"
)
volcanoPlots(
res.K00vsK72,
"K00vsK72",
"H9_BMP4.0h_Krendl",
"H9_BMP4.72h_Krendl",
"#FF1493",
"#EE82EE",
"#4d4d4d",
ChartTitle = "H9_BMP4.0h_Krendl vs. H9_BMP4.72h_Krendl"
)
volcanoPlots(
res.UNDvsSTB,
"UNDvsSTB",
"H1_Yabe",
"H1_BAP_D8_>70_Yabe",
"#598234",
"#006400",
"#4d4d4d",
ChartTitle = "H1_Yabe vs. H1_BAP_D8_>70_Yabe"
)
volcanoPlots(
res.K72vsSTB,
"K72vsSTB",
"H9_BMP4.72h_Krendl",
"H1_BAP_D8_>70_Yabe",
"#598234",
"#EE82EE",
"#4d4d4d",
ChartTitle = "H9_BMP4.72h_Krendl vs. H1_BAP_D8_>70_Yabe"
)
volcanoPlots(
res.BAPvsSTB,
"BAPvsSTB",
"H9_pBAP_D3_Io",
"H1_BAP_D8_>70_Yabe",
"#598234",
"#0571B0",
"#4d4d4d",
ChartTitle = "H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe"
)
volcanoPlots(
res.BAPvsK72,
"BAPvsK72",
"H9_pBAP_D3_Io",
"H9_BMP4.72h_Krendl",
"#EE82EE",
"#0571B0",
"#4d4d4d",
ChartTitle = "H9_pBAP_D3_Io vs. H9_BMP4.72h_Krendl"
)
volcanoPlots(
res.K72vsL40,
"K72vsL40",
"H9_BMP4.72h_Krendl",
"H1_BAP_D8_<40_Yabe",
"#AEBD38",
"#EE82EE",
"#4d4d4d",
ChartTitle = "H9_BMP4.72h_Krendl vs. H1_BAP_D8_<40_Yabe"
)
volcanoPlots(
res.BAPvsL40,
"BAPvsL40",
"H9_pBAP_D3_Io",
"H1_BAP_D8_<40_Yabe",
"#AEBD38",
"#0571B0",
"#4d4d4d",
ChartTitle = "H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe"
)Interactive Volcano Plots:
Static Volcano Plots:
volcanoPlots2 <-
function(res.se,
string,
first,
second,
color1,
color2,
color3,
ChartTitle) {
res.se <- res.se[order(res.se$padj), ]
res.se <-
rownames_to_column(as.data.frame(res.se[order(res.se$padj), ]))
names(res.se)[1] <- "Gene"
res.data <-
merge(res.se,
mart,
by.x = "Gene",
by.y = "ensembl_gene_id_version")
res.data <- res.data %>% mutate_all(na_if, "")
res.data <- res.data %>% mutate_all(na_if, " ")
res.data <-
res.data %>% mutate(gene_symbol = coalesce(gene_symbol, Gene))
res.data$diffexpressed <- "other.genes"
res.data$diffexpressed[res.data$log2FoldChange >= 1 &
res.data$padj <= 0.05] <-
paste("Higher expression in", first)
res.data$diffexpressed[res.data$log2FoldChange <= -1 &
res.data$padj <= 0.05] <-
paste("Higher expression in", second)
res.data$delabel <- ""
res.data$delabel[res.data$log2FoldChange >= 1
& res.data$padj <= 0.05
&
!is.na(res.data$padj)] <-
res.data$gene_symbol[res.data$log2FoldChange >= 1
&
res.data$padj <= 0.05
&
!is.na(res.data$padj)]
res.data$delabel[res.data$log2FoldChange <= -1
& res.data$padj <= 0.05
&
!is.na(res.data$padj)] <-
res.data$gene_symbol[res.data$log2FoldChange <= -1
&
res.data$padj <= 0.05
&
!is.na(res.data$padj)]
ggplot(res.data,
aes(
x = log2FoldChange,
y = -log10(padj),
col = diffexpressed,
label = delabel
)) +
geom_point(alpha = 0.5) +
xlim(-20, 20) +
theme_classic() +
scale_color_manual(name = "Expression", values = c(color1, color2, color3)) +
geom_text_repel(
data = subset(res.data, padj <= 0.05),
max.overlaps = 15,
show.legend = F,
min.segment.length = Inf,
seed = 42,
box.padding = 0.5
) +
ggtitle(ChartTitle) +
xlab(paste("log2 fold change")) +
ylab("-log10 pvalue (adjusted)") +
theme(legend.text.align = 0)
}volcanoPlots2(
res.PH9vsBAP,
"PH9vsBAP",
"pH9_Io",
"H9_pBAP_D3_Io",
"#0571B0",
"#483D8B",
"#4d4d4d",
ChartTitle = "pH9_Io vs. H9_pBAP_D3_Io"
)
#> Warning: Removed 1637 rows containing missing values (geom_point).
#> Warning: Removed 5 rows containing missing values (geom_text_repel).
#> Warning: ggrepel: 3956 unlabeled data points (too many overlaps). Consider
#> increasing max.overlapsFig 2.1: pH9_Io vs. H9_pBAP_D3_Io
volcanoPlots2(
res.K00vsK72,
"K00vsK72",
"H9_BMP4.0h_Krendl",
"H9_BMP4.72h_Krendl",
"#FF1493",
"#EE82EE",
"#4d4d4d",
ChartTitle = "H9_BMP4.0h_Krendl vs. H9_BMP4.72h_Krendl"
)
#> Warning: Removed 1312 rows containing missing values (geom_point).
#> Warning: Removed 4 rows containing missing values (geom_text_repel).
#> Warning: ggrepel: 7556 unlabeled data points (too many overlaps). Consider
#> increasing max.overlapsFig 2.2: H9_BMP4.0h_Krendl vs. H9_BMP4.72h_Krendl
volcanoPlots2(
res.UNDvsSTB,
"UNDvsSTB",
"H1_Yabe",
"H1_BAP_D8_>70_Yabe",
"#598234",
"#006400",
"#4d4d4d",
ChartTitle = "H1_Yabe vs. H1_BAP_D8_>70_Yabe"
)
#> Warning: Removed 985 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_text_repel).
#> Warning: ggrepel: 6855 unlabeled data points (too many overlaps). Consider
#> increasing max.overlapsFig 2.3: H1_Yabe vs. H1_BAP_D8_>70_Yabe
volcanoPlots2(
res.K72vsSTB,
"K72vsSTB",
"H9_BMP4.72h_Krendl",
"H1_BAP_D8_>70_Yabe",
"#598234",
"#EE82EE",
"#4d4d4d",
ChartTitle = "H9_BMP4.72h_Krendl vs. H1_BAP_D8_>70_Yabe"
)
#> Warning: Removed 1309 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_text_repel).
#> Warning: ggrepel: 3540 unlabeled data points (too many overlaps). Consider
#> increasing max.overlapsFig 2.4: H9_BMP4.72h_Krendl vs. H1_BAP_D8_>70_Yabe
volcanoPlots2(
res.BAPvsSTB,
"BAPvsSTB",
"H9_pBAP_D3_Io",
"H1_BAP_D8_>70_Yabe",
"#598234",
"#0571B0",
"#4d4d4d",
ChartTitle = "H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe"
)
#> Warning: Removed 1634 rows containing missing values (geom_point).
#> Warning: Removed 2 rows containing missing values (geom_text_repel).
#> Warning: ggrepel: 2884 unlabeled data points (too many overlaps). Consider
#> increasing max.overlapsFig 2.5: H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe
volcanoPlots2(
res.BAPvsK72,
"BAPvsK72",
"H9_pBAP_D3_Io",
"H9_BMP4.72h_Krendl",
"#EE82EE",
"#0571B0",
"#4d4d4d",
ChartTitle = "H9_pBAP_D3_Io vs. H9_BMP4.72h_Krendl"
)
#> Warning: Removed 1308 rows containing missing values (geom_point).
#> Warning: ggrepel: 3784 unlabeled data points (too many overlaps). Consider
#> increasing max.overlapsFig 2.6: H9_pBAP_D3_Io vs. H9_BMP4.72h_Krendl
volcanoPlots2(
res.K72vsL40,
"K72vsL40",
"H9_BMP4.72h_Krendl",
"H1_BAP_D8_<40_Yabe",
"#AEBD38",
"#EE82EE",
"#4d4d4d",
ChartTitle = "H9_BMP4.72h_Krendl vs. H1_BAP_D8_<40_Yabe"
)
#> Warning: Removed 1633 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_text_repel).
#> Warning: ggrepel: 3333 unlabeled data points (too many overlaps). Consider
#> increasing max.overlapsFig 2.7: H9_BMP4.72h_Krendl vs. H1_BAP_D8_<40_Yabe
volcanoPlots2(
res.BAPvsL40,
"BAPvsL40",
"H9_pBAP_D3_Io",
"H1_BAP_D8_<40_Yabe",
"#AEBD38",
"#0571B0",
"#4d4d4d",
ChartTitle = "H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe"
)
#> Warning: Removed 1634 rows containing missing values (geom_point).
#> Warning: Removed 2 rows containing missing values (geom_text_repel).
#> Warning: ggrepel: 2556 unlabeled data points (too many overlaps). Consider
#> increasing max.overlapsFig 2.8: H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe
PlacentaCellEnrich (PCE) analyses
The above gene lists are used for running PCE. The function used for running PCE is below.
# Vento-Tormo et al., dataset
l <-
load(file = "assets/combine-test-expression1.Rdata")
humanGeneMapping <- dataset$GRCH38$humanGeneMapping
d <- dataset$PlacentaDeciduaBloodData
data <- d$expressionData
cellDetails <- d$cellDetails
# Xiang et al., dataset
te.dataset.xiang <- readRDS("assets/te.dataset.xiang.rds")
# Castel et al., dataset
te.dataset.castel <- readRDS("assets/te.dataset.castel.rds")
# full names for cell types
xi.md <-
read.csv(
"assets/md-xi.tsv",
sep = "\t",
header = TRUE,
row.names = 1
)
vt.md <-
read.csv(
"assets/md-vt.tsv",
sep = "\t",
header = TRUE,
row.names = 1
)
zp.md <-
read.csv(
"assets/md-zp.tsv",
sep = "\t",
header = TRUE,
row.names = 1
)run.all.PCE <- function(geneList1, geneList2, filename, ChartTitle, barcolor) {
expressionData <-
data[intersect(row.names(data), humanGeneMapping$Gene),]
se <-
SummarizedExperiment(
assays = SimpleList(as.matrix(expressionData)),
rowData = row.names(expressionData),
colData = colnames(expressionData)
)
cellSpecificGenesExp <-
teGeneRetrieval(se, expressedGeneThreshold = 1)
print(length(geneList1))
gs.vt <- GeneSet(geneIds = toupper(geneList1))
output.vt <- teEnrichmentCustom(gs.vt, cellSpecificGenesExp)
en.output.vt <-
setNames(data.frame(assay(output.vt[[1]]), row.names = rowData(output.vt[[1]])[, 1]),
colData(output.vt[[1]])[, 1])
row.names(cellDetails) <- cellDetails$RName
en.output.vt$Tissue <-
cellDetails[row.names(en.output.vt), "CellName"]
gs <- GeneSet(unique(geneList2))
output.xi <- teEnrichmentCustom(gs, te.dataset.xiang)
output.zp <- teEnrichmentCustom(gs, te.dataset.castel)
en.output.xi <-
setNames(data.frame(assay(output.xi[[1]]), row.names = rowData(output.xi[[1]])[, 1]),
colData(output.xi[[1]])[, 1])
en.output.xi$Tissue <- rownames(en.output.xi)
en.output.zp <-
setNames(data.frame(assay(output.zp[[1]]), row.names = rowData(output.zp[[1]])[, 1]),
colData(output.zp[[1]])[, 1])
en.output.zp$Tissue <- rownames(en.output.zp)
en.output.zp$source <- "ZP"
en.output.zp <- en.output.zp[order(-en.output.zp$Log10PValue), ]
en.output.zp <-
merge(en.output.zp, zp.md, by = "row.names", all.x = TRUE)
en.output.zp <- rownames_to_column(en.output.zp, var = "Name")
en.output.vt$source <- "VT"
en.output.vt <- en.output.vt[order(-en.output.vt$Log10PValue), ]
en.output.vt <-
merge(en.output.vt, vt.md, by = "row.names", all.x = TRUE)
en.output.vt <- rownames_to_column(en.output.vt, var = "Name")
en.output.xi$source <- "Xi"
en.output.xi <- en.output.xi[order(-en.output.xi$Log10PValue), ]
en.output.xi <-
merge(en.output.xi, xi.md, by = "row.names", all.x = TRUE)
en.output.xi <- rownames_to_column(en.output.xi, var = "Name")
en.conbined <- rbind(en.output.vt, en.output.xi, en.output.zp)
p <- 0.05
logp <- -log10(p)
en.conbined <- en.conbined %>%
mutate(Log10PValue = replace(Log10PValue, Log10PValue < logp, 0))
en.conbined %>%
group_by(source) %>%
arrange(source, desc(Log10PValue)) %>% dplyr::slice(1:7) %>%
ungroup %>%
mutate(
source = as.factor(source),
CellNames = tidytext::reorder_within(CellNames, Log10PValue, source, sep = ":")
) %>%
ggplot(aes(CellNames, Log10PValue)) + geom_bar(stat = 'identity', fill = barcolor) + theme_minimal() +
theme(
axis.text.x = element_text(
vjust = 1,
hjust = 1,
size = 12
),
axis.text.y = element_text(size = 12),
plot.margin = margin(10, 10, 10, 100),
legend.position = "none",
plot.title = element_text(
color = "black",
size = 18,
face = "bold.italic"
),
axis.title.y = element_blank(),
axis.line.x = element_line(
colour = 'black',
size = 0.5,
linetype = 'solid'
),
axis.ticks.x = element_line(
colour = 'black',
size = 1,
linetype = 'solid'
),
axis.title.x = element_text(
color = "black",
size = 14,
face = "bold"
)
) +
scale_y_continuous(expand = expansion(mult = c(0, .1)), breaks = pretty_breaks()) +
facet_wrap(~ source, scales = "free", ncol = 3) +
coord_flip() +
ggtitle(ChartTitle)
}The PCE is run on each of the gene lists as follows (up and down pairs are displayed together).
PCE plots
a <-
run.all.PCE(
PH9vsBAP.dw.pce1,
PH9vsBAP.dw.pce2,
"PH9vsBAP.down_allPCE.v2",
"Overexpressed in H9_pBAP_D3_Io",
"#0571B0"
)
#> [1] 1500
b <-
run.all.PCE(
PH9vsBAP.up.pce1,
PH9vsBAP.up.pce2,
"PH9vsBAP.up_allPCE.v2",
"Overexpressed in pH9_Io",
"#483F8E"
)
#> [1] 2067
panel_plot <-
plot_grid(a,
b,
labels = c("A", "B"),
ncol = 1,
nrow = 2)
panel_plotFig 2.9: PCE results for pH9_Io vs. H9_pBAP_D3_Io
a <-
run.all.PCE(
K00vsK72.dw.pce1,
K00vsK72.dw.pce2,
"K00vsK72.down_allPCE.v2",
"Overexpressed in H9_BMP4.72h_Krendl",
"#EE82EE"
)
#> [1] 3688
b <-
run.all.PCE(
K00vsK72.up.pce1,
K00vsK72.up.pce2,
"K00vsK72.up_allPCE.v2",
"Overexpressed in H9_BMP4.0h_Krendl",
"#FF1493"
)
#> [1] 3145
panel_plot <-
plot_grid(a,
b,
labels = c("A", "B"),
ncol = 1,
nrow = 2)
panel_plotFig 2.10: PCE results for H9_BMP4.0h_Krendl vs. H9_BMP4.72h_Krendl
a <-
run.all.PCE(
UNDvsSTB.dw.pce1,
UNDvsSTB.dw.pce2,
"UNDvsSTB.down_allPCE.v2",
"Overexpressed in H1_BAP_D8_>70_Yabe",
"#598234"
)
#> [1] 3473
b <-
run.all.PCE(
UNDvsSTB.up.pce1,
UNDvsSTB.up.pce2,
"UNDvsSTB.up_allPCE.v2",
"Overexpressed in H1_Yabe",
"#006400"
)
#> [1] 2717
panel_plot <-
plot_grid(a,
b,
labels = c("A", "B"),
ncol = 1,
nrow = 2)
panel_plotFig 2.11: PCE results for H1_Yabe vs. H1_BAP_D8_>70_Yabe
a <-
run.all.PCE(
K72vsSTB.dw.pce1,
K72vsSTB.dw.pce2,
"K72vsSTB.down_allPCE.v2",
"Overexpressed in H1_BAP_D8_>70_Yabe",
"#598234"
)
#> [1] 1627
b <-
run.all.PCE(
K72vsSTB.up.pce1,
K72vsSTB.up.pce2,
"K72vsSTB.up_allPCE.v2",
"Overexpressed in H9_BMP4.72h_Krendl",
"#EE82EE"
)
#> [1] 1560
panel_plot <-
plot_grid(a,
b,
labels = c("A", "B"),
ncol = 1,
nrow = 2)
panel_plotFig 2.12: PCE results for H9_BMP4.72h_Krendl vs. H1_BAP_D8_>70_Yabe
a <-
run.all.PCE(
BAPvsSTB.dw.pce1,
BAPvsSTB.dw.pce2,
"BAPvsSTB.down_allPCE.v2",
"Overexpressed in H1_BAP_D8_>70_Yabe",
"#598234"
)
#> [1] 1134
b <-
run.all.PCE(
BAPvsSTB.up.pce1,
BAPvsSTB.up.pce2,
"BAPvsSTB.up_allPCE.v2",
"Overexpressed in H9_pBAP_D3_Io",
"#0571B0"
)
#> [1] 1463
panel_plot <-
plot_grid(a,
b,
labels = c("A", "B"),
ncol = 1,
nrow = 2)
panel_plotFig 2.13: PCE results for H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe
a <-
run.all.PCE(
BAPvsK72.dw.pce1,
BAPvsK72.dw.pce2,
"BAPvsK72.down_allPCE.v2",
"Overexpressed in H9_BMP4.72h_Krendl",
"#EE82EE"
)
#> [1] 1529
b <-
run.all.PCE(
BAPvsK72.up.pce1,
BAPvsK72.up.pce2,
"BAPvsK72.up_allPCE.v2",
"Overexpressed in H9_pBAP_D3_Io",
"#0571B0"
)
#> [1] 1873
panel_plot <-
plot_grid(a,
b,
labels = c("A", "B"),
ncol = 1,
nrow = 2)
panel_plotFig 2.14: PCE results for H9_pBAP_D3_Io vs. H9_BMP4.72h_Krendl
a <-
run.all.PCE(
K72vsL40.dw.pce1,
K72vsL40.dw.pce2,
"K72vsL40.down_allPCE.v2",
"Overexpressed in H1_BAP_D8_<40_Yabe",
"#AEBD38"
)
#> [1] 1663
b <-
run.all.PCE(
K72vsL40.up.pce1,
K72vsL40.up.pce2,
"K72vsL40.up_allPCE.v2",
"Overexpressed in H9_BMP4.72h_Krendl",
"#EE82EE"
)
#> [1] 1342
panel_plot <-
plot_grid(a,
b,
labels = c("A", "B"),
ncol = 1,
nrow = 2)
panel_plotFig 2.15: PCE results for H9_BMP4.72h_Krendl vs. H1_BAP_D8_<40_Yabe
a <-
run.all.PCE(
BAPvsL40.dw.pce1,
BAPvsL40.dw.pce2,
"BAPvsL40.down_allPCE.v2",
"Overexpressed in H1_BAP_D8_<40_Yabe",
"#AEBD38"
)
#> [1] 1045
b <-
run.all.PCE(
BAPvsL40.up.pce1,
BAPvsL40.up.pce2,
"BAPvsL40.up_allPCE.v2",
"Overexpressed in H9_pBAP_D3_Io",
"#0571B0"
)
#> [1] 1259
panel_plot <-
plot_grid(a,
b,
labels = c("A", "B"),
ncol = 1,
nrow = 2)
panel_plotFig 2.16: PCE results for H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe
Session Information
sessionInfo()
#> R version 4.0.5 (2021-03-31)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 19044)
#>
#> Matrix products: default
#>
#> locale:
#> [1] LC_COLLATE=English_United States.1252
#> [2] LC_CTYPE=English_United States.1252
#> [3] LC_MONETARY=English_United States.1252
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=English_United States.1252
#>
#> attached base packages:
#> [1] parallel stats4 stats graphics grDevices utils datasets
#> [8] methods base
#>
#> other attached packages:
#> [1] scales_1.1.1 ggpubr_0.4.0
#> [3] tidytext_0.3.2 cowplot_1.1.1
#> [5] DT_0.18 plotly_4.9.3
#> [7] TissueEnrich_1.10.1 GSEABase_1.52.1
#> [9] graph_1.68.0 annotate_1.68.0
#> [11] XML_3.99-0.6 AnnotationDbi_1.52.0
#> [13] ensurer_1.1 EnhancedVolcano_1.8.0
#> [15] biomaRt_2.46.3 reshape2_1.4.4
#> [17] RColorBrewer_1.1-2 ggrepel_0.9.1
#> [19] pheatmap_1.0.12 vsn_3.58.0
#> [21] DESeq2_1.30.1 SummarizedExperiment_1.20.0
#> [23] Biobase_2.50.0 MatrixGenerics_1.2.1
#> [25] matrixStats_0.58.0 GenomicRanges_1.42.0
#> [27] GenomeInfoDb_1.26.7 IRanges_2.24.1
#> [29] S4Vectors_0.28.1 BiocGenerics_0.36.1
#> [31] forcats_0.5.1 stringr_1.4.0
#> [33] dplyr_1.0.7 purrr_0.3.4
#> [35] readr_2.1.1 tidyr_1.1.4
#> [37] tibble_3.1.1 ggplot2_3.3.5
#> [39] tidyverse_1.3.1 sva_3.38.0
#> [41] BiocParallel_1.24.1 genefilter_1.72.1
#> [43] mgcv_1.8-35 nlme_3.1-152
#>
#> loaded via a namespace (and not attached):
#> [1] readxl_1.3.1 backports_1.2.1 BiocFileCache_1.14.0
#> [4] plyr_1.8.6 lazyeval_0.2.2 splines_4.0.5
#> [7] crosstalk_1.1.1 SnowballC_0.7.0 digest_0.6.27
#> [10] htmltools_0.5.2 fansi_0.4.2 magrittr_2.0.1
#> [13] memoise_2.0.1 openxlsx_4.2.4 tzdb_0.2.0
#> [16] limma_3.46.0 modelr_0.1.8 extrafont_0.17
#> [19] vroom_1.5.7 extrafontdb_1.0 askpass_1.1
#> [22] rmdformats_1.0.3 prettyunits_1.1.1 colorspace_2.0-1
#> [25] blob_1.2.2 rvest_1.0.0 rappdirs_0.3.3
#> [28] haven_2.4.1 xfun_0.29 crayon_1.4.2
#> [31] RCurl_1.98-1.3 jsonlite_1.7.3 survival_3.2-11
#> [34] glue_1.4.2 gtable_0.3.0 zlibbioc_1.36.0
#> [37] XVector_0.30.0 DelayedArray_0.16.3 proj4_1.0-10.1
#> [40] car_3.0-11 Rttf2pt1_1.3.8 maps_3.3.0
#> [43] abind_1.4-5 DBI_1.1.2 edgeR_3.32.1
#> [46] rstatix_0.7.0 Rcpp_1.0.8 viridisLite_0.4.0
#> [49] xtable_1.8-4 progress_1.2.2 foreign_0.8-81
#> [52] bit_4.0.4 preprocessCore_1.52.1 htmlwidgets_1.5.3
#> [55] httr_1.4.2 ellipsis_0.3.2 farver_2.1.0
#> [58] pkgconfig_2.0.3 sass_0.4.0 dbplyr_2.1.1
#> [61] locfit_1.5-9.4 utf8_1.2.1 labeling_0.4.2
#> [64] tidyselect_1.1.1 rlang_0.4.11 munsell_0.5.0
#> [67] cellranger_1.1.0 tools_4.0.5 cachem_1.0.5
#> [70] cli_3.1.1 generics_0.1.1 RSQLite_2.2.9
#> [73] broom_0.7.6 evaluate_0.14 fastmap_1.1.0
#> [ reached getOption("max.print") -- omitted 48 entries ]