Human dataset RNAseq analyses

Files copy

#copy commands

Packages

library(data.table)
library(tidyverse)
library(sva)
library(DESeq2)
library(pheatmap)
library(viridis)
library(biomaRt)
library(ggvenn)
library(ggpubr)

Input

setwd("/work/LAS/geetu-lab/arnstrm/HumanDatasets")
bewo <- fread("assets/BeWo_PLAC1_KD_counts_genes.tsv")
CT29 <- fread("assets/CT29_ST3D_PLAC1_KD_counts_genes.tsv")
colnames(CT29) <-
  c(
    "Geneid",
    "CT29_SCR.1",
    "CT29_SCR.2",
    "CT29_SCR.3",
    "CT29_shPLAC1.1",
    "CT29_shPLAC1.2",
    "CT29_shPLAC1.3"
  )
CT27 <- fread("assets/hTS_CT27_ST3D_PLAC1_KD_counts_genes.tsv")
colnames(CT27) <-
  c(
    "Geneid",
    "CT27_PLAC1.4_1",
    "CT27_PLAC1.4_2",
    "CT27_PLAC1.4_3",
    "CT27_SCR_1",
    "CT27_SCR_2",
    "CT27_SCR_3"
  )
bewo_CT27 <- left_join(bewo, CT27, by = "Geneid")
bewo_CT27_CT29 <- left_join(bewo_CT27, CT29, by = "Geneid")
counts <- bewo_CT27_CT29 %>%
  as_tibble() %>%
  column_to_rownames(var = "Geneid") %>%
  as.matrix() 
info <- fread("assets/info.tsv")
info <- info[c(-1),]
names <- info %>% 
  column_to_rownames("name") %>% 
  mutate(across(where(is.character),
                as.factor))

Read previous experiment DE list

# human datasets
makeFilePath <- function(fileName = "", projDir = "") {
  paste("/work/LAS/geetu-lab/arnstrm",
        projDir,
        "assets",
        fileName,
        sep = "/")
}
BeWo.de <- makeFilePath(fileName = "DESeq2results-KDvsCT_fc.tsv",
                        projDir = "BeWo_PLAC1_KD")
CT29.de <-
  makeFilePath(fileName = "DESeq2results-PLAC1vsSCR_fc.tsv",
               projDir = "CT29_ST3D_PLAC1_KD")
CT27.de <-
  makeFilePath(fileName = "DESeq2results-PLAC1vsSCR_fc.tsv",
               projDir = "hTS_CT27_ST3D_PLAC1_KD")
BeWo.obj <- makeFilePath(fileName = "BeWo_PLAC1_KD_genelists.RData",
                         projDir = "BeWo_PLAC1_KD")
CT29.obj <-
  makeFilePath(fileName = "CT29_ST3D_PLAC1_KD_KD_genelists.RData",
               projDir = "CT29_ST3D_PLAC1_KD")
CT27.obj <-
  makeFilePath(fileName = "hTS_CT27_ST3D_PLAC1_KD_genelists.RData",
               projDir = "hTS_CT27_ST3D_PLAC1_KD")

de.BeWo <- fread(BeWo.de)
colnames(de.BeWo)[3] <- "BeWo.log2FC"
de.CT29 <- fread(CT29.de)
colnames(de.CT29)[3] <- "CT29.log2FC"
de.CT27 <- fread(CT27.de)
colnames(de.CT27)[3] <- "CT27.log2FC"

load(BeWo.obj)
BeWo_PLAC1_KD.KDvsCT.dw.pce1 <-  KDvsCT.dw.pce1
BeWo_PLAC1_KD.KDvsCT.up.pce1 <-  KDvsCT.up.pce1
load(CT29.obj)
CT29_ST3D_PLAC1_KD.PLAC1vsSCR.dw.pce1 <- PLAC1vsSCR.dw.pce1
CT29_ST3D_PLAC1_KD.PLAC1vsSCR.up.pce1 <- PLAC1vsSCR.up.pce1
load(CT27.obj)
hTS_CT27_ST3D_PLAC1_KD.PLAC1vsSCR.up.pce1 <- PLAC1vsSCR.up.pce1
hTS_CT27_ST3D_PLAC1_KD.PLAC1vsSCR.dw.pce1 <- PLAC1vsSCR.dw.pce1

Batch correction

all(rownames(names) %in% colnames(counts))
#> [1] TRUE
counts <- counts[, rownames(names)]
cov1 <- as.factor(names$expt)
adjusted_counts <- ComBat_seq(counts, batch = cov1, group = NULL)
#> Found 3 batches
#> Using null model in ComBat-seq.
#> Adjusting for 0 covariate(s) or covariate level(s)
#> Estimating dispersions
#> Fitting the GLM model
#> Shrinkage off - using GLM estimates for parameters
#> Adjusting the data

DESeq2

dds <- DESeqDataSetFromMatrix(countData = adjusted_counts,
                              colData = names,
                              design = contrasts ~ sample)
#> converting counts to integer mode
vst <- assay(vst(dds))
vsd <- vst(dds, blind = FALSE)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]
dds <- DESeq(dds)
#> estimating size factors
#> estimating dispersions
#> gene-wise dispersion estimates
#> mean-dispersion relationship
#> final dispersion estimates
#> fitting model and testing

PCA

pcaData <- plotPCA(vsd,
                   intgroup = c("expt", "sample"),
                   returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))

rv <- rowVars(assay(vsd))
select <- order(rv, 
                decreasing = TRUE)[seq_len(min(500,
                                               length(rv)))]

pca <- prcomp(t(assay(vsd)[select, ]))
percentVar <- pca$sdev ^ 2 / sum(pca$sdev ^ 2)
intgroup = c("expt", "sample")
intgroup.df <- as.data.frame(colData(vsd)[, intgroup, drop = FALSE])
group <- if (length(intgroup) > 1) {
  factor(apply(intgroup.df, 1, paste, collapse = " : "))
}
d <- data.frame(
  PC1 = pca$x[, 1],
  PC2 = pca$x[, 2],
  PC3 = pca$x[, 3],
  PC4 = pca$x[, 4],
  PC5 = pca$x[, 5],
  group = group,
  intgroup.df,
  name = colnames(vsd)
)

PC1 vs PC2

ggplot(d, aes(PC1, PC2, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC1 vs PC3

ggplot(d, aes(PC1, PC3, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC1 vs PC4

ggplot(d, aes(PC1, PC4, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC1 vs PC5

ggplot(d, aes(PC1, PC5, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC2 vs PC3

ggplot(d, aes(PC2, PC3, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC2 vs PC4

ggplot(d, aes(PC2, PC4, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC2 vs PC5

ggplot(d, aes(PC2, PC5, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC3 vs PC4

ggplot(d, aes(PC3, PC4, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC3 vs PC5

ggplot(d, aes(PC3, PC5, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

PC4 vs PC5

ggplot(d, aes(PC4, PC5, color = sample, shape = expt)) +
  scale_shape_manual(values = 1:3) +
  theme_bw() +
  theme(legend.title = element_blank()) +
  geom_point(size = 2, stroke = 2) +
  xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
  ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))

Sample distance

sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- colnames(vsd)
colnames(sampleDistMatrix) <- NULL
pheatmap(sampleDistMatrix,
         clustering_distance_rows = sampleDists,
         clustering_distance_cols = sampleDists,
         col = magma(9))

Gene Information

ensembl = useMart("ENSEMBL_MART_ENSEMBL")
listDatasets(ensembl) %>%
  filter(str_detect(description, "Human"))
ensembl = useDataset("hsapiens_gene_ensembl", mart = ensembl)
listFilters(ensembl) %>%
  filter(str_detect(name, "ensembl"))
filterType <- "ensembl_gene_id_version"
head(rownames(counts))
counts <- read.delim("assets/counts_genes.tsv", row.names = 1, header = TRUE)
head(rownames(counts))
filterValues <- rownames(counts)
listAttributes(ensembl) %>%
  head(20)
attributeNames <- c('ensembl_gene_id_version',
                    'ensembl_gene_id',
                    'external_gene_name')
annot <- getBM(
  attributes = attributeNames,
  filters = filterType,
  values = filterValues,
  mart = ensembl
)
attributeNames <- c('ensembl_gene_id_version',
                    'gene_biotype',
                    'external_gene_name',
                    'description')
mart <- getBM(
  attributes = attributeNames,
  filters = filterType,
  values = filterValues,
  mart = ensembl
)
write_delim(
  annot,
  file = "assets/annot.tsv",
  delim = "\t"
)
write_delim(
  mart,
  file = "assets/mart.tsv",
  delim = "\t"
)    

Files were saved, so we don’t query BioMart everytime we run the markdown. The files will be loaded, instead

mart <-
  read.csv("assets/mart.tsv",
           sep = "\t",
           header = TRUE)
annot <-
  read.csv("assets/annot.tsv",
           sep = "\t",
           header = TRUE)

Comparing DE genes from CT27, CT29 and BeWo experiments

Downregulated genes (Ct and SCRs)

myCol = viridis(3)
down = list(BeWo = BeWo_PLAC1_KD.KDvsCT.dw.pce1,
         CT29 = CT29_ST3D_PLAC1_KD.PLAC1vsSCR.dw.pce1,
         CT27 = hTS_CT27_ST3D_PLAC1_KD.PLAC1vsSCR.dw.pce1)
ggvenn(
    down, 
    fill_color = myCol,
    stroke_size = 0.5,
    set_name_size = 4
)

Upregulated genes (KD & PLAC1’s)

mycol = viridis(3)
up = list(BeWo = BeWo_PLAC1_KD.KDvsCT.up.pce1,
         CT29 = CT29_ST3D_PLAC1_KD.PLAC1vsSCR.up.pce1,
         CT27 = hTS_CT27_ST3D_PLAC1_KD.PLAC1vsSCR.up.pce1)
ggvenn(
    up, 
    fill_color = myCol,
    stroke_size = 0.5,
    set_name_size = 4
)

Top 500 variable genes

myCol = viridis(3)
topVar = list(BeWo = BeWo.var.genes,
         CT29 = CT29.var.genes,
         CT27 = CT27.var.genes)
ggvenn(
    topVar, 
    fill_color = myCol,
    stroke_size = 0.5,
    set_name_size = 4
)

Create intersecting genelists

getFCtable <-
  function(de1 = de.BeWo,
           de2 = de.CT27,
           int1 = down$BeWo,
           int2 = down$CT27) {
    intList = intersect(int1, int2)
    cond1 <- de1 %>%
      mutate(newGene = gsub("\\..*", "", Gene)) %>%
      filter(newGene %in% intList) %>%
      dplyr::select(external_gene_name, 3) %>%
      column_to_rownames("external_gene_name")
    cond2 <- de2 %>%
      mutate(newGene = gsub("\\..*", "", Gene)) %>%
      filter(newGene %in% intList) %>%
      dplyr::select(external_gene_name, 3) %>%
      column_to_rownames("external_gene_name")
      log2FCtable <- merge(cond1,
                           cond2,
                           by = "row.names",
                           all = TRUE)
    return(log2FCtable)
  }

Genes with FC for down

log2FC.dw_BeWo.CT27 <-
  getFCtable(
    de1 = de.BeWo,
    de2 = de.CT27,
    int1 = down$BeWo,
    int2 = down$CT27
  )
log2FC.dw_BeWo.CT29 <-
  getFCtable(
    de1 = de.BeWo,
    de2 = de.CT29,
    int1 = down$BeWo,
    int2 = down$CT29
  )
log2FC.dw_CT29.CT27 <-
  getFCtable(
    de1 = de.CT29,
    de2 = de.CT27,
    int1 = down$CT29,
    int2 = down$CT27
  )

Genes with FC for up

log2FC.up_BeWo.CT27 <-
  getFCtable(
    de1 = de.BeWo,
    de2 = de.CT27,
    int1 = up$BeWo,
    int2 = up$CT27
  )
log2FC.up_BeWo.CT29 <-
  getFCtable(
    de1 = de.BeWo,
    de2 = de.CT29,
    int1 = up$BeWo,
    int2 = up$CT29
  )
log2FC.up_CT29.CT27 <-
  getFCtable(
    de1 = de.CT29,
    de2 = de.CT27,
    int1 = up$CT29,
    int2 = up$CT27
  )

top 500 variable genes

log2FC.vr_BeWo.CT27 <-
  getFCtable(
    de1 = de.BeWo,
    de2 = de.CT27,
    int1 = gsub("\\..*", "", topVar$BeWo),
    int2 = gsub("\\..*", "", topVar$CT27)
  )
log2FC.vr_BeWo.CT29 <-
  getFCtable(
    de1 = de.BeWo,
    de2 = de.CT29,
    int1 = gsub("\\..*", "", topVar$BeWo),
    int2 = gsub("\\..*", "", topVar$CT29)
  )
log2FC.vr_CT29.CT27 <-
  getFCtable(
    de1 = de.CT29,
    de2 = de.CT27,
    int1 = gsub("\\..*", "", topVar$CT29),
    int2 = gsub("\\..*", "", topVar$CT27)
  )

Correlation plots (down regulated)

BeWo and CT27

ggscatter(
  log2FC.dw_BeWo.CT27,
  x = "BeWo.log2FC",
  y = "CT27.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")

BeWo and CT29

ggscatter(
  log2FC.dw_BeWo.CT29,
  x = "BeWo.log2FC",
  y = "CT29.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")

CT29 and CT27

ggscatter(
  log2FC.dw_CT29.CT27,
  x = "CT29.log2FC",
  y = "CT27.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")

Correlation plots (up regulated)

BeWo and CT27

ggscatter(
  log2FC.up_BeWo.CT27,
  x = "BeWo.log2FC",
  y = "CT27.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")

BeWo and CT29

ggscatter(
  log2FC.up_BeWo.CT29,
  x = "BeWo.log2FC",
  y = "CT29.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")

CT29 and CT27

ggscatter(
  log2FC.up_CT29.CT27,
  x = "CT29.log2FC",
  y = "CT27.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")

Correlation plots (most variable genes)

BeWo and CT27

ggscatter(
  log2FC.vr_BeWo.CT27,
  x = "BeWo.log2FC",
  y = "CT27.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")

BeWo and CT29

ggscatter(
  log2FC.vr_BeWo.CT29,
  x = "BeWo.log2FC",
  y = "CT29.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")

CT29 and CT27

ggscatter(
  log2FC.vr_CT29.CT27,
  x = "CT29.log2FC",
  y = "CT27.log2FC",
  add = "reg.line",
  conf.int = TRUE,
  add.params = list(color = "blue",
                    fill = "lightgray")) +
  stat_cor(method = "pearson")