CT27 ST3D PLAC1 KD Bulk RNAseq analyses
Environment Setup
salloc -N 1 --exclusive -p amd -t 8:00:00
conda activate star
# working dir
mkdir -p /work/LAS/geetu-lab/arnstrm/hTS_CT27_ST3D_PLAC1_KD
cd /work/LAS/geetu-lab/arnstrm/hTS_CT27_ST3D_PLAC1_KD
mkdir -p 1_data
mkdir -p 2_fastqc
mkdir -p 3_STAR-mapping
mkdir -p 4_featureCounts
mkdir -p 5_multiqc
# file structure
tree -L 1
.
├── 1_data
├── 2_fastqc
├── 3_STAR-mapping
├── 4_featureCounts
└── 5_multiqcRaw data
Raw data was downloaded from the LSS using rsync
command.
cd 1_data
rsync -avP /lss/folder/path/hTS_CT27_ST3D_PLAC1_KD ./1_data
# GEO link will be included laterGenome/annotation
Additional files required for the analyses were downloaded from GenCode. The downloaded files are as follows:
cd 3_STAR-mapping
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_42/GRCh38.p13.genome.fa.gz
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_42/gencode.v42.primary_assembly.annotation.gff3.gz
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_42/gencode.v42.primary_assembly.annotation.gtf.gz
gunzip GRCh38.p13.genome.fa.gz
gunzip gencode.v42.primary_assembly.annotation.gtf.gz
gunzip gencode.v42.primary_assembly.annotation.gff3.gz
# ids for prot coding genes
awk '$3=="gene" && $9 ~/gene_type=protein_coding;/ {
split($9,a,";"); print gensub(/gene_id=/, "", 1, a[2])
}' gencode.v42.primary_assembly.annotation.gff3 > GRCh38.protein_coding
# you should have 20,054 ids (protein-coding genes)FastQC
Quality inspection of the reads. The multiqc report,
collating all samples together are provided as html file.
cd 2_fastqc
for fq in ../1_data/*.fq.gz; do
fastqc --threads $SLURM_JOB_CPUS_PER_NODE $fq;
doneMapping
To index the genome, following command was run (in an interactive session).
fastaGenome="GRCh38.p13.genome.fa"
gtf="gencode.v42.primary_assembly.annotation.gtf"
STAR --runThreadN $SLURM_JOB_CPUS_PER_NODE \
--runMode genomeGenerate \
--genomeDir $(pwd) \
--genomeFastaFiles $fastaGenome \
--sjdbGTFfile $gtf \
--sjdbOverhang 1Each fastq file was mapped to the indexed genome as
using runSTAR_map.sh script shown below:
#!/bin/bash
conda activate star
index=/work/LAS/geetu-lab/arnstrm/GRCh38_index
read1=$1
read2=$(echo ${read1} | sed 's/_R1_/_R2_/g')
cpus=${SLURM_JOB_CPUS_PER_NODE}
out=$(basename ${read1} | cut -f 1-2 -d "_")
STAR \
--runThreadN ${cpus} \
--genomeDir ${index} \
--outSAMtype BAM SortedByCoordinate \
--quantMode GeneCounts \
--outFilterScoreMinOverLread 0.3 \
--outFilterMatchNminOverLread 0.3 \
--outFileNamePrefix ${out}_ \
--readFilesCommand zcat \
--outWigType bedGraph \
--outWigStrand Unstranded \
--outWigNorm RPM \
--readFilesIn ${read1} ${read2}Mapping was run with a simple loop:
for fq in *_R1_*fastq.gz; do
runSTAR_map.sh $fq;
doneCounts
For generating counts from the mapped reads, we used
subread package program featureCounts. All bam
files were supplied together to generate a single count file for
individual samples.
cd 3_STAR-mapping
realpath *.bam > ../4_featureCounts/bam.fofn
cd ../4_featureCounts
gtf="gencode.v42.primary_assembly.annotation.gtf"
while read line; do
ln -s $line;
done
featureCounts \
-T ${SLURM_CPUS_ON_NODE} \
-a ${gtf} \
-t exon \
-g gene_id \
-p \
-B \
--countReadPairs \
-o merged_counts.txt \
--tmpDir ./tmp *.bamThe generated counts file was processed to use it direclty with
DESeq2
grep -v "^#" merged_counts.txt |\
cut -f 1,7- |\
sed 's/_Aligned.sortedByCoord.out.bam//g' > merged_clean-counts.txt
head -n 1 merged_clean-counts.txt > header
grep -Fw -f GRCh38.protein_coding merged_clean-counts.txt > body
cat header body > counts_genes.tsv
rm body headCreate a info file:
head -n 1 counts_genes.tsv |\
tr "\t" "\n" |\
grep -v "^Geneid" |\
awk '{print $1"\t"gensub(/..$/, "", 1,$1)}' > info.tsvDifferential expression analysis
Differential expression (DE) analyses using DESeq2 was
performed as shown below.
Prerequisites
R packages required for this section are loaded
# set path
setwd("/work/LAS/geetu-lab/arnstrm/hTS_CT27_ST3D_PLAC1_KD")
# load the modules
library(tidyverse)
library(DESeq2)
library(pheatmap)
library(ggrepel)
library(RColorBrewer)
library(reshape2)
library(TissueEnrich)
library(plotly)
library(cowplot)
library(biomaRt)
library(scales)
library(kableExtra)
library(htmlwidgets)
library(DT)
library(enrichR)
library(clusterProfiler)
library(enrichplot)
library(org.Hs.eg.db)
#> Warning: package 'org.Hs.eg.db' was built under R version 4.2.3
library(org.Rn.eg.db)
#> Warning: package 'org.Rn.eg.db' was built under R version 4.2.3
library(data.table)
library(GOSemSim)
library(DOSE)
library(pathview)
#> Warning: package 'pathview' was built under R version 4.2.3Import datasets
The counts data and its associated metadata
(coldata) are imported for analyses.
countsFile = 'assets/counts_genes.tsv'
groupFile = 'assets/info.tsv'
coldata <-
read.csv(
groupFile,
row.names = 1,
sep = "\t",
header = FALSE,
stringsAsFactors = TRUE
)
colnames(coldata) <- "condition"
cts <- as.matrix(read.delim(countsFile, row.names = 1, header = TRUE))Reorder columns of cts according to coldata
rows. Check if samples in both files match.
all(rownames(coldata) %in% colnames(cts))
#> [1] TRUE
cts <- cts[, rownames(coldata)]DESeq2
The batch corrected read counts are then used for running DESeq2 analyses
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = coldata,
design = ~ condition)
vsd <- vst(dds, blind = FALSE)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]
dds <- DESeq(dds)PCA plot for QC
PCA plot for the dataset that includes all libraries.
rv <- rowVars(assay(vsd))
select <-
order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(vsd)[select, ]))
percentVar <- pca$sdev ^ 2 / sum(pca$sdev ^ 2)
intgroup = "condition"
intgroup.df <- as.data.frame(colData(vsd)[, intgroup, drop = FALSE])
group <- if (length(intgroup) == 1) {
factor(apply(intgroup.df, 1, paste, collapse = " : "))
}
d <- data.frame(
PC1 = pca$x[, 1],
PC2 = pca$x[, 2],
intgroup.df,
name = colnames(vsd)
)
ggplot(d, aes(PC1, PC2, color = condition)) +
scale_shape_manual(values = 1:12) +
scale_color_manual(values = c('PLAC1.4' = '#00b462',
'SCR' = '#980c80')) +
theme_bw() +
theme(legend.title = element_blank()) +
geom_point(size = 2, stroke = 2) +
geom_text_repel(aes(label = name)) +
xlab(paste("PC1", round(percentVar[1] * 100, 2), "% variance")) +
ylab(paste("PC2", round(percentVar[2] * 100, 2), "% variance"))Figure 4: PCA plot for the first 2 principal components
Sample distance for QC
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- colnames(vsd)
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists,
col = colors)Figure 5: Euclidean distance between samples
Set contrasts and find DE genes
res.PLCvsSCR <-
results(dds,
contrast = c(
"condition",
"PLAC1.4",
"SCR"))Functions
Import functions for processing and plotting
source("/work/LAS/geetu-lab/arnstrm/myR_functions/plot_enrichR.R")
source("/work/LAS/geetu-lab/arnstrm/myR_functions/processDE.R")
source("/work/LAS/geetu-lab/arnstrm/myR_functions/rat_id_conversion.R")
source("/work/LAS/geetu-lab/arnstrm/myR_functions/run_pce.R")
source("/work/LAS/geetu-lab/arnstrm/myR_functions/save_pheatmap.R")
source("/work/LAS/geetu-lab/arnstrm/myR_functions/theme_clean.R")
source("/work/LAS/geetu-lab/arnstrm/myR_functions/volcano_plots.R")Gene information
ensembl = useMart("ENSEMBL_MART_ENSEMBL")
listDatasets(ensembl) %>%
filter(str_detect(description, "Human"))
ensembl = useDataset("hsapiens_gene_ensembl", mart = ensembl)
listFilters(ensembl) %>%
filter(str_detect(name, "ensembl"))
filterType <- "ensembl_gene_id_version"
head(rownames(counts))
counts <- read.delim("assets/counts_genes.tsv", row.names = 1, header = TRUE)
head(rownames(counts))
filterValues <- rownames(counts)
listAttributes(ensembl) %>%
head(20)
attributeNames <- c('ensembl_gene_id_version',
'ensembl_gene_id',
'external_gene_name')
annot <- getBM(
attributes = attributeNames,
filters = filterType,
values = filterValues,
mart = ensembl
)
attributeNames <- c('ensembl_gene_id_version',
'gene_biotype',
'external_gene_name',
'description')
mart <- getBM(
attributes = attributeNames,
filters = filterType,
values = filterValues,
mart = ensembl
)
write_delim(
annot,
file = "assets/annot.tsv",
delim = "\t"
)
write_delim(
mart,
file = "assets/mart.tsv",
delim = "\t"
) Files were saved, so we don’t query BioMart everytime we run the markdown. The files will be loaded, instead
mart <-
read.csv(
"assets/mart.tsv",
sep = "\t",
header = TRUE,
)
annot <-
read.csv(
"assets/annot.tsv",
sep = "\t",
header = TRUE,
)Results
Write files
processDE.ver(res.PLCvsSCR, "PLAC1vsSCR")Volcano plots
PLAC1 vs. SCR (fc)
g <- volcanoPlots(
res.se = res.PLCvsSCR,
string = "PLAC1vsSCR",
first = "PLAC1",
second = "SCR",
color1 = "#00b462",
color2 = "#4d4d4d",
color3 = "#980c80",
ChartTitle = "PLAC1 vs. SCR",
labelType = "fc"
)
g
#> Warning: Removed 1992 rows containing missing values (`geom_point()`).
#> Warning: Removed 14687 rows containing missing values (`geom_label_repel()`).Fig X: PLAC1 vs SCR (purple/negFC = overexpressed in SCR; green/posFC = overexpressed in PLAC1)
PLAC1 vs. SCR (interactive)
ggplotly(g)
#> Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#> If you'd like to see this geom implemented,
#> Please open an issue with your example code at
#> https://github.com/ropensci/plotly/issues
#> Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#> If you'd like to see this geom implemented,
#> Please open an issue with your example code at
#> https://github.com/ropensci/plotly/issues
#> Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#> If you'd like to see this geom implemented,
#> Please open an issue with your example code at
#> https://github.com/ropensci/plotly/issuesFig X: PLAC1 vs SCR (purple/negFC = overexpressed in SCR; green/posFC = overexpressed in PLAC1)
PLAC1 vs. SCR (padj)
g <- volcanoPlots(
res.se = res.PLCvsSCR,
string = "PLAC1vsSCR",
first = "PLAC1",
second = "SCR",
color1 = "#00b462",
color2 = "#4d4d4d",
color3 = "#980c80",
ChartTitle = "PLAC1 vs. SCR",
labelType = "padj"
)
g
#> Warning: Removed 1992 rows containing missing values (`geom_point()`).
#> Warning: Removed 14687 rows containing missing values (`geom_label_repel()`).Fig X: PLAC1 vs SCR (purple/negFC = overexpressed in SCR; green/posFC = overexpressed in PLAC1)
enrichR
setEnrichrSite("Enrichr")
websiteLive <- TRUE
myDBs <-
c(
"DisGeNET",
"GO_Biological_Process_2021",
"HDSigDB_Human_2021",
"KEGG_2021_Human",
"GTEx_Tissues_V8_2023",
"MGI_Mammalian_Phenotype_Level_3",
"MGI_Mammalian_Phenotype_Level_4_2021",
"WikiPathways_2019_Human",
"Panther_2015"
)
if (websiteLive) {
PLAC1vsSCR.up.enriched <- enrichr(PLAC1vsSCR.up.pce2, myDBs)
PLAC1vsSCR.dw.enriched <- enrichr(PLAC1vsSCR.dw.pce2, myDBs)
}
#> Uploading data to Enrichr... Done.
#> Querying DisGeNET... Done.
#> Querying GO_Biological_Process_2021... Done.
#> Querying HDSigDB_Human_2021... Done.
#> Querying KEGG_2021_Human... Done.
#> Querying GTEx_Tissues_V8_2023... Done.
#> Querying MGI_Mammalian_Phenotype_Level_3... Done.
#> Querying MGI_Mammalian_Phenotype_Level_4_2021... Done.
#> Querying WikiPathways_2019_Human... Done.
#> Querying Panther_2015... Done.
#> Parsing results... Done.
#> Uploading data to Enrichr... Done.
#> Querying DisGeNET... Done.
#> Querying GO_Biological_Process_2021... Done.
#> Querying HDSigDB_Human_2021... Done.
#> Querying KEGG_2021_Human... Done.
#> Querying GTEx_Tissues_V8_2023... Done.
#> Querying MGI_Mammalian_Phenotype_Level_3... Done.
#> Querying MGI_Mammalian_Phenotype_Level_4_2021... Done.
#> Querying WikiPathways_2019_Human... Done.
#> Querying Panther_2015... Done.
#> Parsing results... Done.PCE tests (green/posFC = overexpressed in PLAC1)
VentoTormo
plotPCE(inputGenes = PLAC1vsSCR.up.pce1, dataset = "Vt", myColor = "#00b462")
#> [1] "requires human gene names as input"Xiang
plotPCE(inputGenes = PLAC1vsSCR.up.pce2, dataset = "Xi", myColor = "#00b462")
#> [1] "requires human ensembl gene ids"Zhou&Petropoulos (Castel)
plotPCE(inputGenes = PLAC1vsSCR.up.pce2, dataset = "Zp", myColor = "#00b462")
#> [1] "requires human ensembl gene ids"Rostovskaya (Hs)
plotPCE(inputGenes = PLAC1vsSCR.up.pce2, dataset = "Ro.Hs", myColor = "#00b462")
#> [1] "requires human ensembl gene ids"Rostovskaya (Cy)
plotPCE(inputGenes = PLAC1vsSCR.up.pce2, dataset = "Ro.Cy", myColor = "#00b462")
#> [1] "requires human ensembl gene ids"PCE tests (purple/negFC = overexpressed in SCR)
VentoTormo
plotPCE(inputGenes = PLAC1vsSCR.dw.pce1, dataset = "Vt", myColor = "#980c80")
#> [1] "requires human gene names as input"Xiang
plotPCE(inputGenes = PLAC1vsSCR.dw.pce2, dataset = "Xi", myColor = "#980c80")
#> [1] "requires human ensembl gene ids"Zhou&Petropoulos (Castel)
plotPCE(inputGenes = PLAC1vsSCR.dw.pce2, dataset = "Zp", myColor = "#980c80")
#> [1] "requires human ensembl gene ids"Rostovskaya (Hs)
plotPCE(inputGenes = PLAC1vsSCR.dw.pce2, dataset = "Ro.Hs", myColor = "#980c80")
#> [1] "requires human ensembl gene ids"Rostovskaya (Cy)
plotPCE(inputGenes = PLAC1vsSCR.dw.pce2, dataset = "Ro.Cy", myColor = "#980c80")
#> [1] "requires human ensembl gene ids"Enrichment tests (green/posFC = overexpressed in PLAC1)
DisGeNET
plotEnrichR(PLAC1vsSCR.up.enriched, table="DisGeNET" , myColor = "#00b462")GO_Biological_Process_2021
plotEnrichR(PLAC1vsSCR.up.enriched, table="GO_Biological_Process_2021" , "#00b462")HDSigDB_Human_2021
plotEnrichR(PLAC1vsSCR.up.enriched, table="HDSigDB_Human_2021" , "#00b462")KEGG_2021_Human
plotEnrichR(PLAC1vsSCR.up.enriched, table="KEGG_2021_Human" , "#00b462")GTEx_Tissues_V8_2023
plotEnrichR(PLAC1vsSCR.up.enriched, table="GTEx_Tissues_V8_2023" , "#00b462")MGI_Mammalian_Phenotype_Level_3
plotEnrichR(PLAC1vsSCR.up.enriched, table="MGI_Mammalian_Phenotype_Level_3" , "#00b462")MGI_Mammalian_Phenotype_Level_4_2021
plotEnrichR(PLAC1vsSCR.up.enriched, table="MGI_Mammalian_Phenotype_Level_4_2021" , "#00b462")WikiPathways_2019_Human
plotEnrichR(PLAC1vsSCR.up.enriched, table="WikiPathways_2019_Human" , "#00b462")Panther_2015
plotEnrichR(PLAC1vsSCR.up.enriched, table="Panther_2015" , "#00b462")Enrichment tests (purple/negFC = overexpressed in SCR)
DisGeNET
plotEnrichR(PLAC1vsSCR.dw.enriched, table="DisGeNET" , myColor = "#980c80")GO_Biological_Process_2021
plotEnrichR(PLAC1vsSCR.dw.enriched, table="GO_Biological_Process_2021" , "#980c80")HDSigDB_Human_2021
plotEnrichR(PLAC1vsSCR.dw.enriched, table="HDSigDB_Human_2021" , "#980c80")KEGG_2021_Human
plotEnrichR(PLAC1vsSCR.dw.enriched, table="KEGG_2021_Human" , "#980c80")GTEx_Tissues_V8_2023
plotEnrichR(PLAC1vsSCR.dw.enriched, table="GTEx_Tissues_V8_2023" , "#980c80")MGI_Mammalian_Phenotype_Level_3
plotEnrichR(PLAC1vsSCR.dw.enriched, table="MGI_Mammalian_Phenotype_Level_3" , "#980c80")MGI_Mammalian_Phenotype_Level_4_2021
plotEnrichR(PLAC1vsSCR.dw.enriched, table="MGI_Mammalian_Phenotype_Level_4_2021" , "#980c80")WikiPathways_2019_Human
plotEnrichR(PLAC1vsSCR.dw.enriched, table="WikiPathways_2019_Human" , "#980c80")Panther_2015
plotEnrichR(PLAC1vsSCR.dw.enriched, table="Panther_2015" , "#980c80")GSEA
Prepare GeneList
filteredDE <- fread("assets/DE_PLAC1vsSCR_filtered.tsv")
original_gene_list <- filteredDE$log2FoldChange
names(original_gene_list) <- gsub("\\..*", "", filteredDE$Gene)
gene_list<-na.omit(original_gene_list)
gene_list = sort(gene_list, decreasing = TRUE)Run GSEA
gse <- gseGO(
geneList = gene_list,
ont = "ALL",
keyType = "ENSEMBL",
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = org.Hs.eg.db,
pAdjustMethod = "BH",
eps = 0,
exponent = 1
)
#> preparing geneSet collections...
#> GSEA analysis...
#> leading edge analysis...
#> done...
saveRDS(gse, file = "assets/gsea.rds")Results table
gse <- readRDS(file = "assets/gsea.rds")
gseTable <- as_tibble(gse)
DT::datatable(
gseTable,
filter = "top",
options = list(pageLength = 10)) %>%
DT::formatRound(c("enrichmentScore", "NES", "pvalue", "p.adjust", "qvalue"), 2) %>%
DT::formatStyle("core_enrichment",
"white-space" = "nowrap")Plots
Blobplot
dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)Enrichment Map
For the first 15 categories
mapGO <- godata('org.Hs.eg.db', ont = "BP")
emaGSE <- pairwise_termsim(gse, method="Wang", semData = mapGO)
emapplot(emaGSE, showCategory = 15)Category Netplot
Showing 5 categories
cnetplot(gse, categorySize="pvalue", foldChange=gene_list, showCategory = 5)
#> Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
#> The foldChange parameter will be removed in the next version.Ridgeplot
ridgeplot(gse) + labs(x = "enrichment distribution")GSEA plot
For the top 20 categories
set1
gseaplot(gse, by = "all", title = gse$Description[1], geneSetID = 1)set2
gseaplot(gse, by = "all", title = gse$Description[2], geneSetID = 2)set3
gseaplot(gse, by = "all", title = gse$Description[3], geneSetID = 3)set4
gseaplot(gse, by = "all", title = gse$Description[4], geneSetID = 4)set5
gseaplot(gse, by = "all", title = gse$Description[5], geneSetID = 5)set6
gseaplot(gse, by = "all", title = gse$Description[6], geneSetID = 6)set7
gseaplot(gse, by = "all", title = gse$Description[7], geneSetID = 7)set8
gseaplot(gse, by = "all", title = gse$Description[8], geneSetID = 8)set9
gseaplot(gse, by = "all", title = gse$Description[9], geneSetID = 9)set10
gseaplot(gse, by = "all", title = gse$Description[10], geneSetID = 10)set11
gseaplot(gse, by = "all", title = gse$Description[11], geneSetID = 11)set12
gseaplot(gse, by = "all", title = gse$Description[12], geneSetID = 12)set13
gseaplot(gse, by = "all", title = gse$Description[13], geneSetID = 13)set14
gseaplot(gse, by = "all", title = gse$Description[14], geneSetID = 14)set15
gseaplot(gse, by = "all", title = gse$Description[15], geneSetID = 15)KEGG
Prepare GeneList
ids <- bitr(
names(original_gene_list),
fromType = "ENSEMBL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db
)
dedup_ids = ids[!duplicated(ids[c("ENSEMBL")]),]
filteredDE$Gene <- gsub("\\..*", "", filteredDE$Gene)
filteredDE2 <- left_join(dedup_ids,
filteredDE,
by = c('ENSEMBL' = 'Gene'))
kegg_gene_list <- filteredDE2$log2FoldChange
names(kegg_gene_list) <- filteredDE2$ENTREZID
kegg_gene_list <- na.omit(kegg_gene_list)
kegg_gene_list = sort(kegg_gene_list,
decreasing = TRUE)run KEGG
orgCode <- "hsa"
keggPW <-gseKEGG(
kegg_gene_list,
organism = orgCode,
keyType = "ncbi-geneid",
exponent = 1,
minGSSize = 3,
maxGSSize = 800,
eps = 1e-10,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
verbose = TRUE,
by = "fgsea")
#> Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
#> Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
#> Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
#> preparing geneSet collections...
#> GSEA analysis...
#> leading edge analysis...
#> done...
saveRDS(keggPW, file = "assets/kegg.rds")Results table
keggPW <- readRDS(file = "assets/kegg.rds")
keggTable <- as_tibble(keggPW)
DT::datatable(keggTable,
filter = "top",
options = list(pageLength = 10)) %>%
DT::formatRound(c("enrichmentScore", "NES", "pvalue", "p.adjust", "qvalue"), 2) %>%
DT::formatStyle("core_enrichment",
"white-space" = "nowrap")Plots
Blobplot
dotplot(keggPW,
showCategory = 10,
title = "Enriched Pathways",
split = ".sign") + facet_grid(. ~ .sign)Category Netplot
Showing 5 categories
cnetplot(keggPW, categorySize="pvalue", foldChange=kegg_gene_list, showCategory = 5)
#> Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
#> The foldChange parameter will be removed in the next version.Ridgeplot
ridgeplot(keggPW) + labs(x = "enrichment distribution")KEGG GSEA plot
For the top 20 categories
set1
gseaplot(keggPW, by = "all", title = keggPW$Description[1], geneSetID = 1)Pathway
pathview(gene.data=kegg_gene_list, pathway.id="hsa04814", species = orgCode)
knitr::include_graphics("hsa04814.pathview.png")Save RData
Saving the entire session as well as the DEseq2 object (as
rds) for doing the overlap analyses.
save.image(file = "assets/hTS_CT27_ST3D_PLAC1_KD.RData")
save(PLAC1vsSCR.up.pce1, PLAC1vsSCR.dw.pce1, file = "assets/hTS_CT27_ST3D_PLAC1_KD_genelists.RData")MultiQC report:
NOT YET MultiQC report is available at this link
Session Information
sessionInfo()
#> R version 4.2.2 (2022-10-31)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.1 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] pathview_1.38.0 DOSE_3.24.2
#> [3] GOSemSim_2.24.0 data.table_1.14.6
#> [5] org.Rn.eg.db_3.16.0 org.Hs.eg.db_3.16.0
#> [7] enrichplot_1.18.4 clusterProfiler_4.6.2
#> [9] enrichR_3.1 DT_0.27
#> [11] htmlwidgets_1.5.4 kableExtra_1.3.4
#> [13] scales_1.2.1 biomaRt_2.54.1
#> [15] cowplot_1.1.1 plotly_4.10.1
#> [17] TissueEnrich_1.18.0 GSEABase_1.60.0
#> [19] graph_1.76.0 annotate_1.76.0
#> [21] XML_3.99-0.12 AnnotationDbi_1.60.0
#> [23] ensurer_1.1 reshape2_1.4.4
#> [25] RColorBrewer_1.1-3 ggrepel_0.9.3
#> [27] pheatmap_1.0.12 DESeq2_1.38.3
#> [29] SummarizedExperiment_1.28.0 Biobase_2.58.0
#> [31] MatrixGenerics_1.10.0 matrixStats_0.63.0
#> [33] GenomicRanges_1.50.1 GenomeInfoDb_1.34.3
#> [35] IRanges_2.32.0 S4Vectors_0.36.0
#> [37] BiocGenerics_0.44.0 forcats_0.5.2
#> [39] stringr_1.5.0 dplyr_1.0.10
#> [41] purrr_1.0.1 readr_2.1.3
#> [43] tidyr_1.3.0 tibble_3.1.8
#> [45] ggplot2_3.4.0 tidyverse_1.3.2
#>
#> loaded via a namespace (and not attached):
#> [1] utf8_1.2.2 tidyselect_1.2.0 RSQLite_2.2.18
#> [4] grid_4.2.2 BiocParallel_1.32.1 scatterpie_0.1.8
#> [7] munsell_0.5.0 codetools_0.2-18 withr_2.5.0
#> [10] colorspace_2.0-3 filelock_1.0.2 highr_0.9
#> [13] knitr_1.41 rstudioapi_0.14 labeling_0.4.2
#> [16] KEGGgraph_1.58.3 GenomeInfoDbData_1.2.9 polyclip_1.10-4
#> [19] bit64_4.0.5 farver_2.1.1 downloader_0.4
#> [22] vctrs_0.6.2 treeio_1.22.0 generics_0.1.3
#> [25] gson_0.1.0 xfun_0.35 timechange_0.1.1
#> [28] BiocFileCache_2.6.1 R6_2.5.1 graphlayouts_0.8.4
#> [31] rmdformats_1.0.4 locfit_1.5-9.7 bitops_1.0-7
#> [34] cachem_1.0.6 fgsea_1.24.0 gridGraphics_0.5-1
#> [37] DelayedArray_0.24.0 assertthat_0.2.1 vroom_1.6.0
#> [40] ggraph_2.1.0 googlesheets4_1.0.1 gtable_0.3.1
#> [43] tidygraph_1.2.3 rlang_1.1.1 systemfonts_1.0.4
#> [46] splines_4.2.2 lazyeval_0.2.2 gargle_1.2.1
#> [49] broom_1.0.1 yaml_2.3.6 modelr_0.1.10
#> [52] crosstalk_1.2.0 backports_1.4.1 qvalue_2.30.0
#> [55] tools_4.2.2 bookdown_0.33 ggplotify_0.1.0
#> [58] ellipsis_0.3.2 jquerylib_0.1.4 ggridges_0.5.4
#> [61] Rcpp_1.0.9 plyr_1.8.8 progress_1.2.2
#> [64] zlibbioc_1.44.0 RCurl_1.98-1.9 prettyunits_1.1.1
#> [67] viridis_0.6.2 haven_2.5.1 fs_1.5.2
#> [70] magrittr_2.0.3 reprex_2.0.2 googledrive_2.0.0
#> [73] ggnewscale_0.4.8 hms_1.1.2 patchwork_1.1.2
#> [76] evaluate_0.18 xtable_1.8-4 HDO.db_0.99.1
#> [79] readxl_1.4.1 gridExtra_2.3 compiler_4.2.2
#> [82] shadowtext_0.1.2 crayon_1.5.2 htmltools_0.5.3
#> [85] ggfun_0.0.9 tzdb_0.3.0 geneplotter_1.76.0
#> [88] aplot_0.1.10 lubridate_1.9.0 DBI_1.1.3
#> [91] tweenr_2.0.2 dbplyr_2.2.1 MASS_7.3-58.1
#> [94] rappdirs_0.3.3 Matrix_1.5-3 cli_3.4.1
#> [97] parallel_4.2.2 igraph_1.3.5 pkgconfig_2.0.3
#> [100] xml2_1.3.3 ggtree_3.6.2 svglite_2.1.0
#> [103] bslib_0.4.1 webshot_0.5.4 XVector_0.38.0
#> [106] rvest_1.0.3 yulab.utils_0.0.6 digest_0.6.30
#> [109] Biostrings_2.66.0 rmarkdown_2.18 cellranger_1.1.0
#> [112] fastmatch_1.1-3 tidytree_0.4.2 curl_4.3.3
#> [115] rjson_0.2.21 lifecycle_1.0.3 nlme_3.1-160
#> [118] jsonlite_1.8.3 viridisLite_0.4.1 fansi_1.0.3
#> [121] pillar_1.8.1 lattice_0.20-45 KEGGREST_1.38.0
#> [124] fastmap_1.1.0 httr_1.4.4
#> [ reached getOption("max.print") -- omitted 11 entries ]