library(Seurat)
library(ggplot2)
## library(SeuratData)
## The Seurat vignettes show how to load hdf5 files and the metadata required.
## They are using a pacreas dataset, I presume it is the 'panc8' data in SeuratData.
## Thus I will poke at that to see if I can learn about how they handle the metadata.
la_data <- Seurat::Read10X("khamidza_158974/KH_LA/outs/filtered_feature_bc_matrix")
lp_data <- Seurat::Read10X("khamidza_158974/KH_LP/outs/filtered_feature_bc_matrix")
lps_data <- Seurat::Read10X("khamidza_158974/KH_LPS/outs/filtered_feature_bc_matrix")
ns_data <- Seurat::Read10X("khamidza_158974/KH_NS/outs/filtered_feature_bc_matrix")
la <- Seurat::CreateSeuratObject(la_data, project="la")
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning in CheckDuplicateCellNames(object.list = objects): Some cell names are
## duplicated across objects provided. Renaming to enforce unique cell names.
## Warning in CheckDuplicateCellNames(object.list = objects): Some cell names are
## duplicated across objects provided. Renaming to enforce unique cell names.
## I think this is redundant, but interesting for me to understand sc metadata
cluster_letters <- as.factor(LETTERS[Idents(object=all)])
names(cluster_letters) <- colnames(x=all)
stimulated <- as.character(cluster_letters)
ns_idx <- stimulated == "D"
stimulated[ns_idx] <- "unstimulated"
stimulated[!ns_idx] <- "stimulated"
stimulated <- as.factor(stimulated)
stimulation <- as.character(cluster_letters)
ns_idx <- stimulation == "D"
stimulation[ns_idx] <- "Unstimulated"
la_idx <- stimulation == "A"
stimulation[la_idx] <- "L+Ado"
lp_idx <- stimulation == "B"
stimulation[lp_idx] <- "L+PGE2"
lps_idx <- stimulation == "C"
stimulation[lps_idx] <- "LPS"
stimulation <- as.factor(stimulation)
all <- AddMetaData(
object=all,
metadata=cluster_letters,
col.name="cluster_letters")
all <- AddMetaData(
object=all,
metadata=stimulated,
col.name="stimulatedp")
all <- AddMetaData(
object=all,
metadata=stimulation,
col.name="stimulation")
all[["percent_mt"]] <- PercentageFeatureSet(all, pattern="^MT-")
VlnPlot(all, features="nFeature_RNA", pt.size=0.1)
all <- NormalizeData(object=all)
all <- FindVariableFeatures(object=all)
all <- ScaleData(object=all)
## Centering and scaling data matrix
## PC_ 1
## Positive: PLIN2, SGK1, DAB2, GLRX, RAP2B, MBP, ZFYVE16, ZFP36L2, SQSTM1, ZFP36L1
## ZMIZ1, BRI3, KIAA0930, MAP3K2, HMOX1, CALM2, EVI2B, JAKMIP2, NCEH1, CD84
## CYP1B1, NOP10, FABP5, HS3ST2, ARHGAP18, CNBP, DDI2, H2AFY, CORO1C, RAB11FIP1
## Negative: IL1B, ISG15, CCL4, EREG, SOD2, CCL3, PTGS2, IFIT3, GBP1, TNFAIP6
## ISG20, IL8, MT2A, MX1, TNIP3, CCL20, SERPINB9, IL1RN, BTG1, IER3
## INHBA, NAMPT, IL7R, TNFAIP3, MARCKS, SAMD9, SAMD9L, IFI44, ZC3HAV1, DDX58
## PC_ 2
## Positive: FTL, TMSB10, RPS8, LGALS1, CSTB, LGALS3, DBI, RPL26, CD63, SRGN
## SH3BGRL3, TXN, PRDX1, NACA, S100A9, CTSD, CTSZ, TSPO, LIPA, GM2A
## BRI3, ACP5, DYNLL1, MARCKS, CYP1B1, IGSF6, NCF2, CALM2, TXNRD1, C5AR1
## Negative: MCOLN2, KCNQ1OT1, RP11-561O23.5, MIR29A, BX255923.3, RP11-214O1.3, LL22NC03-2H8.5, AKAP12, ATP8B4, PAPLN
## AC005477.1, N4BP2L1, HSPA6, ITGA1, FAM65C, RAVER2, AIRN, NABP1, RBPMS, MS4A1
## TMC5, MIR503HG, GATA2, STX19, KB-1507C5.4, EXTL3-AS1, RP11-30L15.6, COBL, RP11-665N17.4, LINC01125
## PC_ 3
## Positive: CREM, THBS1, ATP1B3, THAP2, CD300E, BNIP3L, G0S2, ATP1B1, DUSP4, SAMSN1
## INHBA, VEGFA, HES4, TIMP1, MMP19, GK, ELL2, OLR1, TFPI, CXCR4
## LYPD3, CHMP1B, CSRP2, SATB1, DUSP2, PLAUR, STK4, ID2, TFRC, TMEM2
## Negative: CCL2, NBN, TNF, BHLHE41, NCF2, SNX10, RDX, CCL8, NCF1, SLAMF7
## SGTB, DDX21, P2RX7, PLEK, CXCL10, KLF6, AK4, IL8, CCL4, CCL3
## IRF8, ACSL1, CDC42EP3, CSF1, TBC1D9, TNFAIP2, PSTPIP2, IL1A, RIPK2, FAM129A
## PC_ 4
## Positive: CD52, FBP1, CCL20, CHI3L1, CD63, TSPO, RP11-20G13.3, IGSF6, APOC1, CIR1
## PRDX1, TXN, EIF1B, DBI, ACAT2, OLR1, CLEC4E, HCST, G0S2, TMSB10
## IL23A, CSTB, RPL26, TIMP1, CYCS, NACA, DYNLL1, LGALS2, PHLDA2, SELK
## Negative: RNASE1, FUCA1, SEPP1, STAB1, CXCL10, KCTD12, LACC1, IRG1, TGFBI, MPEG1
## CTSZ, CXCL11, FPR3, RIN2, MS4A6A, MMP9, FCN1, ARL4C, CD163, MAFB
## LMNA, GBP4, RSAD2, FGL2, CD14, RGL1, CTSD, LHFPL2, SLCO2B1, CTSC
## PC_ 5
## Positive: CXCL3, CXCL2, SERPINB2, CXCL1, IL1A, CCL3L3, DUSP6, CCL3, IL8, CCL20
## PHLDA1, INHBA, IER3, MAFB, CSF3, CCL2, PTGS2, MLTK, IL1R1, CXCL5
## CCL4, FCN1, PLAUR, CTSD, PNP, GJB2, HBEGF, IL10, CCL4L1, F3
## Negative: TNFSF13B, TNFSF10, CXCL10, IFIT2, CCR7, IDO1, CXCL11, GBP4, SAMD9L, HERC5
## NUB1, NT5C3A, WARS, A2M, IFIT1, RSAD2, RABGAP1L, LIPA, ATF3, RP11-20G13.3
## TNFAIP2, CD38, APOC1, CIR1, TCF4, IL27, GBP5, SAT1, REL, GBP1
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 17306
## Number of edges: 530664
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8296
## Number of communities: 10
## Elapsed time: 3 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:25:18 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:25:18 Read 17306 rows and found 20 numeric columns
## 15:25:18 Using Annoy for neighbor search, n_neighbors = 30
## 15:25:18 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:25:21 Writing NN index file to temp file /tmp/RtmpAT6sfS/file771224dbcf382
## 15:25:21 Searching Annoy index using 1 thread, search_k = 3000
## 15:25:28 Annoy recall = 100%
## 15:25:28 Commencing smooth kNN distance calibration using 1 thread
## 15:25:30 Initializing from normalized Laplacian + noise
## 15:25:31 Commencing optimization for 200 epochs, with 759148 positive edges
## 15:25:41 Optimization finished
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
## Try to set some metrics to drop crappy stuff.
FeatureScatter(all, feature1="nCount_RNA", feature2="percent_mt")
## Suggests that we want ~ < 15% mt
## Suggests we want > 1000 nCounts
FeatureScatter(all, feature1="nCount_RNA", feature2="nFeature_RNA")
## Suggests we want > 200 nFeature
FeatureScatter(all, feature1="percent_mt", feature2="nFeature_RNA")
all_sub <- subset(all,
subset=nFeature_RNA > 200 & percent_mt < 15)
all_sub <- NormalizeData(object=all_sub)
all_sub <- FindVariableFeatures(object=all_sub)
most_var <- head(VariableFeatures(all_sub), 30)
variable_plot <- VariableFeaturePlot(all_sub)
variable_plot <- LabelPoints(plot=variable_plot, points=most_var, repel=TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
## Warning: Transformation introduced infinite values in continuous x-axis
## Centering and scaling data matrix
## PC_ 1
## Positive: PLIN2, SGK1, GLRX, DAB2, CYP1B1, BRI3, ZFYVE16, MBP, SQSTM1, FTL
## RAP2B, ZFP36L2, CALM2, ZFP36L1, KIAA0930, ZMIZ1, MAP3K2, NOP10, CSTB, HMOX1
## CD84, JAKMIP2, FABP5, EVI2B, NCEH1, FBP1, CNBP, H2AFY, ARHGAP18, DDI2
## Negative: ISG15, IL1B, CCL4, EREG, SOD2, IFIT3, CCL3, PARP14, PTGS2, GBP1
## ISG20, TNFAIP6, MARCKS, IL8, MT2A, MX1, TNIP3, BTG1, SERPINB9, IER3
## SRGN, IL1RN, CCL20, XAF1, CD44, C15orf48, NAMPT, WTAP, SAMD9L, TNFAIP3
## PC_ 2
## Positive: CREM, THBS1, ATP1B3, THAP2, CD300E, BNIP3L, G0S2, ATP1B1, SAMSN1, DUSP4
## FTL, VEGFA, INHBA, TIMP1, HES4, GK, OLR1, ELL2, MMP19, TFPI
## PLAUR, PRDX1, LYPD3, SATB1, STK4, ID2, CHMP1B, DUSP2, CSRP2, CXCR4
## Negative: CCL2, NCF2, NBN, SNX10, RDX, BHLHE41, TNF, NCF1, CYP1B1, SGTB
## SLAMF7, CCL8, PLEK, DDX21, P2RX7, KLF6, ACSL1, IL8, TNFAIP2, AK4
## CDC42EP3, CXCL10, CSF1, TBC1D9, IRF8, PSTPIP2, CCL4, CCL3, FAM129A, PTPRE
## PC_ 3
## Positive: CXCL3, CCL20, CXCL1, CXCL2, TNFAIP6, SERPINB2, TIMP1, FPR1, IL1B, IL8
## S100A8, C15orf48, HCST, CCL7, IL1A, MT1X, CCL4, RETN, CCL3, ALOX5AP
## CLEC4E, FPR2, CSF3, LIPN, CAMP, PHLDA2, C19orf59, GAPT, TNFRSF4, MT1F
## Negative: RNASE1, CTSZ, LIPA, GM2A, FUCA1, CTSD, LGMN, FNIP2, NABP1, CCR7
## APOE, MACF1, LMNA, MMP9, A2M, SCD, ACP5, EMP1, MALAT1, SEPP1
## SAMD4A, RSAD2, ADAMDEC1, GBP4, WARS, IFIT2, APOL6, CXCL10, ANXA1, OGFRL1
## PC_ 4
## Positive: RNASE1, FUCA1, FCN1, CTSD, LACC1, KCTD12, SEPP1, TGFBI, MAFB, MPEG1
## CCL2, STAB1, MS4A6A, IRG1, CD14, FPR3, TNFRSF1B, RIN2, ARL4C, FGL2
## CXCL10, CTSZ, RGL1, CD163, WIPF1, MMP9, LHFPL2, SLCO2B1, CXCL11, RASSF2
## Negative: CD63, FBP1, CD52, TXN, RP11-20G13.3, CSTB, CHI3L1, PRDX1, CIR1, APOC1
## PSD3, TXNRD1, LGALS3, SPP1, REL, IGSF6, APOE, GLRX, MT-ND6, TMEM14C
## ACAT2, BCL2A1, EIF1B, TMEM14B, FABP5, CHIT1, CDK5RAP2, ATP1B1, CES1, CCL20
## PC_ 5
## Positive: TNFSF13B, TNFSF10, CXCL10, IFIT2, HERC5, MT2A, CXCL11, IDO1, GBP4, NT5C3A
## NCF1, GBP5, IFIT1, LILRB2, TCF4, S100A9, CD86, RABGAP1L, NUB1, LILRB1
## RSAD2, CD38, IFITM3, MNDA, RNF213, LYSMD2, APOL6, SAMD9L, FAM26F, WARS
## Negative: CXCL2, SERPINB2, CXCL3, IL1A, CTSL, DUSP6, CCL3L3, INHBA, TNF, CCL3
## CXCL1, PHLDA1, IL8, MLTK, EMP1, ASPH, CSF2, FNIP2, F3, INSIG1
## IL1R1, IL10, CCL20, NRIP3, MIR155HG, RGCC, IER3, TNFSF14, TSC22D1, CTSD
all_sub <- JackStraw(all_sub, num.replicate=100)
all_sub <- ScoreJackStraw(all_sub, dims=1:20)
JackStrawPlot(all_sub, dims=1:15)
## Warning: Removed 21000 rows containing missing values (geom_point).
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12893
## Number of edges: 388733
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7907
## Number of communities: 9
## Elapsed time: 2 seconds
## 15:35:32 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:35:32 Read 12893 rows and found 20 numeric columns
## 15:35:32 Using Annoy for neighbor search, n_neighbors = 30
## 15:35:32 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:35:34 Writing NN index file to temp file /tmp/RtmpAT6sfS/file771224e90442d
## 15:35:34 Searching Annoy index using 1 thread, search_k = 3000
## 15:35:38 Annoy recall = 100%
## 15:35:39 Commencing smooth kNN distance calibration using 1 thread
## 15:35:41 Initializing from normalized Laplacian + noise
## 15:35:41 Commencing optimization for 200 epochs, with 544078 positive edges
## 15:35:49 Optimization finished
DefaultAssay(all) <- "RNA"
markers <- FindConservedMarkers(all_sub, ident.1=4, grouping.var="stimulation", verbose=TRUE)
## Warning: Identity: 4 not present in group LPS. Skipping LPS
## Warning: Identity: 4 not present in group L+Ado. Skipping L+Ado
## Testing group L+PGE2: (4) vs (2_L+PGE2, 1_L+PGE2, 3_L+PGE2, 5_L+PGE2, 0_L+PGE2, 7_L+PGE2, 8_L+PGE2, 6_L+PGE2)
## Testing group Unstimulated: (4) vs (6, 8, 7, 1, 3, 0)
## Note that I renamed them according to Dave's suggestion.
lps_vs_unstim <- FindMarkers(all_sub,
ident.1="LPS",
ident.2="Unstimulated",
group.by="stimulation",
min.pct=0.25)
head(lps_vs_unstim, n=20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## CCL4 0 4.072 0.997 0.063 0
## CCL3 0 3.693 0.995 0.131 0
## IL1B 0 3.379 0.993 0.215 0
## TNFAIP6 0 2.866 0.920 0.034 0
## PTGS2 0 2.840 0.851 0.037 0
## CCL20 0 2.711 0.761 0.020 0
## ISG15 0 2.690 0.956 0.097 0
## IFIT3 0 2.565 0.963 0.052 0
## IL8 0 2.457 0.999 0.833 0
## CXCL1 0 2.450 0.531 0.020 0
## GBP1 0 2.377 0.916 0.063 0
## RSAD2 0 2.363 0.737 0.012 0
## EREG 0 2.334 0.968 0.237 0
## CCL5 0 2.333 0.712 0.015 0
## CXCL10 0 2.270 0.408 0.002 0
## TNIP3 0 2.228 0.868 0.043 0
## SOD2 0 2.212 0.999 0.822 0
## IL1A 0 2.192 0.702 0.026 0
## IFIT2 0 2.017 0.764 0.013 0
## SLAMF7 0 1.969 0.941 0.209 0
ade_vs_unstim <- FindMarkers(all_sub,
ident.1="L+Ado",
ident.2="Unstimulated",
group.by="stimulation",
min.pct=0.25)
head(ade_vs_unstim, n=20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## ISG15 0 4.042 0.995 0.097 0
## CCL20 0 3.617 0.921 0.020 0
## CCL4 0 3.449 0.980 0.063 0
## EREG 0 3.413 0.997 0.237 0
## CCL3 0 3.300 0.968 0.131 0
## INHBA 0 3.213 0.866 0.008 0
## IL1B 0 3.208 0.984 0.215 0
## PTGS2 0 3.074 0.922 0.037 0
## G0S2 0 2.941 0.839 0.055 0
## THBS1 0 2.931 0.889 0.059 0
## TNFAIP6 0 2.856 0.931 0.034 0
## IL1RN 0 2.630 0.922 0.111 0
## MT2A 0 2.566 0.954 0.260 0
## ISG20 0 2.517 0.931 0.022 0
## TNIP3 0 2.509 0.900 0.043 0
## IFIT3 0 2.479 0.949 0.052 0
## GBP1 0 2.299 0.920 0.063 0
## CXCL1 0 2.249 0.448 0.020 0
## SOD2 0 2.218 1.000 0.822 0
## CXCL3 0 2.188 0.817 0.359 0
pge_vs_unstim <- FindMarkers(all_sub,
ident.1="L+PGE2",
ident.2="Unstimulated",
group.by="stimulation",
min.pct=0.25)
head(pge_vs_unstim, n=20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## IL1B 0 3.138 0.992 0.215 0
## ISG15 0 3.078 0.977 0.097 0
## CCL4 0 2.973 0.959 0.063 0
## EREG 0 2.912 0.984 0.237 0
## CCL20 0 2.728 0.792 0.020 0
## CCL3 0 2.633 0.938 0.131 0
## PTGS2 0 2.589 0.858 0.037 0
## TNFAIP6 0 2.583 0.903 0.034 0
## INHBA 0 2.334 0.619 0.008 0
## MT2A 0 2.293 0.957 0.260 0
## G0S2 0 2.275 0.744 0.055 0
## IFIT3 0 2.257 0.929 0.052 0
## THBS1 0 2.171 0.712 0.059 0
## ISG20 0 2.133 0.847 0.022 0
## GBP1 0 2.124 0.885 0.063 0
## CCL5 0 2.116 0.638 0.015 0
## RSAD2 0 2.107 0.683 0.012 0
## CXCL1 0 2.102 0.489 0.020 0
## SOD2 0 2.086 0.999 0.822 0
## IL7R 0 2.013 0.698 0.020 0
ado_lps <- FindMarkers(all_sub,
ident.1="L+Ado",
ident.2="LPS",
group.by="stimulation", min.pct=0.25)
head(ado_lps, n=20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## THBS1 0.000e+00 2.2714 0.889 0.181 0.000e+00
## G0S2 0.000e+00 1.9418 0.839 0.315 0.000e+00
## CREM 0.000e+00 1.8509 0.892 0.200 0.000e+00
## INHBA 0.000e+00 1.5476 0.866 0.366 0.000e+00
## ATP1B3 0.000e+00 1.4492 0.926 0.569 0.000e+00
## CD300E 0.000e+00 1.3897 0.700 0.160 0.000e+00
## TIMP1 0.000e+00 1.3845 0.945 0.745 0.000e+00
## ISG15 0.000e+00 1.3525 0.995 0.956 0.000e+00
## PLAUR 0.000e+00 1.1642 0.936 0.675 0.000e+00
## EREG 0.000e+00 1.0787 0.997 0.968 0.000e+00
## SAMSN1 0.000e+00 1.0552 0.832 0.369 0.000e+00
## BTG1 0.000e+00 0.8131 0.980 0.866 0.000e+00
## PRDX1 0.000e+00 0.7540 0.981 0.930 0.000e+00
## FTH1 0.000e+00 0.4474 1.000 1.000 0.000e+00
## NCF2 0.000e+00 -0.9195 0.644 0.933 0.000e+00
## CYP1B1 0.000e+00 -0.9657 0.892 0.991 0.000e+00
## CCL2 0.000e+00 -1.5774 0.523 0.923 0.000e+00
## BHLHE41 3.782e-292 -1.1658 0.061 0.495 1.238e-287
## NBN 1.461e-275 -0.8257 0.602 0.899 4.784e-271
## REL 2.826e-268 0.8714 0.919 0.750 9.251e-264
pge2_lps <- FindMarkers(all_sub,
ident.1="L+PGE2",
ident.2="LPS",
group.by="stimulation", min.pct=0.25)
head(pge2_lps, n=20)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## THBS1 0.000e+00 1.5116 0.712 0.181 0.000e+00
## FTH1 0.000e+00 0.3905 1.000 1.000 0.000e+00
## CCL3 0.000e+00 -1.0606 0.938 0.995 0.000e+00
## CCL4 0.000e+00 -1.0991 0.959 0.997 0.000e+00
## TNF 2.071e-289 -1.4675 0.088 0.493 6.781e-285
## G0S2 2.485e-283 1.2761 0.744 0.315 8.134e-279
## IL8 2.055e-252 -0.5784 0.996 0.999 6.728e-248
## FTL 4.242e-217 0.3096 1.000 1.000 1.389e-212
## SLAMF7 1.680e-212 -0.6433 0.762 0.941 5.499e-208
## NBN 3.916e-197 -0.6211 0.691 0.899 1.282e-192
## PRDX1 2.295e-191 0.4720 0.978 0.930 7.514e-187
## TIMP1 1.439e-176 0.7003 0.905 0.745 4.711e-172
## PLEK 1.671e-175 -0.7291 0.392 0.708 5.472e-171
## EREG 2.041e-173 0.5781 0.984 0.968 6.681e-169
## CYBA 2.862e-172 0.4637 0.953 0.857 9.370e-168
## PTX3 1.792e-171 -1.1279 0.287 0.620 5.866e-167
## RPS2 1.041e-164 0.4049 0.977 0.924 3.408e-160
## FNIP2 1.925e-162 -0.6648 0.511 0.783 6.300e-158
## MIR155HG 1.778e-150 -0.6839 0.358 0.668 5.820e-146
## CD300E 7.293e-144 0.8286 0.466 0.160 2.388e-139
FeaturePlot(all_sub, features=c("THBS1"),
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
FeaturePlot(all_sub, features=c("PLEK"),
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
FeaturePlot(all_sub, features=c("FNIP2"),
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
FeaturePlot(all_sub, features=c("G0S2"),
split.by="stimulation", max.cutoff=3, cols=c("darkgreen", "darkred"))
This looks to me like a bar plot of the top 10 up/down genes. But I have no clue where these numbers are coming from, a range of -5 < x < 5 logFC just does not seem to me to exist in this data. The range of logFCs on my sheet goes from -1.5 < x < 1.7. In addition, I do not see where in the Seurat data structures the error bars are coming from.
ado_lps <- FindMarkers(all_sub, test.use="DESeq2",
ident.1="L+Ado",
ident.2="LPS",
group.by="stimulation", min.pct=0.25)
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
Reading from the text, this appears to me to be from the total cell RNASeq? No, that cannot be true, the bulk data did not compare these things. It must be this data.
I did generate a couple of Venns using the scRNA data and got similar numbers in the opposite orientation.
ado <- rownames(ado_lps)
ado_up <- ado[ado_lps[["avg_logFC"]] > 0]
ado_down <- ado[ado_lps[["avg_logFC"]] < 0]
pge2 <- rownames(pge2_lps)
pge_up <- pge2[pge2_lps[["avg_logFC"]] > 0]
pge_down <- pge2[pge2_lps[["avg_logFC"]] < 0]
library(Vennerable)
ups <- list(ado_up, pge_up)
up_data <- Venn(ups, SetNames=c("LPS+Ado", "LPS+PGE2"), numberOfSets=2)
plot(up_data, doWeights=FALSE)
downs <- list(ado_down, pge_down)
v_data <- Venn(downs, SetNames=c("LPS+Ado", "LPS+PGE2"), numberOfSets=2)
plot(v_data, doWeights=FALSE)
input <- thbs1$data
thbs1_ggstats <- ggstatsplot::ggbetweenstats(
data=input, x=ident, y=THBS1,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=TRUE)
## Registered S3 methods overwritten by 'broom.mixed':
## method from
## augment.lme broom
## augment.merMod broom
## glance.lme broom
## glance.merMod broom
## glance.stanreg broom
## tidy.brmsfit broom
## tidy.gamlss broom
## tidy.lme broom
## tidy.merMod broom
## tidy.rjags broom
## tidy.stanfit broom
## tidy.stanreg broom
## Registered S3 method overwritten by 'jmvcore':
## method from
## as.data.frame.Array DelayedArray
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## Registered S3 method overwritten by 'DescTools':
## method from
## reorder.factor gdata
## Note: Bartlett's test for homogeneity of variances for factor ident: p-value = < 0.001
##
## pairwise.comparisons=TRUE, # display significant pairwise comparisons
## p.adjust.method="bonferroni", # method for adjusting p-values for multiple comparisons
## ## adding new components to `ggstatsplot` default
## ##ggplot.component = list(ggplot2::scale_y_continuous(sec.axis = ggplot2::dup_axis())),
## k=3, title.prefix="THBS1", palette="default_jama",
## package="ggsci", messages=TRUE, plotgrid.args=list(nrow=2))
vegfa <- VlnPlot(all, features="VEGFA", group.by="stimulation", pt.size=0.1)
vegfa
vegfa_ggstats <- ggstatsplot::ggbetweenstats(
data=vegfa$data, x=ident, y=VEGFA,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=TRUE)
## Note: Bartlett's test for homogeneity of variances for factor ident: p-value = < 0.001
##
## notch went outside hinges. Try setting notch=FALSE.
cd300e <- VlnPlot(all, features="CD300E", group.by="stimulation", pt.size=0.1)
cd300e_ggstats <- ggstatsplot::ggbetweenstats(
data=cd300e$data, x=ident, y=CD300E,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=TRUE)
## Note: Bartlett's test for homogeneity of variances for factor ident: p-value = < 0.001
##
## notch went outside hinges. Try setting notch=FALSE.
plaur <- VlnPlot(all, features="PLAUR", group.by="stimulation", pt.size=0.1)
plaur_ggstats <- ggstatsplot::ggbetweenstats(
data=plaur$data, x=ident, y=PLAUR,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=TRUE)
## Note: Bartlett's test for homogeneity of variances for factor ident: p-value = < 0.001
##
## notch went outside hinges. Try setting notch=FALSE.
R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: Vennerable(v.3.1.0.9000), ggplot2(v.3.3.0), Seurat(v.3.1.4), hpgltools(v.1.0), Biobase(v.2.46.0) and BiocGenerics(v.0.32.0)
loaded via a namespace (and not attached): rcompanion(v.2.3.25), TMB(v.1.7.16), pander(v.0.6.3), graphlayouts(v.0.6.0), pbapply(v.1.4-2), lattice(v.0.20-41), haven(v.2.2.0), expm(v.0.999-4), vctrs(v.0.2.4), usethis(v.1.6.0), mgcv(v.1.8-31), blob(v.1.2.1), survival(v.3.1-12), RBGL(v.1.62.1), later(v.1.0.0), nloptr(v.1.2.2.1), DBI(v.1.1.0), rappdirs(v.0.3.1), uwot(v.0.1.8), jpeg(v.0.1-8.1), zlibbioc(v.1.32.0), MatrixModels(v.0.4-1), sjmisc(v.2.8.4), htmlwidgets(v.1.5.1), mvtnorm(v.1.1-0), future(v.1.16.0), inline(v.0.3.15), broomExtra(v.3.0.0), leiden(v.0.3.3), pairwiseComparisons(v.0.3.0), pbkrtest(v.0.4-8.6), irlba(v.2.3.3), ez(v.4.4-0), tidygraph(v.1.1.2), Rcpp(v.1.0.4.6), KernSmooth(v.2.23-16), promises(v.1.1.0), gdata(v.2.18.0), DelayedArray(v.0.12.3), limma(v.3.42.2), pkgload(v.1.0.2), statsExpressions(v.0.4.0), graph(v.1.64.0), Hmisc(v.4.4-0), RSpectra(v.0.16-0), fs(v.1.4.1), fastmatch(v.1.1-0), fastGHQuad(v.1.0), mnormt(v.1.5-6), digest(v.0.6.25), png(v.0.1-7), jcolors(v.0.0.4), sctransform(v.0.2.1), cowplot(v.1.0.0), DOSE(v.3.12.0), ggraph(v.2.0.2), pkgconfig(v.2.0.3), GO.db(v.3.10.0), estimability(v.1.3), iterators(v.1.0.12), minqa(v.1.2.4), reticulate(v.1.15), clusterProfiler(v.3.14.3), SummarizedExperiment(v.1.16.1), rstan(v.2.19.3), nortest(v.1.0-4), modeltools(v.0.2-23), xfun(v.0.13), zoo(v.1.8-7), tidyselect(v.1.0.0), performance(v.0.4.5), reshape2(v.1.4.4), purrr(v.0.3.4), ica(v.1.0-2), viridisLite(v.0.3.0), rtracklayer(v.1.46.0), pkgbuild(v.1.0.6), rlang(v.0.4.5), groupedstats(v.0.2.2), glue(v.1.4.0), metap(v.1.3), RColorBrewer(v.1.1-2), palr(v.0.2.0), pals(v.1.6), variancePartition(v.1.16.1), modelr(v.0.1.6), matrixStats(v.0.56.0), emmeans(v.1.4.5), ggcorrplot(v.0.1.3), multcompView(v.0.1-8), stringr(v.1.4.0), europepmc(v.0.3), sva(v.3.34.0), ggsignif(v.0.6.0), bayestestR(v.0.5.3), DESeq2(v.1.26.0), LaplacesDemon(v.16.1.4), labeling(v.0.3), gbRd(v.0.4-11), mutoss(v.0.1-12), httpuv(v.1.5.2), TH.data(v.1.0-10), DO.db(v.2.9), annotate(v.1.64.0), jsonlite(v.1.6.1), XVector(v.0.26.0), bit(v.1.1-15.2), mime(v.0.9), gridExtra(v.2.3), gplots(v.3.0.3), Rsamtools(v.2.2.3), stringi(v.1.4.6), insight(v.0.8.2), processx(v.3.4.2), logspline(v.2.1.15), bitops(v.1.0-6), cli(v.2.0.2), Rdpack(v.0.11-1), maps(v.3.3.0), RSQLite(v.2.2.0), tidyr(v.1.0.2), libcoin(v.1.0-5), broom.mixed(v.0.2.4), data.table(v.1.12.8), correlation(v.0.2.0), rstudioapi(v.0.11), GenomicAlignments(v.1.22.1), nlme(v.3.1-145), qvalue(v.2.18.0), locfit(v.1.5-9.4), listenv(v.0.8.0), miniUI(v.0.1.1.1), scico(v.1.1.0), gridGraphics(v.0.5-0), dbplyr(v.1.4.2), readxl(v.1.3.1), sessioninfo(v.1.1.1), lifecycle(v.0.2.0), cellranger(v.1.1.0), munsell(v.0.5.0), mapproj(v.1.2.7), caTools(v.1.18.0), codetools(v.0.2-16), coda(v.0.19-3), GenomeInfoDb(v.1.22.1), lmtest(v.0.9-37), htmlTable(v.1.13.3), triebeard(v.0.3.0), rstantools(v.2.0.0), tidyBF(v.0.2.0), lsei(v.1.2-0), xtable(v.1.8-4), ROCR(v.1.0-7), BiocManager(v.1.30.10), StanHeaders(v.2.19.2), abind(v.1.4-5), farver(v.2.0.3), ggExtra(v.0.9), RANN(v.2.6.1), askpass(v.1.1), GenomicRanges(v.1.38.0), bibtex(v.0.4.2.2), sjstats(v.0.17.9), RcppAnnoy(v.0.0.16), patchwork(v.1.0.0), tibble(v.3.0.0), dichromat(v.2.0-0), cluster(v.2.1.0), future.apply(v.1.4.0), zeallot(v.0.1.0), Matrix(v.1.2-18), ellipsis(v.0.3.0), prettyunits(v.1.1.1), ggridges(v.0.5.2), prismatic(v.0.2.0), EMT(v.1.1), colorRamps(v.2.3), igraph(v.1.2.5), multtest(v.2.42.0), fgsea(v.1.12.0), sjlabelled(v.1.1.3), remotes(v.2.1.1), TFisher(v.0.2.0), paletteer(v.1.1.0), parameters(v.0.6.1), testthat(v.2.3.2), mc2d(v.0.1-18), htmltools(v.0.4.0), BiocFileCache(v.1.10.2), yaml(v.2.2.1), ipmisc(v.2.0.0), GenomicFeatures(v.1.38.2), loo(v.2.2.0), plotly(v.4.9.2.1), XML(v.3.99-0.3), foreign(v.0.8-76), withr(v.2.1.2), fitdistrplus(v.1.0-14), BiocParallel(v.1.20.1), BayesFactor(v.0.9.12-4.2), bit64(v.0.9-7), effectsize(v.0.3.0), jmvcore(v.1.2.5), multcomp(v.1.4-13), foreach(v.1.5.0), Biostrings(v.2.54.0), GOSemSim(v.2.12.1), rsvd(v.1.0.3), devtools(v.2.3.0), memoise(v.1.1.0), evaluate(v.0.14), rio(v.0.5.16), forcats(v.0.5.0), geneplotter(v.1.64.0), callr(v.3.4.3), ps(v.1.3.2), metafor(v.2.4-0), curl(v.4.3), fansi(v.0.4.1), urltools(v.1.7.3), acepack(v.1.4.1), checkmate(v.2.0.0), desc(v.1.2.0), npsurv(v.0.4-0), rjson(v.0.2.20), openxlsx(v.4.1.4), jmv(v.1.2.5), ggrepel(v.0.8.2), oompaBase(v.3.2.9), rprojroot(v.1.3-2), tools(v.3.6.1), sandwich(v.2.5-1), magrittr(v.1.5), RCurl(v.1.98-1.1), car(v.3.0-7), ape(v.5.3), ggplotify(v.0.0.5), xml2(v.1.3.1), httr(v.1.4.1), assertthat(v.0.2.1), rmarkdown(v.2.1), boot(v.1.3-24), globals(v.0.12.5), R6(v.2.4.1), nnet(v.7.3-13), progress(v.1.2.2), genefilter(v.1.68.0), Brobdingnag(v.1.2-6), gtools(v.3.8.2), statmod(v.1.4.34), coin(v.1.3-1), repr(v.1.1.0), rematch2(v.2.1.1), splines(v.3.6.1), carData(v.3.0-3), colorspace(v.1.4-1), generics(v.0.0.2), stats4(v.3.6.1), base64enc(v.0.1-3), metaBMA(v.0.6.2), pillar(v.1.4.3), sn(v.1.6-1), tweenr(v.1.0.1), WRS2(v.1.0-0), rvcheck(v.0.1.8), GenomeInfoDbData(v.1.2.2), plyr(v.1.8.6), gtable(v.0.3.0), bdsmatrix(v.1.3-4), zip(v.2.0.4), knitr(v.1.28), latticeExtra(v.0.6-29), biomaRt(v.2.42.1), IRanges(v.2.20.2), fastmap(v.1.0.1), metaplus(v.0.7-11), doParallel(v.1.0.15), AnnotationDbi(v.1.48.0), broom(v.0.5.5), openssl(v.1.4.1), scales(v.1.1.0), backports(v.1.1.6), plotrix(v.3.7-7), S4Vectors(v.0.24.4), lme4(v.1.1-23), enrichplot(v.1.6.1), hms(v.0.5.3), ggforce(v.0.3.1), Rtsne(v.0.15), dplyr(v.0.8.5), shiny(v.1.4.0.2), polyclip(v.1.10-0), grid(v.3.6.1), numDeriv(v.2016.8-1.1), ggstatsplot(v.0.4.0.9000), DescTools(v.0.99.34), bbmle(v.1.0.23.1), lazyeval(v.0.2.2), Formula(v.1.2-3), tsne(v.0.1-3), crayon(v.1.3.4), MASS(v.7.3-51.5), skimr(v.2.1.1), viridis(v.0.5.1), reshape(v.0.8.8), bridgesampling(v.1.0-0), rpart(v.4.1-15) and compiler(v.3.6.1)
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset e0b7cae0257816079fa8e67c59028a10d066fd10
## This is hpgltools commit: Thu Apr 9 14:19:47 2020 -0400: e0b7cae0257816079fa8e67c59028a10d066fd10
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to scrnaseq_analyses-v202004.rda.xz