Directly build the SingleCellExperiment object loadings in Tutorial 01 and the Seurat Object loaded in Supplementary Tutorial X:
[33]:
library(Seurat, quietly = T)
library(SingleCellExperiment, quietly = T)
# paths
data.path<-'../../data/'
covid.input.path<-paste0(data.path, 'raw/covid_balf/')
Attaching SeuratObject
Attaching package: ‘Seurat’
The following object is masked from ‘package:SummarizedExperiment’:
Assays
Loading
The 12 samples can be downloaded as .h5 files from here. You can also download the cell metadata from here
We download these files directly in the proceeding cell:
[ ]:
# download the metadata
metadata.link <- 'https://raw.githubusercontent.com/zhangzlab/covid_balf/master/all.cell.annotation.meta.txt'
cmd <- paste0('wget ', metadata.link, ' -O ', covid.input.path, 'metadata.txt')
system(cmd, ignore.stdout = T, ignore.stderr = T)
# download the expression data
sample.links <- c(
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339769/suppl/GSM4339769%5FC141%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339770/suppl/GSM4339770%5FC142%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339771/suppl/GSM4339771%5FC143%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339772/suppl/GSM4339772%5FC144%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339773/suppl/GSM4339773%5FC145%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339774/suppl/GSM4339774%5FC146%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475048/suppl/GSM4475048%5FC51%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475049/suppl/GSM4475049%5FC52%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475050/suppl/GSM4475050%5FC100%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475051/suppl/GSM4475051%5FC148%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475052/suppl/GSM4475052%5FC149%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475053/suppl/GSM4475053%5FC152%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5'
)
for (sl in sample.links){
cmd <- paste0('wget ', sl, ' -P ', covid.input.path)
system(cmd, ignore.stdout = T, ignore.stderr = T)
}
We can then format the downloaded files:
[ ]:
# format the metadata
md <- read.table(paste0(covid.input.path, 'metadata.txt'), header = T, row.names = 'ID')
colnames(md) = c('Sample.ID', 'sample_new', 'Condition', 'disease', 'hasnCoV', 'cluster', 'cell.type')
condition.map = c('Control', 'Moderate COVID-19', 'Severe COVID-19')
names(condition.map) <- c('HC', 'M', 'S')
md['Condition'] <- unname(condition.map[md$Condition])
md$Condition <- factor(md$Condition, levels = condition.map)
md<-md[md$Sample.ID != 'GSM3660650', ] # drop the non-scRNAseq dataset included in this file
sample.order<-c('C100', 'C144', 'C149', 'C51', 'C141', 'C145', 'C152', 'C143', 'C142', 'C146', 'C148', 'C52')
md$Sample.ID <- factor(md$Sample.ID, levels = sample.order)
md<-md[with(md, order(Sample.ID)), ]
colnames(md)<-c('sample', 'sample_new', 'condition', 'disease', 'hasnCoV', 'cluster', 'cell.type')
md<-md[c('sample', 'sample_new', 'disease', 'hasnCoV', 'cluster', 'cell.type', 'condition')]
[ ]:
balf.samples<-list()
suppressMessages({
suppressWarnings({
for (filename in list.files(covid.input.path)){
if (endsWith(filename, '.h5')){
sample<-unlist(strsplit(filename, '_'))[[2]]
# subset and format metadata
md.sample<-md[md[['sample']] == sample,]
rownames(md.sample) <- unname(sapply(rownames(md.sample),
function(x) paste0(unlist(strsplit(x, '_'))[[1]], '-1')))
# load the counts
so <- Seurat::Read10X_h5(filename=paste0(covid.input.path, filename), unique.features=T)
so <- so[, rownames(md.sample)] # only include cells present in the metadata
# preprocess
so <- CreateSeuratObject(counts=so, project=sample, meta.data=md.sample)
balf.samples[[sample]] <- so
}
}
})
})
Merge into a single file
[ ]:
balf.samples<-balf.samples[sample.order]
covid_data<-merge(balf.samples[[1]], y = balf.samples[2:length(balf.samples)],
project = "balf.covid")
covid_data<-covid_data[rownames(covid_data) != 'nCoV', ]
saveRDS(covid_data, paste0(data.path, 'BALF-COVID19-Liao_et_al-NatMed-2020.rds'))
Convert into SingleCellExperiment
[19]:
# covid_data<-readRDS('/data/hratch/ccc_protocols/raw/BALF-COVID19-Liao_et_al-NatMed-2020.rds')
covid_data_sce<-Seurat::as.SingleCellExperiment(covid_data)
covid_data_sce@colData<-covid_data_sce@colData[!(colnames(covid_data_sce@colData) %in% c('nCount_RNA', 'nFeature_RNA'))]
assay(covid_data_sce, 'logcounts')<-NULL
saveRDS(covid_data_sce, paste0(data.path, 'BALF-COVID19-Liao_et_al-NatMed-2020_SCE.rds'))