Directly build the SingleCellExperiment object loadings in Tutorial 01 and the Seurat Object loaded in Supplementary Tutorial X:

[33]:
library(Seurat, quietly = T)
library(SingleCellExperiment, quietly = T)

# paths
data.path<-'../../data/'
covid.input.path<-paste0(data.path, 'raw/covid_balf/')
Attaching SeuratObject


Attaching package: ‘Seurat’


The following object is masked from ‘package:SummarizedExperiment’:

    Assays


Loading

The 12 samples can be downloaded as .h5 files from here. You can also download the cell metadata from here

We download these files directly in the proceeding cell:

[ ]:
# download the metadata
metadata.link <- 'https://raw.githubusercontent.com/zhangzlab/covid_balf/master/all.cell.annotation.meta.txt'
cmd <- paste0('wget ', metadata.link, ' -O ', covid.input.path, 'metadata.txt')
system(cmd, ignore.stdout = T, ignore.stderr = T)

# download the expression data
sample.links <- c(
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339769/suppl/GSM4339769%5FC141%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339770/suppl/GSM4339770%5FC142%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339771/suppl/GSM4339771%5FC143%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339772/suppl/GSM4339772%5FC144%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339773/suppl/GSM4339773%5FC145%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339774/suppl/GSM4339774%5FC146%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475048/suppl/GSM4475048%5FC51%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475049/suppl/GSM4475049%5FC52%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475050/suppl/GSM4475050%5FC100%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475051/suppl/GSM4475051%5FC148%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475052/suppl/GSM4475052%5FC149%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475053/suppl/GSM4475053%5FC152%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5'
    )

for (sl in sample.links){
    cmd <- paste0('wget ', sl, ' -P ', covid.input.path)
    system(cmd, ignore.stdout = T, ignore.stderr = T)
}

We can then format the downloaded files:

[ ]:
# format the metadata
md <- read.table(paste0(covid.input.path, 'metadata.txt'), header = T, row.names = 'ID')
colnames(md) = c('Sample.ID', 'sample_new', 'Condition', 'disease', 'hasnCoV', 'cluster', 'cell.type')

condition.map = c('Control', 'Moderate COVID-19', 'Severe COVID-19')
names(condition.map) <- c('HC', 'M', 'S')
md['Condition'] <- unname(condition.map[md$Condition])
md$Condition <- factor(md$Condition, levels = condition.map)

md<-md[md$Sample.ID != 'GSM3660650', ] # drop the non-scRNAseq dataset included in this file

sample.order<-c('C100', 'C144', 'C149', 'C51', 'C141', 'C145', 'C152', 'C143', 'C142', 'C146', 'C148', 'C52')
md$Sample.ID <- factor(md$Sample.ID, levels = sample.order)

md<-md[with(md, order(Sample.ID)), ]

colnames(md)<-c('sample', 'sample_new', 'condition', 'disease', 'hasnCoV', 'cluster', 'cell.type')
md<-md[c('sample', 'sample_new', 'disease', 'hasnCoV', 'cluster', 'cell.type', 'condition')]
[ ]:
balf.samples<-list()

suppressMessages({
    suppressWarnings({
        for (filename in list.files(covid.input.path)){
            if (endsWith(filename, '.h5')){
                sample<-unlist(strsplit(filename, '_'))[[2]]

                # subset and format metadata
                md.sample<-md[md[['sample']] == sample,]
                rownames(md.sample) <- unname(sapply(rownames(md.sample),
                                                   function(x) paste0(unlist(strsplit(x, '_'))[[1]], '-1')))
                # load the counts
                so <- Seurat::Read10X_h5(filename=paste0(covid.input.path, filename), unique.features=T)
                so <- so[, rownames(md.sample)] # only include cells present in the metadata

                # preprocess
                so <- CreateSeuratObject(counts=so, project=sample, meta.data=md.sample)
                balf.samples[[sample]] <- so
            }
        }
    })
})

Merge into a single file

[ ]:
balf.samples<-balf.samples[sample.order]
covid_data<-merge(balf.samples[[1]], y = balf.samples[2:length(balf.samples)],
                  project = "balf.covid")
covid_data<-covid_data[rownames(covid_data) != 'nCoV', ]

saveRDS(covid_data, paste0(data.path, 'BALF-COVID19-Liao_et_al-NatMed-2020.rds'))

Convert into SingleCellExperiment

[19]:
# covid_data<-readRDS('/data/hratch/ccc_protocols/raw/BALF-COVID19-Liao_et_al-NatMed-2020.rds')
covid_data_sce<-Seurat::as.SingleCellExperiment(covid_data)
covid_data_sce@colData<-covid_data_sce@colData[!(colnames(covid_data_sce@colData) %in% c('nCount_RNA', 'nFeature_RNA'))]
assay(covid_data_sce, 'logcounts')<-NULL
saveRDS(covid_data_sce, paste0(data.path, 'BALF-COVID19-Liao_et_al-NatMed-2020_SCE.rds'))