Directly build the SingleCellExperiment object loadings in Tutorial 01 and the Seurat Object loaded in Supplementary Tutorial X:

[33]:

library(Seurat, quietly = T)
library(SingleCellExperiment, quietly = T)

# paths
data.path<-'../../data/'
covid.input.path<-paste0(data.path, 'raw/covid_balf/')

Attaching SeuratObject


Attaching package: ‘Seurat’


The following object is masked from ‘package:SummarizedExperiment’:

    Assays

Loading

The 12 samples can be downloaded as .h5 files from here. You can also download the cell metadata from here

We download these files directly in the proceeding cell:

[ ]:

# download the metadata
metadata.link <- 'https://raw.githubusercontent.com/zhangzlab/covid_balf/master/all.cell.annotation.meta.txt'
cmd <- paste0('wget ', metadata.link, ' -O ', covid.input.path, 'metadata.txt')
system(cmd, ignore.stdout = T, ignore.stderr = T)

# download the expression data
sample.links <- c(
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339769/suppl/GSM4339769%5FC141%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339770/suppl/GSM4339770%5FC142%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339771/suppl/GSM4339771%5FC143%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339772/suppl/GSM4339772%5FC144%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339773/suppl/GSM4339773%5FC145%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4339nnn/GSM4339774/suppl/GSM4339774%5FC146%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475048/suppl/GSM4475048%5FC51%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475049/suppl/GSM4475049%5FC52%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475050/suppl/GSM4475050%5FC100%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475051/suppl/GSM4475051%5FC148%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475052/suppl/GSM4475052%5FC149%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5',
    'https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4475nnn/GSM4475053/suppl/GSM4475053%5FC152%5Ffiltered%5Ffeature%5Fbc%5Fmatrix%2Eh5'
    )

for (sl in sample.links){
    cmd <- paste0('wget ', sl, ' -P ', covid.input.path)
    system(cmd, ignore.stdout = T, ignore.stderr = T)
}

We can then format the downloaded files:

[ ]:

# format the metadata
md <- read.table(paste0(covid.input.path, 'metadata.txt'), header = T, row.names = 'ID')
colnames(md) = c('Sample.ID', 'sample_new', 'Condition', 'disease', 'hasnCoV', 'cluster', 'cell.type')

condition.map = c('Control', 'Moderate COVID-19', 'Severe COVID-19')
names(condition.map) <- c('HC', 'M', 'S')
md['Condition'] <- unname(condition.map[md$Condition])
md$Condition <- factor(md$Condition, levels = condition.map)

md<-md[md$Sample.ID != 'GSM3660650', ] # drop the non-scRNAseq dataset included in this file

sample.order<-c('C100', 'C144', 'C149', 'C51', 'C141', 'C145', 'C152', 'C143', 'C142', 'C146', 'C148', 'C52')
md$Sample.ID <- factor(md$Sample.ID, levels = sample.order)

md<-md[with(md, order(Sample.ID)), ]

colnames(md)<-c('sample', 'sample_new', 'condition', 'disease', 'hasnCoV', 'cluster', 'cell.type')
md<-md[c('sample', 'sample_new', 'disease', 'hasnCoV', 'cluster', 'cell.type', 'condition')]

[ ]:

balf.samples<-list()

suppressMessages({
    suppressWarnings({
        for (filename in list.files(covid.input.path)){
            if (endsWith(filename, '.h5')){
                sample<-unlist(strsplit(filename, '_'))[[2]]

                # subset and format metadata
                md.sample<-md[md[['sample']] == sample,]
                rownames(md.sample) <- unname(sapply(rownames(md.sample),
                                                   function(x) paste0(unlist(strsplit(x, '_'))[[1]], '-1')))
                # load the counts
                so <- Seurat::Read10X_h5(filename=paste0(covid.input.path, filename), unique.features=T)
                so <- so[, rownames(md.sample)] # only include cells present in the metadata

                # preprocess
                so <- CreateSeuratObject(counts=so, project=sample, meta.data=md.sample)
                balf.samples[[sample]] <- so
            }
        }
    })
})

Merge into a single file

[ ]:

balf.samples<-balf.samples[sample.order]
covid_data<-merge(balf.samples[[1]], y = balf.samples[2:length(balf.samples)],
                  project = "balf.covid")
covid_data<-covid_data[rownames(covid_data) != 'nCoV', ]

saveRDS(covid_data, paste0(data.path, 'BALF-COVID19-Liao_et_al-NatMed-2020.rds'))

Convert into SingleCellExperiment

[19]:

# covid_data<-readRDS('/data/hratch/ccc_protocols/raw/BALF-COVID19-Liao_et_al-NatMed-2020.rds')
covid_data_sce<-Seurat::as.SingleCellExperiment(covid_data)
covid_data_sce@colData<-covid_data_sce@colData[!(colnames(covid_data_sce@colData) %in% c('nCount_RNA', 'nFeature_RNA'))]
assay(covid_data_sce, 'logcounts')<-NULL
saveRDS(covid_data_sce, paste0(data.path, 'BALF-COVID19-Liao_et_al-NatMed-2020_SCE.rds'))