Source code for scvi.dataset.seqfishplus

import os
import zipfile

import pandas as pd

from scvi.dataset import DownloadableDataset, CellMeasurement


[docs]class SeqFishPlusDataset(DownloadableDataset): """seqFISH+ can image mRNAs for 10,000 genes in single cells—with high accuracy and sub-diffraction-limit resolution—in the cortex, subventricular zone and olfactory bulb of mouse brain Parameters ---------- tissue_region Region of the mouse brain, Either "subventricular cortex" or "olfactory bulb" save_path Location to use when saving/loading the SeqFish+ data. delayed_populating Switch for delayed populating mechanism. """ def __init__( self, tissue_region: str = "subventricular cortex", save_path: str = "data", delayed_populating: bool = False, ): self.tissue_region = tissue_region if tissue_region == "subventricular cortex": self.file_prefix = "cortex_svz" elif tissue_region == "olfactory bulb": self.file_prefix = "ob" else: raise ValueError( '`tissue_type` must be "subventricular cortex" or "olfactory bulb", but got {}'.format( tissue_region ) ) super().__init__( urls="https://github.com/CaiGroup/seqFISH-PLUS/raw/master/sourcedata.zip", filenames="seqfishplus.zip", save_path=save_path, delayed_populating=delayed_populating, )
[docs] def populate(self): counts_filename = "sourcedata/{}_counts.csv".format(self.file_prefix) coordinates_filename = "sourcedata/{}_cellcentroids.csv".format( self.file_prefix ) data_path = os.path.join(self.save_path, "seqfishplus") if not os.path.exists(data_path): os.makedirs(data_path) with zipfile.ZipFile(os.path.join(self.save_path, self.filenames[0])) as f: f.extract(counts_filename, path=data_path) f.extract(coordinates_filename, path=data_path) df_counts = pd.read_csv(os.path.join(data_path, counts_filename)) df_coordinates = pd.read_csv(os.path.join(data_path, coordinates_filename)) coordinates = CellMeasurement( name="coords", data=df_coordinates[["X", "Y"]], columns_attr_name="axis", columns=["x", "y"], ) cell_attributes_name_mapping = { "Cell ID": "cell_id", "Field of View": "field_of_view", } if self.tissue_region == "subventricular cortex": cell_attributes_name_mapping.update({"Region": "region"}) cell_attributes_dict = {} for column_name, attribute_name in cell_attributes_name_mapping.items(): cell_attributes_dict[attribute_name] = df_coordinates[column_name] self.populate_from_data( X=df_counts.values, gene_names=df_counts.columns, Ys=[coordinates], cell_attributes_dict=cell_attributes_dict, )