import logging
import os
import numpy as np
from scvi.dataset.dataset import DownloadableDataset
logger = logging.getLogger(__name__)
[docs]class SmfishDataset(DownloadableDataset):
"""Loads osmFISH data of mouse cortex cells from the Linarsson lab.
Parameters
----------
save_path
Location to use when saving/loading the data.
use_high_level_cluster
If True, use higher-level agglomerate clusters.
The resulting cell types are "Astrocytes", "Endothelials", "Inhibitory",
"Microglias", "Oligodendrocytes" and "Pyramidals".
delayed_populating
Switch for delayed populating mechanism.
"""
def __init__(
self,
save_path: str = "data/",
use_high_level_cluster: bool = True,
delayed_populating: bool = False,
):
self.use_high_level_cluster = use_high_level_cluster
super().__init__(
"http://linnarssonlab.org/osmFISH/osmFISH_SScortex_mouse_all_cells.loom",
"osmFISH_SScortex_mouse_all_cell.loom",
save_path,
delayed_populating=delayed_populating,
)
[docs] def populate(self):
try:
import loompy
except ImportError:
raise ImportError(
"Please install loompy package via `pip install --user loompy"
)
logger.info("Loading smFISH dataset")
ds = loompy.connect(os.path.join(self.save_path, self.filenames[0]))
gene_names = ds.ra["Gene"].astype(np.str)
labels = ds.ca["ClusterID"].reshape(-1, 1)
tmp_cell_types = np.asarray(ds.ca["ClusterName"])
u_labels, u_index = np.unique(labels.ravel(), return_index=True)
cell_types = ["" for _ in range(max(u_labels) + 1)]
for i, index in zip(u_labels, u_index):
cell_types[i] = tmp_cell_types[index]
cell_types = np.asarray(cell_types, dtype=np.str)
x_coord, y_coord = ds.ca["X"], ds.ca["Y"]
x_coord = x_coord.reshape((-1, 1))
y_coord = y_coord.reshape((-1, 1))
data = ds[:, :].T
self.populate_from_data(
X=data,
labels=labels,
gene_names=gene_names,
cell_types=cell_types,
cell_attributes_dict={"x_coord": x_coord, "y_coord": y_coord},
remap_attributes=False,
)
major_clusters = dict(
[
((3, 2), "Astrocytes"),
((7, 26), "Endothelials"),
((18, 17, 14, 19, 15, 16, 20), "Inhibitory"),
((29, 28), "Microglias"),
((32, 33, 30, 22, 21), "Oligodendrocytes"),
((9, 8, 10, 6, 5, 4, 12, 1, 13), "Pyramidals"),
]
)
if self.use_high_level_cluster:
self.map_cell_types(major_clusters)
self.filter_cell_types(
[
"Astrocytes",
"Endothelials",
"Inhibitory",
"Microglias",
"Oligodendrocytes",
"Pyramidals",
]
)
self.remap_categorical_attributes()