Vitessce#

This tutorial has been adopted from the data preparation examples in the Vitessce documention.

Setup#

Install dependencies:

pip install vitessce
pip install 'lamindb[jupyter,aws,bionty]'
!lamin load laminlabs/lamindata  # load your instance
馃挕 connected lamindb: laminlabs/lamindata
from urllib.request import urlretrieve
from pathlib import Path
from anndata import read_h5ad
from vitessce import (
    VitessceConfig,
    Component as cm,
    AnnDataWrapper,
)
from vitessce.data_utils import (
    to_uint8,
    sort_var_axis,
    optimize_adata,
)
import lamindb as ln
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/vitessce/__init__.py:42: UserWarning: Extra installs are necessary to use widgets: No module named 'anywidget'
  warn(f'Extra installs are necessary to use widgets: {e}')
馃挕 connected lamindb: laminlabs/lamindata
# [optional] track the current notebook or script
ln.settings.transform.stem_uid = "BZhZQ6uIbkWv"
ln.settings.transform.version = "2.1"
ln.track()
馃挕 notebook imports: anndata==0.10.7 lamindb==0.70.4 vitessce==3.2.5
馃挕 loaded: Transform(uid='BZhZQ6uIbkWvFKml', name='Vitessce', key='vitessce', version='2.1', type='notebook', updated_at=2024-04-22 07:51:15 UTC, created_by_id=9)
馃挕 loaded: Run(uid='RBnByquj667LpuNQf25u', started_at=2024-04-24 11:35:29 UTC, is_consecutive=True, transform_id=81, created_by_id=18)

Pre-process the dataset#

For this example, we use a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17.

# From https://github.com/vitessce/vitessce-python/blob/main/demos/habib-2017/src/convert_to_zarr.py
def convert_h5ad_to_zarr(input_path, output_path):
    adata = read_h5ad(input_path)
    adata = adata[:, adata.var["highly_variable"]].copy()
    leaf_list = sort_var_axis(adata.X, adata.var.index.values)
    adata = adata[:, leaf_list].copy()
    adata.layers["X_uint8"] = to_uint8(adata.X, norm_along="var")
    adata = optimize_adata(
        adata, obs_cols=["CellType"], obsm_keys=["X_umap"], layer_keys=["X_uint8"]
    )
    adata.write_zarr(output_path)
adata_filepath = "./habib17.processed.h5ad"
if not Path(adata_filepath).exists():
    urlretrieve(
        "https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad", adata_filepath
    )
zarr_filepath = "./hhabib_2017_nature_methods.anndata.zarr"

convert_h5ad_to_zarr(adata_filepath, zarr_filepath)
Hide code cell output
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/anndata/compat/__init__.py:311: FutureWarning: Moving element from .uns['neighbors']['distances'] to .obsp['distances'].

This is where adjacency matrices should go now.
  warn(
/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/anndata/compat/__init__.py:311: FutureWarning: Moving element from .uns['neighbors']['connectivities'] to .obsp['connectivities'].

This is where adjacency matrices should go now.
  warn(

Save dataset#

zarr_artifact = ln.Artifact(
    zarr_filepath,
    description="Habib et al., 2017 Nature Methods, optimized anndata zarr",
)
zarr_artifact.save()

Create a VitessceConfig object#

Note

Here is a note on folder upload speed and we why chose to not use the .export(to="s3") functionality of Vitessce.

Set up the configuration by adding the views and datasets of interest.

vc = VitessceConfig(
    schema_version="1.0.15",
    description=zarr_artifact.description,
)
dataset = vc.add_dataset(name="Habib 2017").add_object(
    AnnDataWrapper(
        adata_url=zarr_artifact.path.to_url(),
        obs_feature_matrix_path="layers/X_uint8",
        obs_embedding_paths=["obsm/X_umap"],
        obs_embedding_names=["UMAP"],
        obs_set_paths=["obs/CellType"],
        obs_set_names=["Cell Type"],
    )
)
obs_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)
obs_sets_sizes = vc.add_view(cm.OBS_SET_SIZES, dataset=dataset)
scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping="UMAP")
heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)
genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)
vc.layout(((scatterplot | obs_sets) / heatmap) | (obs_sets_sizes / genes))

# inspect the config
vc.to_dict()
Hide code cell output
{'version': '1.0.15',
 'name': '',
 'description': 'Habib et al., 2017 Nature Methods, optimized anndata zarr',
 'datasets': [{'uid': 'A',
   'name': 'Habib 2017',
   'files': [{'fileType': 'anndata.zarr',
     'url': 'https://lamindata.s3.amazonaws.com/.lamindb/KNSe0OZffU9yarq9.anndata.zarr',
     'options': {'obsEmbedding': [{'path': 'obsm/X_umap',
        'dims': [0, 1],
        'embeddingType': 'UMAP'}],
      'obsSets': [{'name': 'Cell Type', 'path': 'obs/CellType'}],
      'obsFeatureMatrix': {'path': 'layers/X_uint8'}}}]}],
 'coordinationSpace': {'dataset': {'A': 'A'}, 'embeddingType': {'A': 'UMAP'}},
 'layout': [{'component': 'obsSets',
   'coordinationScopes': {'dataset': 'A'},
   'x': 3.0,
   'y': 0.0,
   'w': 3.0,
   'h': 6.0},
  {'component': 'obsSetSizes',
   'coordinationScopes': {'dataset': 'A'},
   'x': 6.0,
   'y': 0.0,
   'w': 6.0,
   'h': 6.0},
  {'component': 'scatterplot',
   'coordinationScopes': {'dataset': 'A', 'embeddingType': 'A'},
   'x': 0.0,
   'y': 0.0,
   'w': 3.0,
   'h': 6.0},
  {'component': 'heatmap',
   'coordinationScopes': {'dataset': 'A'},
   'x': 0.0,
   'y': 6.0,
   'w': 6.0,
   'h': 6.0},
  {'component': 'featureList',
   'coordinationScopes': {'dataset': 'A'},
   'x': 6.0,
   'y': 6.0,
   'w': 6.0,
   'h': 6.0}],
 'initStrategy': 'auto'}

Save VitessceConfig object#

from lamindb.integrations import save_vitessce_config
vc_artifact = save_vitessce_config(vc, description="View Habib17 in Vitessce")
馃挕 go to: https://lamin.ai/laminlabs/lamindata/artifact/aVH2kNVuQUQJjfTvuwv2
vc_artifact.view_lineage()
_images/3a77daf13ad706dabf0aa7e7cfd1a3489fcc5c4f6c3b1a56cd0557f5faa0e5b6.svg
# [optional] save run report to share notebook with collaborators
# ln.finish()
Hide code cell content
# clean up artifacts in CI run
zarr_artifact.delete(permanent=True)
vc_artifact.delete(permanent=True)