import lamindb as ln
import wetlab as wl

ln.track("PtTXoc0RbOIq0001")
→ connected lamindb: laminlabs/lamindata
→ loaded Transform('PtTXoc0R'), re-started Run('jrCu89c7') at 2024-12-04 12:55:59 UTC
→ notebook imports: lamindb==0.77.2 wetlab==0.36.0

Download assay readout from wetlab

# https://lamin.ai/laminlabs/lamindata/artifact/MDG7BbeFVPvEyyUb0000
artifact = ln.Artifact.get("MDG7BbeFVPvEyyUb0000")
artifact.describe()
Artifact .csv
├── General
│   ├── .uid = 'MDG7BbeFVPvEyyUb0000'
│   ├── .size = 1729694
│   ├── .hash = 'WhvwTUnFe29PopBqMp5UkQ'
│   ├── .path = s3://lamindata/.lamindb/MDG7BbeFVPvEyyUb0000.csv
│   ├── .created_by = sunnyosun (Sunny Sun)
│   ├── .created_at = 2024-12-03 13:14:50
│   └── .transform = 'Upload GWS CRISPRa result'
├── Internal features/.feature_sets
│   └── columns • 14                [Feature]                                                           
│       gene_target                 cat[wetlab.TreatmentTarg…  A1BG, A1CF, A2M, A2ML1, A3GALT2, A4GALT,…
│       num                         float                                                               
│       neg|score                   float                                                               
│       neg|p-value                 float                                                               
│       neg|fdr                     float                                                               
│       neg|rank                    float                                                               
│       neg|goodsgrna               float                                                               
│       neg|lfc                     float                                                               
│       pos|score                   float                                                               
│       pos|p-value                 float                                                               
│       pos|fdr                     float                                                               
│       pos|rank                    float                                                               
│       pos|goodsgrna               float                                                               
│       pos|lfc                     float                                                               
├── External features
│   └── assay                       cat[bionty.ExperimentalF…  gRNA-seq                                 
│       biosample                   cat[wetlab.Biosample]      S001                                     
│       experiment                  cat[wetlab.Experiment]     EXP001                                   
│       readout                     cat[bionty.ExperimentalF…  interferon gamma                         
└── Labels
    └── .experimental_factors       bionty.ExperimentalFactor  'interferon gamma', 'gRNA-seq'           
        .experiments                wetlab.Experiment          'EXP001'                                 
        .treatment_targets          wetlab.TreatmentTarget     'STAT1', 'CD86', 'BRD4', 'ATF2', 'SPI1',…
        .biosamples                 wetlab.Biosample           'S001'                                   

Load in the assay data and perform analysis

df_raw = artifact.load()

Perform analysis and ingest results into LaminDB

Let's assume we performed analysis from raw data df_raw, now we generated a new data objects hits: a subsetted dataframe containing hits of the screen.

hits_df = df_raw[df_raw["pos|fdr"] < 0.01].copy()
hits_df
gene_target num neg|score neg|p-value neg|fdr neg|rank neg|goodsgrna neg|lfc pos|score pos|p-value pos|fdr pos|rank pos|goodsgrna pos|lfc
458 GMFG 12 0.000844 0.001329 0.054466 459 5 0.81349 2.039700e-05 3.425700e-05 0.005316 122 7 0.81349
1285 NIT1 12 0.008487 0.012619 0.185767 1286 5 -0.27529 1.596100e-05 2.641200e-05 0.004425 113 3 -0.27529
2226 FOXD1 12 0.024504 0.043741 0.369343 2227 2 0.77968 4.777300e-07 2.353500e-06 0.000755 55 7 0.77968
3989 SPPL2B 12 0.068257 0.111050 0.524627 3990 4 -0.12209 1.712500e-05 2.745800e-05 0.004560 114 3 -0.12209
4662 MVB12A 12 0.092113 0.145080 0.586112 4663 5 -0.05888 6.652500e-06 2.065900e-05 0.003989 86 5 -0.05888
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
18925 NLRC3 12 1.000000 1.000000 1.000000 18926 0 1.07850 2.023800e-12 2.615000e-07 0.000118 10 12 1.07850
18926 TNFRSF1A 12 1.000000 1.000000 1.000000 18927 0 2.22640 1.075200e-20 2.615000e-07 0.000118 5 12 2.22640
18927 VAV1 12 1.000000 1.000000 1.000000 18928 0 3.16930 6.190200e-36 2.615000e-07 0.000118 1 12 3.16930
18928 CD28 12 1.000000 1.000000 1.000000 18929 0 3.12380 8.135700e-30 2.615000e-07 0.000118 2 12 3.12380
18929 IFNG 12 1.000000 1.000000 1.000000 18930 0 3.38620 1.810300e-28 2.615000e-07 0.000118 3 12 3.38620

123 rows × 14 columns

Register the file of screen hits

curator = ln.Curator.from_df(hits_df, categoricals={"gene_target": wl.TreatmentTarget.name})
curator.validate()
✓ "gene_target" is validated against TreatmentTarget.name
True
artifact_hits = curator.save_artifact(description="hits of schmidt22 CIRSPRa GWS")
→ returning existing artifact with same hash: Artifact(uid='EkiyqIOLUh7E6QFw0000', is_latest=True, description='hits of schmidt22 CIRSPRa GWS', suffix='.parquet', type='dataset', size=19846, hash='Q8nLSnk8o5nqwX4B0fNQ7w', _hash_type='md5', _accessor='DataFrame', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=213, run_id=369, created_by_id=2, created_at=2024-12-03 15:49:30 UTC)
→ go to https://lamin.ai/laminlabs/lamindata/artifact/EkiyqIOLUh7E6QFw0000
for name in ["assay", "biosample", "experiment", "readout"]:
    feature = ln.Feature.get(name=name)
    artifact_hits.labels.add(artifact.labels.get(feature), feature=feature)
artifact_hits.describe()
Artifact .parquet/DataFrame
├── General
│   ├── .uid = 'EkiyqIOLUh7E6QFw0000'
│   ├── .size = 19846
│   ├── .hash = 'Q8nLSnk8o5nqwX4B0fNQ7w'
│   ├── .path = s3://lamindata/.lamindb/EkiyqIOLUh7E6QFw0000.parquet
│   ├── .created_by = sunnyosun (Sunny Sun)
│   ├── .created_at = 2024-12-03 16:49:30
│   └── .transform = 'Hit identification - genome-wide CRIPSRa IFNG screen of T cells'
├── Internal features/.feature_sets
│   └── columns • 14                [Feature]                                                           
│       gene_target                 cat[wetlab.TreatmentTarg…  AKAP12, APOBEC3A, APOBEC3C, APOBEC3D, AP…
│       num                         float                                                               
│       neg|score                   float                                                               
│       neg|p-value                 float                                                               
│       neg|fdr                     float                                                               
│       neg|rank                    float                                                               
│       neg|goodsgrna               float                                                               
│       neg|lfc                     float                                                               
│       pos|score                   float                                                               
│       pos|p-value                 float                                                               
│       pos|fdr                     float                                                               
│       pos|rank                    float                                                               
│       pos|goodsgrna               float                                                               
│       pos|lfc                     float                                                               
├── External features
│   └── assay                       cat[bionty.ExperimentalF…  gRNA-seq, interferon gamma               
│       biosample                   cat[wetlab.Biosample]      S001                                     
│       experiment                  cat[wetlab.Experiment]     EXP001                                   
│       readout                     cat[bionty.ExperimentalF…  gRNA-seq, interferon gamma               
└── Labels
    └── .experimental_factors       bionty.ExperimentalFactor  'interferon gamma', 'gRNA-seq'           
        .experiments                wetlab.Experiment          'EXP001'                                 
        .treatment_targets          wetlab.TreatmentTarget     'POU2F2', 'TNFRSF14', 'FOSL1', 'OTUD7A',…
        .biosamples                 wetlab.Biosample           'S001'                                   
        .ulabels                    ULabel                     'S001'                                   
artifact_hits.view_lineage()
No description has been provided for this image
ln.context.transform.ulabels.add(ln.ULabel.get(name="use-case"), ln.ULabel.get(name="schmidt22"))
ln.finish()