import lamindb as ln
import wetlab as wl
ln.track("PtTXoc0RbOIq0001")
→ connected lamindb: laminlabs/lamindata → loaded Transform('PtTXoc0R'), re-started Run('jrCu89c7') at 2024-12-04 12:55:59 UTC → notebook imports: lamindb==0.77.2 wetlab==0.36.0
# https://lamin.ai/laminlabs/lamindata/artifact/MDG7BbeFVPvEyyUb0000
artifact = ln.Artifact.get("MDG7BbeFVPvEyyUb0000")
artifact.describe()
Artifact .csv ├── General │ ├── .uid = 'MDG7BbeFVPvEyyUb0000' │ ├── .size = 1729694 │ ├── .hash = 'WhvwTUnFe29PopBqMp5UkQ' │ ├── .path = s3://lamindata/.lamindb/MDG7BbeFVPvEyyUb0000.csv │ ├── .created_by = sunnyosun (Sunny Sun) │ ├── .created_at = 2024-12-03 13:14:50 │ └── .transform = 'Upload GWS CRISPRa result' ├── Internal features/.feature_sets │ └── columns • 14 [Feature] │ gene_target cat[wetlab.TreatmentTarg… A1BG, A1CF, A2M, A2ML1, A3GALT2, A4GALT,… │ num float │ neg|score float │ neg|p-value float │ neg|fdr float │ neg|rank float │ neg|goodsgrna float │ neg|lfc float │ pos|score float │ pos|p-value float │ pos|fdr float │ pos|rank float │ pos|goodsgrna float │ pos|lfc float ├── External features │ └── assay cat[bionty.ExperimentalF… gRNA-seq │ biosample cat[wetlab.Biosample] S001 │ experiment cat[wetlab.Experiment] EXP001 │ readout cat[bionty.ExperimentalF… interferon gamma └── Labels └── .experimental_factors bionty.ExperimentalFactor 'interferon gamma', 'gRNA-seq' .experiments wetlab.Experiment 'EXP001' .treatment_targets wetlab.TreatmentTarget 'STAT1', 'CD86', 'BRD4', 'ATF2', 'SPI1',… .biosamples wetlab.Biosample 'S001'
df_raw = artifact.load()
Let's assume we performed analysis from raw data df_raw
, now we generated a new data objects hits: a subsetted dataframe containing hits of the screen.
hits_df = df_raw[df_raw["pos|fdr"] < 0.01].copy()
hits_df
gene_target | num | neg|score | neg|p-value | neg|fdr | neg|rank | neg|goodsgrna | neg|lfc | pos|score | pos|p-value | pos|fdr | pos|rank | pos|goodsgrna | pos|lfc | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
458 | GMFG | 12 | 0.000844 | 0.001329 | 0.054466 | 459 | 5 | 0.81349 | 2.039700e-05 | 3.425700e-05 | 0.005316 | 122 | 7 | 0.81349 |
1285 | NIT1 | 12 | 0.008487 | 0.012619 | 0.185767 | 1286 | 5 | -0.27529 | 1.596100e-05 | 2.641200e-05 | 0.004425 | 113 | 3 | -0.27529 |
2226 | FOXD1 | 12 | 0.024504 | 0.043741 | 0.369343 | 2227 | 2 | 0.77968 | 4.777300e-07 | 2.353500e-06 | 0.000755 | 55 | 7 | 0.77968 |
3989 | SPPL2B | 12 | 0.068257 | 0.111050 | 0.524627 | 3990 | 4 | -0.12209 | 1.712500e-05 | 2.745800e-05 | 0.004560 | 114 | 3 | -0.12209 |
4662 | MVB12A | 12 | 0.092113 | 0.145080 | 0.586112 | 4663 | 5 | -0.05888 | 6.652500e-06 | 2.065900e-05 | 0.003989 | 86 | 5 | -0.05888 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
18925 | NLRC3 | 12 | 1.000000 | 1.000000 | 1.000000 | 18926 | 0 | 1.07850 | 2.023800e-12 | 2.615000e-07 | 0.000118 | 10 | 12 | 1.07850 |
18926 | TNFRSF1A | 12 | 1.000000 | 1.000000 | 1.000000 | 18927 | 0 | 2.22640 | 1.075200e-20 | 2.615000e-07 | 0.000118 | 5 | 12 | 2.22640 |
18927 | VAV1 | 12 | 1.000000 | 1.000000 | 1.000000 | 18928 | 0 | 3.16930 | 6.190200e-36 | 2.615000e-07 | 0.000118 | 1 | 12 | 3.16930 |
18928 | CD28 | 12 | 1.000000 | 1.000000 | 1.000000 | 18929 | 0 | 3.12380 | 8.135700e-30 | 2.615000e-07 | 0.000118 | 2 | 12 | 3.12380 |
18929 | IFNG | 12 | 1.000000 | 1.000000 | 1.000000 | 18930 | 0 | 3.38620 | 1.810300e-28 | 2.615000e-07 | 0.000118 | 3 | 12 | 3.38620 |
123 rows × 14 columns
curator = ln.Curator.from_df(hits_df, categoricals={"gene_target": wl.TreatmentTarget.name})
curator.validate()
✓ "gene_target" is validated against TreatmentTarget.name
True
artifact_hits = curator.save_artifact(description="hits of schmidt22 CIRSPRa GWS")
→ returning existing artifact with same hash: Artifact(uid='EkiyqIOLUh7E6QFw0000', is_latest=True, description='hits of schmidt22 CIRSPRa GWS', suffix='.parquet', type='dataset', size=19846, hash='Q8nLSnk8o5nqwX4B0fNQ7w', _hash_type='md5', _accessor='DataFrame', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=213, run_id=369, created_by_id=2, created_at=2024-12-03 15:49:30 UTC) → go to https://lamin.ai/laminlabs/lamindata/artifact/EkiyqIOLUh7E6QFw0000
for name in ["assay", "biosample", "experiment", "readout"]:
feature = ln.Feature.get(name=name)
artifact_hits.labels.add(artifact.labels.get(feature), feature=feature)
artifact_hits.describe()
Artifact .parquet/DataFrame ├── General │ ├── .uid = 'EkiyqIOLUh7E6QFw0000' │ ├── .size = 19846 │ ├── .hash = 'Q8nLSnk8o5nqwX4B0fNQ7w' │ ├── .path = s3://lamindata/.lamindb/EkiyqIOLUh7E6QFw0000.parquet │ ├── .created_by = sunnyosun (Sunny Sun) │ ├── .created_at = 2024-12-03 16:49:30 │ └── .transform = 'Hit identification - genome-wide CRIPSRa IFNG screen of T cells' ├── Internal features/.feature_sets │ └── columns • 14 [Feature] │ gene_target cat[wetlab.TreatmentTarg… AKAP12, APOBEC3A, APOBEC3C, APOBEC3D, AP… │ num float │ neg|score float │ neg|p-value float │ neg|fdr float │ neg|rank float │ neg|goodsgrna float │ neg|lfc float │ pos|score float │ pos|p-value float │ pos|fdr float │ pos|rank float │ pos|goodsgrna float │ pos|lfc float ├── External features │ └── assay cat[bionty.ExperimentalF… gRNA-seq, interferon gamma │ biosample cat[wetlab.Biosample] S001 │ experiment cat[wetlab.Experiment] EXP001 │ readout cat[bionty.ExperimentalF… gRNA-seq, interferon gamma └── Labels └── .experimental_factors bionty.ExperimentalFactor 'interferon gamma', 'gRNA-seq' .experiments wetlab.Experiment 'EXP001' .treatment_targets wetlab.TreatmentTarget 'POU2F2', 'TNFRSF14', 'FOSL1', 'OTUD7A',… .biosamples wetlab.Biosample 'S001' .ulabels ULabel 'S001'
artifact_hits.view_lineage()
ln.context.transform.ulabels.add(ln.ULabel.get(name="use-case"), ln.ULabel.get(name="schmidt22"))
ln.finish()