Query book#

import lamindb as ln
import lamindb.schema as lns

from sqlmodel import or_

ln.track()
ℹ️ Instance: testuser1/mydata
ℹ️ User: testuser2
ℹ️ Added notebook: Transform(id='2l46gxQHI6Ud', v='0', name='15-query-book', type=notebook, title='Query book', created_by='bKeW4T6E', created_at=datetime.datetime(2023, 3, 30, 23, 19, 44))
ℹ️ Added run: Run(id='raQ31pdoVbCUhfMVb3G2', transform_id='2l46gxQHI6Ud', transform_v='0', created_by='bKeW4T6E', created_at=datetime.datetime(2023, 3, 30, 23, 19, 44))

Basic select operations#

Output formats#

ln.select(ln.File).df()
name suffix size hash source_id storage_id created_at updated_at
id
sfqjeqshOu4n2OCrxGj4 iris_new .parquet 5629 jUTdERuqlGv_GyqFfIEb2Q aUvAkAwVxam9tDQcKfro 8Pj12JLb 2023-03-30 23:15:58 2023-03-30 23:16:49
oocXPPbvFX1fYuFt5i6D mini .csv 11 z1LdF2qN4cN0M2sXrcW8aw obsRwVGjOgOautAvjeiS 8Pj12JLb 2023-03-30 23:16:07 NaT
lM7fgzdhxYBCYVbQvt5V metrics_summary .csv 6 Qt326UFWQibtvzRP1mhnJw 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
ABx9QrIk3ZfKMKT1zZ54 raw_feature_bc_matrix .h5 6 YeBfpVEGZriQhUNira-qiA 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
6DuGMqan46UGUU0S7Ivp possorted_genome_bam .bam.bai 6 BJDp79QxGfDAds40LMLUHw 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
4kEgNOBUIpNORbubLdQu web_summary .html 6 P3VFbegx8Uvt70i82pN4kA 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
4spvfwRkeBXOXhdIEpaL cloupe .cloupe 6 kRJKl4U-rCLESg8i6Tk1QA 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
kId2tfidnfLUT9JatfJG possorted_genome_bam .bam 6 HguFGkYNsZBrkJ-7K3mLBw 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
pN3ap4esn0TBkJEsb4DB molecule_info .h5 6 YBZ-JAAuZwNT6mjdLqwtGA 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
THjan9RLY3JUotzHlYbq filtered_feature_bc_matrix .h5 6 UyuUA2YXfAJBEefQw_wVhA 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
jzC6yuaVbt7if2dSso9z features .tsv.gz 6 CPLWI4kM2TYtpVA1GP4B4g 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
DEN1tlUq2AtRJuk02iOZ barcodes .tsv.gz 6 HkZpiKOqDM14o_BdT-jdRg 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
AQ4Cs8pvSSZXLlffNXUZ matrix .mtx.gz 6 0A_HXnvE3DwCXRHYtSknJw 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
R8QdanNZelXhxQsfFP0n analysis .csv 6 dYsOqf4SMpTH5HqeAVgCHw 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
hwALCxnzc9TC1Q2pF9mM features .tsv.gz 6 amZgim1akXXVSHHxQdCrNA 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
CA3s57VaDTleG1Wd21wX barcodes .tsv.gz 6 odGzhNIQSkMOkaVhbkyU5w 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
NgLuRTl6WDHt8bhjZfhj matrix .mtx.gz 6 B9zq5Zvi4gJTGKSCRSn7zQ 4aFiVMrLvr2yiopRZ1Y6 8Pj12JLb 2023-03-30 23:16:33 NaT
E1hNK7YnrfYwc8F9vC3G input .fastq.gz 16 QDkCIyDtWe8tlrS9zG8gnw cUu9I6OzZJgZkyrK4D4h 8Pj12JLb 2023-03-30 23:16:57 NaT
0z1lsm4wUo2XamCsq2jJ output .bam 14 rGSwtSEKB65DaaQq740p6A M4vLFDKQmOA90LYs8fNL 8Pj12JLb 2023-03-30 23:16:57 NaT
iZ26MT0p56QN0flq69Pv Mouse Lymph Node scRNA-seq .h5ad 17341245 Qprqj0O23197Ko-VobaZiw H8XISAvITiHbM2Z0nxtn 8Pj12JLb 2023-03-30 23:17:45 NaT
tsjjl9Tl837wKMUmxkXC example .fcs 6785467 KCEXRahJ-Ui9Y6nksQ8z1A kXZegnhfZcQNtvYGY6C3 8Pj12JLb 2023-03-30 23:17:57 NaT
ln.select(ln.File).all()
[File(id='sfqjeqshOu4n2OCrxGj4', name='iris_new', suffix='.parquet', size=5629, hash='jUTdERuqlGv_GyqFfIEb2Q', source_id='aUvAkAwVxam9tDQcKfro', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 15, 58), updated_at=datetime.datetime(2023, 3, 30, 23, 16, 49)),
 File(id='oocXPPbvFX1fYuFt5i6D', name='mini', suffix='.csv', size=11, hash='z1LdF2qN4cN0M2sXrcW8aw', source_id='obsRwVGjOgOautAvjeiS', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 7)),
 File(id='lM7fgzdhxYBCYVbQvt5V', name='metrics_summary', suffix='.csv', size=6, hash='Qt326UFWQibtvzRP1mhnJw', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='ABx9QrIk3ZfKMKT1zZ54', name='raw_feature_bc_matrix', suffix='.h5', size=6, hash='YeBfpVEGZriQhUNira-qiA', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='6DuGMqan46UGUU0S7Ivp', name='possorted_genome_bam', suffix='.bam.bai', size=6, hash='BJDp79QxGfDAds40LMLUHw', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='4kEgNOBUIpNORbubLdQu', name='web_summary', suffix='.html', size=6, hash='P3VFbegx8Uvt70i82pN4kA', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='4spvfwRkeBXOXhdIEpaL', name='cloupe', suffix='.cloupe', size=6, hash='kRJKl4U-rCLESg8i6Tk1QA', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='kId2tfidnfLUT9JatfJG', name='possorted_genome_bam', suffix='.bam', size=6, hash='HguFGkYNsZBrkJ-7K3mLBw', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='pN3ap4esn0TBkJEsb4DB', name='molecule_info', suffix='.h5', size=6, hash='YBZ-JAAuZwNT6mjdLqwtGA', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='THjan9RLY3JUotzHlYbq', name='filtered_feature_bc_matrix', suffix='.h5', size=6, hash='UyuUA2YXfAJBEefQw_wVhA', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='jzC6yuaVbt7if2dSso9z', name='features', suffix='.tsv.gz', size=6, hash='CPLWI4kM2TYtpVA1GP4B4g', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='DEN1tlUq2AtRJuk02iOZ', name='barcodes', suffix='.tsv.gz', size=6, hash='HkZpiKOqDM14o_BdT-jdRg', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='AQ4Cs8pvSSZXLlffNXUZ', name='matrix', suffix='.mtx.gz', size=6, hash='0A_HXnvE3DwCXRHYtSknJw', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='R8QdanNZelXhxQsfFP0n', name='analysis', suffix='.csv', size=6, hash='dYsOqf4SMpTH5HqeAVgCHw', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='hwALCxnzc9TC1Q2pF9mM', name='features', suffix='.tsv.gz', size=6, hash='amZgim1akXXVSHHxQdCrNA', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='CA3s57VaDTleG1Wd21wX', name='barcodes', suffix='.tsv.gz', size=6, hash='odGzhNIQSkMOkaVhbkyU5w', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='NgLuRTl6WDHt8bhjZfhj', name='matrix', suffix='.mtx.gz', size=6, hash='B9zq5Zvi4gJTGKSCRSn7zQ', source_id='4aFiVMrLvr2yiopRZ1Y6', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 33)),
 File(id='E1hNK7YnrfYwc8F9vC3G', name='input', suffix='.fastq.gz', size=16, hash='QDkCIyDtWe8tlrS9zG8gnw', source_id='cUu9I6OzZJgZkyrK4D4h', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 57)),
 File(id='0z1lsm4wUo2XamCsq2jJ', name='output', suffix='.bam', size=14, hash='rGSwtSEKB65DaaQq740p6A', source_id='M4vLFDKQmOA90LYs8fNL', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 57)),
 File(id='iZ26MT0p56QN0flq69Pv', name='Mouse Lymph Node scRNA-seq', suffix='.h5ad', size=17341245, hash='Qprqj0O23197Ko-VobaZiw', source_id='H8XISAvITiHbM2Z0nxtn', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 17, 45)),
 File(id='tsjjl9Tl837wKMUmxkXC', name='example', suffix='.fcs', size=6785467, hash='KCEXRahJ-Ui9Y6nksQ8z1A', source_id='kXZegnhfZcQNtvYGY6C3', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 17, 57))]
ln.select(ln.File).first()
File(id='sfqjeqshOu4n2OCrxGj4', name='iris_new', suffix='.parquet', size=5629, hash='jUTdERuqlGv_GyqFfIEb2Q', source_id='aUvAkAwVxam9tDQcKfro', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 15, 58), updated_at=datetime.datetime(2023, 3, 30, 23, 16, 49))
ln.select(ln.File, name="Mouse Lymph Node scRNA-seq", suffix=".h5ad").one()
File(id='iZ26MT0p56QN0flq69Pv', name='Mouse Lymph Node scRNA-seq', suffix='.h5ad', size=17341245, hash='Qprqj0O23197Ko-VobaZiw', source_id='H8XISAvITiHbM2Z0nxtn', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 17, 45))

And#

ln.select(ln.File).where(
    ln.File.name == "Mouse Lymph Node scRNA-seq", ln.File.suffix == ".h5ad"
).one()
File(id='iZ26MT0p56QN0flq69Pv', name='Mouse Lymph Node scRNA-seq', suffix='.h5ad', size=17341245, hash='Qprqj0O23197Ko-VobaZiw', source_id='H8XISAvITiHbM2Z0nxtn', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 17, 45))

Or#

ln.select(ln.File).where(or_(ln.File.suffix == ".zarr", ln.File.suffix == ".h5ad")).df()
name suffix size hash source_id storage_id created_at updated_at
id
iZ26MT0p56QN0flq69Pv Mouse Lymph Node scRNA-seq .h5ad 17341245 Qprqj0O23197Ko-VobaZiw H8XISAvITiHbM2Z0nxtn 8Pj12JLb 2023-03-30 23:17:45 None

In#

ln.select(ln.File).where(ln.File.suffix.in_([".zarr", ".h5ad"])).df()
name suffix size hash source_id storage_id created_at updated_at
id
iZ26MT0p56QN0flq69Pv Mouse Lymph Node scRNA-seq .h5ad 17341245 Qprqj0O23197Ko-VobaZiw H8XISAvITiHbM2Z0nxtn 8Pj12JLb 2023-03-30 23:17:45 None

Order by#

ln.select(ln.Run).order_by(ln.Run.created_at.desc()).df()
name external_id transform_id transform_v created_by created_at
id
raQ31pdoVbCUhfMVb3G2 None None 2l46gxQHI6Ud 0 bKeW4T6E 2023-03-30 23:19:44
c42EAlE765NP4ibZbhEa None None zMCvXplQ8kTk 0 bKeW4T6E 2023-03-30 23:18:05
kXZegnhfZcQNtvYGY6C3 None None OWuTtS4SApon 0 bKeW4T6E 2023-03-30 23:17:53
H8XISAvITiHbM2Z0nxtn None None Nv48yAceNSh8 0 bKeW4T6E 2023-03-30 23:17:18
l1cjVOvwRlrb9n4OnMUQ None None 6ZBQKdB7Mvlh 0 DzTjkKse 2023-03-30 23:17:05
M4vLFDKQmOA90LYs8fNL None None BQNTTPRP 7 DzTjkKse 2023-03-30 23:16:57
cUu9I6OzZJgZkyrK4D4h None None g0mYW16n 1 DzTjkKse 2023-03-30 23:16:57
VbVAEqftAYKe1jl2QnS0 None None 1LCd8kco9lZU 0 DzTjkKse 2023-03-30 23:16:57
8FRls4T4CujAdW92XRT3 None None yowZSQXMmZkT 0 DzTjkKse 2023-03-30 23:16:49
OvcwilVppkGAnRY3YcH5 None None vldHzF3aTAiW 0 DzTjkKse 2023-03-30 23:16:41
4aFiVMrLvr2yiopRZ1Y6 None None QrRtGnxmM3Bo 0 DzTjkKse 2023-03-30 23:16:33
obsRwVGjOgOautAvjeiS None None OEbRXnepeCqE 0 DzTjkKse 2023-03-30 23:16:07
aUvAkAwVxam9tDQcKfro None None OdlFhFWW7qg3 0 DzTjkKse 2023-03-30 23:15:58
bpttr5hRLo73B4m2co3v None None NJvdsWWbJlZS 0 DzTjkKse 2023-03-30 23:15:49

Contains#

ln.select(ln.Transform).where(ln.Transform.title.contains("Link")).df()
name type title reference created_by created_at updated_at
id v
Nv48yAceNSh8 0 11-scrna TransformType.notebook Linking scRNA-seq data against `Gene` None bKeW4T6E 2023-03-30 23:17:18 None
OWuTtS4SApon 0 12-flow TransformType.notebook Linking flow cytometry data against `CellMarker` None bKeW4T6E 2023-03-30 23:17:53 None

Data lineage#

Track run, inputs & outputs#

Which run does this file output.bam come from?

ss = ln.Session()

run = ss.select(ln.Run).join(ln.File, name="output", suffix=".bam").one()
run.inputs
[[session open] File(id='E1hNK7YnrfYwc8F9vC3G', name='input', suffix='.fastq.gz', size=16, hash='QDkCIyDtWe8tlrS9zG8gnw', source_id='cUu9I6OzZJgZkyrK4D4h', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 57))]
run.outputs
[[session open] File(id='0z1lsm4wUo2XamCsq2jJ', name='output', suffix='.bam', size=14, hash='rGSwtSEKB65DaaQq740p6A', source_id='M4vLFDKQmOA90LYs8fNL', storage_id='8Pj12JLb', created_at=datetime.datetime(2023, 3, 30, 23, 16, 57))]
ss.close()

Track notebook#

Which notebook ingested this dataset Mouse Lymph Node scRNA-seq.h5ad?

ln.select(ln.Transform).join(ln.Run).join(
    ln.File, name="Mouse Lymph Node scRNA-seq", suffix=".h5ad"
).one()
Transform(id='Nv48yAceNSh8', v='0', name='11-scrna', type=notebook, title='Linking scRNA-seq data against `Gene`', created_by='bKeW4T6E', created_at=datetime.datetime(2023, 3, 30, 23, 17, 18))

Which notebooks are produced by testuser2?

ln.select(ln.Transform).join(lns.User, handle="testuser2").df()
name type title reference created_by created_at updated_at
id v
Nv48yAceNSh8 0 11-scrna TransformType.notebook Linking scRNA-seq data against `Gene` None bKeW4T6E 2023-03-30 23:17:18 None
OWuTtS4SApon 0 12-flow TransformType.notebook Linking flow cytometry data against `CellMarker` None bKeW4T6E 2023-03-30 23:17:53 None
zMCvXplQ8kTk 0 14-link-samples TransformType.notebook Track sample-level metadata None bKeW4T6E 2023-03-30 23:18:05 None
2l46gxQHI6Ud 0 15-query-book TransformType.notebook Query book None bKeW4T6E 2023-03-30 23:19:44 None

Track pipeline#

Which pipeline produced this file input.fastq.gz?

with ln.Session() as ss:
    file = ss.select(ln.File, name="input", suffix=".fastq.gz").one()
    print(file.source.transform)
[session open] Transform(id='g0mYW16n', v='1', name='10x scRNA-seq nextseq', type=pipeline, created_by='DzTjkKse', created_at=datetime.datetime(2023, 3, 30, 23, 16, 57))

Which pipelines are registered by testuser1?

ln.select(ln.Transform).join(lns.User, handle="testuser1").df()
name type title reference created_by created_at updated_at
id v
NJvdsWWbJlZS 0 03-files TransformType.notebook Track files None DzTjkKse 2023-03-30 23:15:49 None
OdlFhFWW7qg3 0 04-memory TransformType.notebook Track in-memory data objects None DzTjkKse 2023-03-30 23:15:58 None
OEbRXnepeCqE 0 05-existing TransformType.notebook Track data in existing storage locations None DzTjkKse 2023-03-30 23:16:07 None
QrRtGnxmM3Bo 0 06-folder TransformType.notebook Track folders None DzTjkKse 2023-03-30 23:16:33 None
vldHzF3aTAiW 0 07-select TransformType.notebook Query data None DzTjkKse 2023-03-30 23:16:41 None
yowZSQXMmZkT 0 08-add-delete TransformType.notebook Add & delete metadata None DzTjkKse 2023-03-30 23:16:49 None
1LCd8kco9lZU 0 08-run TransformType.notebook Track data lineage: `Transform` & `Run` None DzTjkKse 2023-03-30 23:16:57 None
g0mYW16n 1 10x scRNA-seq nextseq TransformType.pipeline None None DzTjkKse 2023-03-30 23:16:57 None
BQNTTPRP 7 Cell Ranger TransformType.pipeline None None DzTjkKse 2023-03-30 23:16:57 None
6ZBQKdB7Mvlh 0 09-schema TransformType.notebook Model data None DzTjkKse 2023-03-30 23:17:05 None

Track user#

Which users have interacted with the database?

ln.select(lns.User).df()
email handle name created_at updated_at
id
DzTjkKse testuser1@lamin.ai testuser1 Test User1 2023-03-30 23:15:36 None
bKeW4T6E testuser2@lamin.ai testuser2 Test User2 2023-03-30 23:17:12 None

Which user ingested this file input.fastq.gz?

ln.select(lns.User).join(ln.Run).join(ln.File, name="input", suffix=".fastq.gz").one()
User(id='DzTjkKse', email='testuser1@lamin.ai', handle='testuser1', name='Test User1', created_at=datetime.datetime(2023, 3, 30, 23, 15, 36))

Which users created notebooks with “lineage” in the title?

ln.select(lns.User).join(ln.Transform).where(
    ln.Transform.title.contains("lineage")
).df()
email handle name created_at updated_at
id
DzTjkKse testuser1@lamin.ai testuser1 Test User1 2023-03-30 23:15:36 None

Which user registered this pipeline?

ln.select(lns.User).join(ln.Transform, name="Cell Ranger", v="7").one()
User(id='DzTjkKse', email='testuser1@lamin.ai', handle='testuser1', name='Test User1', created_at=datetime.datetime(2023, 3, 30, 23, 15, 36))