From 73895258c89d2e524bac00c90cf66972ec62a77e Mon Sep 17 00:00:00 2001 From: "mintlify[bot]" <109931778+mintlify[bot]@users.noreply.github.com> Date: Wed, 20 May 2026 07:00:57 +0000 Subject: [PATCH] docs: document scalar indexing on nested struct fields --- docs/indexing/scalar-index.mdx | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/docs/indexing/scalar-index.mdx b/docs/indexing/scalar-index.mdx index e41980b..5301bb0 100644 --- a/docs/indexing/scalar-index.mdx +++ b/docs/indexing/scalar-index.mdx @@ -140,3 +140,65 @@ LanceDB supports scalar indexes on UUID columns (stored as `FixedSizeBinary(16)` +## Index nested fields + +You can build a scalar index on a field inside a struct column by passing the +canonical dot-separated path to `create_index`. This is useful when filters +target attributes nested under a `metadata`-style column, for example +`metadata.user_id` or `metadata.event.type`. + +If a literal segment of the path itself contains a dot (for example a column +named `user.id` nested inside `metadata`), wrap that segment in backticks so +LanceDB can tell the dot apart from the path separator: `` metadata.`user.id` ``. + +`list_indices()` echoes the same canonical path back, so the column you pass in +round-trips through index metadata regardless of nesting depth or escaping. + +```python Python icon="python" +import pyarrow as pa +from lancedb.index import BTree + +metadata_type = pa.struct( + [ + pa.field("user_id", pa.int32()), + pa.field("user.id", pa.int32()), + ] +) +data = pa.Table.from_arrays( + [ + pa.array([1, 2, 3], type=pa.int32()), + pa.array( + [ + {"user_id": 10, "user.id": 100}, + {"user_id": 20, "user.id": 200}, + {"user_id": 30, "user.id": 300}, + ], + type=metadata_type, + ), + ], + names=["user_id", "metadata"], +) +table = await db.create_table("nested_scalar_index", data) + +# Index a nested struct field. +await table.create_index( + "metadata.user_id", config=BTree(), name="nested_user_id_idx" +) + +# Escape literal dots inside a segment with backticks. +await table.create_index( + "metadata.`user.id`", config=BTree(), name="escaped_user_id_idx" +) + +# `columns` is returned as the canonical path you passed in. +for index in await table.list_indices(): + print(index.name, index.columns) +# nested_user_id_idx ['metadata.user_id'] +# escaped_user_id_idx ['metadata.`user.id`'] +``` + + +Composite indexes that cover multiple columns aren't supported yet. Each +`create_index` call must target a single (possibly nested) field path. + +