Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions java/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,7 @@ fn inner_create_index<'local>(
| IndexType::Inverted
| IndexType::NGram
| IndexType::ZoneMap
| IndexType::PartitionedZoneMap
| IndexType::BloomFilter
| IndexType::RTree => {
// For scalar indices, create a scalar IndexParams
Expand Down
2 changes: 2 additions & 0 deletions java/lance-jni/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ fn determine_index_type<'local>(
Some("INVERTED")
} else if lower.contains("ngram") {
Some("NGRAM")
} else if lower.contains("partitionedzonemap") {
Some("PARTITIONED_ZONEMAP")
} else if lower.contains("zonemap") {
Some("ZONEMAP")
} else if lower.contains("bloomfilter") {
Expand Down
1 change: 1 addition & 0 deletions java/src/main/java/org/lance/index/IndexType.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public enum IndexType {
MEM_WAL(7),
ZONEMAP(8),
BLOOM_FILTER(9),
PARTITIONED_ZONEMAP(11),
VECTOR(100),
IVF_FLAT(101),
IVF_SQ(102),
Expand Down
4 changes: 3 additions & 1 deletion protos/index.proto
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,6 @@ message JsonIndexDetails {
}
message BloomFilterIndexDetails {}

message RTreeIndexDetails {}
message PartitionedZoneMapIndexDetails {}

message RTreeIndexDetails {}
15 changes: 11 additions & 4 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2789,6 +2789,7 @@ def create_scalar_index(
Literal["FTS"],
Literal["NGRAM"],
Literal["ZONEMAP"],
Literal["PARTITIONED_ZONEMAP"],
Literal["BLOOMFILTER"],
Literal["RTREE"],
IndexConfig,
Expand Down Expand Up @@ -2834,7 +2835,7 @@ def create_scalar_index(
)


There are 5 types of scalar indices available today.
There are several types of scalar indices available today.

* ``BTREE``. The most common type is ``BTREE``. This index is inspired
by the btree data structure although only the first few layers of the btree
Expand All @@ -2858,6 +2859,9 @@ def create_scalar_index(
called zones and stores summary statistics for each zone (min, max,
null_count, nan_count, fragment_id, local_row_offset). It's very small but
only effective if the column is at least approximately in sorted order.
* ``PARTITIONED_ZONEMAP``. This is a distributed variant of zonemap that stores
one shard per fragment build. It is intended for executor-parallel index
creation and uses the same pruning semantics as ``ZONEMAP`` at query time.
* ``INVERTED`` (alias: ``FTS``). It is used to index document columns. This
index can conduct full-text searches. For example, a column that contains any
word
Expand All @@ -2879,7 +2883,8 @@ def create_scalar_index(
or string column.
index_type : str
The type of the index. One of ``"BTREE"``, ``"BITMAP"``,
``"LABEL_LIST"``, ``"NGRAM"``, ``"ZONEMAP"``, ``"INVERTED"``,
``"LABEL_LIST"``, ``"NGRAM"``, ``"ZONEMAP"``,
``"PARTITIONED_ZONEMAP"``, ``"INVERTED"``,
``"FTS"``, ``"BLOOMFILTER"``, ``"RTREE"``.
name : str, optional
The index name. If not provided, it will be generated from the
Expand Down Expand Up @@ -3012,6 +3017,7 @@ def create_scalar_index(
"BITMAP",
"NGRAM",
"ZONEMAP",
"PARTITIONED_ZONEMAP",
"LABEL_LIST",
"INVERTED",
"FTS",
Expand All @@ -3020,8 +3026,9 @@ def create_scalar_index(
]:
raise NotImplementedError(
(
'Only "BTREE", "BITMAP", "NGRAM", "ZONEMAP", "LABEL_LIST", '
'"INVERTED", "BLOOMFILTER" or "RTREE" are supported for '
'Only "BTREE", "BITMAP", "NGRAM", "ZONEMAP", '
'"PARTITIONED_ZONEMAP", "LABEL_LIST", "INVERTED", '
'"BLOOMFILTER" or "RTREE" are supported for '
f"scalar columns. Received {index_type}",
)
)
Expand Down
1 change: 1 addition & 0 deletions python/python/lance/indices/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class SupportedDistributedIndices(str, Enum):
# Scalar index types
BTREE = "BTREE"
INVERTED = "INVERTED"
PARTITIONED_ZONEMAP = "PARTITIONED_ZONEMAP"

# Precise vector index types supported by distributed merge
IVF_FLAT = "IVF_FLAT"
Expand Down
5 changes: 5 additions & 0 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1972,6 +1972,7 @@ impl Dataset {
"BITMAP" => IndexType::Bitmap,
"NGRAM" => IndexType::NGram,
"ZONEMAP" => IndexType::ZoneMap,
"PARTITIONED_ZONEMAP" => IndexType::PartitionedZoneMap,
"BLOOMFILTER" => IndexType::BloomFilter,
"LABEL_LIST" => IndexType::LabelList,
"RTREE" => IndexType::RTree,
Expand Down Expand Up @@ -2003,6 +2004,10 @@ impl Dataset {
index_type: "zonemap".to_string(),
params: None,
}),
"PARTITIONED_ZONEMAP" => Box::new(ScalarIndexParams {
index_type: "partitioned_zonemap".to_string(),
params: None,
}),
"LABEL_LIST" => Box::new(ScalarIndexParams {
index_type: "label_list".to_string(),
params: None,
Expand Down
11 changes: 10 additions & 1 deletion rust/lance-index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ pub enum IndexType {

RTree = 10, // RTree

PartitionedZoneMap = 11, // Distributed zonemap stored as partition shards

// 100+ and up for vector index.
/// Flat vector index.
Vector = 100, // Legacy vector index, alias to IvfPq
Expand All @@ -150,6 +152,7 @@ impl std::fmt::Display for IndexType {
Self::ZoneMap => write!(f, "ZoneMap"),
Self::BloomFilter => write!(f, "BloomFilter"),
Self::RTree => write!(f, "RTree"),
Self::PartitionedZoneMap => write!(f, "PartitionedZoneMap"),
Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
Self::IvfFlat => write!(f, "IVF_FLAT"),
Self::IvfSq => write!(f, "IVF_SQ"),
Expand All @@ -176,6 +179,7 @@ impl TryFrom<i32> for IndexType {
v if v == Self::MemWal as i32 => Ok(Self::MemWal),
v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
v if v == Self::PartitionedZoneMap as i32 => Ok(Self::PartitionedZoneMap),
v if v == Self::Vector as i32 => Ok(Self::Vector),
v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
Expand All @@ -202,6 +206,9 @@ impl TryFrom<&str> for IndexType {
"Inverted" | "INVERTED" => Ok(Self::Inverted),
"NGram" | "NGRAM" => Ok(Self::NGram),
"ZoneMap" | "ZONEMAP" => Ok(Self::ZoneMap),
"PartitionedZoneMap" | "PARTITIONED_ZONEMAP" | "PARTITIONED_ZONE_MAP" => {
Ok(Self::PartitionedZoneMap)
}
"Vector" | "VECTOR" => Ok(Self::Vector),
"IVF_FLAT" => Ok(Self::IvfFlat),
"IVF_SQ" => Ok(Self::IvfSq),
Expand Down Expand Up @@ -232,7 +239,8 @@ impl IndexType {
| Self::NGram
| Self::ZoneMap
| Self::BloomFilter
| Self::RTree,
| Self::RTree
| Self::PartitionedZoneMap,
)
}

Expand Down Expand Up @@ -272,6 +280,7 @@ impl IndexType {
Self::ZoneMap => 0,
Self::BloomFilter => 0,
Self::RTree => 0,
Self::PartitionedZoneMap => 0,

// IMPORTANT: if any vector index subtype needs a format bump that is
// not backward compatible, its new version must be set to
Expand Down
22 changes: 19 additions & 3 deletions rust/lance-index/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@ use crate::scalar::rtree::RTreeIndexPlugin;
use crate::{
pb, pbold,
scalar::{
bitmap::BitmapIndexPlugin, bloomfilter::BloomFilterIndexPlugin, btree::BTreeIndexPlugin,
inverted::InvertedIndexPlugin, json::JsonIndexPlugin, label_list::LabelListIndexPlugin,
ngram::NGramIndexPlugin, registry::ScalarIndexPlugin, zonemap::ZoneMapIndexPlugin,
bitmap::BitmapIndexPlugin,
bloomfilter::BloomFilterIndexPlugin,
btree::BTreeIndexPlugin,
inverted::InvertedIndexPlugin,
json::JsonIndexPlugin,
label_list::LabelListIndexPlugin,
ngram::NGramIndexPlugin,
registry::ScalarIndexPlugin,
zonemap::{PartitionedZoneMapIndexPlugin, ZoneMapIndexPlugin},
},
};

Expand Down Expand Up @@ -49,6 +55,14 @@ impl IndexPluginRegistry {
.insert(plugin_name, Box::new(PluginType::default()));
}

pub fn add_alias<PluginType: ScalarIndexPlugin + std::default::Default + 'static>(
&mut self,
alias: &str,
) {
self.plugins
.insert(alias.to_string(), Box::new(PluginType::default()));
}

/// Create a registry with the default plugins
pub fn with_default_plugins() -> Arc<Self> {
let mut registry = Self {
Expand All @@ -59,6 +73,8 @@ impl IndexPluginRegistry {
registry.add_plugin::<pbold::LabelListIndexDetails, LabelListIndexPlugin>();
registry.add_plugin::<pbold::NGramIndexDetails, NGramIndexPlugin>();
registry.add_plugin::<pbold::ZoneMapIndexDetails, ZoneMapIndexPlugin>();
registry.add_plugin::<pb::PartitionedZoneMapIndexDetails, PartitionedZoneMapIndexPlugin>();
registry.add_alias::<PartitionedZoneMapIndexPlugin>("partitioned_zonemap");
registry.add_plugin::<pb::BloomFilterIndexDetails, BloomFilterIndexPlugin>();
registry.add_plugin::<pbold::InvertedIndexDetails, InvertedIndexPlugin>();
registry.add_plugin::<pb::JsonIndexDetails, JsonIndexPlugin>();
Expand Down
3 changes: 3 additions & 0 deletions rust/lance-index/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ pub enum BuiltinIndexType {
LabelList,
NGram,
ZoneMap,
PartitionedZoneMap,
BloomFilter,
RTree,
Inverted,
Expand All @@ -76,6 +77,7 @@ impl BuiltinIndexType {
Self::LabelList => "labellist",
Self::NGram => "ngram",
Self::ZoneMap => "zonemap",
Self::PartitionedZoneMap => "partitioned_zonemap",
Self::Inverted => "inverted",
Self::BloomFilter => "bloomfilter",
Self::RTree => "rtree",
Expand All @@ -93,6 +95,7 @@ impl TryFrom<IndexType> for BuiltinIndexType {
IndexType::LabelList => Ok(Self::LabelList),
IndexType::NGram => Ok(Self::NGram),
IndexType::ZoneMap => Ok(Self::ZoneMap),
IndexType::PartitionedZoneMap => Ok(Self::PartitionedZoneMap),
IndexType::Inverted => Ok(Self::Inverted),
IndexType::BloomFilter => Ok(Self::BloomFilter),
IndexType::RTree => Ok(Self::RTree),
Expand Down
Loading
Loading