diff --git a/docs/release notes/4.0.0-RC.9/pr685.feature.md b/docs/release notes/4.0.0-RC.9/pr685.feature.md new file mode 100644 index 000000000..806e19c9a --- /dev/null +++ b/docs/release notes/4.0.0-RC.9/pr685.feature.md @@ -0,0 +1,8 @@ +### Provide a possibility to store and load an empty graph + +**Description** +Provide a possibility to store and load an empty graph from/to disk + +**Purpose / Impact** +- Helps to preserve graph metadata (similarity function, features, dimensions, even if the graph is empty) + diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java index 8135bba25..d4432665e 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java @@ -951,7 +951,9 @@ private void loadV4(RandomAccessReader in) throws IOException { } graph.setDegrees(layerDegrees); - graph.updateEntryNode(new NodeAtLevel(graph.getMaxLevel(), entryNode)); + if (entryNode != ImmutableGraphIndex.OMITTED) { + graph.updateEntryNode(new NodeAtLevel(graph.getMaxLevel(), entryNode)); + } } @Deprecated @@ -984,7 +986,9 @@ private void loadV3(RandomAccessReader in, int size) throws IOException { graph.markComplete(new NodeAtLevel(0, nodeId)); } - graph.updateEntryNode(new NodeAtLevel(0, entryNode)); + if (entryNode != ImmutableGraphIndex.OMITTED) { + graph.updateEntryNode(new NodeAtLevel(0, entryNode)); + } graph.setDegrees(List.of(maxDegree)); } diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/ImmutableGraphIndex.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/ImmutableGraphIndex.java index a4758c493..4bee5f73d 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/ImmutableGraphIndex.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/ImmutableGraphIndex.java @@ -24,6 +24,7 @@ package io.github.jbellis.jvector.graph; +import io.github.jbellis.jvector.graph.disk.OrdinalMapper; import io.github.jbellis.jvector.graph.similarity.ScoreFunction; import io.github.jbellis.jvector.util.Accountable; import io.github.jbellis.jvector.util.Bits; @@ -35,7 +36,6 @@ import java.io.Closeable; import java.io.IOException; -import java.util.function.Function; /** * Represents a graph-based vector index. Nodes are represented as ints, and edges are @@ -48,6 +48,8 @@ * in a View that should be created per accessing thread. */ public interface ImmutableGraphIndex extends AutoCloseable, Accountable { + int OMITTED = OrdinalMapper.OMITTED; // same as OrdinalMapper, since OrdinalMapper::oldToNew may return it + /** Returns the number of nodes in the graph */ @Deprecated default int size() { diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java index 9ed1a92dd..be67b0c6a 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java @@ -49,7 +49,6 @@ import java.util.concurrent.atomic.AtomicIntegerArray; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.StampedLock; -import java.util.function.Function; import java.util.stream.IntStream; /** @@ -542,8 +541,12 @@ public void save(DataOutput out) throws IOException { } var entryNode = entryPoint.get(); - assert entryNode.level == getMaxLevel(); - out.writeInt(entryNode.node); + if (entryNode != null) { + assert entryNode.level == getMaxLevel(); + out.writeInt(entryNode.node); + } else { + out.writeInt(OMITTED); + } for (int level = 0; level < layers.size(); level++) { out.writeInt(size(level)); @@ -618,7 +621,9 @@ public static OnHeapGraphIndex load(RandomAccessReader in, int dimension, double } graph.setDegrees(layerDegrees); - graph.updateEntryNode(new NodeAtLevel(graph.getMaxLevel(), entryNode)); + if (entryNode != OMITTED) { + graph.updateEntryNode(new NodeAtLevel(graph.getMaxLevel(), entryNode)); + } return graph; } diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/AbstractGraphIndexWriter.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/AbstractGraphIndexWriter.java index a5ff739f3..fa6a8e378 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/AbstractGraphIndexWriter.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/AbstractGraphIndexWriter.java @@ -175,7 +175,7 @@ void writeFooter(ImmutableGraphIndex.View view, long headerOffset) throws IOExce var layerInfo = CommonHeader.LayerInfo.fromGraph(graph, ordinalMapper); var commonHeader = new CommonHeader(version, dimension, - ordinalMapper.oldToNew(view.entryNode().node), + view.entryNode() == null ? OrdinalMapper.OMITTED : ordinalMapper.oldToNew(view.entryNode().node), layerInfo, ordinalMapper.maxOrdinal() + 1); var header = new Header(commonHeader, featureMap); @@ -198,7 +198,7 @@ protected synchronized void writeHeader(ImmutableGraphIndex.View view, long star var layerInfo = CommonHeader.LayerInfo.fromGraph(graph, ordinalMapper); var commonHeader = new CommonHeader(version, dimension, - ordinalMapper.oldToNew(view.entryNode().node), + view.entryNode() == null ? OrdinalMapper.OMITTED : ordinalMapper.oldToNew(view.entryNode().node), layerInfo, ordinalMapper.maxOrdinal() + 1); var header = new Header(commonHeader, featureMap); diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CommonHeader.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CommonHeader.java index 5d0a1aecb..90a812c09 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CommonHeader.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CommonHeader.java @@ -81,10 +81,10 @@ void write(IndexWriter out) throws IOException { out.writeInt(OnDiskGraphIndex.MAGIC); out.writeInt(version); } - out.writeInt(layerInfo.get(0).size); + out.writeInt(layerInfo.isEmpty() ? 0 : layerInfo.get(0).size); out.writeInt(dimension); out.writeInt(entryNode); - out.writeInt(layerInfo.get(0).degree); + out.writeInt(layerInfo.isEmpty() ? 0 : layerInfo.get(0).degree); if (version >= 4) { out.writeInt(idUpperBound); diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java index 3fb69d967..1bf6c704e 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java @@ -50,7 +50,6 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; -import java.util.function.Function; import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -95,7 +94,11 @@ private OnDiskGraphIndex(ReaderSupplier readerSupplier, Header header, long neig this.version = header.common.version; this.layerInfo = header.common.layerInfo; this.dimension = header.common.dimension; - this.entryNode = new NodeAtLevel(header.common.layerInfo.size() - 1, header.common.entryNode); + if (header.common.entryNode == OMITTED) { + this.entryNode = null; + } else { + this.entryNode = new NodeAtLevel(header.common.layerInfo.size() - 1, header.common.entryNode); + } this.idUpperBound = header.common.idUpperBound; this.features = header.features; this.neighborsOffset = neighborsOffset; @@ -128,6 +131,9 @@ private List> getInMemoryLayers(RandomAccessReader in) private List> loadInMemoryLayers(RandomAccessReader in) throws IOException { var imn = new ArrayList>(layerInfo.size()); + if (layerInfo.isEmpty()) { + return imn; + } // For levels > 0, we load adjacency into memory imn.add(null); // L0 placeholder so we don't have to mangle indexing long L0size = idUpperBound * (inlineBlockSize + Integer.BYTES * (1L + 1L + layerInfo.get(0).degree)); @@ -333,12 +339,12 @@ public int getDimension() { @Override public int size(int level) { - return layerInfo.get(level).size; + return layerInfo.isEmpty() ? 0 : layerInfo.get(level).size; } @Override public int getDegree(int level) { - return layerInfo.get(level).degree; + return layerInfo.isEmpty() ? 0 : layerInfo.get(level).degree; } @Override @@ -435,7 +441,7 @@ public String toString() { @Override public int getMaxLevel() { - return entryNode.level; + return entryNode == null ? 0 : entryNode.level; } @Override diff --git a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/GraphIndexBuilderTest.java b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/GraphIndexBuilderTest.java index 59b248584..659abecac 100644 --- a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/GraphIndexBuilderTest.java +++ b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/GraphIndexBuilderTest.java @@ -38,6 +38,7 @@ import static io.github.jbellis.jvector.TestUtil.assertGraphEquals; import static io.github.jbellis.jvector.graph.TestVectorGraph.createRandomFloatVectors; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @ThreadLeakScope(ThreadLeakScope.Scope.NONE) @@ -156,6 +157,34 @@ public void testSaveAndLoad() throws IOException { } assertGraphEquals(graph, builder.graph); } + + @Test + public void testSaveAndLoadEmptyGraph() throws IOException { + int dimension = randomIntBetween(2, 32); + var ravv = MockVectorValues.empty(dimension); + + Supplier newBuilder = () -> + new GraphIndexBuilder(ravv, VectorSimilarityFunction.COSINE, 2, 10, 1.0f, 1.0f, true); + + var indexDataPath = testDirectory.resolve("index_builder_empty.data"); + var builder = newBuilder.get(); + + var graph = TestUtil.buildSequentially(builder, ravv); + + try (var out = TestUtil.openDataOutputStream(indexDataPath)) { + ((OnHeapGraphIndex) graph).setAllMutationsCompleted(); + ((OnHeapGraphIndex) graph).save(out); + } + + builder = newBuilder.get(); + try(var readerSupplier = new SimpleMappedReader.Supplier(indexDataPath)) { + builder.load(readerSupplier.get()); + } + + assertEquals(ravv.size(), builder.graph.size(0)); + assertNull(builder.graph.entryNode()); + assertGraphEquals(graph, builder.graph); + } // Because RandomAccessVectorValues is exposed in such a way that it allows for subsequent additions to the // vector source, we need to ensure that GraphIndexBuilder can handle this. diff --git a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/MockVectorValues.java b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/MockVectorValues.java index a5a23b245..8bb2dfa01 100644 --- a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/MockVectorValues.java +++ b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/MockVectorValues.java @@ -39,6 +39,10 @@ public static MockVectorValues fromValues(VectorFloat[] values) { return new MockVectorValues(values[0].length(), values); } + public static MockVectorValues empty(int dimension) { + return new MockVectorValues(dimension, new VectorFloat[0]); + } + MockVectorValues(int dimension, VectorFloat[] denseValues) { this.dimension = dimension; this.denseValues = denseValues;