diff --git a/examples/prisma-next-demo/src/main.ts b/examples/prisma-next-demo/src/main.ts index 46ddd6f135..23a406c01a 100644 --- a/examples/prisma-next-demo/src/main.ts +++ b/examples/prisma-next-demo/src/main.ts @@ -37,6 +37,11 @@ * Vector similarity search via ORM client * - users-paginate [cursor] Cursor-based pagination * - similarity-search Vector similarity search (pgvector) + * - cross-author-similarity [limit] + * SQL DSL escape-hatch: closest post pairs across different + * authors via a self-join on a non-relation predicate, with + * cosineDistance over two column references — a shape the + * current ORM collection surface cannot directly express. * - budget-violation Demo budget enforcement error * - guardrail-delete Demo AST lint blocking DELETE without WHERE * @@ -63,6 +68,7 @@ import { ormClientGetUsersByIdCursor } from './orm-client/get-users-by-id-cursor import { ormClientSearchPostsByEmbedding } from './orm-client/search-posts-by-embedding'; import { ormClientUpsertUser } from './orm-client/upsert-user'; import { db } from './prisma/db'; +import { crossAuthorSimilarity } from './queries/cross-author-similarity'; import { deleteWithoutWhere } from './queries/delete-without-where'; import { getAllPostsUnbounded } from './queries/get-all-posts-unbounded'; import { getUserById } from './queries/get-user-by-id'; @@ -324,6 +330,12 @@ async function main() { const limit = limitStr ? Number.parseInt(limitStr, 10) : 10; const results = await similaritySearch(queryVector, limit); + console.log(JSON.stringify(results, null, 2)); + } else if (cmd === 'cross-author-similarity') { + const [limitStr] = args; + const limit = limitStr ? Number.parseInt(limitStr, 10) : 10; + const results = await crossAuthorSimilarity(limit); + console.log(JSON.stringify(results, null, 2)); } else if (cmd === 'budget-violation') { console.log('Running unbounded query to demonstrate budget violation...'); @@ -377,7 +389,8 @@ async function main() { 'repo-upsert-user | repo-create-user-address | ' + 'repo-similar-posts [limit] | repo-search-posts [limit] | ' + 'users-paginate [cursor] [limit] | users-paginate-back [limit] | ' + - 'similarity-search [limit] | budget-violation | guardrail-delete]', + 'similarity-search [limit] | cross-author-similarity [limit] | ' + + 'budget-violation | guardrail-delete]', ); process.exit(1); } diff --git a/examples/prisma-next-demo/src/queries/cross-author-similarity.ts b/examples/prisma-next-demo/src/queries/cross-author-similarity.ts new file mode 100644 index 0000000000..f4bdeda98d --- /dev/null +++ b/examples/prisma-next-demo/src/queries/cross-author-similarity.ts @@ -0,0 +1,64 @@ +import type { Runtime } from '@prisma-next/sql-runtime'; +import { db } from '../prisma/db'; + +/** + * "Cross-author similarity" — an SQL DSL escape-hatch query for a shape that the current ORM + * collection surface cannot directly express. + * + * Finds the closest pairs of posts written by *different* authors, ordered by cosine distance + * between their embeddings. For each pair, projects both posts' id/title/userId side-by-side + * along with the distance between their embeddings. + * + * Why this is an escape-hatch shape: + * 1. **Self-join on a non-relation predicate.** The ORM collection surface's join is + * relation-shaped — `include('posts', ...)` follows declared relations. Joining `Post` to + * itself on `p1.userId != p2.userId` is an arbitrary predicate join, not a relation, and + * cannot be expressed as a single collection query. + * 2. **Extension op taking two column references.** `cosineDistance(f.p1.embedding, + * f.p2.embedding)` compares two columns from two aliases within one query. The ORM's + * extension-op integration (TML-2042) is `column.method(boundValue)` — method-on-receiver + * form where the other argument must be a materialized value. `ormClientFindSimilarPosts` + * works around this by running a separate query to load the reference embedding first. + * The collection surface has no "column vs column within a single query" form. + * 3. **Flat peer-row projection.** A single collection query has a single root model and + * shapes its output row from that root plus its relations. Two sibling `Post` rows + * projected flat into one output row is not a shape the single-collection surface + * produces. + * + * Note: `@prisma-next/sql-orm-client` is a repository layer (ADR 164) and can orchestrate + * multiple plans for one logical operation, so a user could *simulate* this with client-side + * stitching — at the cost of extra round-trips and losing single-statement ordering/limit + * semantics. The point of the SQL DSL escape hatch is that this shape is a single SQL + * statement making one pass over the data. + * + * Features exercised: + * 1. Self-join via `.as()` aliasing of the same table (`post` aliased as `p1` and `p2`). + * 2. INNER JOIN with a non-equality predicate (`ne(p1.userId, p2.userId)`). + * 3. pgvector `cosineDistance` called with two column references from two aliases — in the + * SELECT projection and in the ORDER BY. + * 4. Typed result row inferred from the SELECT projection, mixing columns from both aliases. + */ +export async function crossAuthorSimilarity(limit = 10, runtime?: Runtime) { + const plan = db.sql.post + .as('p1') + .innerJoin(db.sql.post.as('p2'), (f, fns) => fns.ne(f.p1.userId, f.p2.userId)) + .select((f, fns) => ({ + postAId: f.p1.id, + postATitle: f.p1.title, + postAUserId: f.p1.userId, + postBId: f.p2.id, + postBTitle: f.p2.title, + postBUserId: f.p2.userId, + distance: fns.cosineDistance(f.p1.embedding, f.p2.embedding), + })) + .where((f, fns) => fns.and(fns.ne(f.p1.embedding, null), fns.ne(f.p2.embedding, null))) + .orderBy((f, fns) => fns.cosineDistance(f.p1.embedding, f.p2.embedding), { + direction: 'asc', + }) + .orderBy((f) => f.p1.id, { direction: 'asc' }) + .orderBy((f) => f.p2.id, { direction: 'asc' }) + .limit(limit) + .build(); + + return (runtime ?? db.runtime()).execute(plan); +} diff --git a/examples/prisma-next-demo/test/sql-dsl.integration.test.ts b/examples/prisma-next-demo/test/sql-dsl.integration.test.ts new file mode 100644 index 0000000000..fa1ce3e660 --- /dev/null +++ b/examples/prisma-next-demo/test/sql-dsl.integration.test.ts @@ -0,0 +1,244 @@ +/** + * Integration tests for the SQL DSL as a standalone query surface. + * + * These tests validate that the SQL DSL can express and execute queries the ORM + * client cannot — fulfilling the "escape hatch" role described in TML-2160 and + * VP1 of the Runtime pipeline project. + */ + +import { instantiateExecutionStack } from '@prisma-next/framework-components/execution'; +import { sql } from '@prisma-next/sql-builder/runtime'; +import type { SqlDriver } from '@prisma-next/sql-relational-core/ast'; +import { type CreateRuntimeOptions, createRuntime, type Runtime } from '@prisma-next/sql-runtime'; +import { timeouts, withDevDatabase } from '@prisma-next/test-utils'; +import { Pool } from 'pg'; +import { describe, expect, it } from 'vitest'; +import { db } from '../src/prisma/db'; +import { crossAuthorSimilarity } from '../src/queries/cross-author-similarity'; +import { initTestDatabase } from './utils/control-client'; + +const context = db.context; +const { contract } = context; +const executionStack = db.stack; + +async function createTestDriver(connectionString: string) { + const stackInstance = instantiateExecutionStack( + executionStack, + ) as CreateRuntimeOptions['stackInstance']; + const driver = stackInstance.driver as unknown as SqlDriver; + if (!driver) { + throw new Error('Driver descriptor missing from execution stack'); + } + const pool = new Pool({ connectionString }); + try { + await driver.connect({ kind: 'pgPool', pool }); + } catch (error) { + await pool.end(); + throw error; + } + return { stackInstance, driver }; +} + +async function getRuntime(connectionString: string): Promise { + const { stackInstance, driver } = await createTestDriver(connectionString); + return createRuntime({ + stackInstance, + context, + driver, + verify: { mode: 'onFirstUse', requireMarker: false }, + }); +} + +const seededUserIds = { + alice: '30000000-0000-0000-0000-000000000001', + bob: '30000000-0000-0000-0000-000000000002', + carol: '30000000-0000-0000-0000-000000000003', +} as const; + +const seededPostIds = { + aliceClose: '40000000-0000-0000-0000-000000000001', + aliceFar: '40000000-0000-0000-0000-000000000002', + bobClose: '40000000-0000-0000-0000-000000000003', + bobMid: '40000000-0000-0000-0000-000000000004', + bobFar: '40000000-0000-0000-0000-000000000005', + carolUnembedded: '40000000-0000-0000-0000-000000000006', +} as const; + +function makeVector(leadingValues: number[]): number[] { + const vec = new Array(1536).fill(0); + for (let i = 0; i < leadingValues.length; i++) { + vec[i] = leadingValues[i]!; + } + return vec; +} + +function unorderedPairKey(a: string, b: string): string { + return a < b ? `${a}|${b}` : `${b}|${a}`; +} + +async function seedCrossAuthorSimilarity(runtime: Runtime): Promise { + const builder = sql({ context }); + + const users = [ + { + id: seededUserIds.alice, + email: 'alice@example.com', + displayName: 'Alice', + createdAt: new Date('2024-03-01T00:00:00.000Z'), + kind: 'admin' as const, + }, + { + id: seededUserIds.bob, + email: 'bob@example.com', + displayName: 'Bob', + createdAt: new Date('2024-03-02T00:00:00.000Z'), + kind: 'user' as const, + }, + { + id: seededUserIds.carol, + email: 'carol@example.com', + displayName: 'Carol', + createdAt: new Date('2024-03-03T00:00:00.000Z'), + kind: 'user' as const, + }, + ]; + + for (const user of users) { + await runtime.execute(builder.user.insert(user).build()); + } + + // Alice's aliceFar [0.7,0.3,0] and Bob's bobClose [0.5,0.5,0] are the closest + // cross-author pair (cosine distance ≈ 0.0715). Alice's aliceClose [1,0,0] vs + // bobClose is second closest (≈ 0.2929). Carol has no embedded post and must + // never appear in a cross-author pair. + const posts = [ + { + id: seededPostIds.aliceClose, + title: 'Alice close', + userId: seededUserIds.alice, + createdAt: new Date('2024-03-10T10:00:00.000Z'), + embedding: makeVector([1, 0, 0]), + }, + { + id: seededPostIds.aliceFar, + title: 'Alice far', + userId: seededUserIds.alice, + createdAt: new Date('2024-03-11T10:00:00.000Z'), + embedding: makeVector([0.7, 0.3, 0]), + }, + { + id: seededPostIds.bobClose, + title: 'Bob close', + userId: seededUserIds.bob, + createdAt: new Date('2024-03-12T10:00:00.000Z'), + embedding: makeVector([0.5, 0.5, 0]), + }, + { + id: seededPostIds.bobMid, + title: 'Bob mid', + userId: seededUserIds.bob, + createdAt: new Date('2024-03-13T10:00:00.000Z'), + embedding: makeVector([0, 1, 0]), + }, + { + id: seededPostIds.bobFar, + title: 'Bob far', + userId: seededUserIds.bob, + createdAt: new Date('2024-03-14T10:00:00.000Z'), + embedding: makeVector([-1, 0, 0]), + }, + { + id: seededPostIds.carolUnembedded, + title: 'Carol unembedded', + userId: seededUserIds.carol, + createdAt: new Date('2024-03-15T10:00:00.000Z'), + }, + ]; + + for (const post of posts) { + await runtime.execute(builder.post.insert(post).build()); + } +} + +describe('SQL DSL standalone query execution (TML-2160)', () => { + it( + 'crossAuthorSimilarity returns closest cross-author pairs ordered by cosine distance', + async () => { + await withDevDatabase(async ({ connectionString }) => { + await initTestDatabase({ connection: connectionString, contract }); + const runtime = await getRuntime(connectionString); + + try { + await seedCrossAuthorSimilarity(runtime); + + const results = await crossAuthorSimilarity(20, runtime); + + // Every returned row is a cross-author pair: authors must differ. + expect(results.every((row) => row.postAUserId !== row.postBUserId)).toBe(true); + + // Carol has no embedded post, so she must not appear on either side. + expect( + results.every( + (row) => + row.postAUserId !== seededUserIds.carol && row.postBUserId !== seededUserIds.carol, + ), + ).toBe(true); + + // Distances are non-negative and non-decreasing (ORDER BY distance ASC). + for (const row of results) { + expect(row.distance).toBeGreaterThanOrEqual(0); + } + for (let i = 1; i < results.length; i++) { + expect(results[i]!.distance).toBeGreaterThanOrEqual(results[i - 1]!.distance); + } + + // 2 Alice posts × 3 Bob posts × 2 join orderings = 12 cross-author pairs. + expect(results).toHaveLength(12); + + // The closest unordered pair is aliceFar × bobClose. It appears in both + // orderings (p1=alice/p2=bob and p1=bob/p2=alice), so the top-2 rows + // should be those two permutations of the same unordered pair. + const topTwo = results.slice(0, 2); + expect(topTwo).toHaveLength(2); + const topPairKeys = new Set( + topTwo.map((row) => unorderedPairKey(row.postAId, row.postBId)), + ); + expect(topPairKeys.size).toBe(1); + expect( + topPairKeys.has(unorderedPairKey(seededPostIds.aliceFar, seededPostIds.bobClose)), + ).toBe(true); + expect(topTwo[0]!.distance).toBeLessThan(0.1); + } finally { + await runtime.close(); + } + }); + }, + timeouts.spinUpPpgDev, + ); + + it( + 'crossAuthorSimilarity respects the limit argument', + async () => { + await withDevDatabase(async ({ connectionString }) => { + await initTestDatabase({ connection: connectionString, contract }); + const runtime = await getRuntime(connectionString); + + try { + await seedCrossAuthorSimilarity(runtime); + + const limited = await crossAuthorSimilarity(1, runtime); + + expect(limited).toHaveLength(1); + const row = limited[0]!; + expect(row.postAUserId).not.toBe(row.postBUserId); + expect(unorderedPairKey(row.postAId, row.postBId)).toBe( + unorderedPairKey(seededPostIds.aliceFar, seededPostIds.bobClose), + ); + } finally { + await runtime.close(); + } + }); + }, + timeouts.spinUpPpgDev, + ); +});