From 1cfe5f057370e2806c7550bb2faaae99a261b0d6 Mon Sep 17 00:00:00 2001 From: ferhat elmas Date: Wed, 8 Apr 2026 17:36:04 +0200 Subject: [PATCH] fix: empty segment listing Signed-off-by: ferhat elmas --- .../0059-fix-common-prefix-empty-segments.sql | 351 +++++++++++ src/internal/database/migrations/types.ts | 1 + src/storage/object.ts | 36 +- src/test/object-list-v2.test.ts | 273 ++++++++- src/test/object.test.ts | 568 +++++++++++++++++- 5 files changed, 1212 insertions(+), 17 deletions(-) create mode 100644 migrations/tenant/0059-fix-common-prefix-empty-segments.sql diff --git a/migrations/tenant/0059-fix-common-prefix-empty-segments.sql b/migrations/tenant/0059-fix-common-prefix-empty-segments.sql new file mode 100644 index 000000000..7abb52916 --- /dev/null +++ b/migrations/tenant/0059-fix-common-prefix-empty-segments.sql @@ -0,0 +1,351 @@ +CREATE OR REPLACE FUNCTION storage.get_common_prefix( + p_key TEXT, + p_prefix TEXT, + p_delimiter TEXT +) RETURNS TEXT +IMMUTABLE +LANGUAGE plpgsql +AS $$ +DECLARE + v_prefix TEXT := coalesce(p_prefix, ''); + v_suffix TEXT; + v_scan_index INT := 1; + v_next_delimiter_index INT; +BEGIN + IF coalesce(p_delimiter, '') = '' THEN + RETURN NULL; + END IF; + + IF v_prefix <> '' AND lower(left(p_key, length(v_prefix))) <> lower(v_prefix) THEN + RETURN NULL; + END IF; + + v_suffix := substring(p_key FROM length(v_prefix) + 1); + + -- Only skip leading delimiters when the prefix already ends with the + -- delimiter (empty-segment case like 'prefix//file'). Otherwise the + -- first delimiter in the suffix is the real folder boundary. + IF right(v_prefix, length(p_delimiter)) = p_delimiter THEN + WHILE left(substring(v_suffix FROM v_scan_index), length(p_delimiter)) = p_delimiter LOOP + v_scan_index := v_scan_index + length(p_delimiter); + END LOOP; + END IF; + + v_next_delimiter_index := position(p_delimiter IN substring(v_suffix FROM v_scan_index)); + IF v_next_delimiter_index = 0 THEN + RETURN NULL; + END IF; + + RETURN left( + p_key, + length(v_prefix) + (v_scan_index - 1) + v_next_delimiter_index - 1 + length(p_delimiter) + ); +END; +$$; + +CREATE OR REPLACE FUNCTION storage.get_prefix_child_name( + p_key TEXT, + p_prefix TEXT, + p_delimiter TEXT +) RETURNS TEXT +IMMUTABLE +LANGUAGE plpgsql +AS $$ +DECLARE + v_prefix TEXT := coalesce(p_prefix, ''); + v_suffix TEXT; + v_scan_index INT := 1; + v_trimmed_suffix TEXT; +BEGIN + IF coalesce(p_delimiter, '') = '' THEN + RETURN NULL; + END IF; + + IF v_prefix <> '' AND lower(left(p_key, length(v_prefix))) <> lower(v_prefix) THEN + RETURN NULL; + END IF; + + v_suffix := substring(p_key FROM length(v_prefix) + 1); + + IF right(v_prefix, length(p_delimiter)) = p_delimiter THEN + WHILE left(substring(v_suffix FROM v_scan_index), length(p_delimiter)) = p_delimiter LOOP + v_scan_index := v_scan_index + length(p_delimiter); + END LOOP; + END IF; + + v_trimmed_suffix := substring(v_suffix FROM v_scan_index); + IF coalesce(v_trimmed_suffix, '') = '' THEN + RETURN NULL; + END IF; + + RETURN split_part(v_trimmed_suffix, p_delimiter, 1); +END; +$$; + +CREATE OR REPLACE FUNCTION storage.search( + prefix text, + bucketname text, + limits int DEFAULT 100, + levels int DEFAULT 1, + offsets int DEFAULT 0, + search text DEFAULT '', + sortcolumn text DEFAULT 'name', + sortorder text DEFAULT 'asc' +) +RETURNS TABLE ( + name text, + id uuid, + updated_at timestamptz, + created_at timestamptz, + last_accessed_at timestamptz, + metadata jsonb +) +SECURITY INVOKER +LANGUAGE plpgsql STABLE +AS $func$ +DECLARE + v_peek_name TEXT; + v_current RECORD; + v_common_prefix TEXT; + v_child_name TEXT; + v_delimiter CONSTANT TEXT := '/'; + + -- Configuration + v_limit INT; + v_prefix TEXT; + v_raw_prefix TEXT; + v_prefix_lower TEXT; + v_is_asc BOOLEAN; + v_order_by TEXT; + v_sort_order TEXT; + v_upper_bound TEXT; + v_file_batch_size INT; + + -- Dynamic SQL for batch query only + v_batch_query TEXT; + + -- Seek state + v_next_seek TEXT; + v_count INT := 0; + v_skipped INT := 0; + v_has_pending_peek BOOLEAN := FALSE; + v_emitted_folders TEXT[] := ARRAY[]::TEXT[]; +BEGIN + v_limit := LEAST(coalesce(limits, 100), 1500); + v_prefix := coalesce(prefix, '') || coalesce(search, ''); + -- The caller may have LIKE-escaped the prefix (e.g. \_ \%). + -- Keep the escaped version for ILIKE filters, but strip the + -- backslash escapes for exact-match helper functions. + v_raw_prefix := replace(replace(v_prefix, '\%', '%'), '\_', '_'); + v_prefix_lower := lower(v_prefix); + v_is_asc := lower(coalesce(sortorder, 'asc')) = 'asc'; + v_file_batch_size := LEAST(GREATEST(v_limit * 2, 100), 1000); + + CASE lower(coalesce(sortcolumn, 'name')) + WHEN 'name' THEN v_order_by := 'name'; + WHEN 'updated_at' THEN v_order_by := 'updated_at'; + WHEN 'created_at' THEN v_order_by := 'created_at'; + WHEN 'last_accessed_at' THEN v_order_by := 'last_accessed_at'; + ELSE v_order_by := 'name'; + END CASE; + + v_sort_order := CASE WHEN v_is_asc THEN 'asc' ELSE 'desc' END; + + IF v_order_by != 'name' THEN + RETURN QUERY EXECUTE format( + $sql$ + WITH folders AS ( + SELECT storage.get_prefix_child_name(objects.name, $5, '/') AS folder + FROM storage.objects + WHERE objects.name ILIKE $1 || '%%' + AND bucket_id = $2 + AND storage.get_common_prefix(objects.name, $5, '/') IS NOT NULL + GROUP BY folder + ), entries AS ( + SELECT folder AS "name", + NULL::uuid AS id, + NULL::timestamptz AS updated_at, + NULL::timestamptz AS created_at, + NULL::timestamptz AS last_accessed_at, + NULL::jsonb AS metadata, + 0 AS sort_group + FROM folders + WHERE folder IS NOT NULL + UNION ALL + SELECT storage.get_prefix_child_name(objects.name, $5, '/') AS "name", + id, updated_at, created_at, last_accessed_at, metadata, + 1 AS sort_group + FROM storage.objects + WHERE objects.name ILIKE $1 || '%%' + AND bucket_id = $2 + AND storage.get_common_prefix(objects.name, $5, '/') IS NULL + AND storage.get_prefix_child_name(objects.name, $5, '/') IS NOT NULL + ) + SELECT "name", id, updated_at, created_at, last_accessed_at, metadata + FROM entries + ORDER BY sort_group ASC, + CASE WHEN sort_group = 0 THEN "name" END %s, + CASE WHEN sort_group = 1 THEN %I END %s, + CASE WHEN sort_group = 1 THEN "name" END %s + LIMIT $3 OFFSET $4 + $sql$, v_sort_order, v_order_by, v_sort_order, v_sort_order + ) USING v_prefix, bucketname, v_limit, offsets, v_raw_prefix; + RETURN; + END IF; + + IF v_prefix_lower = '' THEN + v_upper_bound := NULL; + ELSIF right(v_prefix_lower, 1) = v_delimiter THEN + v_upper_bound := left(v_prefix_lower, -1) || chr(ascii(v_delimiter) + 1); + ELSE + v_upper_bound := left(v_prefix_lower, -1) || chr(ascii(right(v_prefix_lower, 1)) + 1); + END IF; + + IF v_is_asc THEN + IF v_upper_bound IS NOT NULL THEN + v_batch_query := 'SELECT o.name, o.id, o.updated_at, o.created_at, o.last_accessed_at, o.metadata ' || + 'FROM storage.objects o WHERE o.bucket_id = $1 AND lower(o.name) COLLATE "C" >= $2 ' || + 'AND lower(o.name) COLLATE "C" < $3 ORDER BY lower(o.name) COLLATE "C" ASC LIMIT $4'; + ELSE + v_batch_query := 'SELECT o.name, o.id, o.updated_at, o.created_at, o.last_accessed_at, o.metadata ' || + 'FROM storage.objects o WHERE o.bucket_id = $1 AND lower(o.name) COLLATE "C" >= $2 ' || + 'ORDER BY lower(o.name) COLLATE "C" ASC LIMIT $4'; + END IF; + ELSE + IF v_upper_bound IS NOT NULL THEN + v_batch_query := 'SELECT o.name, o.id, o.updated_at, o.created_at, o.last_accessed_at, o.metadata ' || + 'FROM storage.objects o WHERE o.bucket_id = $1 AND lower(o.name) COLLATE "C" < $2 ' || + 'AND lower(o.name) COLLATE "C" >= $3 ORDER BY lower(o.name) COLLATE "C" DESC LIMIT $4'; + ELSE + v_batch_query := 'SELECT o.name, o.id, o.updated_at, o.created_at, o.last_accessed_at, o.metadata ' || + 'FROM storage.objects o WHERE o.bucket_id = $1 AND lower(o.name) COLLATE "C" < $2 ' || + 'ORDER BY lower(o.name) COLLATE "C" DESC LIMIT $4'; + END IF; + END IF; + + IF v_is_asc THEN + v_next_seek := v_prefix_lower; + ELSE + IF v_upper_bound IS NOT NULL THEN + SELECT o.name INTO v_peek_name FROM storage.objects o + WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" >= v_prefix_lower AND lower(o.name) COLLATE "C" < v_upper_bound + ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1; + ELSIF v_prefix_lower <> '' THEN + SELECT o.name INTO v_peek_name FROM storage.objects o + WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" >= v_prefix_lower + ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1; + ELSE + SELECT o.name INTO v_peek_name FROM storage.objects o + WHERE o.bucket_id = bucketname + ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1; + END IF; + + IF v_peek_name IS NOT NULL THEN + v_next_seek := lower(v_peek_name) || v_delimiter; + ELSE + RETURN; + END IF; + END IF; + + LOOP + EXIT WHEN v_count >= v_limit; + + IF NOT v_has_pending_peek THEN + IF v_is_asc THEN + IF v_upper_bound IS NOT NULL THEN + SELECT o.name INTO v_peek_name FROM storage.objects o + WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" >= v_next_seek AND lower(o.name) COLLATE "C" < v_upper_bound + ORDER BY lower(o.name) COLLATE "C" ASC LIMIT 1; + ELSE + SELECT o.name INTO v_peek_name FROM storage.objects o + WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" >= v_next_seek + ORDER BY lower(o.name) COLLATE "C" ASC LIMIT 1; + END IF; + ELSE + IF v_upper_bound IS NOT NULL THEN + SELECT o.name INTO v_peek_name FROM storage.objects o + WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" < v_next_seek AND lower(o.name) COLLATE "C" >= v_prefix_lower + ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1; + ELSIF v_prefix_lower <> '' THEN + SELECT o.name INTO v_peek_name FROM storage.objects o + WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" < v_next_seek AND lower(o.name) COLLATE "C" >= v_prefix_lower + ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1; + ELSE + SELECT o.name INTO v_peek_name FROM storage.objects o + WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" < v_next_seek + ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1; + END IF; + END IF; + END IF; + + v_has_pending_peek := FALSE; + + EXIT WHEN v_peek_name IS NULL; + + v_common_prefix := storage.get_common_prefix(lower(v_peek_name), v_prefix_lower, v_delimiter); + + IF v_common_prefix IS NOT NULL THEN + v_child_name := storage.get_prefix_child_name(v_peek_name, v_prefix, v_delimiter); + + IF v_child_name IS NOT NULL AND array_position(v_emitted_folders, v_child_name) IS NULL THEN + v_emitted_folders := array_append(v_emitted_folders, v_child_name); + + IF v_skipped < offsets THEN + v_skipped := v_skipped + 1; + ELSE + name := v_child_name; + id := NULL; + updated_at := NULL; + created_at := NULL; + last_accessed_at := NULL; + metadata := NULL; + RETURN NEXT; + v_count := v_count + 1; + END IF; + END IF; + + IF v_is_asc THEN + v_next_seek := lower(left(v_common_prefix, -1)) || chr(ascii(v_delimiter) + 1); + ELSE + v_next_seek := lower(v_common_prefix); + END IF; + ELSE + FOR v_current IN EXECUTE v_batch_query + USING bucketname, v_next_seek, + CASE WHEN v_is_asc THEN COALESCE(v_upper_bound, v_prefix_lower) ELSE v_prefix_lower END, v_file_batch_size + LOOP + v_common_prefix := storage.get_common_prefix(lower(v_current.name), v_prefix_lower, v_delimiter); + + IF v_common_prefix IS NOT NULL THEN + v_peek_name := v_current.name; + v_has_pending_peek := TRUE; + EXIT; + END IF; + + IF v_skipped < offsets THEN + v_skipped := v_skipped + 1; + ELSE + name := storage.get_prefix_child_name(v_current.name, v_prefix, v_delimiter); + IF name IS NOT NULL THEN + id := v_current.id; + updated_at := v_current.updated_at; + created_at := v_current.created_at; + last_accessed_at := v_current.last_accessed_at; + metadata := v_current.metadata; + RETURN NEXT; + v_count := v_count + 1; + END IF; + END IF; + + IF v_is_asc THEN + v_next_seek := lower(v_current.name) || v_delimiter; + ELSE + v_next_seek := lower(v_current.name); + END IF; + + EXIT WHEN v_count >= v_limit; + END LOOP; + END IF; + END LOOP; +END; +$func$; diff --git a/src/internal/database/migrations/types.ts b/src/internal/database/migrations/types.ts index 651d4e091..673f02d49 100644 --- a/src/internal/database/migrations/types.ts +++ b/src/internal/database/migrations/types.ts @@ -58,4 +58,5 @@ export const DBMigration = { 'fix-optimized-search-function': 56, 's3-multipart-uploads-metadata': 57, 'operation-ergonomics': 58, + 'fix-common-prefix-empty-segments': 59, } as const diff --git a/src/storage/object.ts b/src/storage/object.ts index 1529919e5..a1b230aed 100644 --- a/src/storage/object.ts +++ b/src/storage/object.ts @@ -48,6 +48,36 @@ export interface ListObjectsV2Result { nextCursorKey?: string } +export function getNextCommonPrefix( + key: string, + prefix: string, + delimiter: string +): string | undefined { + if (!delimiter || !key.startsWith(prefix)) { + return undefined + } + + const suffix = key.slice(prefix.length) + let scanIndex = 0 + + // Ignore empty path segments immediately after the current prefix so + // repeated delimiters like `prefix//file` do not produce `prefix/` again. + // Only skip when the prefix already ends with the delimiter. Otherwise + // the first delimiter in the suffix is the real folder boundary. + if (prefix.endsWith(delimiter)) { + while (suffix.startsWith(delimiter, scanIndex)) { + scanIndex += delimiter.length + } + } + + const nextDelimiterIndex = suffix.indexOf(delimiter, scanIndex) + if (nextDelimiterIndex < 0) { + return undefined + } + + return key.substring(0, prefix.length + nextDelimiterIndex + delimiter.length) +} + /** * ObjectStorage * interact with remote objects and database state @@ -624,11 +654,9 @@ export class ObjectStorage { if (delimiter) { const delimitedResults: Obj[] = [] for (const object of searchResult) { - let idx = object.name.replace(prefix, '').indexOf(delimiter) + const currPrefix = getNextCommonPrefix(object.name, prefix, delimiter) - if (idx >= 0) { - idx = prefix.length + idx + delimiter.length - const currPrefix = object.name.substring(0, idx) + if (currPrefix) { if (currPrefix === prevPrefix) { continue } diff --git a/src/test/object-list-v2.test.ts b/src/test/object-list-v2.test.ts index 46ff4c3a8..4fa5e4119 100644 --- a/src/test/object-list-v2.test.ts +++ b/src/test/object-list-v2.test.ts @@ -1,7 +1,7 @@ 'use strict' import { randomUUID } from 'node:crypto' -import { ListObjectsV2Result } from '@storage/object' +import { getNextCommonPrefix, ListObjectsV2Result } from '@storage/object' import { FastifyInstance } from 'fastify' import { Knex } from 'knex' import app from '../app' @@ -579,6 +579,7 @@ describe('objects - list v2 sorting tests', () => { }) const LIST_V2_WILDCARD_BUCKET = `list-v2-wildcard-${randomUUID()}` +const LIST_V2_EMPTY_SEGMENT_BUCKET = `list-v2-empty-segment-${randomUUID()}` describe('objects - list v2 prefix wildcard handling', () => { beforeAll(async () => { @@ -701,3 +702,273 @@ describe('objects - list v2 prefix wildcard handling', () => { expect(data.objects.map((obj) => obj.name)).toEqual([literalMatch]) }) }) + +describe('objects - list v2 repeated delimiters', () => { + beforeAll(async () => { + appInstance = app() + + const createBucketResponse = await appInstance.inject({ + method: 'POST', + url: `/bucket`, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + payload: { + name: LIST_V2_EMPTY_SEGMENT_BUCKET, + }, + }) + + expect(createBucketResponse.statusCode).toBe(200) + await appInstance.close() + }) + + afterAll(async () => { + appInstance = app() + + const emptyBucketResponse = await appInstance.inject({ + method: 'POST', + url: `/bucket/${LIST_V2_EMPTY_SEGMENT_BUCKET}/empty`, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(emptyBucketResponse.statusCode).toBe(200) + + const deleteBucketResponse = await appInstance.inject({ + method: 'DELETE', + url: `/bucket/${LIST_V2_EMPTY_SEGMENT_BUCKET}`, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(deleteBucketResponse.statusCode).toBe(200) + await appInstance.close() + }) + + test('skips empty path segments when grouping with a delimiter', async () => { + const pdfObject = 'service-bulletins/pdfs//whirlpool-washer.pdf' + const nestedPdfObject = 'service-bulletins/pdfs//nested/whirlpool-washer-nested.pdf' + const markdownObject = 'service-bulletins/markdown//whirlpool-washer.md' + + for (const objectName of [pdfObject, nestedPdfObject, markdownObject]) { + const uploadResponse = await appInstance.inject({ + method: 'POST', + url: `/object/${LIST_V2_EMPTY_SEGMENT_BUCKET}/${encodeURIComponent(objectName)}`, + payload: createUpload(objectName, 'test content'), + headers: { + authorization: serviceKey, + }, + }) + + expect(uploadResponse.statusCode).toBe(200) + } + + const rootListResponse = await appInstance.inject({ + method: 'POST', + url: `/object/list-v2/${LIST_V2_EMPTY_SEGMENT_BUCKET}`, + payload: { + prefix: 'service-bulletins/', + with_delimiter: true, + limit: 100, + }, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(rootListResponse.statusCode).toBe(200) + expect(rootListResponse.json()).toMatchObject({ + folders: [{ name: 'service-bulletins/markdown/' }, { name: 'service-bulletins/pdfs/' }], + objects: [], + }) + + const pdfFolderListResponse = await appInstance.inject({ + method: 'POST', + url: `/object/list-v2/${LIST_V2_EMPTY_SEGMENT_BUCKET}`, + payload: { + prefix: 'service-bulletins/pdfs/', + with_delimiter: true, + limit: 100, + }, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(pdfFolderListResponse.statusCode).toBe(200) + expect(pdfFolderListResponse.json()).toMatchObject({ + folders: [{ name: 'service-bulletins/pdfs//nested/' }], + objects: [{ name: pdfObject }], + }) + + const emptySegmentListResponse = await appInstance.inject({ + method: 'POST', + url: `/object/list-v2/${LIST_V2_EMPTY_SEGMENT_BUCKET}`, + payload: { + prefix: 'service-bulletins/pdfs//', + with_delimiter: true, + limit: 100, + }, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(emptySegmentListResponse.statusCode).toBe(200) + expect(emptySegmentListResponse.json()).toMatchObject({ + folders: [{ name: 'service-bulletins/pdfs//nested/' }], + objects: [{ name: pdfObject }], + }) + + const nestedFolderListResponse = await appInstance.inject({ + method: 'POST', + url: `/object/list-v2/${LIST_V2_EMPTY_SEGMENT_BUCKET}`, + payload: { + prefix: 'service-bulletins/pdfs//nested/', + with_delimiter: true, + limit: 100, + }, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(nestedFolderListResponse.statusCode).toBe(200) + expect(nestedFolderListResponse.json()).toMatchObject({ + folders: [], + objects: [{ name: nestedPdfObject }], + }) + }) + + test('keeps repeated-delimiter folders distinct from normal folders in byte order', async () => { + const prefix = `mixed-ordering/${randomUUID()}/a/` + const repeatedBLeaf = `${prefix}//b/repeated-leaf.txt` + const repeatedBNestedLeaf = `${prefix}//b/nested/repeated-nested-leaf.txt` + const repeatedDLeaf = `${prefix}//d/repeated-second-leaf.txt` + const normalBLeaf = `${prefix}b/normal-leaf.txt` + const normalBNestedLeaf = `${prefix}b/nested/normal-nested-leaf.txt` + const normalCLeaf = `${prefix}c/normal-second-leaf.txt` + + for (const objectName of [ + repeatedBLeaf, + repeatedBNestedLeaf, + repeatedDLeaf, + normalBLeaf, + normalBNestedLeaf, + normalCLeaf, + ]) { + const uploadResponse = await appInstance.inject({ + method: 'POST', + url: `/object/${LIST_V2_EMPTY_SEGMENT_BUCKET}/${encodeURIComponent(objectName)}`, + payload: createUpload(objectName, 'test content'), + headers: { + authorization: serviceKey, + }, + }) + + expect(uploadResponse.statusCode).toBe(200) + } + + const rootListResponse = await appInstance.inject({ + method: 'POST', + url: `/object/list-v2/${LIST_V2_EMPTY_SEGMENT_BUCKET}`, + payload: { + prefix, + with_delimiter: true, + limit: 100, + }, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(rootListResponse.statusCode).toBe(200) + expect(rootListResponse.json()).toMatchObject({ + folders: [ + { name: `${prefix}//b/` }, + { name: `${prefix}//d/` }, + { name: `${prefix}b/` }, + { name: `${prefix}c/` }, + ], + objects: [], + }) + + const repeatedBListResponse = await appInstance.inject({ + method: 'POST', + url: `/object/list-v2/${LIST_V2_EMPTY_SEGMENT_BUCKET}`, + payload: { + prefix: `${prefix}//b/`, + with_delimiter: true, + limit: 100, + }, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(repeatedBListResponse.statusCode).toBe(200) + expect(repeatedBListResponse.json()).toMatchObject({ + folders: [{ name: `${prefix}//b/nested/` }], + objects: [{ name: repeatedBLeaf }], + }) + + const normalBListResponse = await appInstance.inject({ + method: 'POST', + url: `/object/list-v2/${LIST_V2_EMPTY_SEGMENT_BUCKET}`, + payload: { + prefix: `${prefix}b/`, + with_delimiter: true, + limit: 100, + }, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + + expect(normalBListResponse.statusCode).toBe(200) + expect(normalBListResponse.json()).toMatchObject({ + folders: [{ name: `${prefix}b/nested/` }], + objects: [{ name: normalBLeaf }], + }) + }) +}) + +describe('getNextCommonPrefix', () => { + test('returns undefined when delimiter is empty', () => { + expect(getNextCommonPrefix('service-bulletins/pdfs/file.pdf', 'service-bulletins/', '')).toBe( + undefined + ) + }) + + test('returns undefined when key does not start with the provided prefix', () => { + expect( + getNextCommonPrefix('service-bulletins/pdfs/file.pdf', 'service-bulletins/markdown/', '/') + ).toBe(undefined) + }) + + test('skips empty path segments immediately after the prefix', () => { + expect( + getNextCommonPrefix( + 'service-bulletins/pdfs//nested/whirlpool-washer-nested.pdf', + 'service-bulletins/pdfs/', + '/' + ) + ).toBe('service-bulletins/pdfs//nested/') + }) + + test('skips multiple empty path segments immediately after the prefix', () => { + expect(getNextCommonPrefix('a///b/c', 'a/', '/')).toBe('a///b/') + }) + + test('treats repeated delimiters followed by a leaf as a file, not a folder', () => { + expect( + getNextCommonPrefix( + 'service-bulletins/pdfs//whirlpool-washer.pdf', + 'service-bulletins/pdfs/', + '/' + ) + ).toBe(undefined) + }) +}) diff --git a/src/test/object.test.ts b/src/test/object.test.ts index dcd5397df..e61ca0bb5 100644 --- a/src/test/object.test.ts +++ b/src/test/object.test.ts @@ -35,6 +35,14 @@ async function getSuperuserPostgrestClient() { return tnx } +async function commitSharedTestTransaction(tx: Knex.Transaction) { + if (tnx === tx) { + tnx = undefined + } + + await tx.commit() +} + useMockObject() useMockQueue() @@ -44,8 +52,11 @@ beforeEach(() => { }) afterEach(async () => { - if (tnx) { - await tnx.commit() + const currentTx = tnx + tnx = undefined + + if (currentTx) { + await currentTx.commit() } await appInstance.close() }) @@ -2338,8 +2349,7 @@ describe('testing move object', () => { version: `rollback-version-${runId}`, metadata: { mimetype: 'image/png', size: 1234 }, }) - await seedTx.commit() - tnx = undefined + await commitSharedTestTransaction(seedTx) jest .spyOn(S3Backend.prototype, 'headObject') @@ -2541,6 +2551,544 @@ describe('testing list objects', () => { expect(responseJSON).toHaveLength(0) }) + test('list-v1 should skip empty path segments for name and non-name sorting', async () => { + const runId = randomUUID() + const bucketName = `list-v1-empty-segment-${runId}` + const basePrefix = `service-bulletins-${runId}/pdfs/` + const objectNames = [ + `${basePrefix}/whirlpool-washer.pdf`, + `${basePrefix}/nested/whirlpool-washer-nested.pdf`, + ] + + try { + const createBucketResponse = await appInstance.inject({ + method: 'POST', + url: '/bucket', + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + payload: { + name: bucketName, + }, + }) + + expect(createBucketResponse.statusCode).toBe(200) + + for (const objectName of objectNames) { + const uploadResponse = await appInstance.inject({ + method: 'POST', + url: `/object/${bucketName}/${encodeURIComponent(objectName)}`, + payload: new File(['test'], 'file.txt'), + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + + expect(uploadResponse.statusCode).toBe(200) + } + + const listNames = async (options?: { + column: string + order: 'asc' | 'desc' + prefixOverride?: string + }) => { + const response = await appInstance.inject({ + method: 'POST', + url: `/object/list/${bucketName}`, + payload: { + prefix: options?.prefixOverride ?? basePrefix, + limit: 100, + offset: 0, + sortBy: options + ? { + column: options.column, + order: options.order, + } + : undefined, + }, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + + expect(response.statusCode).toBe(200) + return response.json<{ name: string }[]>().map((obj) => obj.name) + } + + expect(await listNames()).toEqual(['nested', 'whirlpool-washer.pdf']) + expect(await listNames({ column: 'created_at', order: 'asc' })).toEqual([ + 'nested', + 'whirlpool-washer.pdf', + ]) + expect( + await listNames({ + column: 'created_at', + order: 'asc', + prefixOverride: basePrefix.toUpperCase(), + }) + ).toEqual(['nested', 'whirlpool-washer.pdf']) + expect(await listNames({ column: 'updated_at', order: 'asc' })).toEqual([ + 'nested', + 'whirlpool-washer.pdf', + ]) + } finally { + await appInstance.inject({ + method: 'POST', + url: `/bucket/${bucketName}/empty`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + + await appInstance.inject({ + method: 'DELETE', + url: `/bucket/${bucketName}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + } + }) + + test('list-v1 should skip multiple empty path segments for name and non-name sorting', async () => { + const runId = randomUUID() + const bucketName = `list-v1-multi-empty-segment-${runId}` + const basePrefix = `service-bulletins-${runId}/pdfs/` + const objectNames = [ + `${basePrefix}//whirlpool-washer.pdf`, + `${basePrefix}//nested/whirlpool-washer-nested.pdf`, + ] + + try { + const createBucketResponse = await appInstance.inject({ + method: 'POST', + url: '/bucket', + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + payload: { + name: bucketName, + }, + }) + + expect(createBucketResponse.statusCode).toBe(200) + + for (const objectName of objectNames) { + const uploadResponse = await appInstance.inject({ + method: 'POST', + url: `/object/${bucketName}/${encodeURIComponent(objectName)}`, + payload: new File(['test'], 'file.txt'), + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + + expect(uploadResponse.statusCode).toBe(200) + } + + const listNames = async (options?: { column: string; order: 'asc' | 'desc' }) => { + const response = await appInstance.inject({ + method: 'POST', + url: `/object/list/${bucketName}`, + payload: { + prefix: basePrefix, + limit: 100, + offset: 0, + sortBy: options + ? { + column: options.column, + order: options.order, + } + : undefined, + }, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + + expect(response.statusCode).toBe(200) + return response.json<{ name: string }[]>().map((obj) => obj.name) + } + + expect(await listNames()).toEqual(['nested', 'whirlpool-washer.pdf']) + expect(await listNames({ column: 'created_at', order: 'asc' })).toEqual([ + 'nested', + 'whirlpool-washer.pdf', + ]) + expect(await listNames({ column: 'updated_at', order: 'asc' })).toEqual([ + 'nested', + 'whirlpool-washer.pdf', + ]) + } finally { + await appInstance.inject({ + method: 'POST', + url: `/bucket/${bucketName}/empty`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + + await appInstance.inject({ + method: 'DELETE', + url: `/bucket/${bucketName}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + } + }) + + test('list-v1 should keep mixed normal and repeated-delimiter folders stable when listed together', async () => { + const runId = randomUUID() + const bucketName = `list-v1-mixed-empty-segment-${runId}` + const prefix = `mixed-ordering-${runId}/a/` + const objectRows = [ + { + bucket_id: bucketName, + name: `${prefix}//b/repeated-leaf.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-repeated-b`, + created_at: '2024-01-01T00:00:00.000Z', + updated_at: '2024-01-06T00:00:00.000Z', + metadata: { + eTag: `${runId}-repeated-b`, + size: 1, + mimetype: 'text/plain', + }, + }, + { + bucket_id: bucketName, + name: `${prefix}//c/repeated-second-leaf.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-repeated-c`, + created_at: '2024-01-02T00:00:00.000Z', + updated_at: '2024-01-05T00:00:00.000Z', + metadata: { + eTag: `${runId}-repeated-c`, + size: 2, + mimetype: 'text/plain', + }, + }, + { + bucket_id: bucketName, + name: `${prefix}b/normal-leaf.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-normal-b`, + created_at: '2024-01-03T00:00:00.000Z', + updated_at: '2024-01-04T00:00:00.000Z', + metadata: { + eTag: `${runId}-normal-b`, + size: 3, + mimetype: 'text/plain', + }, + }, + { + bucket_id: bucketName, + name: `${prefix}c/normal-second-leaf.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-normal-c`, + created_at: '2024-01-04T00:00:00.000Z', + updated_at: '2024-01-03T00:00:00.000Z', + metadata: { + eTag: `${runId}-normal-c`, + size: 4, + mimetype: 'text/plain', + }, + }, + { + bucket_id: bucketName, + name: `${prefix}leaf-a.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-leaf-a`, + created_at: '2024-01-05T00:00:00.000Z', + updated_at: '2024-01-02T00:00:00.000Z', + metadata: { + eTag: `${runId}-leaf-a`, + size: 5, + mimetype: 'text/plain', + }, + }, + { + bucket_id: bucketName, + name: `${prefix}leaf-z.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-leaf-z`, + created_at: '2024-01-06T00:00:00.000Z', + updated_at: '2024-01-01T00:00:00.000Z', + metadata: { + eTag: `${runId}-leaf-z`, + size: 6, + mimetype: 'text/plain', + }, + }, + ] + + try { + const createBucketResponse = await appInstance.inject({ + method: 'POST', + url: '/bucket', + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + payload: { + name: bucketName, + }, + }) + + expect(createBucketResponse.statusCode).toBe(200) + + const seedTx = await getSuperuserPostgrestClient() + await seedTx.from('objects').insert(objectRows) + await commitSharedTestTransaction(seedTx) + + const listNames = async (options?: { + column: 'created_at' | 'updated_at' + order: 'asc' | 'desc' + }) => { + const response = await appInstance.inject({ + method: 'POST', + url: `/object/list/${bucketName}`, + payload: { + prefix, + limit: 100, + offset: 0, + sortBy: options + ? { + column: options.column, + order: options.order, + } + : undefined, + }, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + + expect(response.statusCode).toBe(200) + return response.json<{ name: string }[]>().map((obj) => obj.name) + } + + expect(await listNames()).toEqual(['b', 'c', 'leaf-a.txt', 'leaf-z.txt']) + expect(await listNames({ column: 'created_at', order: 'asc' })).toEqual([ + 'b', + 'c', + 'leaf-a.txt', + 'leaf-z.txt', + ]) + expect(await listNames({ column: 'updated_at', order: 'desc' })).toEqual([ + 'c', + 'b', + 'leaf-a.txt', + 'leaf-z.txt', + ]) + } finally { + const cleanupTx = await getSuperuserPostgrestClient() + await withDeleteEnabled(cleanupTx, async (db) => { + await db + .from('objects') + .where({ bucket_id: bucketName }) + .whereIn( + 'name', + objectRows.map((row) => row.name) + ) + .delete() + }) + await commitSharedTestTransaction(cleanupTx) + + await appInstance.inject({ + method: 'DELETE', + url: `/bucket/${bucketName}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + } + }) + + test('sql prefix helpers should return null for non-matching prefixes', async () => { + const db = await getSuperuserPostgrestClient() + + const result = await db.raw<{ + rows: Array<{ + common_prefix: string | null + child_name: string | null + }> + }>( + ` + select + storage.get_common_prefix(?, ?, '/') as common_prefix, + storage.get_prefix_child_name(?, ?, '/') as child_name + `, + ['foo/bar/baz.txt', 'qux/', 'foo/bar/baz.txt', 'qux/'] + ) + + expect(result.rows[0]).toEqual({ + common_prefix: null, + child_name: null, + }) + + await commitSharedTestTransaction(db) + }) + + test('afterEach auto-commit should leave shared transaction state cleared for the next test', async () => { + const tx = await getSuperuserPostgrestClient() + const result = await tx.raw<{ rows: Array<{ value: number }> }>('select 1 as value') + + expect(result.rows[0].value).toBe(1) + }) + + test('afterEach auto-commit should clear shared transaction state between tests', () => { + expect(tnx).toBeUndefined() + }) + + test('list-v1 should keep folders first for non-name sorting with stable pagination', async () => { + const runId = randomUUID() + const bucketName = `list-v1-non-name-order-${runId}` + const prefix = `non-name-ordering-${runId}/` + const objectRows = [ + { + bucket_id: bucketName, + name: `${prefix}zeta-folder/file.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-folder-zeta`, + created_at: '2024-01-03T00:00:00.000Z', + updated_at: '2024-01-03T00:00:00.000Z', + metadata: { + eTag: `${runId}-folder-zeta`, + size: 1, + mimetype: 'text/plain', + }, + }, + { + bucket_id: bucketName, + name: `${prefix}alpha-folder/file.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-folder-alpha`, + created_at: '2024-01-02T00:00:00.000Z', + updated_at: '2024-01-02T00:00:00.000Z', + metadata: { + eTag: `${runId}-folder-alpha`, + size: 2, + mimetype: 'text/plain', + }, + }, + { + bucket_id: bucketName, + name: `${prefix}oldest-file.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-file-oldest`, + created_at: '2024-01-01T00:00:00.000Z', + updated_at: '2024-01-01T00:00:00.000Z', + metadata: { + eTag: `${runId}-file-oldest`, + size: 3, + mimetype: 'text/plain', + }, + }, + { + bucket_id: bucketName, + name: `${prefix}newest-file.txt`, + owner: '317eadce-631a-4429-a0bb-f19a7a517b4a', + version: `${runId}-file-newest`, + created_at: '2024-01-04T00:00:00.000Z', + updated_at: '2024-01-05T00:00:00.000Z', + metadata: { + eTag: `${runId}-file-newest`, + size: 4, + mimetype: 'text/plain', + }, + }, + ] + + try { + const createBucketResponse = await appInstance.inject({ + method: 'POST', + url: '/bucket', + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + payload: { + name: bucketName, + }, + }) + + expect(createBucketResponse.statusCode).toBe(200) + + const seedTx = await getSuperuserPostgrestClient() + await seedTx.from('objects').insert(objectRows) + await commitSharedTestTransaction(seedTx) + + const listNames = async (options: { + column: 'created_at' | 'updated_at' + order: 'asc' | 'desc' + offset?: number + limit?: number + }) => { + const response = await appInstance.inject({ + method: 'POST', + url: `/object/list/${bucketName}`, + payload: { + prefix, + limit: options.limit ?? 100, + offset: options.offset ?? 0, + sortBy: { + column: options.column, + order: options.order, + }, + }, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + + expect(response.statusCode).toBe(200) + return response.json<{ name: string }[]>().map((obj) => obj.name) + } + + expect(await listNames({ column: 'created_at', order: 'asc' })).toEqual([ + 'alpha-folder', + 'zeta-folder', + 'oldest-file.txt', + 'newest-file.txt', + ]) + expect(await listNames({ column: 'created_at', order: 'asc', offset: 1, limit: 2 })).toEqual([ + 'zeta-folder', + 'oldest-file.txt', + ]) + expect(await listNames({ column: 'updated_at', order: 'desc' })).toEqual([ + 'zeta-folder', + 'alpha-folder', + 'newest-file.txt', + 'oldest-file.txt', + ]) + } finally { + const cleanupTx = await getSuperuserPostgrestClient() + await withDeleteEnabled(cleanupTx, async (db) => { + await db + .from('objects') + .where({ bucket_id: bucketName }) + .whereIn( + 'name', + objectRows.map((row) => row.name) + ) + .delete() + }) + await commitSharedTestTransaction(cleanupTx) + + await appInstance.inject({ + method: 'DELETE', + url: `/bucket/${bucketName}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + } + }) + + test('manual superuser transaction cleanup should not leak shared transaction state between tests', () => { + expect(tnx).toBeUndefined() + }) + test('checking if limit works', async () => { const response = await appInstance.inject({ method: 'POST', @@ -2673,8 +3221,7 @@ describe('testing list objects', () => { }, })) ) - await seedTx.commit() - tnx = undefined + await commitSharedTestTransaction(seedTx) try { const response = await appInstance.inject({ @@ -2706,8 +3253,7 @@ describe('testing list objects', () => { .whereIn('name', objectNames) .delete() }) - await cleanupTx.commit() - tnx = undefined + await commitSharedTestTransaction(cleanupTx) } }) @@ -2742,8 +3288,7 @@ describe('testing list objects', () => { }, }, ]) - await seedTx.commit() - tnx = undefined + await commitSharedTestTransaction(seedTx) try { const response = await appInstance.inject({ @@ -2775,8 +3320,7 @@ describe('testing list objects', () => { .whereIn('name', [literalMatch, wildcardOnlyMatch]) .delete() }) - await cleanupTx.commit() - tnx = undefined + await commitSharedTestTransaction(cleanupTx) } }) })