diff --git a/README.md b/README.md index 6c655a7..03682b8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -> **VERSION**: GTS specification draft, version 0.8Beta2 +> **VERSION**: GTS specification draft, version 0.8 # Global Type System (GTS) Specification @@ -94,6 +94,7 @@ See the [Practical Benefits for Service and Platform Vendors](#51-practical-bene | 0.7 | BREAKING: require $ref value to start with 'gts://'; strict rules for schema/instance distinction; prohibiting well-known instances without left-hand type segments | | 0.8beta1 | Add OP#12 (schema vs schema validation), unified validation endpoint (/validate-entity), and clarify instance -> schema and schema -> schema validation semantics for chained GTS IDs | | 0.8beta2 | Introduce schema traits (`x-gts-traits-schema`, `x-gts-traits`) and OP#13 (schema traits validation) | +| 0.8 | Add alternate combined anonymous instance identifier format | ## 1. Motivation @@ -138,9 +139,11 @@ The GTS identifier is a string with total length of 1024 characters maximum. - `gts.....v[.]~` - Note the trailing `~` to denote a type (schema) identifier. - A single instance identifier (object of given type): - - `gts.....v[.]~....v[.]` - - Well-known instance identifiers MUST include a left-hand type segment in a chain (see 2.2 and 3.7). - - Note: no trailing `~` for instances. The identifier ends with an integer (the last version component). + - Well-known instance: `gts.....v[.]~....v[.]` + - Combined anonymous instance: `gts.....v[.]~` + - Well-known and combined anonymous instance identifiers MUST include a left-hand type segment in a chain (see 2.2 and 3.7). + - Combined anonymous instance identifiers MUST include a UUID tail. + - Note: no trailing `~` for instances. The `` refers to a string code that indicates the origin of a given schema or instance definition. This can be valuable in systems that support cross-vendor data exchange, such as events or configuration files, especially in environments with deployable applications or plugins. @@ -156,6 +159,8 @@ Versioning uses semantic versioning constrained to major and optional minor: `v< - `gts.x.core.events.type.v1~` - defines a base event type in the system - `gts.x.core.events.type.v1.2~` - defines a specific edition v1.2 of the base event type +The `` is a 128-bit identifier (e.g., a UUID v5) that is used to identify a specific anonymous instance of a type. It is generated using a deterministic algorithm based on the type identifier and the instance data. + **Examples** - The GTS identifier can be used for instance or type identifiers: ```bash gts.x.idp.users.user.v1.0~ # defines ID of a schema of the user objects provided by vendor 'x' in scope of the package 'idp' @@ -203,7 +208,7 @@ The complete GTS identifier syntax in Extended Backus-Naur Form (EBNF): ```ebnf (* Top-level identifier *) -gts-identifier = "gts." , gts-segment , ( chain-suffix-type | chain-suffix-instance ) ; +gts-identifier = "gts." , gts-segment , ( chain-suffix-type | chain-suffix-instance | chain-suffix-anon-instance ) ; (* Chained type ID ends with ~ *) chain-suffix-type = { "~" , gts-segment } , "~" ; @@ -211,6 +216,9 @@ chain-suffix-type = { "~" , gts-segment } , "~" ; (* Chained instance ID MUST have at least one tilde separator and NO trailing tilde *) chain-suffix-instance = "~" , gts-segment , { "~" , gts-segment } ; +(* Combined anonymous instance ID ends with a UUID tail and NO trailing tilde *) +chain-suffix-anon-instance = { "~" , gts-segment } , "~" , uuid ; + (* Single GTS ID segment *) gts-segment = vendor , "." , package , "." , namespace , "." , type , "." , version ; @@ -234,6 +242,13 @@ letter = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "u" | "v" | "w" | "x" | "y" | "z" ; digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; non-zero-digit = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + +(* UUID tail for combined anonymous instance identifiers *) +hex-digit = digit | "a" | "b" | "c" | "d" | "e" | "f" ; +uuid = 8hex , "-" , 4hex , "-" , 4hex , "-" , 4hex , "-" , 12hex ; +8hex = hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit ; +4hex = hex-digit , hex-digit , hex-digit , hex-digit ; +12hex = hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit , hex-digit ; ``` **Grammar notes:** @@ -242,11 +257,13 @@ non-zero-digit = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; 2. **Chain interpretation**: In a chained identifier `gts.~~`, each `~` acts as a separator. All segments before the final segment MUST be types (conceptually ending with `~`). The final segment determines whether the entire identifier is a type or instance. -3. **Placeholder rule**: Use `_` (underscore) as a segment value when the namespace is not applicable. It is recommended to use the placeholder only for the `` segment. +3. **Combined anonymous instance**: In a chained identifier of the form `gts.~...~`, the UUID is the instance identifier tail. All segments before the UUID MUST be types (conceptually ending with `~`). -4. **Normalization**: GTS identifiers must be lowercase. Leading/trailing whitespace is not permitted. Canonical form has no optional spacing. +4. **Placeholder rule**: Use `_` (underscore) as a segment value when the namespace is not applicable. It is recommended to use the placeholder only for the `` segment. -5. **Reserved prefix**: The `gts.` prefix is mandatory and reserved. Future versions may introduce alternative prefixes but will maintain backward compatibility. +5. **Normalization**: GTS identifiers must be lowercase. Leading/trailing whitespace is not permitted. Canonical form has no optional spacing. + +6. **Reserved prefix**: The `gts.` prefix is mandatory and reserved. Future versions may introduce alternative prefixes but will maintain backward compatibility. ## 3. Semantics and Capabilities @@ -507,6 +524,10 @@ Example: - `id: "7a1d2f34-5678-49ab-9012-abcdef123456"`, `type: "gts.x.core.events.type.v1~x.commerce.orders.order_placed.v1.0~"` - Field naming: `type` (alternatives: `gtsType`, `gts_type`). + Some services may also support a **combined** anonymous instance representation: + - `id: "gts.x.core.events.type.v1~x.commerce.orders.order_placed.v1.0~7a1d2f34-5678-49ab-9012-abcdef123456"` + - In this case, the explicit `type` field MAY be omitted, since the schema/type can be derived from the `id` prefix up to the final `~`. + This split is common in event systems: **topics/streams** are often well-known instances, while individual **events** are anonymous. See `./examples/events` and the field-level recommendations in section **9.1**. Example: @@ -1127,27 +1148,33 @@ gts\. `is_type` captures the optional trailing `~` (present for type IDs, absent for instance IDs). ### 8.2 Chained identifier regex - -For chained identifiers, the pattern enforces that all segments except the last are type IDs (with `~` separators): - -```regex -^\s*gts\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.v(0|[1-9]\d*)(?:\.(0|[1-9]\d*))?(?:~[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.v(0|[1-9]\d*)(?:\.(0|[1-9]\d*))?)*~?\s*$ -``` - -**Pattern explanation:** -- Starts with a single absolute segment (`gts.` prefix) -- Followed by zero or more relative segments, each prefixed by `~` -- The final `~` is optional: present for a type, absent for an instance - -**Validation rules:** -1. Standalone type identifier: MUST end with `~` -2. In a chain, all segments except the rightmost MUST be types (end with `~` in the original string) -3. Only the first segment uses the `gts.` prefix; chained segments are relative (no `gts.`) - -**Parsing strategy:** -- Split on `~` to get raw segments; the first is absolute, the rest are relative -- Parse the first using the absolute pattern, the rest using the relative pattern -- Validate that all segments except possibly the last are types + + For chained identifiers, the pattern enforces that all segments except the final instance designator are type IDs (with `~` separators): + + ```regex + ^\s*gts\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.v(0|[1-9]\d*)(?:\.(0|[1-9]\d*))?(?:~[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.[a-z_][a-z0-9_]*\.v(0|[1-9]\d*)(?:\.(0|[1-9]\d*))?)*(?:~(?:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?)?\s*$ + ``` + + **Pattern explanation:** + - Starts with a single absolute segment (`gts.` prefix) + - Followed by zero or more relative segments, each prefixed by `~` + - The identifier may end with: + - `~` (type) + - a segment end (instance) + - `~` (combined anonymous instance) + + **Validation rules:** + 1. Standalone type identifier: MUST end with `~` + 2. In a chain, all elements before the final instance designator MUST be types (end with `~` in the original string). + - For well-known instances, the final designator is the last `....v...` segment (no trailing `~`). + - For combined anonymous instances, the final designator is the UUID tail after the last `~`. + 3. Only the first segment uses the `gts.` prefix; chained segments are relative (no `gts.`) + + **Parsing strategy:** + - Split on `~` to get raw segments; the first is absolute, the rest are relative + - If the final raw segment is a UUID, treat it as the combined-anonymous instance tail; otherwise treat the final raw segment as the well-known instance segment (or absent, if the identifier ends with `~`) + - Parse the non-UUID segments as GTS segments (first absolute, the rest relative) + - Validate that all segments except the final instance designator are types ## 9. Reference Implementation Recommendations diff --git a/examples/events/README.md b/examples/events/README.md index 6c70643..ddde30a 100644 --- a/examples/events/README.md +++ b/examples/events/README.md @@ -23,6 +23,12 @@ Individual events are commonly anonymous: they use a UUID `id` but still declare - `./instances/gts.x.core.events.type.v1~x.commerce.orders.order_placed.v1~.examples.json` +Alternative combined anonymous instance id form (type chain + UUID tail embedded into `id`): + +- Schema: `./schemas/gts.x.core.events.type_combined.v1~.schema.json` +- Derived schema: `./schemas/gts.x.core.events.type_combined.v1~x.commerce.orders.order_placed.v1.0~.schema.json` +- Instance: `./instances/gts.x.core.events.type_combined.v1~x.commerce.orders.order_placed.v1.0~.examples.json` + ### Field name aliases (recommended) If a payload cannot use `id` / `type`, implementations may also support: diff --git a/examples/events/instances/gts.x.core.events.type_combined_id.v1~x.commerce.orders.order_placed.v1.0~.examples.json b/examples/events/instances/gts.x.core.events.type_combined_id.v1~x.commerce.orders.order_placed.v1.0~.examples.json new file mode 100644 index 0000000..af13700 --- /dev/null +++ b/examples/events/instances/gts.x.core.events.type_combined_id.v1~x.commerce.orders.order_placed.v1.0~.examples.json @@ -0,0 +1,24 @@ +[ + { + "id": "gts.x.core.events.type_combined.v1~x.commerce.orders.order_placed.v1.0~7a1d2f34-5678-49ab-9012-abcdef123456", + "tenantId": "11111111-2222-3333-4444-555555555555", + "userId": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "clientId": "12345678-90ab-cdef-1234-567890abcdef", + "source": "orders-service@1.2.3", + "occurredAt": "2025-09-20T18:35:00Z", + "ingestedAt": "2025-09-20T18:35:01Z", + "sequenceNumber": 1, + "previousSequenceNumber": 0, + "subject": "af0e3c1b-8f1e-4a27-9a9b-b7b9b70c1f01", + "subjectType": "gts.x.commerce.orders.order.v1.0~", + "payload": { + "orderId": "af0e3c1b-8f1e-4a27-9a9b-b7b9b70c1f01", + "customerId": "0f2e4a9b-1c3d-4e5f-8a9b-0c1d2e3f4a5b", + "totalAmount": 149.99, + "items": [ + { "sku": "SKU-ABC-001", "name": "Wireless Mouse", "qty": 1, "price": 49.99 }, + { "sku": "SKU-XYZ-002", "name": "Mechanical Keyboard", "qty": 1, "price": 100.0 } + ] + } + } +] diff --git a/examples/events/schemas/gts.x.core.events.type.v1~.schema.json b/examples/events/schemas/gts.x.core.events.type.v1~.schema.json index 8a1c33c..bd4826a 100644 --- a/examples/events/schemas/gts.x.core.events.type.v1~.schema.json +++ b/examples/events/schemas/gts.x.core.events.type.v1~.schema.json @@ -70,7 +70,7 @@ "type": "string", "format": "date-time", "description": "The time the event was ingested into the event store.", - "$readonly": true + "readOnly": true }, "sequenceNumber": { "type": "number", @@ -91,7 +91,7 @@ "subjectType": { "description": "GTS type of the subject of the event.", "type": "string", - "x-gts-ref": "*", + "x-gts-ref": "gts.*", "$comment": "The value is mandatory if subject is present" } }, diff --git a/examples/events/schemas/gts.x.core.events.type_combined.v1~.schema.json b/examples/events/schemas/gts.x.core.events.type_combined.v1~.schema.json new file mode 100644 index 0000000..c528dc9 --- /dev/null +++ b/examples/events/schemas/gts.x.core.events.type_combined.v1~.schema.json @@ -0,0 +1,85 @@ +{ + "$id": "gts://gts.x.core.events.type_combined.v1~", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Event Envelope (Combined Anonymous Instance ID)", + "type": "object", + "description": "Base event type definition (combined anonymous instance id)", + "required": [ + "id", + "tenantId", + "occurredAt" + ], + "x-event-type-settings": { + "additionalProperties": false, + "topicRef": { + "description": "ID of the topic where events of this type are stored.", + "type": "string", + "x-gts-ref":"gts.x.core.events.topic.v1~" + } + }, + "properties": { + "$schema": { + "description": "Link to the event type schema.", + "type": "string" + }, + "id": { + "description": "GTS instance identifier including the type chain and instance UUID.", + "type": "string", + "x-gts-ref": "gts.x.core.events.type_combined.v1~*" + }, + "tenantId": { + "type": "string", + "format": "uuid", + "description": "The id of the tenant that produced the event. Mandatory" + }, + "userId": { + "type": "string", + "format": "uuid", + "description": "The id of the user that produced the event. Optional" + }, + "clientId": { + "type": "string", + "format": "uuid", + "description": "The id of the API client that produced the event. Optional" + }, + "source": { + "description": "Full SemVer of the event type schema used to produce this event.", + "type": "string", + "maxLength": 256 + }, + "occurredAt": { + "type": "string", + "format": "date-time", + "description": "The time the event occurred. Fully controlled by the producer" + }, + "ingestedAt": { + "type": "string", + "format": "date-time", + "description": "The time the event was ingested into the event store.", + "readOnly": true + }, + "sequenceNumber": { + "type": "number", + "description": "The sequence number of the event in the stream." + }, + "previousSequenceNumber": { + "type": "number", + "description": "The sequence number of the previous event in the same stream." + }, + "payload": { + "description": "Type-specific body; see event type schema.", + "type": "object" + }, + "subject": { + "description": "The subject of the event.", + "type": "string" + }, + "subjectType": { + "description": "GTS type of the subject of the event.", + "type": "string", + "x-gts-ref": "gts.*", + "$comment": "The value is mandatory if subject is present" + } + }, + "additionalProperties": false +} diff --git a/examples/events/schemas/gts.x.core.events.type_combined.v1~x.commerce.orders.order_placed.v1.0~.schema.json b/examples/events/schemas/gts.x.core.events.type_combined.v1~x.commerce.orders.order_placed.v1.0~.schema.json new file mode 100644 index 0000000..aeffedf --- /dev/null +++ b/examples/events/schemas/gts.x.core.events.type_combined.v1~x.commerce.orders.order_placed.v1.0~.schema.json @@ -0,0 +1,33 @@ +{ + "$id": "gts://gts.x.core.events.type_combined.v1~x.commerce.orders.order_placed.v1.0~", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Event Instance Schema (Combined Anonymous Instance ID): order.placed", + "type": "object", + "allOf": [ + { "$ref": "gts://gts.x.core.events.type_combined.v1~" }, + { + "type": "object", + "required": ["payload", "subjectType"], + "properties": { + "id": { + "type": "string", + "x-gts-ref": "gts.x.core.events.type_combined.v1~x.commerce.orders.order_placed.v1.0~*" + }, + "payload": { + "type": "object", + "required": ["orderId", "customerId", "totalAmount", "items"], + "properties": { + "orderId": { "type": "string", "format": "uuid" }, + "customerId": { "type": "string", "format": "uuid" }, + "totalAmount": { "type": "number" }, + "items": { "type": "array", "items": { "type": "object" } } + } + }, + "subjectType": { + "type": "string", + "x-gts-ref": "gts.x.commerce.orders.order.v1.0~" + } + } + } + ] +} diff --git a/tests/test_op1_id_validation.py b/tests/test_op1_id_validation.py index 97eea3e..8d76f37 100644 --- a/tests/test_op1_id_validation.py +++ b/tests/test_op1_id_validation.py @@ -44,6 +44,11 @@ class TestCaseTestOp1IdValidationAllValid(HttpRunner): "gts.a.b.c.d.v0~", "gts._._._._.v1~", "gts.x.y.z.a.v999999.888888~", + ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + "7a1d2f34-5678-49ab-9012-abcdef123456" + ), ] } ), @@ -97,6 +102,11 @@ class TestCaseTestOp1IdValidationInvalid(HttpRunner): "gts.x.test1.events.v1~", "gts.x.test1.namespace.type.v1~a.b.c.v1", "gts.x.test1.events.type.v1.0.0~", + ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + "not-a-uuid" + ), ] } ), diff --git a/tests/test_op2_id_extraction.py b/tests/test_op2_id_extraction.py index 2d0d311..b3a36a7 100644 --- a/tests/test_op2_id_extraction.py +++ b/tests/test_op2_id_extraction.py @@ -125,6 +125,50 @@ def test_start(self): ] +class TestCaseTestOp2IdExtraction_Case7_CombinedAnonymousInstance(HttpRunner): + config = Config("OP#2 - Extract ID (case 7: combined anonymous instance)").base_url( + get_gts_base_url() + ) + + def test_start(self): + super().test_start() + + teststeps = [ + Step( + RunRequest("extract id (combined anonymous instance)") + .post("/extract-id") + .with_json({ + "id": ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + "7a1d2f34-5678-49ab-9012-abcdef123456" + ), + "occurredAt": "2025-09-20T18:35:00Z", + }) + .validate() + .assert_equal("status_code", 200) + .assert_equal( + "body.id", + ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + "7a1d2f34-5678-49ab-9012-abcdef123456" + ), + ) + .assert_equal( + "body.schema_id", + ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + ), + ) + .assert_equal("body.selected_entity_field", "id") + .assert_equal("body.selected_schema_id_field", "id") + .assert_equal("body.is_schema", False) + ), + ] + + class TestCaseTestOp2IdExtraction_Case5_GtsBaseSchema(HttpRunner): """ Schemas MUST be detected by presence of $schema; GTS $id MUST be normalized diff --git a/tests/test_op2_schema_id_priority.py b/tests/test_op2_schema_id_priority.py index 415a94d..a183559 100644 --- a/tests/test_op2_schema_id_priority.py +++ b/tests/test_op2_schema_id_priority.py @@ -295,3 +295,31 @@ def test_op2_single_segment_gts_id_uses_explicit_type() -> None: # Type field is used for single-segment IDs (no chain to derive from) assert body["schema_id"] == "gts.acme.core.models.base.v1~" assert body["selected_schema_id_field"] == "type" + + +def test_op2_combined_anonymous_id_takes_priority_over_explicit_type() -> None: + """ + For combined anonymous instance identifiers (type-chain + UUID tail), + schema_id MUST be derived from the `id` prefix up to the last '~', and + any explicit `type` field is ignored. + """ + url = get_gts_base_url() + "/extract-id" + payload = { + "id": ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + "7a1d2f34-5678-49ab-9012-abcdef123456" + ), + "type": "gts.different.schema.type.v1~", + } + r = requests.post(url, json=payload, timeout=30) + assert r.status_code == 200 + body = r.json() + + assert body["is_schema"] is False + assert body["id"] == payload["id"] + assert body["schema_id"] == ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + ) + assert body["selected_schema_id_field"] == "id" diff --git a/tests/test_op3_id_parsing.py b/tests/test_op3_id_parsing.py index afe18b0..41132d9 100644 --- a/tests/test_op3_id_parsing.py +++ b/tests/test_op3_id_parsing.py @@ -93,6 +93,42 @@ def test_start(self, param): ] +class TestCaseTestOp3IdParsing_CombinedAnonymousInstance(HttpRunner): + config = Config("OP#3 - Parse ID (combined anonymous instance)").base_url( + get_gts_base_url() + ) + + @pytest.mark.parametrize( + "param", + Parameters( + { + "id": [ + ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + "7a1d2f34-5678-49ab-9012-abcdef123456" + ) + ] + } + ), + ) + def test_start(self, param): + super().test_start(param) + + teststeps = [ + Step( + RunRequest("parse id (combined anonymous instance)") + .get("/parse-id") + .with_params(**{"gts_id": "${id}"}) + .validate() + .assert_equal("status_code", 200) + .assert_equal("body.id", "${id}") + .assert_equal("body.ok", True) + .assert_equal("body.is_schema", False) + ), + ] + + class TestCaseTestOp3Parsing_ChainedIdentifiers(HttpRunner): """OP#3 Extended - Chained identifier parsing""" config = Config("OP#3 Extended - Chained ID Parsing").base_url( diff --git a/tests/test_op5_id_uuid.py b/tests/test_op5_id_uuid.py index 8a8379c..b76b860 100644 --- a/tests/test_op5_id_uuid.py +++ b/tests/test_op5_id_uuid.py @@ -61,6 +61,30 @@ def test_start(self): ) .assert_equal("body.uuid", "c7f8cca7-3af6-58af-b72b-3febfd93f1a8") ), + Step( + RunRequest("uuid mapping (combined anonymous instance)") + .get("/uuid") + .with_params( + **{ + "gts_id": ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + "7a1d2f34-5678-49ab-9012-abcdef123456" + ) + } + ) + .validate() + .assert_equal("status_code", 200) + .assert_equal( + "body.id", + ( + "gts.x.core.events.type.v1~" + "x.commerce.orders.order_placed.v1.0~" + "7a1d2f34-5678-49ab-9012-abcdef123456" + ), + ) + .assert_equal("body.uuid", "4a31b759-722b-5bb1-a1dc-2cf40963e81b") + ), Step( RunRequest("uuid mapping deterministic (instance)") .get("/uuid")