From bbc2cfff7b8441c51cc3395164f54f602367ced6 Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Mon, 4 May 2026 15:55:50 -0700 Subject: [PATCH 01/11] Start query RP topics doc --- modules/sql/pages/query/index.adoc | 3 + .../pages/query/query-redpanda-topics.adoc | 66 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 modules/sql/pages/query/index.adoc create mode 100644 modules/sql/pages/query/query-redpanda-topics.adoc diff --git a/modules/sql/pages/query/index.adoc b/modules/sql/pages/query/index.adoc new file mode 100644 index 000000000..a3ae56627 --- /dev/null +++ b/modules/sql/pages/query/index.adoc @@ -0,0 +1,3 @@ += Query data +:description: Query streaming data in Redpanda topics and lakehouse data in Iceberg tables using standard PostgreSQL syntax. +:page-layout: index diff --git a/modules/sql/pages/query/query-redpanda-topics.adoc b/modules/sql/pages/query/query-redpanda-topics.adoc new file mode 100644 index 000000000..5173f96df --- /dev/null +++ b/modules/sql/pages/query/query-redpanda-topics.adoc @@ -0,0 +1,66 @@ += Query Redpanda topics +:description: Map a Redpanda topic to a SQL table and run analytical queries directly against streaming data. +:page-topic-type: how-to +:personas: app_developer, data_engineer +:learning-objective-1: Map a Redpanda topic to a SQL table using a Redpanda catalog +:learning-objective-2: Run analytical SQL queries against Redpanda topic data + +Map a Redpanda topic to a SQL table to run analytical queries directly against streaming data without building ETL pipelines. Redpanda SQL reads each record's fields from the topic's Protobuf schema in Schema Registry. + +After completing these steps, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} + +== Prerequisites + +Before you query a topic with SQL: + +* Enable the Redpanda SQL engine on your Redpanda Cloud Bring Your Own Cloud (BYOC) cluster. See xref:sql:get-started/sql-quickstart.adoc[Quickstart]. +* Connect to Redpanda SQL with `psql` or another PostgreSQL client. See xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL]. +* Confirm that the Redpanda topic you want to query has a Protobuf schema registered in Schema Registry. + +== Map the topic to a SQL table + +Each Redpanda topic appears as a SQL table inside a Redpanda catalog. When you enable the SQL engine, Redpanda SQL automatically creates a catalog named `default_redpanda_connection` that points at your cluster. + +Define a table against the topic with `CREATE TABLE`: + +[source,sql] +---- +CREATE TABLE default_redpanda_connection=>orders WITH ( + topic = 'orders', + schema_subject = 'orders-value' +); +---- + +Replace `orders` with your topic name and `orders-value` with the Schema Registry subject that holds the topic's value schema. + +The table inherits its column definitions from the registered Protobuf schema. Each top-level Protobuf field becomes a SQL column. + +== Run queries + +Query the table with standard `SELECT` syntax. The following query returns the first 10 records: + +[source,sql] +---- +SELECT * FROM default_redpanda_connection=>orders LIMIT 10; +---- + +Aggregate and filter records using familiar PostgreSQL constructs: + +[source,sql] +---- +SELECT customer_id, SUM(amount) AS total +FROM default_redpanda_connection=>orders +WHERE status = 'completed' +GROUP BY customer_id +ORDER BY total DESC +LIMIT 10; +---- + +== Next steps + +* xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE]: full reference for the table-against-topic syntax, including all options. +* xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[CREATE REDPANDA CATALOG]: define additional Redpanda catalogs with custom connection settings. +* xref:reference:sql/index.adoc[Redpanda SQL Reference]: supported SQL statements, clauses, data types, and functions. From 545f2caf798cc10750a4b82f8f344d61a5e7295e Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Thu, 7 May 2026 19:27:48 -0700 Subject: [PATCH 02/11] DOC-1990: Move query directory and rewrite streaming-topics how-to Renames modules/sql/pages/query/ to modules/sql/pages/query-data/ and renames the streaming-topic how-to from query-redpanda-topics.adoc to query-streaming-topics.adoc to match the SQL GA IA. Retitles the page "Query streaming topics" and reframes the description and learning objectives around live streaming data; bridge-query and Iceberg content stays out of this page (DOC-2006 owns the Iceberg-topics how-to). Adds a pointer to the Iceberg topics how-to under the intro and lists it under Next steps. Updates the enable-prereq xref to point to the Enable Redpanda SQL page. Drops the CREATE REDPANDA CATALOG link from Next steps to align with the v1 framing that users do not typically create their own Redpanda catalog. Reframes the Query data index page description for v1 Iceberg scope (live and historical data in Redpanda topics; no external Iceberg lakehouse). Co-Authored-By: Claude Opus 4.7 (1M context) --- modules/sql/pages/query-data/index.adoc | 3 +++ .../query-streaming-topics.adoc} | 16 +++++++++------- modules/sql/pages/query/index.adoc | 3 --- 3 files changed, 12 insertions(+), 10 deletions(-) create mode 100644 modules/sql/pages/query-data/index.adoc rename modules/sql/pages/{query/query-redpanda-topics.adoc => query-data/query-streaming-topics.adoc} (71%) delete mode 100644 modules/sql/pages/query/index.adoc diff --git a/modules/sql/pages/query-data/index.adoc b/modules/sql/pages/query-data/index.adoc new file mode 100644 index 000000000..c9e39d9eb --- /dev/null +++ b/modules/sql/pages/query-data/index.adoc @@ -0,0 +1,3 @@ += Query data +:description: Query live and historical data in your Redpanda topics using standard PostgreSQL syntax. +:page-layout: index diff --git a/modules/sql/pages/query/query-redpanda-topics.adoc b/modules/sql/pages/query-data/query-streaming-topics.adoc similarity index 71% rename from modules/sql/pages/query/query-redpanda-topics.adoc rename to modules/sql/pages/query-data/query-streaming-topics.adoc index 5173f96df..8f6115f18 100644 --- a/modules/sql/pages/query/query-redpanda-topics.adoc +++ b/modules/sql/pages/query-data/query-streaming-topics.adoc @@ -1,11 +1,13 @@ -= Query Redpanda topics -:description: Map a Redpanda topic to a SQL table and run analytical queries directly against streaming data. += Query streaming topics +:description: Map a Redpanda topic to a SQL table and run analytical queries directly against live streaming data. :page-topic-type: how-to :personas: app_developer, data_engineer -:learning-objective-1: Map a Redpanda topic to a SQL table using a Redpanda catalog -:learning-objective-2: Run analytical SQL queries against Redpanda topic data +:learning-objective-1: Map a streaming Redpanda topic to a SQL table using the default Redpanda catalog +:learning-objective-2: Run analytical SQL queries against live topic data -Map a Redpanda topic to a SQL table to run analytical queries directly against streaming data without building ETL pipelines. Redpanda SQL reads each record's fields from the topic's Protobuf schema in Schema Registry. +Map a Redpanda topic to a SQL table to run analytical queries directly against live streaming data without building ETL pipelines. Redpanda SQL reads each record's fields from the topic's Protobuf schema in Schema Registry. + +To query the Iceberg-translated history of a Redpanda topic, see xref:sql:query-data/query-iceberg-topics.adoc[]. After completing these steps, you will be able to: @@ -16,7 +18,7 @@ After completing these steps, you will be able to: Before you query a topic with SQL: -* Enable the Redpanda SQL engine on your Redpanda Cloud Bring Your Own Cloud (BYOC) cluster. See xref:sql:get-started/sql-quickstart.adoc[Quickstart]. +* Enable the Redpanda SQL engine on your Redpanda Cloud Bring Your Own Cloud (BYOC) cluster. See xref:sql:get-started/deploy-sql-cluster.adoc[Enable Redpanda SQL]. * Connect to Redpanda SQL with `psql` or another PostgreSQL client. See xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL]. * Confirm that the Redpanda topic you want to query has a Protobuf schema registered in Schema Registry. @@ -61,6 +63,6 @@ LIMIT 10; == Next steps +* xref:sql:query-data/query-iceberg-topics.adoc[Query Iceberg topics]: query the Iceberg-translated history of an Iceberg-enabled Redpanda topic, and run a single query that spans live and historical records. * xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE]: full reference for the table-against-topic syntax, including all options. -* xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[CREATE REDPANDA CATALOG]: define additional Redpanda catalogs with custom connection settings. * xref:reference:sql/index.adoc[Redpanda SQL Reference]: supported SQL statements, clauses, data types, and functions. diff --git a/modules/sql/pages/query/index.adoc b/modules/sql/pages/query/index.adoc deleted file mode 100644 index a3ae56627..000000000 --- a/modules/sql/pages/query/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= Query data -:description: Query streaming data in Redpanda topics and lakehouse data in Iceberg tables using standard PostgreSQL syntax. -:page-layout: index From 48a30ca1db9f337caee0c2107bb49db77411de8a Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Wed, 13 May 2026 19:00:19 -0700 Subject: [PATCH 03/11] Change rp connection to catalog --- .../query-data/query-streaming-topics.adoc | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/modules/sql/pages/query-data/query-streaming-topics.adoc b/modules/sql/pages/query-data/query-streaming-topics.adoc index 8f6115f18..262632f14 100644 --- a/modules/sql/pages/query-data/query-streaming-topics.adoc +++ b/modules/sql/pages/query-data/query-streaming-topics.adoc @@ -5,7 +5,7 @@ :learning-objective-1: Map a streaming Redpanda topic to a SQL table using the default Redpanda catalog :learning-objective-2: Run analytical SQL queries against live topic data -Map a Redpanda topic to a SQL table to run analytical queries directly against live streaming data without building ETL pipelines. Redpanda SQL reads each record's fields from the topic's Protobuf schema in Schema Registry. +Map a Redpanda topic to a SQL table to run analytical queries directly against live streaming data without building ETL pipelines. Redpanda SQL reads each record's fields from the topic's schema in Schema Registry. To query the Iceberg-translated history of a Redpanda topic, see xref:sql:query-data/query-iceberg-topics.adoc[]. @@ -18,9 +18,14 @@ After completing these steps, you will be able to: Before you query a topic with SQL: -* Enable the Redpanda SQL engine on your Redpanda Cloud Bring Your Own Cloud (BYOC) cluster. See xref:sql:get-started/deploy-sql-cluster.adoc[Enable Redpanda SQL]. +* Enable the Redpanda SQL engine on your Redpanda Bring Your Own Cloud (BYOC) cluster. See xref:sql:get-started/deploy-sql-cluster.adoc[Enable Redpanda SQL]. * Connect to Redpanda SQL with `psql` or another PostgreSQL client. See xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL]. -* Confirm that the Redpanda topic you want to query has a Protobuf schema registered in Schema Registry. +* Confirm that the Redpanda topic you want to query has a schema registered in Schema Registry. + +// TODO: Confirm permissions/roles/ACLs required +// Is it possible to use a topic without a registered schema? +// Any specific limitations on Protobuf vs JSON vs Avro formats? +// Any requirements related to wire format and subject naming strategy? == Map the topic to a SQL table @@ -30,7 +35,7 @@ Define a table against the topic with `CREATE TABLE`: [source,sql] ---- -CREATE TABLE default_redpanda_connection=>orders WITH ( +CREATE TABLE default_redpanda_catalog=>orders WITH ( topic = 'orders', schema_subject = 'orders-value' ); @@ -38,7 +43,8 @@ CREATE TABLE default_redpanda_connection=>orders WITH ( Replace `orders` with your topic name and `orders-value` with the Schema Registry subject that holds the topic's value schema. -The table inherits its column definitions from the registered Protobuf schema. Each top-level Protobuf field becomes a SQL column. +// TODO: Nested fields? +The table inherits its column definitions from the registered schema. For Protobuf schemas, Redpanda SQL maps each top-level field to a SQL column. == Run queries @@ -46,7 +52,7 @@ Query the table with standard `SELECT` syntax. The following query returns the f [source,sql] ---- -SELECT * FROM default_redpanda_connection=>orders LIMIT 10; +SELECT * FROM default_redpanda_catalog=>orders LIMIT 10; ---- Aggregate and filter records using familiar PostgreSQL constructs: @@ -54,7 +60,7 @@ Aggregate and filter records using familiar PostgreSQL constructs: [source,sql] ---- SELECT customer_id, SUM(amount) AS total -FROM default_redpanda_connection=>orders +FROM default_redpanda_catalog=>orders WHERE status = 'completed' GROUP BY customer_id ORDER BY total DESC From 96b0ff180fb569078316fc7f0c2d55aafa769461 Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Thu, 14 May 2026 10:28:27 -0700 Subject: [PATCH 04/11] Add wire protocol option --- .../pages/sql/sql-statements/create-table.adoc | 18 +++++++++++++----- .../query-data/query-streaming-topics.adoc | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/modules/reference/pages/sql/sql-statements/create-table.adoc b/modules/reference/pages/sql/sql-statements/create-table.adoc index 027ea6681..291b2efa4 100644 --- a/modules/reference/pages/sql/sql-statements/create-table.adoc +++ b/modules/reference/pages/sql/sql-statements/create-table.adoc @@ -51,17 +51,25 @@ a|How to handle records that fail deserialization. |`struct_mapping_policy` |STRING |No -a|How to map nested structures to SQL columns. +a|How to map nested structures from the topic schema to SQL columns. -* `JSON` (default): Stores nested data as JSON. -* `FLATTEN`: Expands nested fields into top-level columns. -* `COMPOUND`: Maps to ROW types. -* `VARIANT`: Stores as a variant type. +* `COMPOUND` (default): Maps each nested structure to a SQL xref:reference:sql/sql-data-types/row.adoc[ROW] value with named fields, queryable using `(column).field_name` syntax. Cyclic types are not supported in `COMPOUND` mode — use `JSON` for recursive schemas. +* `JSON`: Stores each nested structure as a JSON value. Required for recursive (cyclic) types. |`output_schema_message_full_name` |STRING |No |Full Protobuf message name. Required when the schema contains multiple message definitions. + +|`confluent_wire_protocol` +|STRING +|No +a|Whether records on the topic are encoded with the https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format[Confluent Schema Registry wire format^] (a magic byte followed by a 4-byte schema ID before the payload). + +* `'true'` (default): Records carry the Confluent wire-format prefix. Use this for topics whose values were produced by a Schema-Registry-aware client. +* `'false'`: Records are raw Protobuf or Avro without the wire-format prefix. + +Only valid when `schema_lookup_policy = 'LATEST'`. |=== == Examples diff --git a/modules/sql/pages/query-data/query-streaming-topics.adoc b/modules/sql/pages/query-data/query-streaming-topics.adoc index 262632f14..92df65119 100644 --- a/modules/sql/pages/query-data/query-streaming-topics.adoc +++ b/modules/sql/pages/query-data/query-streaming-topics.adoc @@ -29,7 +29,7 @@ Before you query a topic with SQL: == Map the topic to a SQL table -Each Redpanda topic appears as a SQL table inside a Redpanda catalog. When you enable the SQL engine, Redpanda SQL automatically creates a catalog named `default_redpanda_connection` that points at your cluster. +Each Redpanda topic appears as a SQL table inside a Redpanda catalog. When you enable the SQL engine, Redpanda SQL automatically creates a catalog named `default_redpanda_catalog` that points at your cluster. Define a table against the topic with `CREATE TABLE`: From 0c9d7677686afb41471a87fa75de4edc4a5f41dd Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Tue, 19 May 2026 16:08:42 -0700 Subject: [PATCH 05/11] schema_subject required --- .../sql/sql-statements/create-table.adoc | 21 +++++++++-------- .../query-data/query-streaming-topics.adoc | 23 ++++++++++++++----- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/modules/reference/pages/sql/sql-statements/create-table.adoc b/modules/reference/pages/sql/sql-statements/create-table.adoc index 291b2efa4..666bf402f 100644 --- a/modules/reference/pages/sql/sql-statements/create-table.adoc +++ b/modules/reference/pages/sql/sql-statements/create-table.adoc @@ -31,8 +31,8 @@ WITH (option = 'value' [, ...]); |`schema_subject` |STRING -|No -|Schema Registry subject name to use for deserializing topic data. +|Yes +|Schema Registry subject name to use for deserializing topic data. Redpanda SQL requires a schema to query a topic. |`schema_lookup_policy` |STRING @@ -81,20 +81,23 @@ Map the `transactions` topic to a table through `default_redpanda_catalog`: [source,sql] ---- CREATE TABLE default_redpanda_catalog=>transactions -WITH (topic = 'transactions'); +WITH ( + topic = 'transactions', + schema_subject = 'transactions-value' +); ---- -=== Specify a Schema Registry subject +=== Create a table from a multi-message Protobuf schema -Map a topic and specify the Schema Registry subject: +When the Protobuf schema for the topic defines more than one message, specify the message to use with `output_schema_message_full_name`: [source,sql] ---- -CREATE TABLE default_redpanda_catalog=>user_events +CREATE TABLE default_redpanda_catalog=>orders WITH ( - topic = 'user-events', - schema_subject = 'user-events-value', - schema_lookup_policy = 'LATEST' + topic = 'orders', + schema_subject = 'orders-value', + output_schema_message_full_name = 'com.example.orders.Order' ); ---- diff --git a/modules/sql/pages/query-data/query-streaming-topics.adoc b/modules/sql/pages/query-data/query-streaming-topics.adoc index 92df65119..fa99d660d 100644 --- a/modules/sql/pages/query-data/query-streaming-topics.adoc +++ b/modules/sql/pages/query-data/query-streaming-topics.adoc @@ -20,11 +20,10 @@ Before you query a topic with SQL: * Enable the Redpanda SQL engine on your Redpanda Bring Your Own Cloud (BYOC) cluster. See xref:sql:get-started/deploy-sql-cluster.adoc[Enable Redpanda SQL]. * Connect to Redpanda SQL with `psql` or another PostgreSQL client. See xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL]. -* Confirm that the Redpanda topic you want to query has a schema registered in Schema Registry. +* Confirm that the Redpanda topic you want to query has a schema registered in Schema Registry. Redpanda SQL supports Protobuf, JSON, and Avro schemas. // TODO: Confirm permissions/roles/ACLs required -// Is it possible to use a topic without a registered schema? -// Any specific limitations on Protobuf vs JSON vs Avro formats? +// Is it possible to use a topic without a registered schema? // Any requirements related to wire format and subject naming strategy? == Map the topic to a SQL table @@ -41,10 +40,22 @@ CREATE TABLE default_redpanda_catalog=>orders WITH ( ); ---- -Replace `orders` with your topic name and `orders-value` with the Schema Registry subject that holds the topic's value schema. +Replace `orders` with your topic name and `orders-value` with the Schema Registry subject that holds the topic's value schema. `schema_subject` is required: Redpanda SQL needs a schema to deserialize and query the topic's records. -// TODO: Nested fields? -The table inherits its column definitions from the registered schema. For Protobuf schemas, Redpanda SQL maps each top-level field to a SQL column. +If the topic uses a Protobuf schema that defines more than one message, also set `output_schema_message_full_name` to the fully-qualified name of the message to use: + +[source,sql] +---- +CREATE TABLE default_redpanda_catalog=>orders WITH ( + topic = 'orders', + schema_subject = 'orders-value', + output_schema_message_full_name = 'com.example.orders.Order' +); +---- + +The table inherits its column definitions from the registered schema. Each top-level field in the schema becomes a SQL column. + +// TODO: Add xref to query-nested-fields.adoc (DOC-2019) once it lands for nested-field handling guidance. == Run queries From dc4954347ff1b6b3463cd7fe853f0056cd1183ea Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Tue, 19 May 2026 16:37:58 -0700 Subject: [PATCH 06/11] Review pass --- .../query-data/query-streaming-topics.adoc | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/modules/sql/pages/query-data/query-streaming-topics.adoc b/modules/sql/pages/query-data/query-streaming-topics.adoc index fa99d660d..e394a51bd 100644 --- a/modules/sql/pages/query-data/query-streaming-topics.adoc +++ b/modules/sql/pages/query-data/query-streaming-topics.adoc @@ -2,12 +2,12 @@ :description: Map a Redpanda topic to a SQL table and run analytical queries directly against live streaming data. :page-topic-type: how-to :personas: app_developer, data_engineer -:learning-objective-1: Map a streaming Redpanda topic to a SQL table using the default Redpanda catalog +:learning-objective-1: Map a Redpanda topic to a SQL table using the default Redpanda catalog :learning-objective-2: Run analytical SQL queries against live topic data -Map a Redpanda topic to a SQL table to run analytical queries directly against live streaming data without building ETL pipelines. Redpanda SQL reads each record's fields from the topic's schema in Schema Registry. +Map a Redpanda topic to a SQL table to run analytical queries directly against live streaming data without building ETL pipelines. Redpanda SQL reads each record's fields from the topic's registered schema. -To query the Iceberg-translated history of a Redpanda topic, see xref:sql:query-data/query-iceberg-topics.adoc[]. +To extend queries past your Redpanda retention window by reading from Iceberg-translated history, see xref:sql:query-data/query-iceberg-topics.adoc[]. After completing these steps, you will be able to: @@ -19,16 +19,13 @@ After completing these steps, you will be able to: Before you query a topic with SQL: * Enable the Redpanda SQL engine on your Redpanda Bring Your Own Cloud (BYOC) cluster. See xref:sql:get-started/deploy-sql-cluster.adoc[Enable Redpanda SQL]. +* A Redpanda Cloud user with the *SQL: Access* (or *SQL: Manage*) data-plane RBAC permission. For a *SQL: Access* user to query a topic, a *SQL: Manage* user must first `GRANT SELECT` on the topic to that user. See xref:sql:manage/manage-access.adoc[Manage access to Redpanda SQL]. * Connect to Redpanda SQL with `psql` or another PostgreSQL client. See xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL]. * Confirm that the Redpanda topic you want to query has a schema registered in Schema Registry. Redpanda SQL supports Protobuf, JSON, and Avro schemas. -// TODO: Confirm permissions/roles/ACLs required -// Is it possible to use a topic without a registered schema? -// Any requirements related to wire format and subject naming strategy? - == Map the topic to a SQL table -Each Redpanda topic appears as a SQL table inside a Redpanda catalog. When you enable the SQL engine, Redpanda SQL automatically creates a catalog named `default_redpanda_catalog` that points at your cluster. +Each Redpanda topic appears as a SQL table inside a Redpanda catalog. When Redpanda SQL is enabled, a catalog named `default_redpanda_catalog` is created automatically and points at your cluster. Define a table against the topic with `CREATE TABLE`: @@ -53,9 +50,7 @@ CREATE TABLE default_redpanda_catalog=>orders WITH ( ); ---- -The table inherits its column definitions from the registered schema. Each top-level field in the schema becomes a SQL column. - -// TODO: Add xref to query-nested-fields.adoc (DOC-2019) once it lands for nested-field handling guidance. +The table inherits its column definitions from the registered schema. Each top-level field in the schema becomes a SQL column. For querying nested fields in struct types, see xref:sql:query-data/query-nested-fields.adoc[]. == Run queries @@ -80,6 +75,6 @@ LIMIT 10; == Next steps -* xref:sql:query-data/query-iceberg-topics.adoc[Query Iceberg topics]: query the Iceberg-translated history of an Iceberg-enabled Redpanda topic, and run a single query that spans live and historical records. +* xref:sql:query-data/query-iceberg-topics.adoc[Query Iceberg-enabled topics]: run queries against historical data retained beyond your Redpanda retention window. * xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE]: full reference for the table-against-topic syntax, including all options. * xref:reference:sql/index.adoc[Redpanda SQL Reference]: supported SQL statements, clauses, data types, and functions. From 05e88bca38fb5739e7f1a8129a969515c13fd795 Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Thu, 21 May 2026 11:34:27 -0700 Subject: [PATCH 07/11] Review pass --- .../sql/pages/query-data/query-streaming-topics.adoc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/sql/pages/query-data/query-streaming-topics.adoc b/modules/sql/pages/query-data/query-streaming-topics.adoc index e394a51bd..c6dc8a7e8 100644 --- a/modules/sql/pages/query-data/query-streaming-topics.adoc +++ b/modules/sql/pages/query-data/query-streaming-topics.adoc @@ -1,4 +1,4 @@ -= Query streaming topics += Query Streaming Topics :description: Map a Redpanda topic to a SQL table and run analytical queries directly against live streaming data. :page-topic-type: how-to :personas: app_developer, data_engineer @@ -7,9 +7,9 @@ Map a Redpanda topic to a SQL table to run analytical queries directly against live streaming data without building ETL pipelines. Redpanda SQL reads each record's fields from the topic's registered schema. -To extend queries past your Redpanda retention window by reading from Iceberg-translated history, see xref:sql:query-data/query-iceberg-topics.adoc[]. +To extend queries past your Redpanda retention window by reading the Iceberg history of Iceberg-enabled topics, see xref:sql:query-data/query-iceberg-topics.adoc[Query Iceberg-enabled Topics]. -After completing these steps, you will be able to: +Use this page to: * [ ] {learning-objective-1} * [ ] {learning-objective-2} @@ -19,8 +19,8 @@ After completing these steps, you will be able to: Before you query a topic with SQL: * Enable the Redpanda SQL engine on your Redpanda Bring Your Own Cloud (BYOC) cluster. See xref:sql:get-started/deploy-sql-cluster.adoc[Enable Redpanda SQL]. -* A Redpanda Cloud user with the *SQL: Access* (or *SQL: Manage*) data-plane RBAC permission. For a *SQL: Access* user to query a topic, a *SQL: Manage* user must first `GRANT SELECT` on the topic to that user. See xref:sql:manage/manage-access.adoc[Manage access to Redpanda SQL]. -* Connect to Redpanda SQL with `psql` or another PostgreSQL client. See xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL]. +* Have a Redpanda Cloud user with the *SQL: Access* (or *SQL: Manage*) data-plane RBAC permission. For a *SQL: Access* user to query a topic, a *SQL: Manage* user must first `GRANT SELECT` on the topic to that user. See xref:sql:manage/manage-access.adoc[Manage access to Redpanda SQL]. +* Connect to Redpanda SQL with `psql` or another PostgreSQL client. See xref:sql:get-started/sql-quickstart.adoc[] for a `psql` example, or xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL]. * Confirm that the Redpanda topic you want to query has a schema registered in Schema Registry. Redpanda SQL supports Protobuf, JSON, and Avro schemas. == Map the topic to a SQL table @@ -75,6 +75,6 @@ LIMIT 10; == Next steps -* xref:sql:query-data/query-iceberg-topics.adoc[Query Iceberg-enabled topics]: run queries against historical data retained beyond your Redpanda retention window. +* xref:sql:query-data/query-iceberg-topics.adoc[Query Iceberg-enabled Topics]: run queries against historical data retained beyond your Redpanda retention window. * xref:reference:sql/sql-statements/create-table.adoc[CREATE TABLE]: full reference for the table-against-topic syntax, including all options. * xref:reference:sql/index.adoc[Redpanda SQL Reference]: supported SQL statements, clauses, data types, and functions. From 32d483315dd6f923ade9f0852dbc6c8f21c384cc Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Fri, 22 May 2026 11:12:35 -0700 Subject: [PATCH 08/11] Address review comments --- .../reference/pages/sql/sql-statements/create-table.adoc | 8 ++++---- modules/sql/pages/query-data/index.adoc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/reference/pages/sql/sql-statements/create-table.adoc b/modules/reference/pages/sql/sql-statements/create-table.adoc index 666bf402f..c1c531282 100644 --- a/modules/reference/pages/sql/sql-statements/create-table.adoc +++ b/modules/reference/pages/sql/sql-statements/create-table.adoc @@ -4,7 +4,7 @@ The `CREATE TABLE` statement maps a Redpanda topic to a SQL table through a catalog. After creating the table, you can query topic data using standard SQL. -NOTE: You must first xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[create a Redpanda catalog connection] before creating tables. `CREATE TABLE` in Redpanda SQL maps Redpanda topics to SQL tables — it does not create standalone tables with user-defined schemas. +NOTE: You must first xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[create a Redpanda catalog connection] before creating tables. `CREATE TABLE` in Redpanda SQL maps Redpanda topics to SQL tables. It does not create standalone tables with user-defined schemas. == Syntax @@ -31,8 +31,8 @@ WITH (option = 'value' [, ...]); |`schema_subject` |STRING -|Yes -|Schema Registry subject name to use for deserializing topic data. Redpanda SQL requires a schema to query a topic. +|No +|Schema Registry subject name to use for deserializing topic data. Defaults to the topic-name strategy (`-value`). |`schema_lookup_policy` |STRING @@ -53,7 +53,7 @@ a|How to handle records that fail deserialization. |No a|How to map nested structures from the topic schema to SQL columns. -* `COMPOUND` (default): Maps each nested structure to a SQL xref:reference:sql/sql-data-types/row.adoc[ROW] value with named fields, queryable using `(column).field_name` syntax. Cyclic types are not supported in `COMPOUND` mode — use `JSON` for recursive schemas. +* `COMPOUND` (default): Maps each nested structure to a SQL xref:reference:sql/sql-data-types/row.adoc[ROW] value with named fields, queryable using `(column).field_name` syntax. Cyclic types are not supported in `COMPOUND` mode. Use `JSON` for recursive schemas. * `JSON`: Stores each nested structure as a JSON value. Required for recursive (cyclic) types. |`output_schema_message_full_name` diff --git a/modules/sql/pages/query-data/index.adoc b/modules/sql/pages/query-data/index.adoc index c9e39d9eb..2daba009d 100644 --- a/modules/sql/pages/query-data/index.adoc +++ b/modules/sql/pages/query-data/index.adoc @@ -1,3 +1,3 @@ -= Query data += Query Data :description: Query live and historical data in your Redpanda topics using standard PostgreSQL syntax. :page-layout: index From 65233144eff402ba4a36de8a37c76fcce0a3f0ec Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Fri, 22 May 2026 11:32:30 -0700 Subject: [PATCH 09/11] Add info on redpanda and redpanda_raw structs --- .../sql/sql-statements/create-table.adoc | 65 +++++++++++++++++++ .../query-data/query-streaming-topics.adoc | 9 ++- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/modules/reference/pages/sql/sql-statements/create-table.adoc b/modules/reference/pages/sql/sql-statements/create-table.adoc index c1c531282..1c78e31e5 100644 --- a/modules/reference/pages/sql/sql-statements/create-table.adoc +++ b/modules/reference/pages/sql/sql-statements/create-table.adoc @@ -72,6 +72,71 @@ a|Whether records on the topic are encoded with the https://docs.confluent.io/pl Only valid when `schema_lookup_policy = 'LATEST'`. |=== +[#auto-added-columns] +== Auto-added columns + +Every catalog-mapped table includes two struct columns in addition to the columns derived from the topic's schema. Redpanda SQL adds these columns to both Kafka-backed and Iceberg-backed tables. The names `redpanda` and `redpanda_raw` are reserved. A topic schema cannot define columns with these names. + +=== `redpanda` + +Contains Kafka record metadata. Always present on every row. + +[cols="<22%,<28%,<10%,<40%",options="header"] +|=== +|Field |Type |Nullable |Description + +|`partition` +|`INT` +|No +|Partition the record was read from. + +|`offset` +|`BIGINT` +|No +|Offset of the record within its partition. + +|`timestamp` +|`TIMESTAMP WITH TIME ZONE` +|Yes +|Record timestamp. + +|`headers` +|Array of struct `{key TEXT, value BYTEA}` +|Yes +|Record headers, as an array where each element is a struct of header name and value bytes. + +|`key` +|`BYTEA` +|Yes +|Record key bytes. + +|`timestamp_type` +|`INT` +|Yes +|Kafka timestamp type (for example, `CreateTime` or `LogAppendTime`). +|=== + +=== `redpanda_raw` + +Populated only when `error_handling_policy = 'FILL_NULL'` and a record fails to decode. In all other cases, `redpanda_raw` is `NULL`. + +Use `redpanda_raw` as a dead-letter pattern. Rows whose value fails schema deserialization remain queryable, with the malformed payload preserved for inspection or reprocessing. + +[cols="<22%,<28%,<10%,<40%",options="header"] +|=== +|Field |Type |Nullable |Description + +|`key` +|`BYTEA` +|Yes +|Raw record key bytes. + +|`value` +|`BYTEA` +|Yes +|Raw record value bytes that failed to decode. +|=== + == Examples === Create a basic table diff --git a/modules/sql/pages/query-data/query-streaming-topics.adoc b/modules/sql/pages/query-data/query-streaming-topics.adoc index c6dc8a7e8..97a71a463 100644 --- a/modules/sql/pages/query-data/query-streaming-topics.adoc +++ b/modules/sql/pages/query-data/query-streaming-topics.adoc @@ -37,7 +37,7 @@ CREATE TABLE default_redpanda_catalog=>orders WITH ( ); ---- -Replace `orders` with your topic name and `orders-value` with the Schema Registry subject that holds the topic's value schema. `schema_subject` is required: Redpanda SQL needs a schema to deserialize and query the topic's records. +Replace `orders` with your topic name and `orders-value` with the Schema Registry subject that holds the topic's value schema. `schema_subject` is optional. If omitted, Redpanda SQL uses the topic-name strategy default (`-value`). If the topic uses a Protobuf schema that defines more than one message, also set `output_schema_message_full_name` to the fully-qualified name of the message to use: @@ -52,6 +52,13 @@ CREATE TABLE default_redpanda_catalog=>orders WITH ( The table inherits its column definitions from the registered schema. Each top-level field in the schema becomes a SQL column. For querying nested fields in struct types, see xref:sql:query-data/query-nested-fields.adoc[]. +In addition to the columns derived from your topic's schema, Redpanda SQL adds two struct columns to every catalog-mapped table: + +* `redpanda`: Kafka record metadata such as partition, offset, and timestamp. +* `redpanda_raw`: populated only when `error_handling_policy = 'FILL_NULL'` and a record fails to decode. + +For details, see xref:reference:sql/sql-statements/create-table.adoc#auto-added-columns[Auto-added columns]. + == Run queries Query the table with standard `SELECT` syntax. The following query returns the first 10 records: From 9cf7608c36bd687a8bfe519ab467125e538828af Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Fri, 22 May 2026 11:45:34 -0700 Subject: [PATCH 10/11] Review pass --- .../reference/pages/sql/sql-statements/create-table.adoc | 8 ++++---- modules/sql/pages/query-data/query-streaming-topics.adoc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/reference/pages/sql/sql-statements/create-table.adoc b/modules/reference/pages/sql/sql-statements/create-table.adoc index 1c78e31e5..fb9bc9830 100644 --- a/modules/reference/pages/sql/sql-statements/create-table.adoc +++ b/modules/reference/pages/sql/sql-statements/create-table.adoc @@ -4,7 +4,7 @@ The `CREATE TABLE` statement maps a Redpanda topic to a SQL table through a catalog. After creating the table, you can query topic data using standard SQL. -NOTE: You must first xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[create a Redpanda catalog connection] before creating tables. `CREATE TABLE` in Redpanda SQL maps Redpanda topics to SQL tables. It does not create standalone tables with user-defined schemas. +NOTE: You must first xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[create a Redpanda catalog connection] before creating tables. `CREATE TABLE` in Redpanda SQL maps Redpanda topics to SQL tables and does not create standalone tables with user-defined schemas. == Syntax @@ -37,7 +37,7 @@ WITH (option = 'value' [, ...]); |`schema_lookup_policy` |STRING |No -|How to resolve the schema version. Only `LATEST` is supported. +|How to resolve the schema version. `LATEST` is the only supported value. |`error_handling_policy` |STRING @@ -113,7 +113,7 @@ Contains Kafka record metadata. Always present on every row. |`timestamp_type` |`INT` |Yes -|Kafka timestamp type (for example, `CreateTime` or `LogAppendTime`). +|Kafka timestamp type code. `0` for `CreateTime`, `1` for `LogAppendTime`. `NULL` when not available. |=== === `redpanda_raw` @@ -139,7 +139,7 @@ Use `redpanda_raw` as a dead-letter pattern. Rows whose value fails schema deser == Examples -=== Create a basic table +=== Map a topic to a table Map the `transactions` topic to a table through `default_redpanda_catalog`: diff --git a/modules/sql/pages/query-data/query-streaming-topics.adoc b/modules/sql/pages/query-data/query-streaming-topics.adoc index 97a71a463..c26f8b917 100644 --- a/modules/sql/pages/query-data/query-streaming-topics.adoc +++ b/modules/sql/pages/query-data/query-streaming-topics.adoc @@ -55,7 +55,7 @@ The table inherits its column definitions from the registered schema. Each top-l In addition to the columns derived from your topic's schema, Redpanda SQL adds two struct columns to every catalog-mapped table: * `redpanda`: Kafka record metadata such as partition, offset, and timestamp. -* `redpanda_raw`: populated only when `error_handling_policy = 'FILL_NULL'` and a record fails to decode. +* `redpanda_raw`: Populated only when `error_handling_policy = 'FILL_NULL'` and a record fails to decode. For details, see xref:reference:sql/sql-statements/create-table.adoc#auto-added-columns[Auto-added columns]. From 0f5aba14c330df7ebb934fe2982cf6ad4e1f0640 Mon Sep 17 00:00:00 2001 From: Kat Batuigas Date: Fri, 22 May 2026 17:33:05 -0700 Subject: [PATCH 11/11] Adjust column widths --- modules/reference/pages/sql/sql-statements/create-table.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/reference/pages/sql/sql-statements/create-table.adoc b/modules/reference/pages/sql/sql-statements/create-table.adoc index fb9bc9830..f92fc4d32 100644 --- a/modules/reference/pages/sql/sql-statements/create-table.adoc +++ b/modules/reference/pages/sql/sql-statements/create-table.adoc @@ -20,7 +20,7 @@ WITH (option = 'value' [, ...]); == Options -[cols="<30%,<15%,<10%,<45%",options="header"] +[cols="<30%,<10%,<15%,<45%",options="header"] |=== |Option |Type |Required |Description