From 3e59e642f3ecc6f41aa3902889d8e43902621c88 Mon Sep 17 00:00:00 2001 From: Lukasz Antoniak Date: Tue, 10 Feb 2026 15:29:49 +0100 Subject: [PATCH 1/5] CASSANALYTICS-6: User documentation --- build.gradle | 2 + docs/build.gradle | 28 +++++ docs/src/user.adoc | 308 +++++++++++++++++++++++++++++++++++++++++++++ settings.gradle | 3 +- 4 files changed, 340 insertions(+), 1 deletion(-) create mode 100644 docs/build.gradle create mode 100644 docs/src/user.adoc diff --git a/build.gradle b/build.gradle index 396806a8c..c556f9948 100644 --- a/build.gradle +++ b/build.gradle @@ -32,6 +32,8 @@ plugins { // Release Audit Tool (RAT) plugin for checking project licenses id("org.nosphere.apache.rat") version "0.8.1" + + id 'org.asciidoctor.jvm.convert' version '3.3.2' } repositories { diff --git a/docs/build.gradle b/docs/build.gradle new file mode 100644 index 000000000..e11f37030 --- /dev/null +++ b/docs/build.gradle @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +apply plugin: 'org.asciidoctor.jvm.convert' + +asciidoctor { + sourceDir = file("src") + outputDir = file("build") + attributes( + 'project-version': project.version + ) +} diff --git a/docs/src/user.adoc b/docs/src/user.adoc new file mode 100644 index 000000000..e7db9ba8c --- /dev/null +++ b/docs/src/user.adoc @@ -0,0 +1,308 @@ += Overview + +This document describes the configuration options available for the bulk reader and bulk writer components. + +== Cassandra Sidecar Configuration + +Analytics library uses Sidecar to interact with Cassandra cluster. Bulk reader and writer components share common +Sidecar configuration properties. + +[cols="1,1,2"] +|=== +|Property name|Default|Description + +|_sidecar_contact_points_ +| +|Comma-separated list of Cassandra Sidecar contact points. IP addresses and FQDN domain names are supported, +with an optional port number (e.g. `lcoalhost1,localhost2`, `127.0.0.1,127.0.0.2`, `127.0.0.1:9043,127.0.0.2:9043`) + +|_sidecar_port_ +|`9043` +|Default port on which Cassandra Sidecar listens + +|_keystore_path_ +| +|Path to keystore used to establish TLS connection with Cassandra Sidecar + +|_keystore_base64_encoded_ +| +|Base64-encoded keystore used to establish TLS connection with Cassandra Sidecar + +|_keystore_password_ +| +|Keystore password + +|_keystore_type_ +|`PKCS12` +|Keystore type, `PKCS12` or `JKS` + +|_truststore_path_ +| +|Path to truststore used to establish TLS connection with Cassandra Sidecar + +|_truststore_base64_encoded_ +| +|Base64-encoded truststore used to establish TLS connection with Cassandra Sidecar + +|_truststore_password_ +| +|Truststore password + +|_truststore_type_ +|`PKCS12` +|Truststore type, `PKCS12` or `JKS` + +|_cassandra_role_ +| +|Specific role that Sidecar shall use to authorize the request. For further details consult Sidecar documentation +for `cassandra-auth-role` HTTP header + +|=== + +== Bulk Reader + +This section describes configuration properties specific to the bulk reader. + +=== Cassandra Sidecar Configuration + +[cols="1,1,2"] +|=== +|Property name|Default|Description + +|_defaultMillisToSleep_ +|`500` +|Number of milliseconds to wait between retry attempts + +|_maxMillisToSleep_ +|`60000` +|Maximum number of milliseconds to sleep between retries + +|_maxPoolSize_ +|`64` +|Size of the Vert.x worker thread pool + +|_timeoutSeconds_ +|`600` +|Request timeout, expressed in seconds + +|=== + +=== Spark Reader Configuration + +[cols="1,1,2"] +|=== +|Property name|Default|Description + +|_keyspace_ +| +|Keyspace of a table to read + +|_table_ +| +|Table to be read + +|_dc_ +| +|Data center used when `LOCAL_*` consistency level is specified + +|_consistencyLevel_ +|`LOCAL_QUORUM` +|Read consistency level + +|_snapshotName_ +|`sbr_\{uuid\}` +|Name of a snapshot to use (for data consistency). By default, unique name is always generated + +|_createSnapshot_ +|`true` +|Indicates whether a new snapshot should be created prior to performing the read operation + +|_clearSnapshotStrategy_ +|`OnCompletionOrTTL 2d` +|Strategy of removing snapshot once read operation completes. This option is enabled always when _createSnapshot_ +flag is set to `true`. Value of _clearSnapshotStrategy_ must follow the format: `[strategy] [snapshotTTL]`. Supported +strategies: `NoOp`, `OnCompletion`, `OnCompletionOrTTL`, `TTL`. Example configurations: `OnCompletionOrTTL 2d`, +`TTL 2d`, `NoOp`, `OnCompletion`. TTL value has to match pattern: `\d+(d\|h\|m\|s)` + +|_bigNumberConfig_ +| +a|Defines the output scale and precision of `decimal` and `varint` columns. Parameter value is a JSON string +with the following structure: + +[source,json] +---- +{ + "columnName1" : {"bigDecimalPrecision": 10, "bigDecimalScale": 5}, + "columnName2" : {"bigIntegerPrecision": 10, "bigIntegerScale": 5} +} +---- + +|_lastModifiedColumnName_ +| +|Name of the field to be appended to Spark RDD that represents last modification timestamp of each row + +|=== + +=== Other Properties + +[cols="1,1,2"] +|=== +|Property name|Default|Description + +|_defaultParallelism_ +|`1` +|Value of Spark property `spark.default.parallelism` + +|_numCores_ +|`1` +|Total number of cores used by all Spark executors + +|_maxBufferSizeBytes_ +|`6291456` +a|Maximum amount of bytes per sstable file that may be downloaded and buffered in-memory. This parameter is +global default and can be overridden per sstable file type. Effective defaults are: + +- `Data.db`: 6291456 +- `Index.db`: 131072 +- `Summary.db`: 262144 +- `Statistics.db`: 131072 +- `CompressionInfo.db`: 131072 +- `.log` (commit log): 65536 +- `Partitions.db`: 131072 +- `Rows.db`: 131072 + +To override size for `Data.db`, use property `_maxBufferSizeBytes_Data.db_`. + +|_chunkBufferSizeBytes_ +|`4194304` +a|Default chunk size (in bytes) that will be requested when fetching next portion of sstable file. This parameter is +global default and can be overridden per sstable file type. Effective defaults are: + +- `Data.db`: 4194304 +- `Index.db`: 32768 +- `Summary.db`: 131072 +- `Statistics.db`: 65536 +- `CompressionInfo.db`: 65536 +- `.log` (commit log): 65536 +- `Partitions.db`: 4096 +- `Rows.db`: 4096 + +To override size for `Data.db`, use property `_chunkBufferSizeBytes_Data.db_`. + +|_sizing_ +|`default` +a|Determines how the number of CPU cores is selected during the read operation. Supported options: + +* `default`: static number of cores defined by _numCores_ parameter +* `dynamic`: calculates number of cores dynamically based on table size. Improves cost efficiency for processing small +tables (few GBs). Consult JavaDoc of `org.apache.cassandra.spark.data.DynamicSizing` for implementation details. +Relevant configuration properties: + ** _maxPartitionSize_: maximum Spark partition size (in GiB) + +|_quote_identifiers_ +|`false` +|When `true`, keyspace, table and column names are quoted + +|_sstable_start_timestamp_micros_ and _sstable_end_timestamp_micros_ +| +|Define an inclusive time-range filter for sstable selection. Both timestamps are expressed in microseconds + +|=== + +== Bulk Writer + +This section describes configuration properties specific to the bulk writer. + +=== Spark Writer Configuration + +[cols="1,1,2"] +|=== +|Property name|Default|Description + +|_keyspace_ +| +|Keyspace of a table to write + +|_table_ +| +|Table to which rows are written or from which rows are removed depending on _write_mode_ + +|_local_dc_ +| +|Data center used when `LOCAL_*` consistency level is specified + +|_bulk_writer_cl_ +|`EACH_QUORUM` +|Write consistency level + +|_write_mode_ +|`INSERT` +|Determines write mode: `INSERT` or `DELETE_PARTITION` + +|_ttl_ +| +|Time-to-live value applied to created records + +|_timestamp_ +|`NOW` +|Mutation timestamp assigned to generated rows, expressed in microseconds + +|_skip_extended_verify_ +|`false` +|Every imported sstable is verified for corruption during import process. This property allows to enable extended +checking of all values in the new sstables + +|_quote_identifiers_ +|`false` +|Option that specifies whether the identifiers (i.e. keyspace, table name, column names) should be quoted to +support mixed case and reserved keyword names for these fields + +|_data_transport_ +|`DIRECT` +a|Specifies data transport mode. Supported implementations: + +* `DIRECT`: Upload of generated sstables directly to Cassandra cluster via Sidecar +* `S3_COMPAT`: Upload of generated sstables to remote S3-compliant storage + +|=== + +=== S3 Upload Properties + +[cols="1,1,2"] +|=== +|Property name|Default|Description + +|=== + +=== Other Properties + +[cols="1,1,2"] +|=== +|Property name|Default|Description + +|_number_splits_ +|`-1` +|User defined number of token range splits. By default, library will dynamically calculate number of splits based +on Spark properties `spark.default.parallelism`, `spark.executor.cores` and `spark.executor.instances` + +|_sstable_data_size_in_mib_ +|`160` +|Maximum sstable size (in MiB) + +|_digest_ +|`XXHash32` +|Digest algorithm used to compute when uploading sstables for checksum validation. Supported values: `XXHash32`, `MD5` + +|_job_timeout_seconds_ +|`-1` +a|Specifies a timeout in seconds for bulk write jobs. Disabled by default. When configured, job exceeding +the timeout is: + +* successful when the desired consistency level is achieved +* failed otherwise + +|_job_id_ +| +|User-defined identifier for the bulk write job + +|=== \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index 5d7698870..86e917c95 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,4 +50,5 @@ include 'cassandra-analytics-cdc-codec' include 'analytics-sidecar-vertx-client-shaded' include 'analytics-sidecar-vertx-client' include 'analytics-sidecar-client' -include 'analytics-sidecar-client-common' \ No newline at end of file +include 'analytics-sidecar-client-common' +include 'docs' \ No newline at end of file From 4bc3e8ccd7a0ea07f9cc5169933f370f6c7de776 Mon Sep 17 00:00:00 2001 From: Lukasz Antoniak Date: Tue, 10 Feb 2026 16:29:59 +0100 Subject: [PATCH 2/5] ninjafix --- docs/src/user.adoc | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/src/user.adoc b/docs/src/user.adoc index e7db9ba8c..59f789d70 100644 --- a/docs/src/user.adoc +++ b/docs/src/user.adoc @@ -14,7 +14,7 @@ Sidecar configuration properties. |_sidecar_contact_points_ | |Comma-separated list of Cassandra Sidecar contact points. IP addresses and FQDN domain names are supported, -with an optional port number (e.g. `lcoalhost1,localhost2`, `127.0.0.1,127.0.0.2`, `127.0.0.1:9043,127.0.0.2:9043`) +with an optional port number (e.g. `localhost1,localhost2`, `127.0.0.1,127.0.0.2`, `127.0.0.1:9043,127.0.0.2:9043`) |_sidecar_port_ |`9043` @@ -237,15 +237,25 @@ This section describes configuration properties specific to the bulk writer. |_write_mode_ |`INSERT` -|Determines write mode: `INSERT` or `DELETE_PARTITION` +a|Determines write mode: + +* `INSERT`: Writes new rows to the table. Generated sstables contain the data to be inserted +* `DELETE_PARTITION`: Removes entire partitions from the table. Only partition key columns are required in the input data + |_ttl_ | -|Time-to-live value applied to created records +|Time-to-live value (in seconds) applied to created records. When specified, all inserted rows will expire after +given duration. Only applicable in `INSERT` mode. Example: `86400` for 1 day TTL |_timestamp_ |`NOW` -|Mutation timestamp assigned to generated rows, expressed in microseconds +a|Mutation timestamp assigned to generated rows, expressed in microseconds. Options: + +* `NOW`: Uses current system time at write execution +* Custom value: Specify exact timestamp in microseconds (e.g., `1609459200000000` for 2021-01-01 00:00:00 UTC) + +Custom timestamps affect conflict resolution in Cassandra (last-write-wins) |_skip_extended_verify_ |`false` From 2e045740d7b5a2ce65fb43dd8e306ab85650b611 Mon Sep 17 00:00:00 2001 From: Lukasz Antoniak Date: Wed, 11 Feb 2026 13:54:08 +0100 Subject: [PATCH 3/5] Apply review comments --- docs/src/user.adoc | 94 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 22 deletions(-) diff --git a/docs/src/user.adoc b/docs/src/user.adoc index 59f789d70..146a66d5c 100644 --- a/docs/src/user.adoc +++ b/docs/src/user.adoc @@ -4,55 +4,66 @@ This document describes the configuration options available for the bulk reader == Cassandra Sidecar Configuration -Analytics library uses Sidecar to interact with Cassandra cluster. Bulk reader and writer components share common -Sidecar configuration properties. +Cassandra Analytics library uses https://github.com/apache/cassandra-sidecar[Apache Cassandra Sidecar] to interact +with target cluster. Bulk reader and writer components share common Sidecar configuration properties. -[cols="1,1,2"] +[cols="2,1,1,3"] |=== -|Property name|Default|Description +|Property name|Required|Default|Description |_sidecar_contact_points_ +|yes | |Comma-separated list of Cassandra Sidecar contact points. IP addresses and FQDN domain names are supported, with an optional port number (e.g. `localhost1,localhost2`, `127.0.0.1,127.0.0.2`, `127.0.0.1:9043,127.0.0.2:9043`) |_sidecar_port_ +|no |`9043` |Default port on which Cassandra Sidecar listens |_keystore_path_ +|no | |Path to keystore used to establish TLS connection with Cassandra Sidecar |_keystore_base64_encoded_ +|no | |Base64-encoded keystore used to establish TLS connection with Cassandra Sidecar |_keystore_password_ +|no | |Keystore password |_keystore_type_ +|no |`PKCS12` |Keystore type, `PKCS12` or `JKS` |_truststore_path_ +|no | |Path to truststore used to establish TLS connection with Cassandra Sidecar |_truststore_base64_encoded_ +|no | |Base64-encoded truststore used to establish TLS connection with Cassandra Sidecar |_truststore_password_ +|no | |Truststore password |_truststore_type_ +|no |`PKCS12` |Truststore type, `PKCS12` or `JKS` |_cassandra_role_ +|no | |Specific role that Sidecar shall use to authorize the request. For further details consult Sidecar documentation for `cassandra-auth-role` HTTP header @@ -65,23 +76,27 @@ This section describes configuration properties specific to the bulk reader. === Cassandra Sidecar Configuration -[cols="1,1,2"] +[cols="2,1,1,3"] |=== -|Property name|Default|Description +|Property name|Required|Default|Description |_defaultMillisToSleep_ +|no |`500` |Number of milliseconds to wait between retry attempts |_maxMillisToSleep_ +|no |`60000` |Maximum number of milliseconds to sleep between retries |_maxPoolSize_ +|no |`64` |Size of the Vert.x worker thread pool |_timeoutSeconds_ +|no |`600` |Request timeout, expressed in seconds @@ -89,42 +104,54 @@ This section describes configuration properties specific to the bulk reader. === Spark Reader Configuration -[cols="1,1,2"] +[cols="2,1,1,3"] |=== -|Property name|Default|Description +|Property name|Required|Default|Description |_keyspace_ +|yes | |Keyspace of a table to read |_table_ +|yes | |Table to be read |_dc_ +|no | |Data center used when `LOCAL_*` consistency level is specified |_consistencyLevel_ +|no |`LOCAL_QUORUM` |Read consistency level |_snapshotName_ +|no |`sbr_\{uuid\}` |Name of a snapshot to use (for data consistency). By default, unique name is always generated |_createSnapshot_ +|no |`true` |Indicates whether a new snapshot should be created prior to performing the read operation |_clearSnapshotStrategy_ +|no |`OnCompletionOrTTL 2d` -|Strategy of removing snapshot once read operation completes. This option is enabled always when _createSnapshot_ -flag is set to `true`. Value of _clearSnapshotStrategy_ must follow the format: `[strategy] [snapshotTTL]`. Supported -strategies: `NoOp`, `OnCompletion`, `OnCompletionOrTTL`, `TTL`. Example configurations: `OnCompletionOrTTL 2d`, -`TTL 2d`, `NoOp`, `OnCompletion`. TTL value has to match pattern: `\d+(d\|h\|m\|s)` +a|Strategy of removing snapshot once read operation completes. This option is enabled always when _createSnapshot_ +flag is set to `true`. Value of _clearSnapshotStrategy_ must follow the format: `[strategy] [snapshotTTL]`. + +Supported strategies: `NoOp`, `OnCompletion`, `OnCompletionOrTTL`, `TTL`. + +TTL value has to match pattern: `\d+(d\|h\|m\|s)` + +Example configurations: `OnCompletionOrTTL 2d`, `TTL 2d`, `NoOp`, `OnCompletion`. |_bigNumberConfig_ +|no | a|Defines the output scale and precision of `decimal` and `varint` columns. Parameter value is a JSON string with the following structure: @@ -132,12 +159,13 @@ with the following structure: [source,json] ---- { - "columnName1" : {"bigDecimalPrecision": 10, "bigDecimalScale": 5}, - "columnName2" : {"bigIntegerPrecision": 10, "bigIntegerScale": 5} + "column_name_1" : {"bigDecimalPrecision": 10, "bigDecimalScale": 5}, + "column_name_2" : {"bigIntegerPrecision": 10, "bigIntegerScale": 5} } ---- |_lastModifiedColumnName_ +|no | |Name of the field to be appended to Spark RDD that represents last modification timestamp of each row @@ -145,19 +173,22 @@ with the following structure: === Other Properties -[cols="1,1,2"] +[cols="2,1,1,3"] |=== -|Property name|Default|Description +|Property name|Required|Default|Description |_defaultParallelism_ +|recommended |`1` |Value of Spark property `spark.default.parallelism` |_numCores_ +|recommended |`1` |Total number of cores used by all Spark executors |_maxBufferSizeBytes_ +|no |`6291456` a|Maximum amount of bytes per sstable file that may be downloaded and buffered in-memory. This parameter is global default and can be overridden per sstable file type. Effective defaults are: @@ -174,6 +205,7 @@ global default and can be overridden per sstable file type. Effective defaults a To override size for `Data.db`, use property `_maxBufferSizeBytes_Data.db_`. |_chunkBufferSizeBytes_ +|no |`4194304` a|Default chunk size (in bytes) that will be requested when fetching next portion of sstable file. This parameter is global default and can be overridden per sstable file type. Effective defaults are: @@ -190,6 +222,7 @@ global default and can be overridden per sstable file type. Effective defaults a To override size for `Data.db`, use property `_chunkBufferSizeBytes_Data.db_`. |_sizing_ +|no |`default` a|Determines how the number of CPU cores is selected during the read operation. Supported options: @@ -200,10 +233,12 @@ Relevant configuration properties: ** _maxPartitionSize_: maximum Spark partition size (in GiB) |_quote_identifiers_ +|no |`false` |When `true`, keyspace, table and column names are quoted |_sstable_start_timestamp_micros_ and _sstable_end_timestamp_micros_ +|no | |Define an inclusive time-range filter for sstable selection. Both timestamps are expressed in microseconds @@ -215,27 +250,32 @@ This section describes configuration properties specific to the bulk writer. === Spark Writer Configuration -[cols="1,1,2"] +[cols="2,1,1,3"] |=== -|Property name|Default|Description +|Property name|Required|Default|Description |_keyspace_ +|yes | |Keyspace of a table to write |_table_ +|yes | |Table to which rows are written or from which rows are removed depending on _write_mode_ |_local_dc_ +|no | |Data center used when `LOCAL_*` consistency level is specified |_bulk_writer_cl_ +|no |`EACH_QUORUM` |Write consistency level |_write_mode_ +|no |`INSERT` a|Determines write mode: @@ -244,11 +284,13 @@ a|Determines write mode: |_ttl_ +|no | |Time-to-live value (in seconds) applied to created records. When specified, all inserted rows will expire after given duration. Only applicable in `INSERT` mode. Example: `86400` for 1 day TTL |_timestamp_ +|no |`NOW` a|Mutation timestamp assigned to generated rows, expressed in microseconds. Options: @@ -258,16 +300,19 @@ a|Mutation timestamp assigned to generated rows, expressed in microseconds. Opti Custom timestamps affect conflict resolution in Cassandra (last-write-wins) |_skip_extended_verify_ +|no |`false` |Every imported sstable is verified for corruption during import process. This property allows to enable extended checking of all values in the new sstables |_quote_identifiers_ +|no |`false` |Option that specifies whether the identifiers (i.e. keyspace, table name, column names) should be quoted to support mixed case and reserved keyword names for these fields |_data_transport_ +|no |`DIRECT` a|Specifies data transport mode. Supported implementations: @@ -278,32 +323,36 @@ a|Specifies data transport mode. Supported implementations: === S3 Upload Properties -[cols="1,1,2"] +[cols="2,1,1,3"] |=== -|Property name|Default|Description +|Property name|Required|Default|Description |=== === Other Properties -[cols="1,1,2"] +[cols="2,1,1,3"] |=== -|Property name|Default|Description +|Property name|Required|Default|Description |_number_splits_ +|no |`-1` |User defined number of token range splits. By default, library will dynamically calculate number of splits based on Spark properties `spark.default.parallelism`, `spark.executor.cores` and `spark.executor.instances` |_sstable_data_size_in_mib_ +|no |`160` |Maximum sstable size (in MiB) |_digest_ +|no |`XXHash32` |Digest algorithm used to compute when uploading sstables for checksum validation. Supported values: `XXHash32`, `MD5` |_job_timeout_seconds_ +|no |`-1` a|Specifies a timeout in seconds for bulk write jobs. Disabled by default. When configured, job exceeding the timeout is: @@ -312,6 +361,7 @@ the timeout is: * failed otherwise |_job_id_ +|no | |User-defined identifier for the bulk write job From 92611448baf057fd73e9c65dddbac0efec916fe6 Mon Sep 17 00:00:00 2001 From: Lukasz Antoniak Date: Wed, 11 Feb 2026 15:01:25 +0100 Subject: [PATCH 4/5] Added Multi-cluster Upload Properties section --- docs/src/user.adoc | 88 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 3 deletions(-) diff --git a/docs/src/user.adoc b/docs/src/user.adoc index 146a66d5c..d6d47412d 100644 --- a/docs/src/user.adoc +++ b/docs/src/user.adoc @@ -316,17 +316,99 @@ support mixed case and reserved keyword names for these fields |`DIRECT` a|Specifies data transport mode. Supported implementations: -* `DIRECT`: Upload of generated sstables directly to Cassandra cluster via Sidecar -* `S3_COMPAT`: Upload of generated sstables to remote S3-compliant storage +* `DIRECT`: Uploads generated sstables directly to Cassandra cluster via Sidecar +* `S3_COMPAT`: Uploads generated sstables to multiple remote Cassandra clusters with intermediate S3 storage +(see <>) |=== -=== S3 Upload Properties +=== Multi-cluster Upload Properties + +Cassandra Analytics can import the same set of generated sstables to multiple Cassandra clusters running in remote +locations. Analytics library uploads generated sstables to common S3 storage. S3 service replicates data across +regions and triggers import of files to Cassandra cluster using local Sidecar instances. [cols="2,1,1,3"] |=== |Property name|Required|Default|Description +|_coordinated_write_config_ +|yes +| +a| +Configuration of coordinated write operation in JSON format. Lists all remote Cassandra clusters to write, +together with list of local Sidecar instances. + +Example: + +[source,json] +---- +{ + "cluster1": { + "sidecarContactPoints": [ + "instance-1:9999", + "instance-2:9999", + "instance-3:9999" + ], + "localDc": "dc1", + "writeToLocalDcOnly": false + }, + "cluster2": { + "sidecarContactPoints": [ + "instance-4:8888" + ], + "localDc": "dc2", + "writeToLocalDcOnly": false + } +} +---- + +|_data_transport_extension_class_ +|yes +| +|Fully qualified class name that implements `StorageTransportExtension` interface. Consult JavaDoc for +implementation details + +|_storage_client_endpoint_override_ +|no +| +|Property overrides S3 endpoint + +|_storage_client_https_proxy_ +|no +| +|HTTPS proxy for S3 client + +|_max_size_per_sstable_bundle_in_bytes_s3_transport_ +|no +|`5368709120` +|Limits the maximum size of uploaded S3 object + +|_storage_client_max_chunk_size_in_bytes_ +|no +|`104857600` +|Specifies maximum chunk size for multipart S3 upload + +|_storage_client_concurrency_ +|no +|`cores * 2` +|Controls the max parallelism of the thread pool used by S3 client + +|_storage_client_thread_keep_alive_seconds_ +|no +|60 +|Idle storage thread timeout in seconds + +|_storage_client_nio_http_client_connection_acquisition_timeout_seconds_ +|no +|`300` +|Option to tune the connection acquisition timeout for NIO HTTP component employed in S3 client + +|_storage_client_nio_http_client_max_concurrency_ +|no +|`50` +|Specifies concurrency of the NIO HTTP component employed in S3 client + |=== === Other Properties From 2c1a27dde8c8baf41ef53f8b5614d3e7742dd73e Mon Sep 17 00:00:00 2001 From: Lukasz Antoniak Date: Wed, 11 Feb 2026 15:06:07 +0100 Subject: [PATCH 5/5] ninjafix --- docs/src/user.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/user.adoc b/docs/src/user.adoc index d6d47412d..820b6431a 100644 --- a/docs/src/user.adoc +++ b/docs/src/user.adoc @@ -391,7 +391,7 @@ implementation details |_storage_client_concurrency_ |no -|`cores * 2` +|`CPU cores * 2` |Controls the max parallelism of the thread pool used by S3 client |_storage_client_thread_keep_alive_seconds_