From 45e26f66aee88357894decc72313170600d31d6a Mon Sep 17 00:00:00 2001
From: arvindksi274-ksolves <arvind.kandpal@ksolves.com>
Date: Thu, 19 Feb 2026 16:36:20 +0530
Subject: [PATCH] CASSANDRA-19985: Enhance CQLSH to support machine-readable
 output formatting (csv, json)

---
 conf/cqlshrc.sample                           |   3 +
 .../cassandra/pages/managing/tools/cqlsh.adoc |   2 +
 pylib/cqlshlib/cqlshmain.py                   |  68 +++++---
 pylib/cqlshlib/displaying.py                  |  98 +++++++++++
 pylib/cqlshlib/test/test_cqlsh_output.py      | 152 ++++++++++++++++++
 5 files changed, 300 insertions(+), 23 deletions(-)

diff --git a/conf/cqlshrc.sample b/conf/cqlshrc.sample
index 3c957a79a2a5..21138d5b63d6 100644
--- a/conf/cqlshrc.sample
+++ b/conf/cqlshrc.sample
@@ -37,6 +37,9 @@
 ; version = None
 
 [ui]
+;; The format of the output. Valid values are tabular, csv, and json.
+; mode = tabular
+
 ;; Whether or not to display query results with colors
 ; color = on
 
diff --git a/doc/modules/cassandra/pages/managing/tools/cqlsh.adoc b/doc/modules/cassandra/pages/managing/tools/cqlsh.adoc
index 5918d2f3ffae..92d3fe984c28 100644
--- a/doc/modules/cassandra/pages/managing/tools/cqlsh.adoc
+++ b/doc/modules/cassandra/pages/managing/tools/cqlsh.adoc
@@ -98,6 +98,8 @@ Options:
   Collect coverage data
 `--encoding=ENCODING`::
   Specify a non-default encoding for output. (Default: utf-8)
+`--mode=MODE`::
+  Specify the output display format. Valid values are `tabular` (default), `csv`, and `json`.
 `--cqlshrc=CQLSHRC`::
   Specify an alternative cqlshrc file location.
 `--credentials=CREDENTIALS`::
diff --git a/pylib/cqlshlib/cqlshmain.py b/pylib/cqlshlib/cqlshmain.py
index c13256dd4013..4b724f9f3ea7 100755
--- a/pylib/cqlshlib/cqlshmain.py
+++ b/pylib/cqlshlib/cqlshmain.py
@@ -46,7 +46,8 @@
 from cqlshlib import cql3handling, pylexotron, sslhandling, cqlshhandling, authproviderhandling
 from cqlshlib.copyutil import ExportTask, ImportTask
 from cqlshlib.displaying import (ANSI_RESET, BLUE, COLUMN_NAME_COLORS, CYAN,
-                                 RED, WHITE, FormattedValue, colorme)
+                                 RED, WHITE, FormattedValue, colorme,
+                                 TablePrinter, TabularTablePrinter, CsvTablePrinter, JsonTablePrinter)
 from cqlshlib.formatting import (DEFAULT_DATE_FORMAT, DEFAULT_NANOTIME_FORMAT,
                                  DEFAULT_TIMESTAMP_FORMAT, CqlType, DateTimeFormat,
                                  format_by_type)
@@ -284,13 +285,15 @@ def __init__(self, hostname, port, config_file, color=False,
                  connect_timeout=DEFAULT_CONNECT_TIMEOUT_SECONDS,
                  is_subshell=False,
                  auth_provider=None,
-                 disable_history=False):
+                 disable_history=False,
+                 mode='tabular'):
         cmd.Cmd.__init__(self, completekey=completekey)
         self.hostname = hostname
         self.port = port
         self.auth_provider = auth_provider
         self.username = username
         self.config_file = config_file
+        self.mode = mode.lower()
 
         if isinstance(auth_provider, PlainTextAuthProvider):
             self.username = auth_provider.username
@@ -329,6 +332,8 @@ def __init__(self, hostname, port, config_file, color=False,
         self.browser = browser
         self.docspath = docspath
         self.color = color
+        if self.mode in ('csv', 'json'):
+            self.color = False
 
         self.display_nanotime_format = display_nanotime_format
         self.display_timestamp_format = display_timestamp_format
@@ -946,42 +951,55 @@ def perform_simple_statement(self, statement):
             self.print_result(result, self.get_table_meta('system_auth', 'generated_values'))
         elif result:
             # CAS INSERT/UPDATE
-            self.writeresult("")
-            self.print_static_result(result, self.parse_for_update_meta(statement.query_string), with_header=True, tty=self.tty)
+            if self.mode not in ('csv', 'json'):
+                self.writeresult("")
+            cas_printer = TablePrinter.factory(self.mode, self)
+            self.print_static_result(result, self.parse_for_update_meta(statement.query_string),
+                                     with_header=True, tty=self.tty,
+                                     printer=cas_printer)
+            cas_printer.finish()
         if self.elapsed_enabled:
-            self.writeresult("(%dms elapsed)" % elapsed)
+            elapsed_msg = "(%dms elapsed)" % elapsed
+            if self.mode in ('csv', 'json'):
+                self.printerr(elapsed_msg)
+            else:
+                self.writeresult(elapsed_msg)
         self.flush_output()
         return True, future
 
     def print_result(self, result, table_meta):
         self.decoding_errors = []
 
-        self.writeresult("")
+        if self.mode not in ('csv', 'json'):
+            self.writeresult("")
+        printer = TablePrinter.factory(self.mode, self)
 
-        def print_all(result, table_meta, tty):
-            # Return the number of rows in total
+        def print_all(result, table_meta, tty, printer):
+            machine_mode = self.mode in ('csv', 'json')
+            effective_tty = tty and not machine_mode
             num_rows = 0
             is_first = True
             while True:
-                # Always print for the first page even it is empty
                 if result.current_rows or is_first:
-                    with_header = is_first or tty
-                    self.print_static_result(result, table_meta, with_header, tty, num_rows)
+                    with_header = is_first or effective_tty
+                    self.print_static_result(result, table_meta, with_header, effective_tty,
+                                             num_rows, printer)
                     num_rows += len(result.current_rows)
                 if result.has_more_pages:
-                    if self.shunted_query_out is None and tty:
-                        # Only pause when not capturing.
+                    if self.shunted_query_out is None and effective_tty:
                         input("---MORE---")
                     result.fetch_next_page()
                 else:
-                    if not tty:
+                    if not effective_tty and not machine_mode:
                         self.writeresult("")
                     break
                 is_first = False
             return num_rows
 
-        num_rows = print_all(result, table_meta, self.tty)
-        self.writeresult("(%d rows)" % num_rows)
+        num_rows = print_all(result, table_meta, self.tty, printer)
+        printer.finish()
+        if self.mode not in ('csv', 'json'):
+            self.writeresult("(%d rows)" % num_rows)
 
         if self.decoding_errors:
             for err in self.decoding_errors[:2]:
@@ -990,15 +1008,16 @@ def print_all(result, table_meta, tty):
                 self.writeresult('%d more decoding errors suppressed.'
                                  % (len(self.decoding_errors) - 2), color=RED)
 
-    def print_static_result(self, result, table_meta, with_header, tty, row_count_offset=0):
+    def print_static_result(self, result, table_meta, with_header, tty, row_count_offset=0, printer=None):
         if not result.column_names and not table_meta:
             return
 
         column_names = result.column_names or list(table_meta.columns.keys())
         formatted_names = [self.myformat_colname(name, table_meta) for name in column_names]
+
         if not result.current_rows:
-            # print header only
-            self.print_formatted_result(formatted_names, None, with_header=True, tty=tty)
+            if with_header:
+                printer.print_header(formatted_names)
             return
 
         cql_types = []
@@ -1009,10 +1028,9 @@ def print_static_result(self, result, table_meta, with_header, tty, row_count_of
 
         formatted_values = [list(map(self.myformat_value, [row[c] for c in column_names], cql_types)) for row in result.current_rows]
 
-        if self.expand_enabled:
-            self.print_formatted_result_vertically(formatted_names, formatted_values, row_count_offset)
-        else:
-            self.print_formatted_result(formatted_names, formatted_values, with_header, tty)
+        if with_header:
+            printer.print_header(formatted_names)
+        printer.print_rows(formatted_names, formatted_values)
 
     def print_formatted_result(self, formatted_names, formatted_values, with_header, tty):
         # determine column widths
@@ -2026,6 +2044,7 @@ def read_options(cmdlineargs, parser, config_file, cql_dir, environment=os.envir
     argvalues.completekey = option_with_default(configs.get, 'ui', 'completekey',
                                                 DEFAULT_COMPLETEKEY)
     argvalues.color = option_with_default(configs.getboolean, 'ui', 'color')
+    argvalues.mode = option_with_default(configs.get, 'ui', 'mode', 'tabular')
     argvalues.time_format = raw_option_with_default(configs, 'ui', 'time_format',
                                                     DEFAULT_TIMESTAMP_FORMAT)
     argvalues.nanotime_format = raw_option_with_default(configs, 'ui', 'nanotime_format',
@@ -2230,6 +2249,8 @@ def main(cmdline, pkgpath):
                         help='Force tty mode (command prompt).')
     parser.add_argument('--disable-history', default=False, action='store_true',
                         help='Disable saving of history (existing history will still be loaded)')
+    parser.add_argument('--mode', choices=['tabular', 'csv', 'json'],
+                        help='Specify the output format (tabular, csv, json). Default is tabular.')
 
     # This is a hidden option to suppress the warning when the -p/--password command line option is used.
     # Power users may use this option if they know no other people has access to the system where cqlsh is run or don't care about security.
@@ -2357,6 +2378,7 @@ def main(cmdline, pkgpath):
                       display_double_precision=options.double_precision,
                       display_timezone=timezone,
                       max_trace_wait=options.max_trace_wait,
+                      mode=options.mode,
                       ssl=options.ssl,
                       single_statement=options.execute,
                       request_timeout=options.request_timeout,
diff --git a/pylib/cqlshlib/displaying.py b/pylib/cqlshlib/displaying.py
index 424d6334b696..2377dec3bf53 100644
--- a/pylib/cqlshlib/displaying.py
+++ b/pylib/cqlshlib/displaying.py
@@ -126,3 +126,101 @@ def color_ljust(self, width, fill=' '):
                                  )
 
 NO_COLOR_MAP = dict()
+
+class TablePrinter:
+    def print_header(self, formatted_names):
+        raise NotImplementedError
+
+    def print_rows(self, formatted_names, formatted_values):
+        raise NotImplementedError
+
+    def finish(self):
+        pass
+
+    @staticmethod
+    def factory(format_type, shell):
+        format_map = {'csv': CsvTablePrinter, 'json': JsonTablePrinter, 'tabular': TabularTablePrinter}
+        printer_cls = format_map.get(format_type.lower(), TabularTablePrinter)
+        return printer_cls(shell) if format_type.lower() != 'tabular' else printer_cls(shell, shell.tty)
+
+class TabularTablePrinter(TablePrinter):
+    def __init__(self, shell, tty, row_count_offset=0):
+        self._shell = shell
+        self._tty = tty
+        self._row_count_offset = row_count_offset
+        self._pending_header = None
+
+    def print_header(self, formatted_names):
+        # Store only — cannot render yet because column widths depend on
+        # data values. print_rows will render header+data together.
+        # Empty-result case is handled in finish().
+        self._pending_header = formatted_names
+
+    def print_rows(self, formatted_names, formatted_values):
+        # with_header=True only when print_header was called for this page.
+        with_header = self._pending_header is not None
+        self._pending_header = None
+        if self._shell.expand_enabled:
+            self._shell.print_formatted_result_vertically(
+                formatted_names, formatted_values, self._row_count_offset)
+        else:
+            self._shell.print_formatted_result(
+                formatted_names, formatted_values, with_header, self._tty)
+        if formatted_values:
+            self._row_count_offset += len(formatted_values)
+
+    def finish(self):
+        if self._pending_header is not None:
+            self._shell.print_formatted_result(
+                self._pending_header, None, with_header=True, tty=self._tty)
+            self._pending_header = None
+
+class CsvTablePrinter(TablePrinter):
+    def __init__(self, shell):
+        import csv
+        self._writer = csv.writer(shell.query_out)
+        self._header_written = False
+        self._colnames = None
+
+    def print_header(self, formatted_names):
+        self._colnames = [n.strval for n in formatted_names]
+
+    def print_rows(self, formatted_names, formatted_values):
+        if not self._header_written:
+            self._writer.writerow(self._colnames)
+            self._header_written = True
+        if formatted_values is None:
+            return
+        for row in formatted_values:
+            self._writer.writerow([col.strval for col in row])
+
+    def finish(self):
+        if self._colnames is not None and not self._header_written:
+            self._writer.writerow(self._colnames)
+            self._header_written = True
+
+class JsonTablePrinter(TablePrinter):
+    def __init__(self, shell):
+        self._shell = shell
+        self._colnames = None
+        self._first_row = True
+
+    def print_header(self, formatted_names):
+        self._colnames = [n.strval for n in formatted_names]
+        self._shell.writeresult('[')
+
+    def print_rows(self, formatted_names, formatted_values):
+        import json
+        if formatted_values is None:
+            return
+        for row in formatted_values:
+            row_dict = {self._colnames[i]: col.strval for i, col in enumerate(row)}
+            serialized = json.dumps(row_dict)
+            if self._first_row:
+                self._shell.writeresult('  ' + serialized, newline=False)
+                self._first_row = False
+            else:
+                self._shell.writeresult(',\n  ' + serialized, newline=False)
+
+    def finish(self):
+        self._shell.writeresult('\n]')
diff --git a/pylib/cqlshlib/test/test_cqlsh_output.py b/pylib/cqlshlib/test/test_cqlsh_output.py
index c32690b42496..0c89d06f2736 100644
--- a/pylib/cqlshlib/test/test_cqlsh_output.py
+++ b/pylib/cqlshlib/test/test_cqlsh_output.py
@@ -1017,3 +1017,155 @@ def test_quoted_output_text_in_udts(self):
                                         tty=False, input=query)
         self.assertEqual(0, result)
         self.assertEqual(output.splitlines()[3].strip(), "{data: 'I''m newb'}")
+
+    def test_csv_output(self):
+        ks = get_keyspace()
+        query = "SELECT a, b FROM twenty_rows_table WHERE a IN ('1', '2');"
+
+        output, result = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env,
+                                        tty=False, input=query + '\n')
+        self.assertEqual(0, result)
+
+        lines = output.strip().splitlines()
+        self.assertEqual(lines[0].strip(), 'a,b')
+        self.assertIn('1,1', [l.strip() for l in lines])
+        self.assertIn('2,2', [l.strip() for l in lines])
+
+        query2 = "SELECT num, setcol FROM has_all_types WHERE num = 0;"
+        output2, result2 = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env,
+                                          tty=False, input=query2 + '\n')
+        self.assertEqual(0, result2)
+        import csv, io
+        reader = csv.reader(io.StringIO(output2.strip()))
+        rows = list(reader)
+        self.assertEqual(rows[0], ['num', 'setcol'])
+        for row in rows[1:]:
+            self.assertEqual(len(row), 2,
+                             msg='CSV row has wrong field count (commas inside setcol not quoted?): %r' % row)
+
+        query3 = "SELECT num, varintcol FROM has_all_types WHERE num = 0;"
+        output3, result3 = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env,
+                                          tty=False, input=query3 + '\n')
+        self.assertEqual(0, result3)
+        reader3 = csv.reader(io.StringIO(output3.strip()))
+        rows3 = list(reader3)
+        varint_val = rows3[1][1]
+        self.assertNotIn(',', varint_val,
+                         msg='Large varint should not contain thousands separator in CSV: %r' % varint_val)
+
+        ks = get_keyspace()
+        setup_q = ("INSERT INTO %s.has_all_types (num, textcol) VALUES (9998, 'Smith, Joe');" % ks)
+        cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env,
+                       tty=False, input=setup_q + '\n')
+        try:
+            q4 = "SELECT num, textcol FROM %s.has_all_types WHERE num = 9998;" % ks
+            output4, result4 = cqlsh_testcall(args=('--mode', 'csv'), prompt=None,
+                                              env=self.default_env, tty=False, input=q4 + '\n')
+            self.assertEqual(0, result4)
+            reader4 = csv.reader(io.StringIO(output4.strip()))
+            rows4 = list(reader4)
+            self.assertEqual(rows4[0], ['num', 'textcol'])
+            for row in rows4[1:]:
+                self.assertEqual(len(row), 2,
+                                 msg='Comma inside textcol must be quoted in CSV: %r' % row)
+            data_rows4 = [r for r in rows4[1:] if r[0] == '9998']
+            self.assertEqual(len(data_rows4), 1)
+            self.assertEqual(data_rows4[0][1], 'Smith, Joe')
+        finally:
+            cleanup_q = "DELETE FROM %s.has_all_types WHERE num = 9998;" % ks
+            cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env,
+                           tty=False, input=cleanup_q + '\n')
+
+    def test_json_output(self):
+        ks = get_keyspace()
+        query = "SELECT a, b FROM twenty_rows_table WHERE a IN ('1', '2');"
+
+        output, result = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env,
+                                        tty=False, input=query + '\n')
+        self.assertEqual(0, result)
+
+        import json
+        try:
+            parsed_json = json.loads(output)
+            self.assertEqual(len(parsed_json), 2)
+
+            results = { (item['a'], item['b']) for item in parsed_json }
+            self.assertIn(('1', '1'), results)
+            self.assertIn(('2', '2'), results)
+        except ValueError as e:
+            self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output))
+
+        query2 = "SELECT num, setcol, listcol, mapcol FROM has_all_types WHERE num = 0;"
+        output2, result2 = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env,
+                                          tty=False, input=query2 + '\n')
+        self.assertEqual(0, result2)
+        try:
+            rows2 = json.loads(output2)
+            self.assertEqual(len(rows2), 1)
+            row = rows2[0]
+            self.assertIsInstance(row['setcol'], str,
+                                  msg='setcol should be a JSON string, got: %r' % type(row['setcol']))
+            self.assertIsInstance(row['listcol'], str,
+                                  msg='listcol should be a JSON string, got: %r' % type(row['listcol']))
+            self.assertIsInstance(row['mapcol'], str,
+                                  msg='mapcol should be a JSON string, got: %r' % type(row['mapcol']))
+        except ValueError as e:
+            self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output2))
+
+        query3 = "SELECT num, varintcol FROM has_all_types WHERE num = 0;"
+        output3, result3 = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env,
+                                          tty=False, input=query3 + '\n')
+        self.assertEqual(0, result3)
+        try:
+            rows3 = json.loads(output3)
+            self.assertEqual(rows3[0]['varintcol'], '10000000000000000000000000')
+        except ValueError as e:
+            self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output3))
+
+        q4 = "SELECT num, uuidcol, decimalcol, timestampcol FROM has_all_types WHERE num = 0;"
+        output4, result4 = cqlsh_testcall(args=('--mode', 'json'), prompt=None,
+                                          env=self.default_env, tty=False, input=q4 + '\n')
+        self.assertEqual(0, result4)
+        try:
+            rows4 = json.loads(output4)
+            self.assertEqual(len(rows4), 1)
+            row4 = rows4[0]
+            import re
+            uuid_val = row4.get('uuidcol', '')
+            self.assertRegex(uuid_val,
+                             r'^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$',
+                             msg='uuidcol must be a UUID-formatted string: %r' % uuid_val)
+            from decimal import Decimal as PyDecimal, InvalidOperation
+            decimal_val = row4.get('decimalcol', '')
+            self.assertIsInstance(decimal_val, str, msg='decimalcol must be a string in JSON')
+            try:
+                PyDecimal(decimal_val)
+            except InvalidOperation:
+                self.fail('decimalcol value %r is not a valid decimal string' % decimal_val)
+            ts_val = row4.get('timestampcol', '')
+            self.assertIsInstance(ts_val, str)
+            self.assertTrue(len(ts_val) > 0, msg='timestampcol must be a non-empty string')
+        except ValueError as e:
+            self.fail("UUID/Decimal/Timestamp JSON output invalid: %s\nOutput: %s" % (e, output4))
+
+        ks = get_keyspace()
+        setup_q2 = r"INSERT INTO " + ks + r".has_all_types (num, textcol) VALUES (9999, 'say \"hello\" \\ world');"
+        cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env,
+                       tty=False, input=setup_q2 + '\n')
+        try:
+            q5 = "SELECT num, textcol FROM %s.has_all_types WHERE num = 9999;" % ks
+            output5, result5 = cqlsh_testcall(args=('--mode', 'json'), prompt=None,
+                                              env=self.default_env, tty=False, input=q5 + '\n')
+            self.assertEqual(0, result5)
+            try:
+                rows5 = json.loads(output5)
+                self.assertEqual(len(rows5), 1)
+                text_val = rows5[0]['textcol']
+                self.assertIsInstance(text_val, str)
+                self.assertIn('"', text_val, msg='Double-quote must survive JSON round-trip')
+            except ValueError as e:
+                self.fail("Special-char JSON output is not valid JSON: %s\nOutput: %s" % (e, output5))
+        finally:
+            cleanup_q2 = "DELETE FROM %s.has_all_types WHERE num = 9999;" % ks
+            cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env,
+                           tty=False, input=cleanup_q2 + '\n')
\ No newline at end of file