From 45e26f66aee88357894decc72313170600d31d6a Mon Sep 17 00:00:00 2001 From: arvindksi274-ksolves Date: Thu, 19 Feb 2026 16:36:20 +0530 Subject: [PATCH] CASSANDRA-19985: Enhance CQLSH to support machine-readable output formatting (csv, json) --- conf/cqlshrc.sample | 3 + .../cassandra/pages/managing/tools/cqlsh.adoc | 2 + pylib/cqlshlib/cqlshmain.py | 68 +++++--- pylib/cqlshlib/displaying.py | 98 +++++++++++ pylib/cqlshlib/test/test_cqlsh_output.py | 152 ++++++++++++++++++ 5 files changed, 300 insertions(+), 23 deletions(-) diff --git a/conf/cqlshrc.sample b/conf/cqlshrc.sample index 3c957a79a2a5..21138d5b63d6 100644 --- a/conf/cqlshrc.sample +++ b/conf/cqlshrc.sample @@ -37,6 +37,9 @@ ; version = None [ui] +;; The format of the output. Valid values are tabular, csv, and json. +; mode = tabular + ;; Whether or not to display query results with colors ; color = on diff --git a/doc/modules/cassandra/pages/managing/tools/cqlsh.adoc b/doc/modules/cassandra/pages/managing/tools/cqlsh.adoc index 5918d2f3ffae..92d3fe984c28 100644 --- a/doc/modules/cassandra/pages/managing/tools/cqlsh.adoc +++ b/doc/modules/cassandra/pages/managing/tools/cqlsh.adoc @@ -98,6 +98,8 @@ Options: Collect coverage data `--encoding=ENCODING`:: Specify a non-default encoding for output. (Default: utf-8) +`--mode=MODE`:: + Specify the output display format. Valid values are `tabular` (default), `csv`, and `json`. `--cqlshrc=CQLSHRC`:: Specify an alternative cqlshrc file location. `--credentials=CREDENTIALS`:: diff --git a/pylib/cqlshlib/cqlshmain.py b/pylib/cqlshlib/cqlshmain.py index c13256dd4013..4b724f9f3ea7 100755 --- a/pylib/cqlshlib/cqlshmain.py +++ b/pylib/cqlshlib/cqlshmain.py @@ -46,7 +46,8 @@ from cqlshlib import cql3handling, pylexotron, sslhandling, cqlshhandling, authproviderhandling from cqlshlib.copyutil import ExportTask, ImportTask from cqlshlib.displaying import (ANSI_RESET, BLUE, COLUMN_NAME_COLORS, CYAN, - RED, WHITE, FormattedValue, colorme) + RED, WHITE, FormattedValue, colorme, + TablePrinter, TabularTablePrinter, CsvTablePrinter, JsonTablePrinter) from cqlshlib.formatting import (DEFAULT_DATE_FORMAT, DEFAULT_NANOTIME_FORMAT, DEFAULT_TIMESTAMP_FORMAT, CqlType, DateTimeFormat, format_by_type) @@ -284,13 +285,15 @@ def __init__(self, hostname, port, config_file, color=False, connect_timeout=DEFAULT_CONNECT_TIMEOUT_SECONDS, is_subshell=False, auth_provider=None, - disable_history=False): + disable_history=False, + mode='tabular'): cmd.Cmd.__init__(self, completekey=completekey) self.hostname = hostname self.port = port self.auth_provider = auth_provider self.username = username self.config_file = config_file + self.mode = mode.lower() if isinstance(auth_provider, PlainTextAuthProvider): self.username = auth_provider.username @@ -329,6 +332,8 @@ def __init__(self, hostname, port, config_file, color=False, self.browser = browser self.docspath = docspath self.color = color + if self.mode in ('csv', 'json'): + self.color = False self.display_nanotime_format = display_nanotime_format self.display_timestamp_format = display_timestamp_format @@ -946,42 +951,55 @@ def perform_simple_statement(self, statement): self.print_result(result, self.get_table_meta('system_auth', 'generated_values')) elif result: # CAS INSERT/UPDATE - self.writeresult("") - self.print_static_result(result, self.parse_for_update_meta(statement.query_string), with_header=True, tty=self.tty) + if self.mode not in ('csv', 'json'): + self.writeresult("") + cas_printer = TablePrinter.factory(self.mode, self) + self.print_static_result(result, self.parse_for_update_meta(statement.query_string), + with_header=True, tty=self.tty, + printer=cas_printer) + cas_printer.finish() if self.elapsed_enabled: - self.writeresult("(%dms elapsed)" % elapsed) + elapsed_msg = "(%dms elapsed)" % elapsed + if self.mode in ('csv', 'json'): + self.printerr(elapsed_msg) + else: + self.writeresult(elapsed_msg) self.flush_output() return True, future def print_result(self, result, table_meta): self.decoding_errors = [] - self.writeresult("") + if self.mode not in ('csv', 'json'): + self.writeresult("") + printer = TablePrinter.factory(self.mode, self) - def print_all(result, table_meta, tty): - # Return the number of rows in total + def print_all(result, table_meta, tty, printer): + machine_mode = self.mode in ('csv', 'json') + effective_tty = tty and not machine_mode num_rows = 0 is_first = True while True: - # Always print for the first page even it is empty if result.current_rows or is_first: - with_header = is_first or tty - self.print_static_result(result, table_meta, with_header, tty, num_rows) + with_header = is_first or effective_tty + self.print_static_result(result, table_meta, with_header, effective_tty, + num_rows, printer) num_rows += len(result.current_rows) if result.has_more_pages: - if self.shunted_query_out is None and tty: - # Only pause when not capturing. + if self.shunted_query_out is None and effective_tty: input("---MORE---") result.fetch_next_page() else: - if not tty: + if not effective_tty and not machine_mode: self.writeresult("") break is_first = False return num_rows - num_rows = print_all(result, table_meta, self.tty) - self.writeresult("(%d rows)" % num_rows) + num_rows = print_all(result, table_meta, self.tty, printer) + printer.finish() + if self.mode not in ('csv', 'json'): + self.writeresult("(%d rows)" % num_rows) if self.decoding_errors: for err in self.decoding_errors[:2]: @@ -990,15 +1008,16 @@ def print_all(result, table_meta, tty): self.writeresult('%d more decoding errors suppressed.' % (len(self.decoding_errors) - 2), color=RED) - def print_static_result(self, result, table_meta, with_header, tty, row_count_offset=0): + def print_static_result(self, result, table_meta, with_header, tty, row_count_offset=0, printer=None): if not result.column_names and not table_meta: return column_names = result.column_names or list(table_meta.columns.keys()) formatted_names = [self.myformat_colname(name, table_meta) for name in column_names] + if not result.current_rows: - # print header only - self.print_formatted_result(formatted_names, None, with_header=True, tty=tty) + if with_header: + printer.print_header(formatted_names) return cql_types = [] @@ -1009,10 +1028,9 @@ def print_static_result(self, result, table_meta, with_header, tty, row_count_of formatted_values = [list(map(self.myformat_value, [row[c] for c in column_names], cql_types)) for row in result.current_rows] - if self.expand_enabled: - self.print_formatted_result_vertically(formatted_names, formatted_values, row_count_offset) - else: - self.print_formatted_result(formatted_names, formatted_values, with_header, tty) + if with_header: + printer.print_header(formatted_names) + printer.print_rows(formatted_names, formatted_values) def print_formatted_result(self, formatted_names, formatted_values, with_header, tty): # determine column widths @@ -2026,6 +2044,7 @@ def read_options(cmdlineargs, parser, config_file, cql_dir, environment=os.envir argvalues.completekey = option_with_default(configs.get, 'ui', 'completekey', DEFAULT_COMPLETEKEY) argvalues.color = option_with_default(configs.getboolean, 'ui', 'color') + argvalues.mode = option_with_default(configs.get, 'ui', 'mode', 'tabular') argvalues.time_format = raw_option_with_default(configs, 'ui', 'time_format', DEFAULT_TIMESTAMP_FORMAT) argvalues.nanotime_format = raw_option_with_default(configs, 'ui', 'nanotime_format', @@ -2230,6 +2249,8 @@ def main(cmdline, pkgpath): help='Force tty mode (command prompt).') parser.add_argument('--disable-history', default=False, action='store_true', help='Disable saving of history (existing history will still be loaded)') + parser.add_argument('--mode', choices=['tabular', 'csv', 'json'], + help='Specify the output format (tabular, csv, json). Default is tabular.') # This is a hidden option to suppress the warning when the -p/--password command line option is used. # Power users may use this option if they know no other people has access to the system where cqlsh is run or don't care about security. @@ -2357,6 +2378,7 @@ def main(cmdline, pkgpath): display_double_precision=options.double_precision, display_timezone=timezone, max_trace_wait=options.max_trace_wait, + mode=options.mode, ssl=options.ssl, single_statement=options.execute, request_timeout=options.request_timeout, diff --git a/pylib/cqlshlib/displaying.py b/pylib/cqlshlib/displaying.py index 424d6334b696..2377dec3bf53 100644 --- a/pylib/cqlshlib/displaying.py +++ b/pylib/cqlshlib/displaying.py @@ -126,3 +126,101 @@ def color_ljust(self, width, fill=' '): ) NO_COLOR_MAP = dict() + +class TablePrinter: + def print_header(self, formatted_names): + raise NotImplementedError + + def print_rows(self, formatted_names, formatted_values): + raise NotImplementedError + + def finish(self): + pass + + @staticmethod + def factory(format_type, shell): + format_map = {'csv': CsvTablePrinter, 'json': JsonTablePrinter, 'tabular': TabularTablePrinter} + printer_cls = format_map.get(format_type.lower(), TabularTablePrinter) + return printer_cls(shell) if format_type.lower() != 'tabular' else printer_cls(shell, shell.tty) + +class TabularTablePrinter(TablePrinter): + def __init__(self, shell, tty, row_count_offset=0): + self._shell = shell + self._tty = tty + self._row_count_offset = row_count_offset + self._pending_header = None + + def print_header(self, formatted_names): + # Store only — cannot render yet because column widths depend on + # data values. print_rows will render header+data together. + # Empty-result case is handled in finish(). + self._pending_header = formatted_names + + def print_rows(self, formatted_names, formatted_values): + # with_header=True only when print_header was called for this page. + with_header = self._pending_header is not None + self._pending_header = None + if self._shell.expand_enabled: + self._shell.print_formatted_result_vertically( + formatted_names, formatted_values, self._row_count_offset) + else: + self._shell.print_formatted_result( + formatted_names, formatted_values, with_header, self._tty) + if formatted_values: + self._row_count_offset += len(formatted_values) + + def finish(self): + if self._pending_header is not None: + self._shell.print_formatted_result( + self._pending_header, None, with_header=True, tty=self._tty) + self._pending_header = None + +class CsvTablePrinter(TablePrinter): + def __init__(self, shell): + import csv + self._writer = csv.writer(shell.query_out) + self._header_written = False + self._colnames = None + + def print_header(self, formatted_names): + self._colnames = [n.strval for n in formatted_names] + + def print_rows(self, formatted_names, formatted_values): + if not self._header_written: + self._writer.writerow(self._colnames) + self._header_written = True + if formatted_values is None: + return + for row in formatted_values: + self._writer.writerow([col.strval for col in row]) + + def finish(self): + if self._colnames is not None and not self._header_written: + self._writer.writerow(self._colnames) + self._header_written = True + +class JsonTablePrinter(TablePrinter): + def __init__(self, shell): + self._shell = shell + self._colnames = None + self._first_row = True + + def print_header(self, formatted_names): + self._colnames = [n.strval for n in formatted_names] + self._shell.writeresult('[') + + def print_rows(self, formatted_names, formatted_values): + import json + if formatted_values is None: + return + for row in formatted_values: + row_dict = {self._colnames[i]: col.strval for i, col in enumerate(row)} + serialized = json.dumps(row_dict) + if self._first_row: + self._shell.writeresult(' ' + serialized, newline=False) + self._first_row = False + else: + self._shell.writeresult(',\n ' + serialized, newline=False) + + def finish(self): + self._shell.writeresult('\n]') diff --git a/pylib/cqlshlib/test/test_cqlsh_output.py b/pylib/cqlshlib/test/test_cqlsh_output.py index c32690b42496..0c89d06f2736 100644 --- a/pylib/cqlshlib/test/test_cqlsh_output.py +++ b/pylib/cqlshlib/test/test_cqlsh_output.py @@ -1017,3 +1017,155 @@ def test_quoted_output_text_in_udts(self): tty=False, input=query) self.assertEqual(0, result) self.assertEqual(output.splitlines()[3].strip(), "{data: 'I''m newb'}") + + def test_csv_output(self): + ks = get_keyspace() + query = "SELECT a, b FROM twenty_rows_table WHERE a IN ('1', '2');" + + output, result = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env, + tty=False, input=query + '\n') + self.assertEqual(0, result) + + lines = output.strip().splitlines() + self.assertEqual(lines[0].strip(), 'a,b') + self.assertIn('1,1', [l.strip() for l in lines]) + self.assertIn('2,2', [l.strip() for l in lines]) + + query2 = "SELECT num, setcol FROM has_all_types WHERE num = 0;" + output2, result2 = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env, + tty=False, input=query2 + '\n') + self.assertEqual(0, result2) + import csv, io + reader = csv.reader(io.StringIO(output2.strip())) + rows = list(reader) + self.assertEqual(rows[0], ['num', 'setcol']) + for row in rows[1:]: + self.assertEqual(len(row), 2, + msg='CSV row has wrong field count (commas inside setcol not quoted?): %r' % row) + + query3 = "SELECT num, varintcol FROM has_all_types WHERE num = 0;" + output3, result3 = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env, + tty=False, input=query3 + '\n') + self.assertEqual(0, result3) + reader3 = csv.reader(io.StringIO(output3.strip())) + rows3 = list(reader3) + varint_val = rows3[1][1] + self.assertNotIn(',', varint_val, + msg='Large varint should not contain thousands separator in CSV: %r' % varint_val) + + ks = get_keyspace() + setup_q = ("INSERT INTO %s.has_all_types (num, textcol) VALUES (9998, 'Smith, Joe');" % ks) + cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env, + tty=False, input=setup_q + '\n') + try: + q4 = "SELECT num, textcol FROM %s.has_all_types WHERE num = 9998;" % ks + output4, result4 = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, + env=self.default_env, tty=False, input=q4 + '\n') + self.assertEqual(0, result4) + reader4 = csv.reader(io.StringIO(output4.strip())) + rows4 = list(reader4) + self.assertEqual(rows4[0], ['num', 'textcol']) + for row in rows4[1:]: + self.assertEqual(len(row), 2, + msg='Comma inside textcol must be quoted in CSV: %r' % row) + data_rows4 = [r for r in rows4[1:] if r[0] == '9998'] + self.assertEqual(len(data_rows4), 1) + self.assertEqual(data_rows4[0][1], 'Smith, Joe') + finally: + cleanup_q = "DELETE FROM %s.has_all_types WHERE num = 9998;" % ks + cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env, + tty=False, input=cleanup_q + '\n') + + def test_json_output(self): + ks = get_keyspace() + query = "SELECT a, b FROM twenty_rows_table WHERE a IN ('1', '2');" + + output, result = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env, + tty=False, input=query + '\n') + self.assertEqual(0, result) + + import json + try: + parsed_json = json.loads(output) + self.assertEqual(len(parsed_json), 2) + + results = { (item['a'], item['b']) for item in parsed_json } + self.assertIn(('1', '1'), results) + self.assertIn(('2', '2'), results) + except ValueError as e: + self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output)) + + query2 = "SELECT num, setcol, listcol, mapcol FROM has_all_types WHERE num = 0;" + output2, result2 = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env, + tty=False, input=query2 + '\n') + self.assertEqual(0, result2) + try: + rows2 = json.loads(output2) + self.assertEqual(len(rows2), 1) + row = rows2[0] + self.assertIsInstance(row['setcol'], str, + msg='setcol should be a JSON string, got: %r' % type(row['setcol'])) + self.assertIsInstance(row['listcol'], str, + msg='listcol should be a JSON string, got: %r' % type(row['listcol'])) + self.assertIsInstance(row['mapcol'], str, + msg='mapcol should be a JSON string, got: %r' % type(row['mapcol'])) + except ValueError as e: + self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output2)) + + query3 = "SELECT num, varintcol FROM has_all_types WHERE num = 0;" + output3, result3 = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env, + tty=False, input=query3 + '\n') + self.assertEqual(0, result3) + try: + rows3 = json.loads(output3) + self.assertEqual(rows3[0]['varintcol'], '10000000000000000000000000') + except ValueError as e: + self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output3)) + + q4 = "SELECT num, uuidcol, decimalcol, timestampcol FROM has_all_types WHERE num = 0;" + output4, result4 = cqlsh_testcall(args=('--mode', 'json'), prompt=None, + env=self.default_env, tty=False, input=q4 + '\n') + self.assertEqual(0, result4) + try: + rows4 = json.loads(output4) + self.assertEqual(len(rows4), 1) + row4 = rows4[0] + import re + uuid_val = row4.get('uuidcol', '') + self.assertRegex(uuid_val, + r'^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$', + msg='uuidcol must be a UUID-formatted string: %r' % uuid_val) + from decimal import Decimal as PyDecimal, InvalidOperation + decimal_val = row4.get('decimalcol', '') + self.assertIsInstance(decimal_val, str, msg='decimalcol must be a string in JSON') + try: + PyDecimal(decimal_val) + except InvalidOperation: + self.fail('decimalcol value %r is not a valid decimal string' % decimal_val) + ts_val = row4.get('timestampcol', '') + self.assertIsInstance(ts_val, str) + self.assertTrue(len(ts_val) > 0, msg='timestampcol must be a non-empty string') + except ValueError as e: + self.fail("UUID/Decimal/Timestamp JSON output invalid: %s\nOutput: %s" % (e, output4)) + + ks = get_keyspace() + setup_q2 = r"INSERT INTO " + ks + r".has_all_types (num, textcol) VALUES (9999, 'say \"hello\" \\ world');" + cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env, + tty=False, input=setup_q2 + '\n') + try: + q5 = "SELECT num, textcol FROM %s.has_all_types WHERE num = 9999;" % ks + output5, result5 = cqlsh_testcall(args=('--mode', 'json'), prompt=None, + env=self.default_env, tty=False, input=q5 + '\n') + self.assertEqual(0, result5) + try: + rows5 = json.loads(output5) + self.assertEqual(len(rows5), 1) + text_val = rows5[0]['textcol'] + self.assertIsInstance(text_val, str) + self.assertIn('"', text_val, msg='Double-quote must survive JSON round-trip') + except ValueError as e: + self.fail("Special-char JSON output is not valid JSON: %s\nOutput: %s" % (e, output5)) + finally: + cleanup_q2 = "DELETE FROM %s.has_all_types WHERE num = 9999;" % ks + cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env, + tty=False, input=cleanup_q2 + '\n') \ No newline at end of file