From 1da33465c20bdcdf1cf053c44493bed44c94c8e2 Mon Sep 17 00:00:00 2001 From: Johannes Misch Date: Wed, 20 May 2026 15:13:35 +0200 Subject: [PATCH 1/2] Fix parsing of quoted identifiers Previously a type such as Tuple( `a.b` Int8) would fail to parse. This would cause `Client::Impl::ReadBlock` to throw an "unsupported column type" exception, even though this is a valid ClickHouse type and supported by the library. This fix introduces a new token type for quoted identifiers and uses this tuples. A dedicated token type seems a semantically cleaner choice, because formally there could be cases where `Name` would be accepted, but a quoted identifier would not be allowed. --- clickhouse/types/type_parser.cpp | 56 +++++++++++++++++++++++++ clickhouse/types/type_parser.h | 6 +++ ut/columns_ut.cpp | 9 ++++ ut/type_parser_ut.cpp | 71 ++++++++++++++++++++++++++++++++ 4 files changed, 142 insertions(+) diff --git a/clickhouse/types/type_parser.cpp b/clickhouse/types/type_parser.cpp index 82492412..5256d685 100644 --- a/clickhouse/types/type_parser.cpp +++ b/clickhouse/types/type_parser.cpp @@ -173,6 +173,7 @@ bool TypeParser::Parse(TypeAst* type) { type_->code = Type::String; break; } + case Token::QuotedIdentifier: case Token::Name: if (!type_->name.empty()) { // A second Name token on the same element means the @@ -260,6 +261,61 @@ TypeParser::Token TypeParser::NextToken() { } return Token{Token::QuotedString, StringView(cur_++, 1)}; } + case '"': + case '`': + { + const auto quote = *cur_; + ++cur_; + const auto start = cur_; + + // Fast path: scan for the closing quote with no escape + // sequences. Returns a StringView directly into the input + // buffer. Switches to the slow path on the first escape hit. + // + // Two escape forms are recognised, both quote-specific (e.g. + // inside a backtick-quoted identifier only backtick escapes + // apply; a doubled double-quote is treated as two literals): + // \q – backslash followed by the opening quote character + // qq – two consecutive opening quote characters + for (; cur_ < end_; ++cur_) { + if (*cur_ == '\\' && cur_ + 1 < end_ && *(cur_ + 1) == quote) { + break; // backslash-escape found, switch to slow path + } + if (*cur_ == quote) { + if (cur_ + 1 < end_ && *(cur_ + 1) == quote) { + break; // doubled-quote escape, switch to slow path + } + const StringView result{start, static_cast(cur_ - start)}; + ++cur_; + return Token{Token::QuotedIdentifier, result}; + } + } + + if (cur_ >= end_) { + return Token{Token::Invalid, StringView()}; + } + + // Slow path: copy content seen so far into scratch_, then + // continue scanning and unescaping into it. + scratch_.assign(start, cur_); + for (; cur_ < end_; ++cur_) { + if (*cur_ == '\\' && cur_ + 1 < end_ && *(cur_ + 1) == quote) { + scratch_ += quote; + ++cur_; + } else if (*cur_ == quote) { + if (cur_ + 1 < end_ && *(cur_ + 1) == quote) { + scratch_ += quote; + ++cur_; + } else { + ++cur_; + return Token{Token::QuotedIdentifier, StringView{scratch_}}; + } + } else { + scratch_ += *cur_; + } + } + return Token{Token::Invalid, StringView()}; + } default: { const char* st = cur_; diff --git a/clickhouse/types/type_parser.h b/clickhouse/types/type_parser.h index 9cc29512..fb58ec46 100644 --- a/clickhouse/types/type_parser.h +++ b/clickhouse/types/type_parser.h @@ -62,6 +62,7 @@ class TypeParser { RPar, Comma, QuotedString, // string with quotation marks included + QuotedIdentifier, EOS, }; @@ -84,6 +85,11 @@ class TypeParser { TypeAst* type_; std::stack open_elements_; + // Backing storage for unescaped QuotedIdentifier token values. When a + // quoted identifier contains escape sequences the unescaped content is + // written here and the returned StringView points into this string. + // Valid only until the next NextToken() call. + std::string scratch_; }; diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 3e931132..13a1731c 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -298,6 +298,15 @@ TEST(ColumnsCase, TupleSlice){ ASSERT_EQ((*tuple2)[1]->As()->At(0), "3"); } +TEST(ColumnsCase, TupleWithQuotedFieldNames) { + auto col = CreateColumnByType("Tuple(`a.b` Int8, `c.d` String)"); + ASSERT_NE(col, nullptr); + const auto& names = col->AsStrict()->Type()->As()->GetItemNames(); + ASSERT_EQ(names.size(), 2u); + EXPECT_EQ(names[0], "a.b"); + EXPECT_EQ(names[1], "c.d"); +} + TEST(ColumnsCase, TimeAppend) { auto col = std::make_shared(); col->Append(1); diff --git a/ut/type_parser_ut.cpp b/ut/type_parser_ut.cpp index f593de2c..561b1c9d 100644 --- a/ut/type_parser_ut.cpp +++ b/ut/type_parser_ut.cpp @@ -133,6 +133,77 @@ TEST(TypeParserCase, ParseNamedTuple) { ASSERT_EQ(ast.elements[1].code, Type::String); } +TEST(TypeParserCase, ParseNamedTuple_BacktickQuotedFieldNames) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(`a.b` Int8, `c.d` String)").Parse(&ast)); + ASSERT_EQ(ast.meta, TypeAst::Tuple); + ASSERT_EQ(ast.elements.size(), 2u); + + ASSERT_EQ(ast.elements[0].element_name, "a.b"); + ASSERT_EQ(ast.elements[0].name, "Int8"); + ASSERT_EQ(ast.elements[0].code, Type::Int8); + + ASSERT_EQ(ast.elements[1].element_name, "c.d"); + ASSERT_EQ(ast.elements[1].name, "String"); + ASSERT_EQ(ast.elements[1].code, Type::String); +} + +TEST(TypeParserCase, ParseNamedTuple_DoubleQuotedFieldNames) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(\"a.b\" Int8, \"c.d\" String)").Parse(&ast)); + ASSERT_EQ(ast.meta, TypeAst::Tuple); + ASSERT_EQ(ast.elements.size(), 2u); + + ASSERT_EQ(ast.elements[0].element_name, "a.b"); + ASSERT_EQ(ast.elements[0].name, "Int8"); + ASSERT_EQ(ast.elements[0].code, Type::Int8); + + ASSERT_EQ(ast.elements[1].element_name, "c.d"); + ASSERT_EQ(ast.elements[1].name, "String"); + ASSERT_EQ(ast.elements[1].code, Type::String); +} + +TEST(TypeParserCase, ParseNamedTuple_UnterminatedQuote) { + TypeAst ast; + EXPECT_FALSE(TypeParser("Tuple(`a.b Int8)").Parse(&ast)); + EXPECT_FALSE(TypeParser("Tuple(a.b` Int8)").Parse(&ast)); +} + +TEST(TypeParserCase, ParseNamedTuple_DoubledBacktickEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(`a``b` UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a`b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + +TEST(TypeParserCase, ParseNamedTuple_BackslashBacktickEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(`a\\`b` UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a`b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + +TEST(TypeParserCase, ParseNamedTuple_DoubleQuoteNotEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(`a\"\"b` UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a\"\"b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + +TEST(TypeParserCase, ParseNamedTuple_DoubledDoubleQuoteEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(\"a\"\"b\" UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a\"b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + +TEST(TypeParserCase, ParseNamedTuple_BacktickNotEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(\"a``b\" UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a``b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + TEST(TypeParserCase, ParseDecimal) { TypeAst ast; TypeParser("Decimal(12, 5)").Parse(&ast); From 2117c0181ce2cae62bb487f2a4d9221fba3ef07c Mon Sep 17 00:00:00 2001 From: Johannes Misch Date: Thu, 21 May 2026 13:14:53 +0200 Subject: [PATCH 2/2] Quote field names in `TupleType::GetName()` --- clickhouse/types/types.cpp | 46 ++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/clickhouse/types/types.cpp b/clickhouse/types/types.cpp index e12342c8..74c14249 100644 --- a/clickhouse/types/types.cpp +++ b/clickhouse/types/types.cpp @@ -473,24 +473,46 @@ LowCardinalityType::LowCardinalityType(TypeRef nested_type) : Type(LowCardinalit LowCardinalityType::~LowCardinalityType() { } +// Checks if `name` is a valid plain identifier (must not be quoted). +// The condition for this is a match against `^[a-zA-Z_][0-9a-zA-Z_]*$` +static bool IsPlainIdentifier(const std::string& name) { + if (name.empty()) return false; + auto is_alpha_or_under = [](char c) { return std::isalpha(c) || c == '_'; }; + auto is_alnum_or_under = [](char c) { return std::isalnum(c) || c == '_'; }; + if (!is_alpha_or_under(name[0])) return false; + for (size_t i = 1; i < name.size(); ++i) + if (!is_alnum_or_under(name[i])) return false; + return true; +} + +// Appends a fieldname, potentially quoting it and escaping backticks. +static void AppendFieldname(const std::string& name, std::string& out) { + if (IsPlainIdentifier(name)) { + out += name; + return; + } + out += '`'; + for (char c : name) { + if (c == '`') + out += "``"; + else + out += c; + } + out += '`'; +} + std::string TupleType::GetName() const { std::string result("Tuple("); bool has_complete_names = !item_names_.empty(); - if (!item_types_.empty()) { - if (has_complete_names) { - result += item_names_[0] + " " + item_types_[0]->GetName(); - } else { - result += item_types_[0]->GetName(); - } - } - - for (size_t i = 1; i < item_types_.size(); ++i) { + for (size_t i = 0; i < item_types_.size(); ++i) { + if (i > 0) + result += ", "; if (has_complete_names) { - result += ", " + item_names_[i] + " " + item_types_[i]->GetName(); - } else { - result += ", " + item_types_[i]->GetName(); + AppendFieldname(item_names_[i], result); + result += ' '; } + result += item_types_[i]->GetName(); } result += ")";