Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions clickhouse/types/type_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ bool TypeParser::Parse(TypeAst* type) {
type_->code = Type::String;
break;
}
case Token::QuotedIdentifier:
case Token::Name:
if (!type_->name.empty()) {
// A second Name token on the same element means the
Expand Down Expand Up @@ -260,6 +261,61 @@ TypeParser::Token TypeParser::NextToken() {
}
return Token{Token::QuotedString, StringView(cur_++, 1)};
}
case '"':
case '`':
{
const auto quote = *cur_;
++cur_;
const auto start = cur_;

// Fast path: scan for the closing quote with no escape
// sequences. Returns a StringView directly into the input
// buffer. Switches to the slow path on the first escape hit.
//
// Two escape forms are recognised, both quote-specific (e.g.
// inside a backtick-quoted identifier only backtick escapes
// apply; a doubled double-quote is treated as two literals):
// \q – backslash followed by the opening quote character
// qq – two consecutive opening quote characters
for (; cur_ < end_; ++cur_) {
if (*cur_ == '\\' && cur_ + 1 < end_ && *(cur_ + 1) == quote) {
break; // backslash-escape found, switch to slow path
}
if (*cur_ == quote) {
if (cur_ + 1 < end_ && *(cur_ + 1) == quote) {
break; // doubled-quote escape, switch to slow path
}
const StringView result{start, static_cast<size_t>(cur_ - start)};
++cur_;
return Token{Token::QuotedIdentifier, result};
}
}

if (cur_ >= end_) {
return Token{Token::Invalid, StringView()};
}

// Slow path: copy content seen so far into scratch_, then
// continue scanning and unescaping into it.
scratch_.assign(start, cur_);
for (; cur_ < end_; ++cur_) {
if (*cur_ == '\\' && cur_ + 1 < end_ && *(cur_ + 1) == quote) {
scratch_ += quote;
++cur_;
} else if (*cur_ == quote) {
if (cur_ + 1 < end_ && *(cur_ + 1) == quote) {
scratch_ += quote;
++cur_;
} else {
++cur_;
return Token{Token::QuotedIdentifier, StringView{scratch_}};
}
} else {
scratch_ += *cur_;
}
}
return Token{Token::Invalid, StringView()};
}

default: {
const char* st = cur_;
Expand Down
6 changes: 6 additions & 0 deletions clickhouse/types/type_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class TypeParser {
RPar,
Comma,
QuotedString, // string with quotation marks included
QuotedIdentifier,
EOS,
};

Expand All @@ -84,6 +85,11 @@ class TypeParser {

TypeAst* type_;
std::stack<TypeAst*> open_elements_;
// Backing storage for unescaped QuotedIdentifier token values. When a
// quoted identifier contains escape sequences the unescaped content is
// written here and the returned StringView points into this string.
// Valid only until the next NextToken() call.
std::string scratch_;
};


Expand Down
46 changes: 34 additions & 12 deletions clickhouse/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,24 +473,46 @@ LowCardinalityType::LowCardinalityType(TypeRef nested_type) : Type(LowCardinalit
LowCardinalityType::~LowCardinalityType() {
}

// Checks if `name` is a valid plain identifier (must not be quoted).
// The condition for this is a match against `^[a-zA-Z_][0-9a-zA-Z_]*$`
static bool IsPlainIdentifier(const std::string& name) {
if (name.empty()) return false;
auto is_alpha_or_under = [](char c) { return std::isalpha(c) || c == '_'; };
auto is_alnum_or_under = [](char c) { return std::isalnum(c) || c == '_'; };
if (!is_alpha_or_under(name[0])) return false;
for (size_t i = 1; i < name.size(); ++i)
if (!is_alnum_or_under(name[i])) return false;
return true;
}

// Appends a fieldname, potentially quoting it and escaping backticks.
static void AppendFieldname(const std::string& name, std::string& out) {
if (IsPlainIdentifier(name)) {
out += name;
return;
}
out += '`';
for (char c : name) {
if (c == '`')
out += "``";
else
out += c;
}
out += '`';
}

std::string TupleType::GetName() const {
std::string result("Tuple(");
bool has_complete_names = !item_names_.empty();

if (!item_types_.empty()) {
if (has_complete_names) {
result += item_names_[0] + " " + item_types_[0]->GetName();
} else {
result += item_types_[0]->GetName();
}
}

for (size_t i = 1; i < item_types_.size(); ++i) {
for (size_t i = 0; i < item_types_.size(); ++i) {
if (i > 0)
result += ", ";
if (has_complete_names) {
result += ", " + item_names_[i] + " " + item_types_[i]->GetName();
} else {
result += ", " + item_types_[i]->GetName();
AppendFieldname(item_names_[i], result);
result += ' ';
}
result += item_types_[i]->GetName();
}

result += ")";
Expand Down
9 changes: 9 additions & 0 deletions ut/columns_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,15 @@ TEST(ColumnsCase, TupleSlice){
ASSERT_EQ((*tuple2)[1]->As<ColumnString>()->At(0), "3");
}

TEST(ColumnsCase, TupleWithQuotedFieldNames) {
auto col = CreateColumnByType("Tuple(`a.b` Int8, `c.d` String)");
ASSERT_NE(col, nullptr);
const auto& names = col->AsStrict<ColumnTuple>()->Type()->As<TupleType>()->GetItemNames();
ASSERT_EQ(names.size(), 2u);
EXPECT_EQ(names[0], "a.b");
EXPECT_EQ(names[1], "c.d");
}

TEST(ColumnsCase, TimeAppend) {
auto col = std::make_shared<ColumnTime>();
col->Append(1);
Expand Down
71 changes: 71 additions & 0 deletions ut/type_parser_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,77 @@ TEST(TypeParserCase, ParseNamedTuple) {
ASSERT_EQ(ast.elements[1].code, Type::String);
}

TEST(TypeParserCase, ParseNamedTuple_BacktickQuotedFieldNames) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(`a.b` Int8, `c.d` String)").Parse(&ast));
ASSERT_EQ(ast.meta, TypeAst::Tuple);
ASSERT_EQ(ast.elements.size(), 2u);

ASSERT_EQ(ast.elements[0].element_name, "a.b");
ASSERT_EQ(ast.elements[0].name, "Int8");
ASSERT_EQ(ast.elements[0].code, Type::Int8);

ASSERT_EQ(ast.elements[1].element_name, "c.d");
ASSERT_EQ(ast.elements[1].name, "String");
ASSERT_EQ(ast.elements[1].code, Type::String);
}

TEST(TypeParserCase, ParseNamedTuple_DoubleQuotedFieldNames) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(\"a.b\" Int8, \"c.d\" String)").Parse(&ast));
ASSERT_EQ(ast.meta, TypeAst::Tuple);
ASSERT_EQ(ast.elements.size(), 2u);

ASSERT_EQ(ast.elements[0].element_name, "a.b");
ASSERT_EQ(ast.elements[0].name, "Int8");
ASSERT_EQ(ast.elements[0].code, Type::Int8);

ASSERT_EQ(ast.elements[1].element_name, "c.d");
ASSERT_EQ(ast.elements[1].name, "String");
ASSERT_EQ(ast.elements[1].code, Type::String);
}

TEST(TypeParserCase, ParseNamedTuple_UnterminatedQuote) {
TypeAst ast;
EXPECT_FALSE(TypeParser("Tuple(`a.b Int8)").Parse(&ast));
EXPECT_FALSE(TypeParser("Tuple(a.b` Int8)").Parse(&ast));
}

TEST(TypeParserCase, ParseNamedTuple_DoubledBacktickEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(`a``b` UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a`b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseNamedTuple_BackslashBacktickEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(`a\\`b` UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a`b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseNamedTuple_DoubleQuoteNotEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(`a\"\"b` UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a\"\"b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseNamedTuple_DoubledDoubleQuoteEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(\"a\"\"b\" UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a\"b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseNamedTuple_BacktickNotEscape) {
TypeAst ast;
ASSERT_TRUE(TypeParser("Tuple(\"a``b\" UInt8)").Parse(&ast));
ASSERT_EQ(ast.elements[0].element_name, "a``b");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);
}

TEST(TypeParserCase, ParseDecimal) {
TypeAst ast;
TypeParser("Decimal(12, 5)").Parse(&ast);
Expand Down
Loading