Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 189 additions & 8 deletions benchmarks/utf8_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,205 @@
#include <benchmark/benchmark.h>

#include <string>
#include <vector>

#include "utf/utf_strings.hpp"

#ifdef HAVE_GPERFTOOLS
#include <gperftools/profiler.h>
#endif

static void BM_Length_Mixed(benchmark::State& state) {
std::u8string s;
for (int i = 0; i < 1000; ++i) s += u8"Héllø 🌍";
// Test data: ASCII, 2-byte, 3-byte, and 4-byte UTF-8 characters
static const uint32_t test_scalars[] = {
0x48, // H (ASCII, 1 byte)
0x00E9, // é (2 bytes)
0x00F8, // ø (2 bytes)
0x20AC, // € (3 bytes)
0x1F30D, // 🌍 (4 bytes)
0x1F680, // 🚀 (4 bytes)
0x1F4A9, // 💩 (4 bytes)
0x65 // e (ASCII, 1 byte)
};

static void BM_UTF8_CodePoint_Creation(benchmark::State& state) {
// Benchmark UTF-8 code point creation from scalar values
std::size_t idx = 0;
const auto scalar_count = sizeof(test_scalars) / sizeof(test_scalars[0]);

for (auto _ : state) {

Check notice

Code scanning / CodeQL

Unused local variable Note test

Variable _ is not used.

Copilot Autofix

AI 8 months ago

Copilot could not generate an autofix suggestion

Copilot could not generate an autofix suggestion for this alert. Try pushing a new commit or if the problem persists contact support.

auto cp = utf::Utf8CodePoint::from_scalar(test_scalars[idx % scalar_count]);
benchmark::DoNotOptimize(cp);
++idx;
}

state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_UTF8_CodePoint_Creation);

static void BM_UTF16BE_CodePoint_Creation(benchmark::State& state) {
// Benchmark UTF-16 BE code point creation from scalar values
std::size_t idx = 0;
const auto scalar_count = sizeof(test_scalars) / sizeof(test_scalars[0]);

for (auto _ : state) {

Check notice

Code scanning / CodeQL

Unused local variable Note test

Variable _ is not used.

Copilot Autofix

AI 8 months ago

Copilot could not generate an autofix suggestion

Copilot could not generate an autofix suggestion for this alert. Try pushing a new commit or if the problem persists contact support.

auto cp = utf::Utf16BECodePoint::from_scalar(test_scalars[idx % scalar_count]);
benchmark::DoNotOptimize(cp);
++idx;
}

state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_UTF16BE_CodePoint_Creation);

static void BM_UTF32LE_CodePoint_Creation(benchmark::State& state) {
// Benchmark UTF-32 LE code point creation from scalar values
std::size_t idx = 0;
const auto scalar_count = sizeof(test_scalars) / sizeof(test_scalars[0]);

for (auto _ : state) {
auto n = utf::length<char8_t, utf::endian::big>(s);
benchmark::DoNotOptimize(n);
auto cp = utf::Utf32LECodePoint::from_scalar(test_scalars[idx % scalar_count]);
benchmark::DoNotOptimize(cp);
++idx;
}
state.SetComplexityN(static_cast<benchmark::ComplexityN>(s.size()));
state.SetBytesProcessed(state.iterations() * static_cast<int64_t>(s.size()));

state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_UTF32LE_CodePoint_Creation);

static void BM_UTF8_Scalar_Conversion(benchmark::State& state) {
// Benchmark converting UTF-8 code points back to scalar values
std::vector<utf::Utf8CodePoint> codepoints;
const auto scalar_count = sizeof(test_scalars) / sizeof(test_scalars[0]);

// Pre-create code points
for (std::size_t i = 0; i < scalar_count; ++i) {
auto cp = utf::Utf8CodePoint::from_scalar(test_scalars[i]);
if (cp.has_value()) {
codepoints.push_back(*cp);
}
}

std::size_t idx = 0;
for (auto _ : state) {

Check notice

Code scanning / CodeQL

Unused local variable Note test

Variable _ is not used.

Copilot Autofix

AI 8 months ago

Copilot could not generate an autofix suggestion

Copilot could not generate an autofix suggestion for this alert. Try pushing a new commit or if the problem persists contact support.

if (!codepoints.empty()) {
auto scalar = codepoints[idx % codepoints.size()].to_scalar();
benchmark::DoNotOptimize(scalar);
++idx;
}
}

state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_UTF8_Scalar_Conversion);

static void BM_UTF8_Validation(benchmark::State& state) {
// Benchmark UTF-8 code point validation
std::vector<utf::Utf8CodePoint> codepoints;
const auto scalar_count = sizeof(test_scalars) / sizeof(test_scalars[0]);

// Pre-create code points
for (std::size_t i = 0; i < scalar_count; ++i) {
auto cp = utf::Utf8CodePoint::from_scalar(test_scalars[i]);
if (cp.has_value()) {
codepoints.push_back(*cp);
}
}

std::size_t idx = 0;
for (auto _ : state) {

Check notice

Code scanning / CodeQL

Unused local variable Note test

Variable _ is not used.
if (!codepoints.empty()) {
bool valid = codepoints[idx % codepoints.size()].is_valid();
benchmark::DoNotOptimize(valid);
++idx;
}
}

state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_UTF8_Validation);

static void BM_UTF8_to_UTF16BE_Conversion(benchmark::State& state) {
// Benchmark conversion from UTF-8 to UTF-16 BE
std::vector<utf::Utf8CodePoint> utf8_codepoints;
const auto scalar_count = sizeof(test_scalars) / sizeof(test_scalars[0]);

// Pre-create UTF-8 code points
for (std::size_t i = 0; i < scalar_count; ++i) {
auto cp = utf::Utf8CodePoint::from_scalar(test_scalars[i]);
if (cp.has_value()) {
utf8_codepoints.push_back(*cp);
}
}

std::size_t idx = 0;
for (auto _ : state) {

Check notice

Code scanning / CodeQL

Unused local variable Note test

Variable _ is not used.

Copilot Autofix

AI 8 months ago

To resolve the unused local variable warning, modify the loop so the unused variable _ does not appear as a named variable in the loop. The two best standard approaches are:

  1. In C++17 and above, replace auto _ with [[maybe_unused]] auto to signal intent, or just auto (no name), though this is not allowed by the standard;
  2. Alternatively, since the loop variable is not used, prefix _ with [[maybe_unused]], or explicitly cast it to void within the loop to silence the warning.

The most compatible fix (supporting both pre- and post-C++17) is to retain the loop variable as _ but explicitly cast it to (void)_; in the very beginning of the loop body. This makes it clear both to static analysis tools and to readers that the variable is intentionally unused.

Steps to fix:

  • Locate the for loop in line 161: for (auto _ : state) {.
  • As the first statement inside the loop body, add (void)_; to indicate that the unused variable _ is intentionally unused. This will silence CodeQL's warning about unused local variables.
  • No other changes to imports or logic are needed.

Suggested changeset 1
benchmarks/utf8_bench.cpp

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/benchmarks/utf8_bench.cpp b/benchmarks/utf8_bench.cpp
--- a/benchmarks/utf8_bench.cpp
+++ b/benchmarks/utf8_bench.cpp
@@ -159,6 +159,7 @@
 
   std::size_t idx = 0;
   for (auto _ : state) {
+    (void)_;
     if (!utf8_codepoints.empty()) {
       auto utf16be_cp =
           utf::convert<utf::Utf16BECodePoint>(utf8_codepoints[idx % utf8_codepoints.size()]);
EOF
@@ -159,6 +159,7 @@

std::size_t idx = 0;
for (auto _ : state) {
(void)_;
if (!utf8_codepoints.empty()) {
auto utf16be_cp =
utf::convert<utf::Utf16BECodePoint>(utf8_codepoints[idx % utf8_codepoints.size()]);
Copilot is powered by AI and may make mistakes. Always verify output.
if (!utf8_codepoints.empty()) {
auto utf16be_cp =
utf::convert<utf::Utf16BECodePoint>(utf8_codepoints[idx % utf8_codepoints.size()]);
benchmark::DoNotOptimize(utf16be_cp);
++idx;
}
}

state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_UTF8_to_UTF16BE_Conversion);

static void BM_UTF16BE_to_UTF32LE_Conversion(benchmark::State& state) {
// Benchmark conversion from UTF-16 BE to UTF-32 LE
std::vector<utf::Utf16BECodePoint> utf16be_codepoints;
const auto scalar_count = sizeof(test_scalars) / sizeof(test_scalars[0]);

// Pre-create UTF-16 BE code points
for (std::size_t i = 0; i < scalar_count; ++i) {
auto cp = utf::Utf16BECodePoint::from_scalar(test_scalars[i]);
if (cp.has_value()) {
utf16be_codepoints.push_back(*cp);
}
}

std::size_t idx = 0;
for (auto _ : state) {

Check notice

Code scanning / CodeQL

Unused local variable Note test

Variable _ is not used.

Copilot Autofix

AI 8 months ago

The best way to fix an unused local variable in a range-based for loop in C++ (such as for (auto _ : state)) is to explicitly mark the variable as unused using the [[maybe_unused]] attribute (C++17 and above). This both satisfies static analysis tools and signals intent to other developers. In the code provided in benchmarks/utf8_bench.cpp, change line 188 from for (auto _ : state) to for ([[maybe_unused]] auto _ : state). This preserves existing functionality, conforms to modern C++ standards, and improves code readability.

No additional imports or method definitions are needed. Simply update the loop's variable declaration at line 188.

Suggested changeset 1
benchmarks/utf8_bench.cpp

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/benchmarks/utf8_bench.cpp b/benchmarks/utf8_bench.cpp
--- a/benchmarks/utf8_bench.cpp
+++ b/benchmarks/utf8_bench.cpp
@@ -185,7 +185,7 @@
   }
 
   std::size_t idx = 0;
-  for (auto _ : state) {
+  for ([[maybe_unused]] auto _ : state) {
     if (!utf16be_codepoints.empty()) {
       auto utf32le_cp =
           utf::convert<utf::Utf32LECodePoint>(utf16be_codepoints[idx % utf16be_codepoints.size()]);
EOF
@@ -185,7 +185,7 @@
}

std::size_t idx = 0;
for (auto _ : state) {
for ([[maybe_unused]] auto _ : state) {
if (!utf16be_codepoints.empty()) {
auto utf32le_cp =
utf::convert<utf::Utf32LECodePoint>(utf16be_codepoints[idx % utf16be_codepoints.size()]);
Copilot is powered by AI and may make mistakes. Always verify output.
if (!utf16be_codepoints.empty()) {
auto utf32le_cp =
utf::convert<utf::Utf32LECodePoint>(utf16be_codepoints[idx % utf16be_codepoints.size()]);
benchmark::DoNotOptimize(utf32le_cp);
++idx;
}
}

state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_UTF16BE_to_UTF32LE_Conversion);

static void BM_UTF8_Units_Access(benchmark::State& state) {
// Benchmark accessing UTF-8 code point units/bytes
std::vector<utf::Utf8CodePoint> codepoints;
const auto scalar_count = sizeof(test_scalars) / sizeof(test_scalars[0]);

// Pre-create code points
for (std::size_t i = 0; i < scalar_count; ++i) {
auto cp = utf::Utf8CodePoint::from_scalar(test_scalars[i]);
if (cp.has_value()) {
codepoints.push_back(*cp);
}
}

std::size_t idx = 0;
for (auto _ : state) {

Check notice

Code scanning / CodeQL

Unused local variable Note test

Variable _ is not used.

Copilot Autofix

AI 8 months ago

Copilot could not generate an autofix suggestion

Copilot could not generate an autofix suggestion for this alert. Try pushing a new commit or if the problem persists contact support.

if (!codepoints.empty()) {
const auto& cp = codepoints[idx % codepoints.size()];
auto units = cp.units();
auto count = cp.count();
benchmark::DoNotOptimize(units);
benchmark::DoNotOptimize(count);
++idx;
}
}

state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_Length_Mixed)->Complexity();
BENCHMARK(BM_UTF8_Units_Access);

int main(int argc, char** argv) {
#ifdef HAVE_GPERFTOOLS
Expand Down
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class UtfStrings(ConanFile):
name = "utf_strings"
version = "0.1.0"
version = "0.0.2"
settings = "os", "arch", "compiler", "build_type"
package_type = "application"
exports = "LICENSE"
Expand Down
Loading
Loading