From bd557f6c3f0989eb213fa2e7adc6aa9c57ad0768 Mon Sep 17 00:00:00 2001 From: Antal van den Bosch Date: Fri, 19 Jun 2026 16:56:46 +0200 Subject: [PATCH] Test output: buffer the result stream and drop the per-line flush Writing test results was dominated by write() syscalls: the default std::ofstream buffer is tiny, so one result per test instance turned into a flood of small writes, and show_results() additionally flushed the stream on every line via std::endl. Give outStream a 1 MB buffer (a per-experiment member, set with rdbuf()->pubsetbuf() before open()) and write '\n' instead of std::endl in show_results(). The stream is flushed when it is closed at the end of testing, so output is unchanged. Measured (IGTree, 512k test instances written to a file; reused saved base): - wall time: ~31 s -> ~4.6 s (~17k -> ~111k instances/s, ~6.5x) - instructions: ~430 B -> ~36 B (~12x) Output is byte-identical (verified IGTree 512k and TRIBL2, plain and +v db). pubsetbuf must precede open(); honoured by both libc++ and libstdc++. Co-Authored-By: Claude Opus 4.8 --- include/timbl/TimblExperiment.h | 1 + src/TimblExperiment.cxx | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/timbl/TimblExperiment.h b/include/timbl/TimblExperiment.h index a848643..d4698c0 100644 --- a/include/timbl/TimblExperiment.h +++ b/include/timbl/TimblExperiment.h @@ -288,6 +288,7 @@ namespace Timbl { std::string outStreamName; std::ifstream testStream; std::ofstream outStream; + std::vector outStreamBuf; // large write buffer for outStream unsigned long ibCount; ConfusionMatrix *confusionInfo; std::vector instances; diff --git a/src/TimblExperiment.cxx b/src/TimblExperiment.cxx index 6dd551d..e37d9ff 100644 --- a/src/TimblExperiment.cxx +++ b/src/TimblExperiment.cxx @@ -1106,7 +1106,12 @@ namespace Timbl { if ( Verbosity(MATCH_DEPTH) ){ outfile << " " << matchDepth() << ":" << (matchedAtLeaf()?"L":"N"); } - outfile << endl; + // Use '\n' rather than std::endl: endl flushes the output stream on every + // classified line. Combined with the larger output buffer set in + // initTestFiles(), letting lines accumulate turns ~one write() per line + // into a handful of large writes, which dominates testing time on large + // files. The stream is flushed when it is closed at the end of testing. + outfile << '\n'; showBestNeighbors( outfile ); } @@ -1404,6 +1409,15 @@ namespace Timbl { if ( checkTestFile() ){ outStream.close(); outStream.clear(); // just to be shure. old G++ libraries are in error here + // Give the output stream a large buffer before opening it. The + // default buffer is tiny, so writing one result per test instance + // turns into a great many small write() syscalls that dominate + // testing time on large files; a 1 MB buffer batches them into a + // handful of large writes. (pubsetbuf must precede open() to apply.) + if ( outStreamBuf.empty() ){ + outStreamBuf.resize( 1u << 20 ); + } + outStream.rdbuf()->pubsetbuf( outStreamBuf.data(), outStreamBuf.size() ); outStream.open( OutFileName, ios::out | ios::trunc ); return true; }