diff --git a/CMakeLists.txt b/CMakeLists.txt index dd0579871..aea33db8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -306,6 +306,7 @@ endif() option(AUTO_UPDATE_GRAMMAR "Automatically regenerate C++ grammar files on change." TRUE) option(BUILD_BENCHMARK "Build benchmarks." FALSE) +option(BUILD_WAL_DUMP "Build WAL dump tool." FALSE) option(BUILD_EXTENSIONS "Semicolon-separated list of extensions to build." "") option(BUILD_EXAMPLES "Build examples." FALSE) option(BUILD_JAVA "Build Java API." FALSE) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 524e44ecc..af8614116 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -13,6 +13,9 @@ endif() if(${BUILD_BENCHMARK}) add_subdirectory(benchmark) endif() +if(${BUILD_WAL_DUMP}) + add_subdirectory(wal_dump) +endif() if(${BUILD_WASM}) add_subdirectory(wasm) endif() \ No newline at end of file diff --git a/tools/wal_dump/CMakeLists.txt b/tools/wal_dump/CMakeLists.txt new file mode 100644 index 000000000..e117ad160 --- /dev/null +++ b/tools/wal_dump/CMakeLists.txt @@ -0,0 +1,5 @@ +add_executable(wal_dump main.cpp) + +target_link_libraries(wal_dump lbug) + +install(TARGETS wal_dump) diff --git a/tools/wal_dump/main.cpp b/tools/wal_dump/main.cpp new file mode 100644 index 000000000..3febe2bff --- /dev/null +++ b/tools/wal_dump/main.cpp @@ -0,0 +1,134 @@ +#include +#include +#include + +#include "common/exception/storage.h" +#include "common/file_system/file_system.h" +#include "common/file_system/local_file_system.h" +#include "common/serializer/buffered_file.h" +#include "common/serializer/deserializer.h" +#include "storage/buffer_manager/buffer_manager.h" +#include "storage/buffer_manager/memory_manager.h" +#include "storage/storage_utils.h" +#include "storage/wal/checksum_reader.h" +#include "storage/wal/wal_record.h" + +using namespace lbug::common; +using namespace lbug::storage; + +static constexpr std::string_view checksumMismatchMessage = + "Checksum verification failed, the WAL file is corrupted."; + +int main(int argc, char** argv) { + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " \n"; + return 1; + } + + std::string databasePath = argv[1]; + std::string walPath = StorageUtils::getWALFilePath(databasePath); + + std::cout << "WAL File: " << walPath << "\n\n"; + + if (!std::filesystem::exists(walPath)) { + std::cout << "WAL file does not exist. Database was cleanly shutdown or no modifications " + "were made.\n"; + return 0; + } + + try { + LocalFileSystem lfs(""); + auto fileInfo = lfs.openFile(walPath, FileOpenFlags(FileFlags::READ_ONLY)); + auto fileSize = fileInfo->getFileSize(); + if (fileSize == 0) { + std::cout << "WAL file is empty. Database was cleanly shutdown.\n"; + return 0; + } + + Deserializer headerDeserializer(std::make_unique(*fileInfo)); + + headerDeserializer.getReader()->onObjectBegin(); + WALHeader walHeader{}; + headerDeserializer.deserializeValue(walHeader.databaseID); + uint8_t enableChecksumsBytes = 0; + headerDeserializer.deserializeValue(enableChecksumsBytes); + walHeader.enableChecksums = enableChecksumsBytes != 0; + headerDeserializer.getReader()->onObjectEnd(); + + std::cout << "WAL Header:\n"; + std::cout << " Database ID: " << UUID::toString(walHeader.databaseID) << "\n"; + std::cout << " Checksums Enabled: " << (walHeader.enableChecksums ? "true" : "false") + << "\n"; + std::cout << " File Size: " << fileSize << " bytes\n\n"; + + std::cout << "Record offsets:\n"; + + uint64_t recordCount = 0; + uint64_t lastOffset = 0; + uint64_t errorCount = 0; + + if (walHeader.enableChecksums) { + auto bm = std::make_unique(databasePath, "", 32 * 1024 * 1024, 8388608, + nullptr, true); + auto mm = std::make_unique(bm.get(), nullptr); + fileInfo->seek(17, SEEK_SET); + auto checksumReader = + std::make_unique(*fileInfo, *mm, checksumMismatchMessage); + Deserializer deserializer(std::move(checksumReader)); + + while (!deserializer.finished()) { + lastOffset = deserializer.getReader()->cast()->getReadOffset(); + std::cout << " " << lastOffset << "\n"; + recordCount++; + + try { + deserializer.getReader()->onObjectBegin(); + deserializer.getReader()->onObjectEnd(); + } catch (const StorageException& e) { + std::cerr << "\nError: WAL file is corrupted - checksum verification failed.\n"; + std::cerr << "This WAL file cannot be read.\n"; + return 1; + } catch (const std::exception& e) { + std::cerr << "\nError at offset " << lastOffset << ": " << e.what() << "\n"; + errorCount++; + if (errorCount > 10) { + std::cerr << "\nToo many errors, stopping.\n"; + break; + } + } + } + } else { + Deserializer deserializer(std::make_unique(*fileInfo)); + + while (!deserializer.finished()) { + lastOffset = deserializer.getReader()->cast()->getReadOffset(); + std::cout << " " << lastOffset << "\n"; + recordCount++; + + try { + deserializer.getReader()->onObjectBegin(); + deserializer.getReader()->onObjectEnd(); + } catch (const std::exception& e) { + std::cerr << "\nError at offset " << lastOffset << ": " << e.what() << "\n"; + errorCount++; + if (errorCount > 10) { + std::cerr << "\nToo many errors, stopping.\n"; + break; + } + } + } + } + + std::cout << "\nTotal records found: " << recordCount << "\n"; + if (errorCount > 0) { + std::cout << "Records with errors: " << errorCount << "\n"; + } + std::cout << "Last offset: " << lastOffset << "\n"; + + } catch (const std::exception& e) { + std::cerr << "Error reading WAL file: " << e.what() << "\n"; + return 1; + } + + return 0; +}