From 38ceefc6ce63fb4667cd207424b1277c3eed5f8d Mon Sep 17 00:00:00 2001 From: pknowles Date: Fri, 31 Jan 2025 15:27:31 -0800 Subject: [PATCH] gdb pretty printers --- CMakeLists.txt | 20 ++- README.md | 119 +++++++++++++++--- .../offset_ptr.natvis | 0 debugging/offset_ptr_pretty_printer.py | 42 +++++++ .../offset_span.natvis | 0 debugging/offset_span_pretty_printer.py | 65 ++++++++++ debugging/pretty_printer_embed.cmake | 32 +++++ debugging/pretty_printer_embed.cpp | 39 ++++++ test/CMakeLists.txt | 2 - 9 files changed, 300 insertions(+), 19 deletions(-) rename {include/decodeless => debugging}/offset_ptr.natvis (100%) create mode 100644 debugging/offset_ptr_pretty_printer.py rename {include/decodeless => debugging}/offset_span.natvis (100%) create mode 100644 debugging/offset_span_pretty_printer.py create mode 100644 debugging/pretty_printer_embed.cmake create mode 100644 debugging/pretty_printer_embed.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c45421..1803190 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2024 Pyarelal Knowles, MIT License +# Copyright (c) 2024-2025 Pyarelal Knowles, MIT License cmake_minimum_required(VERSION 3.20) @@ -10,8 +10,22 @@ add_library(decodeless_offset_ptr INTERFACE) target_include_directories(decodeless_offset_ptr INTERFACE include) if(MSVC_IDE) - target_sources(decodeless_offset_ptr INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include/decodeless/offset_ptr.natvis - ${CMAKE_CURRENT_SOURCE_DIR}/include/decodeless/offset_span.natvis) + # Natvis for debugging in visual studio + target_sources( + decodeless_offset_ptr + INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/debugging/offset_ptr.natvis" + "${CMAKE_CURRENT_SOURCE_DIR}/debugging/offset_span.natvis") +else() + # Python pretty printers for linux debugging + option(DECODELESS_EMBED_PRETTY_PRINTERS + "Embed python pretty printers in .debug_gdb_scripts sections" ON) + if(DECODELESS_EMBED_PRETTY_PRINTERS) + include(debugging/pretty_printer_embed.cmake) + decodeless_embed_pretty_printers( + decodeless_offset_ptr + "${CMAKE_CURRENT_SOURCE_DIR}/debugging/offset_ptr_pretty_printer.py" + "${CMAKE_CURRENT_SOURCE_DIR}/debugging/offset_span_pretty_printer.py") + endif() endif() add_library(decodeless::offset_ptr ALIAS decodeless_offset_ptr) diff --git a/README.md b/README.md index 7b51f8c..900f3cd 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,31 @@ # decodeless::offset_ptr -Tiny header-only library for an address space independent data structure. The -humble offset pointer is a relative address from its own address in memory -rather than an absolute address. As long as the thing it points to is in the -same address space it will still work. Quite useful if you're... +`offset_ptr` is a self-relative pointer, encapsulated with C++: +```cpp +T* operator->() const { return (T*)((char*)this + m_offset); } +``` + +This is a tiny header-only C++ library. The humble offset pointer is a relative +address from its own address in memory rather than an absolute address. It is +address space independent. As long as the thing it points to is mapped or copied +within the same address range it will still work. Quite useful if you're... + +- Saving a data structure to disk +- Better yet, memory mapping a file from disk - Sharing memory between processes -- Memory mapping a file from disk, or even just reading big chunks - Copying complex data structures to the GPU - Duplicating the raw memory of a data structure, e.g. a low overhead deep copy A tiny offset span class is provided too, which is just an offset pointer and -size. It `offset_span` is very similar to `std::span` but with a relative address. +size. `offset_span` is very similar to `std::span` but with a relative address. Part of the [`decodeless`](https://github.com/decodeless) collection of utility libraries for conveniently reading and writing files via memory mapping. **Example: offset_ptr** -``` +```cpp #include using namespace decodeless; @@ -51,7 +58,7 @@ EXPECT_TRUE(file.data == 42); **Example: offset_span** -``` +```cpp #include using namespace decodeless; @@ -72,19 +79,103 @@ EXPECT_TRUE(file.values.size() == std::size(file.data)); ## Notes -- The offset pointer behaves like a regular C pointer -- Initializes to `nullptr` (perhaps controversial - the `glm` library does not) +- The offset pointer behaves like a regular C pointer - you can assign to it, + dereference etc., and it'll just work +- Initializes to `nullptr` (perhaps controversial - the `glm` library does not zero-initialize) - The difference is internally it stores an address offset from itself/`this` - `nullptr` is encoded as the value `1` to allow pointing to itself - Does not [propagate_const](https://en.cppreference.com/w/cpp/experimental/propagate_const) and arguably should, for something designed for file mapping -## Natvis +## CMake Integration + +This is a header-only library with no dependencies other than C++20. CMake +integration is provided. You don't have to use CMake, but debug visualizers are +integrated if you do. You can use one of the following: + +**add_subdirectory** + +Add the library as a submodule (`git add submodule +https://github.com/decodeless/offset_ptr.git`), then in cmake: + +```cmake +add_subdirectory(path/to/offset_ptr) +target_link_libraries(myproject PRIVATE decodeless::offset_ptr) +``` + +**FetchContent** + +CMake will download the library to the build directory at configure time. + +```cmake +include(FetchContent) +FetchContent_Declare( + decodeless_offset_ptr + GIT_REPOSITORY https://github.com/decodeless/offset_ptr.git + GIT_TAG release_tag + GIT_SHALLOW TRUE +) +FetchContent_MakeAvailable(decodeless_offset_ptr) + +target_link_libraries(myproject PRIVATE decodeless::offset_ptr) +``` + +**find_package** -For visual studio users (and somewhat vscode -https://github.com/microsoft/vscode-cpptools/issues/10917), natvis files are -included to allow debug inspection of pointers and spans. +If using in a library, a config file is provided for +`find_package(... CONFIG ...)`, which trivially +includes `CMakeLists.txt`. See +[decodeless_writer](https://github.com/decodeless/writer/blob/main/CMakeLists.txt) +for an example. + +```cmake +find_package(decodeless_offset_ptr REQUIRED CONFIG PATHS ...) + +target_link_libraries(myproject PRIVATE decodeless::offset_ptr) +``` + +## Debug Visualization + +If you're using CMake, debugging should be seamless on windows without any setup +and near-seamless on linux (see `auto-load safe-path` below). + +Without debug visualizers you'd just see `m_offset` in bytes when hovering over +an `offset_ptr`. You *could* add a watch or print +`*(MyType*)((char*)&offsetPtr + offsetPtr.m_offset)` ... lol. Instead, it'd +be nicer if a debugger showed `offset_ptr` like it was a regular pointer and +`offset_span` like a regular array. That's what the visualizers in +[./debugging](debugging/) do. + +**Natvis** + +For visual studio users (and somewhat +[vscode](https://github.com/microsoft/vscode-cpptools/issues/10917)), natvis +files are included to allow debug inspection of pointers and spans. + +**Pretty Printing** + +For gdb, python pretty printers are embedded in +[`.debug_gdb_scripts`](https://www.heuristic42.com/blog/64) sections during +compilation. + +You may need to set an [auto-load +safe-path](https://sourceware.org/gdb/current/onlinedocs/gdb.html/dotdebug_005fgdb_005fscripts-section.html) +before gdb will read the inlined scripts. Add this to `~/.gdbinit`: + +```set auto-load safe-path ``` + +Or add the following to vscode's `launch.json`: + +```json +"setupCommands": [ + { + "text": "set auto-load safe-path .", + "description": "enable loading pretty printers", + "ignoreFailures": false + } +], +``` ## Contributing diff --git a/include/decodeless/offset_ptr.natvis b/debugging/offset_ptr.natvis similarity index 100% rename from include/decodeless/offset_ptr.natvis rename to debugging/offset_ptr.natvis diff --git a/debugging/offset_ptr_pretty_printer.py b/debugging/offset_ptr_pretty_printer.py new file mode 100644 index 0000000..00a3464 --- /dev/null +++ b/debugging/offset_ptr_pretty_printer.py @@ -0,0 +1,42 @@ +import gdb +import re + + +class OffsetPtrPrinter: + """Pretty printer for decodeless::offset_ptr""" + def __init__(self, val): + self.val = val + self.T = self.val.type.strip_typedefs().unqualified().template_argument(0) + + def pointer(self): + base, offset = self.val.address, self.val["m_offset"] + + # Handle nullptr magic offset value of '1' + if offset == 1: + base, offset = gdb.Value(0), gdb.Value(0) + + return (base.cast(gdb.lookup_type("char").pointer()) + offset).cast(self.T.pointer()) + + def children(self): + # TODO: rather than a virtual '*' member, can this be made to behave + # more like a real pointer? + return [("operator*()", self.pointer().dereference())] + + def to_string(self): + return f'({self.T} *) {self.pointer()}' + + +offset_ptr_lookup_pattern = re.compile("^decodeless::offset_ptr<.*>$") + + +def offset_ptr_lookup(val): + global offset_ptr_lookup_pattern + lookup_tag = val.type.strip_typedefs().unqualified().tag + if lookup_tag is None: + return None + if offset_ptr_lookup_pattern.match(lookup_tag): + return OffsetPtrPrinter(val) + return None + + +gdb.pretty_printers.append(offset_ptr_lookup) diff --git a/include/decodeless/offset_span.natvis b/debugging/offset_span.natvis similarity index 100% rename from include/decodeless/offset_span.natvis rename to debugging/offset_span.natvis diff --git a/debugging/offset_span_pretty_printer.py b/debugging/offset_span_pretty_printer.py new file mode 100644 index 0000000..114af7e --- /dev/null +++ b/debugging/offset_span_pretty_printer.py @@ -0,0 +1,65 @@ +import gdb +import re + + +# Encapsulates a list of children. Calls .child() on the pretty printer only +# when requested. This is a workaround for poor performance when inspecting +# large arrays in the debugger. +class LazyChildren: + def __init__(self, obj, length): + self._obj = obj + self._length = length + + def __getitem__(self, index): + if not 0 <= index < self._length: + raise IndexError("Index out of range") + return self._obj.child(index) + + def __len__(self): + return self._length + + +class OffsetSpanPrinter: + """Pretty printer for decodeless::offset_span""" + def __init__(self, val): + self.T = val.type.strip_typedefs().unqualified().template_argument(0) + self.length = val["m_size"] + + ptr = val["m_data"] + base = ptr.address + offset = ptr["m_offset"] + if offset == 1: + base = gdb.Value(0) + offset = gdb.Value(0) + self.pointer = (base.cast(gdb.lookup_type("char").pointer()) + offset).cast(self.T.pointer()) + + def child(self, i): + return (f"[{i}]", (self.pointer + i).dereference()) + + def children(self): + return LazyChildren(self, self.length) + + def __len__(self): + return self.length + + def to_string(self): + return f'offset_span<{self.T}> of length {self.length}' + + def display_hint(self): + return 'array' + + +offset_span_lookup_pattern = re.compile("^decodeless::offset_span<.*>$") + + +def offset_span_lookup(val): + global offset_span_lookup_pattern + lookup_tag = val.type.strip_typedefs().unqualified().tag + if lookup_tag is None: + return None + if offset_span_lookup_pattern.match(lookup_tag): + return OffsetSpanPrinter(val) + return None + + +gdb.pretty_printers.append(offset_span_lookup) diff --git a/debugging/pretty_printer_embed.cmake b/debugging/pretty_printer_embed.cmake new file mode 100644 index 0000000..81304d1 --- /dev/null +++ b/debugging/pretty_printer_embed.cmake @@ -0,0 +1,32 @@ +# Copyright (c) 2025 Pyarelal Knowles, MIT License + +# Function to embed a python pretty printer in an object (.o) file and add it as +# a dependency of the given target. See https://www.heuristic42.com/blog/64/. +# Usage: decodeless_embed_pretty_printer(MyTarget printer_a.py printer_b.py) +function(decodeless_embed_pretty_printers target) + foreach(source IN LISTS ARGN) + # Generate a *.o filename + get_filename_component(source_name "${source}" NAME_WLE) + set(object "${CMAKE_CURRENT_BINARY_DIR}/${source_name}.o") + + # Create a .bin file with a .debug_gdb_scripts section and generate a .o file from it + add_custom_command( + OUTPUT "${object}" + COMMAND + "$" "${source}" "${object}.bin" && + ${CMAKE_OBJCOPY} -I binary -O elf64-x86-64 --rename-section .data=.debug_gdb_scripts "${object}.bin" "${object}" + DEPENDS decodeless_pretty_printer_embed "${source}" + COMMENT "Embedding GDB pretty-printer ${source}") + + # Add the object file as a target dependency + # TODO: avoid the custom target dance + target_link_libraries(${target} INTERFACE "${object}") + add_custom_target("generate_${source_name}" DEPENDS "${object}") + add_dependencies(${target} "generate_${source_name}") + endforeach() +endfunction() + +# Adding the necessary headers to the .debug_gdb_scripts section is not easy +# with cmake. Much nicer to build a little C++ executable to do the generation +# with no other dependencies. +add_executable(decodeless_pretty_printer_embed "${CMAKE_CURRENT_LIST_DIR}/pretty_printer_embed.cpp") diff --git a/debugging/pretty_printer_embed.cpp b/debugging/pretty_printer_embed.cpp new file mode 100644 index 0000000..4b275da --- /dev/null +++ b/debugging/pretty_printer_embed.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include + +// Tiny program to dump python source code for a .debug_gdb_scripts section into +// a binary file that can then be consume by objcopy. It just adds some header +// bits and a null terminator. +// Usage: ./exe +// See https://sourceware.org/gdb/current/onlinedocs/gdb.html/dotdebug_005fgdb_005fscripts-section.html +// SECTION_SCRIPT_ID_PYTHON_FILE = 1 +// SECTION_SCRIPT_ID_SCHEME_FILE = 3 +// SECTION_SCRIPT_ID_PYTHON_TEXT = 4 <--- source code +// SECTION_SCRIPT_ID_SCHEME_TEXT = 6 +int main(int argc, char** argv) { + if (argc != 3) { + std::cerr << "Usage: ./embed \n"; + return EXIT_FAILURE; + } + + std::ifstream src(argv[1]); + if (!src.good()) { + std::cerr << "Error: could not open input file '" << argv[1] << "'\n"; + return EXIT_FAILURE; + } + + std::ofstream dst(argv[2], std::ios::binary); + if (!dst.good()) { + std::cerr << "Error: could not open output file '" << argv[2] << "'\n"; + return EXIT_FAILURE; + } + + std::string name = std::filesystem::path(argv[1]).filename().string(); + dst.write("\x04", 1); // "python text" identifier byte + dst << name << "\n"; // first line is the name of the script + dst << src.rdbuf(); // inline the script + dst.write("\0", 1); // null terminated + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c83935f..e42bd98 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -20,8 +20,6 @@ target_link_libraries( decodeless::offset_ptr gtest_main) -# TODO: presets? -# https://stackoverflow.com/questions/45955272/modern-way-to-set-compiler-flags-in-cross-platform-cmake-project if(MSVC) target_compile_options(${PROJECT_NAME}_tests PRIVATE /W4 /WX) else()