From 923612919ca2dbf6d6d90f787e1f76a3a379d9e7 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 01:51:18 +0100 Subject: [PATCH 1/9] feat(docs): Start on code documentation Documenting public functions. --- README.md | 17 ++++++++++- docs/CMakeLists.txt | 53 +++++++++++++++++++++++++++++++++++ docs/Doxyfile.in | 14 +++++++++ docs/_static/.gitkeep | 0 docs/api.rst | 10 +++++++ docs/conf.py | 16 +++++++++++ docs/index.rst | 10 +++++++ docs/requirements.txt | 2 ++ {doc => docs}/screenshot.png | Bin src/interp.h | 3 ++ src/ir.c | 1 + src/ir.h | 8 ++++++ src/llvm.h | 4 +-- src/main_bfi.c | 1 - src/read.c | 1 - src/read.h | 10 +++++-- 16 files changed, 142 insertions(+), 8 deletions(-) create mode 100644 docs/CMakeLists.txt create mode 100644 docs/Doxyfile.in create mode 100644 docs/_static/.gitkeep create mode 100644 docs/api.rst create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/requirements.txt rename {doc => docs}/screenshot.png (100%) diff --git a/README.md b/README.md index b96b256..6d138e4 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ $ docker compose up The input validation and parsing is mathematically proven correct for `bf` programs up to eight commands long using the [C Bounded Model Checker](https://github.com/diffblue/cbmc) (CBMC). Details are [here](#model-check-memory-safety). -![alt](doc/screenshot.png) +![alt](docs/screenshot.png) ## Dependencies @@ -23,6 +23,13 @@ $ sudo apt-get install cmake llvm-dev check expect clang-format cpplint $ brew install cmake llvm check expect clang-format cpplint ``` +#### Documentation +To build the docs locally, install Doxygen and the Sphinx Python packages: +```bash +$ brew install doxygen +$ python3 -m pip install -r docs/requirements.txt +``` + ## Building ```bash @@ -56,6 +63,14 @@ Hello, World! $ cmake --build build --target fmt lint ``` +### Documentation + +```bash +$ cmake --build build --target docs +``` + +The generated HTML site is written to `build/docs/html/index.html`. + ### Tests ```bash diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 0000000..13157b9 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,53 @@ +find_package(Doxygen QUIET) +find_package(Python3 COMPONENTS Interpreter QUIET) + +set(DOCS_VENV_DIR ${CMAKE_BINARY_DIR}/docs/.venv) +if(WIN32) + set(DOCS_VENV_PYTHON ${DOCS_VENV_DIR}/Scripts/python.exe) +else() + set(DOCS_VENV_PYTHON ${DOCS_VENV_DIR}/bin/python3) +endif() + +if(Python3_Interpreter_FOUND) + add_custom_target(docs-setup + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/docs + COMMAND ${Python3_EXECUTABLE} -m venv ${DOCS_VENV_DIR} + COMMAND ${DOCS_VENV_PYTHON} -m pip install --disable-pip-version-check --quiet --upgrade pip + COMMAND ${DOCS_VENV_PYTHON} -m pip install --disable-pip-version-check --quiet -r ${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt + COMMENT "Creating docs virtualenv and installing Sphinx dependencies" + ) +else() + add_custom_target(docs-setup + COMMAND ${CMAKE_COMMAND} -E echo "docs-setup requires Python 3 with venv support." + COMMENT "Documentation setup unavailable" + ) +endif() + +if(DOXYGEN_FOUND AND Python3_Interpreter_FOUND) + set(DOXYGEN_OUTPUT_DIR ${CMAKE_BINARY_DIR}/docs/doxygen) + set(DOXYGEN_INPUT_DIR ${CMAKE_SOURCE_DIR}/src) + + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in + ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile + @ONLY + ) + + add_custom_target(docs + COMMAND ${CMAKE_COMMAND} -E make_directory ${DOXYGEN_OUTPUT_DIR} + COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile + COMMAND ${CMAKE_COMMAND} -E env SPHINX_DOXYGEN_XML_DIR=${DOXYGEN_OUTPUT_DIR}/xml ${DOCS_VENV_PYTHON} -m sphinx -b html ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}/docs/html + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Building Doxygen XML and Sphinx HTML documentation" + ) + add_dependencies(docs docs-setup) +else() + message(WARNING "Docs target disabled: install Doxygen, Python 3, and the Sphinx/Breathe packages to build documentation.") + add_custom_target(docs + COMMAND ${CMAKE_COMMAND} -E echo "Docs require Doxygen, Python 3, and the Sphinx/Breathe Python packages." + COMMENT "Documentation tooling not available" + ) + if(Python3_Interpreter_FOUND) + add_dependencies(docs docs-setup) + endif() +endif() \ No newline at end of file diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in new file mode 100644 index 0000000..df61dd3 --- /dev/null +++ b/docs/Doxyfile.in @@ -0,0 +1,14 @@ +PROJECT_NAME = "bf" +PROJECT_BRIEF = "Brainf*ck to LLVM IR compiler frontend" +OUTPUT_DIRECTORY = @DOXYGEN_OUTPUT_DIR@ +INPUT = @DOXYGEN_INPUT_DIR@ +FILE_PATTERNS = *.h *.c +RECURSIVE = YES +GENERATE_HTML = NO +GENERATE_XML = YES +EXTRACT_ALL = YES +EXTRACT_STATIC = NO +OPTIMIZE_OUTPUT_FOR_C = YES +QUIET = YES +WARN_IF_UNDOCUMENTED = NO +STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@ \ No newline at end of file diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..6eb27fb --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,10 @@ +API Reference +============= + +The following pages are generated from the public headers under ``src/``. + +.. doxygenfile:: common.h +.. doxygenfile:: ir.h +.. doxygenfile:: read.h +.. doxygenfile:: interp.h +.. doxygenfile:: llvm.h \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..deda1ed --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,16 @@ +from pathlib import Path +import os + +project = "bf" +author = "Ben Mandrew" +extensions = ["breathe"] +templates_path = ["_templates"] +exclude_patterns = [] +html_theme = "alabaster" +html_static_path = ["_static"] + +default_xml_dir = Path(__file__).resolve().parent.parent / "build" / "docs" / "doxygen" / "xml" +breathe_projects = { + "bf": os.environ.get("SPHINX_DOXYGEN_XML_DIR", str(default_xml_dir)) +} +breathe_default_project = "bf" \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..8ac5a4d --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,10 @@ +bf Documentation +================= + +This site is generated by Sphinx, with the API reference pulled from Doxygen XML through Breathe. + +.. toctree:: + :maxdepth: 2 + :caption: Contents + + api \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..895be47 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx>=7.0 +breathe>=4.35 \ No newline at end of file diff --git a/doc/screenshot.png b/docs/screenshot.png similarity index 100% rename from doc/screenshot.png rename to docs/screenshot.png diff --git a/src/interp.h b/src/interp.h index dd79317..ad99b85 100644 --- a/src/interp.h +++ b/src/interp.h @@ -17,8 +17,11 @@ struct context_t { size_t max_dp; }; +/** Initialize an interpreter context with the program loaded at pc zero. */ struct context_t init_context(struct program p); +/** Execute the command at the current program counter and advance execution. */ int interp(struct context_t *ctx, int, int, bool); +/** Render the current interpreter state as a human-readable trace string. */ char *context_to_string(struct context_t *ctx); #endif diff --git a/src/ir.c b/src/ir.c index 5002c0a..f58ec24 100644 --- a/src/ir.c +++ b/src/ir.c @@ -24,6 +24,7 @@ static struct jump_stack jump_stack_new() { }; } +/** Push a command and its source index onto the jump stack. */ static void jump_stack_push(struct jump_stack *js, struct cmd *c, size_t index) { assert(js->head < JUMP_STACK_MAX_SIZE - 1); diff --git a/src/ir.h b/src/ir.h index bd25e30..ca6dc31 100644 --- a/src/ir.h +++ b/src/ir.h @@ -27,15 +27,23 @@ struct program { size_t length; }; +/** Compute the length of the flattened Brainfuck source string. */ size_t program_str_length(struct program *p); +/** Parse a cleaned Brainfuck string into the internal program form. */ struct program string_to_program(char *s); +/** Release a program's heap-allocated command buffer. */ void free_program(struct program *p); +/** Map a command type back to its Brainfuck character. */ char cmd_type_to_char(enum cmd_type t); +/** Expand a compressed program back into a Brainfuck source string. */ char *program_to_string(struct program *program); +/** Return whether a program contains any output commands. */ char program_contains_output(struct program *p); +/** Return whether a program contains any input commands. */ char program_contains_input(struct program *p); +/** Validate that a source string contains only balanced Brainfuck commands. */ char program_is_valid(char *s); #endif diff --git a/src/llvm.h b/src/llvm.h index a205b6e..03be281 100644 --- a/src/llvm.h +++ b/src/llvm.h @@ -5,9 +5,9 @@ #include "ir.h" -/* Generate LLVM IR for a parsed Brainf*ck program. */ +/** Generate LLVM IR for a parsed Brainfuck program. */ LLVMModuleRef generate(struct program *p); -/* Release an LLVM module created by generate(). */ +/** Release an LLVM module created by generate(). */ void dispose_module(LLVMModuleRef module); #endif diff --git a/src/main_bfi.c b/src/main_bfi.c index f13d35d..5f6a422 100644 --- a/src/main_bfi.c +++ b/src/main_bfi.c @@ -7,7 +7,6 @@ #include "interp.h" #include "ir.h" -#include "llvm.h" #include "read.h" void print_usage(const char *program_name) { diff --git a/src/read.c b/src/read.c index 6b2c937..e54ae68 100644 --- a/src/read.c +++ b/src/read.c @@ -9,7 +9,6 @@ ((c) == '+' || (c) == '-' || (c) == '>' || (c) == '<' || (c) == '.' || \ (c) == ',' || (c) == '[' || (c) == ']') -// Clean whitespace and other extraneous characters from a BF program void clean_whitespace(char *s) { int64_t j = 0, i = 0; while (s[i] != '\0') { diff --git a/src/read.h b/src/read.h index 44b5ea4..10d30f1 100644 --- a/src/read.h +++ b/src/read.h @@ -16,8 +16,12 @@ struct ReadReturn { } value; }; -void clean_whitespace(char *); -struct ReadReturn read_file(char *); -struct ReadReturn validate(char *, size_t len); +/** Remove non-Brainfuck characters from a mutable source buffer. */ +void clean_whitespace(char *s); +/** Read a program file, validate it, and return the normalized source string. + */ +struct ReadReturn read_file(char *fname); +/** Validate and normalize a raw program buffer before parsing it. */ +struct ReadReturn validate(char *program, size_t len); #endif From 3a77b4ec95020f29cf4638c566c2d33904cc2af7 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 02:00:07 +0100 Subject: [PATCH 2/9] feat(doc): Document structs and members --- src/common.h | 1 + src/interp.h | 14 +++++++++----- src/ir.h | 48 ++++++++++++++++++++++++++++++++---------------- src/read.h | 13 +++++++++---- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/src/common.h b/src/common.h index 75d5800..f88700a 100644 --- a/src/common.h +++ b/src/common.h @@ -1,6 +1,7 @@ #ifndef COMMON_H #define COMMON_H +/** Number of cells in the Brainfuck data tape. */ #define DATA_SIZE (65536) #endif diff --git a/src/interp.h b/src/interp.h index ad99b85..5470af4 100644 --- a/src/interp.h +++ b/src/interp.h @@ -7,21 +7,25 @@ #include "common.h" #include "ir.h" +/// Mutable execution state for the Brainfuck interpreter. struct context_t { + /// Current command index in `p.cmds`. size_t pc; + /// Parsed program being executed. struct program p; + /// Current data pointer position. size_t dp; + /// Interpreter data tape. unsigned char data[DATA_SIZE]; - // Keep track of the largest data pointer seen so far, - // for pretty-printing the context + /// Largest data pointer reached for pretty-printing. size_t max_dp; }; -/** Initialize an interpreter context with the program loaded at pc zero. */ +/// Initialize an interpreter context with the program loaded at pc zero. struct context_t init_context(struct program p); -/** Execute the command at the current program counter and advance execution. */ +/// Execute the command at the current program counter and advance execution. int interp(struct context_t *ctx, int, int, bool); -/** Render the current interpreter state as a human-readable trace string. */ +/// Render the current interpreter state as a human-readable trace string. char *context_to_string(struct context_t *ctx); #endif diff --git a/src/ir.h b/src/ir.h index ca6dc31..2987054 100644 --- a/src/ir.h +++ b/src/ir.h @@ -3,47 +3,63 @@ #include +/// Brainfuck command categories used by the internal IR. enum cmd_type { - CMD_SIMPLE_INC, // '+' - CMD_SIMPLE_DEC, // '-' - CMD_SIMPLE_RIGHT, // '>' - CMD_SIMPLE_LEFT, // '<' - CMD_SIMPLE_OUTPUT, // '.' - CMD_SIMPLE_INPUT, // ',' - CMD_JUMP_FORWARD, // '[' - CMD_JUMP_BACK, // ']' + /// `'+'`: increment current cell value. + CMD_SIMPLE_INC, + /// `'-'`: decrement current cell value. + CMD_SIMPLE_DEC, + /// `'>'`: move data pointer right. + CMD_SIMPLE_RIGHT, + /// `'<'`: move data pointer left. + CMD_SIMPLE_LEFT, + /// `'.'`: write current cell as output. + CMD_SIMPLE_OUTPUT, + /// `','`: read input into current cell. + CMD_SIMPLE_INPUT, + /// `'['`: jump forward if current cell is zero. + CMD_JUMP_FORWARD, + /// `']'`: jump back if current cell is non-zero. + CMD_JUMP_BACK, }; +/// One compressed instruction in the internal Brainfuck IR. struct cmd { + /// Command opcode. enum cmd_type type; union { + /// Repeat count for simple commands. size_t simple_count; + /// Matching bracket command index. size_t jump_index; }; }; +/// Parsed Brainfuck program represented as an array of commands. struct program { + /// Heap-allocated command array. struct cmd *cmds; + /// Number of entries in `cmds`. size_t length; }; -/** Compute the length of the flattened Brainfuck source string. */ +/// Compute the length of the flattened Brainfuck source string. size_t program_str_length(struct program *p); -/** Parse a cleaned Brainfuck string into the internal program form. */ +/// Parse a cleaned Brainfuck string into the internal program form. struct program string_to_program(char *s); -/** Release a program's heap-allocated command buffer. */ +/// Release a program's heap-allocated command buffer. void free_program(struct program *p); -/** Map a command type back to its Brainfuck character. */ +/// Map a command type back to its Brainfuck character. char cmd_type_to_char(enum cmd_type t); -/** Expand a compressed program back into a Brainfuck source string. */ +/// Expand a compressed program back into a Brainfuck source string. char *program_to_string(struct program *program); -/** Return whether a program contains any output commands. */ +/// Return whether a program contains any output commands. char program_contains_output(struct program *p); -/** Return whether a program contains any input commands. */ +/// Return whether a program contains any input commands. char program_contains_input(struct program *p); -/** Validate that a source string contains only balanced Brainfuck commands. */ +/// Validate that a source string contains only balanced Brainfuck commands. char program_is_valid(char *s); #endif diff --git a/src/read.h b/src/read.h index 10d30f1..66ca8ba 100644 --- a/src/read.h +++ b/src/read.h @@ -3,25 +3,30 @@ #include +/// Validation error payload returned by file/parse helpers. struct Error { + /// Human-readable error message. char *message; }; +/// Tagged return type for reading and validating source input. struct ReadReturn { + /// Result discriminator. enum { OK, ERROR } type; union { + /// Normalized source on success. char *program_str; + /// Error payload on failure. struct Error error; } value; }; -/** Remove non-Brainfuck characters from a mutable source buffer. */ +/// Remove non-Brainfuck characters from a mutable source buffer. void clean_whitespace(char *s); -/** Read a program file, validate it, and return the normalized source string. - */ +/// Read a file, validate it, and return normalized source. struct ReadReturn read_file(char *fname); -/** Validate and normalize a raw program buffer before parsing it. */ +/// Validate and normalize a raw program buffer before parsing it. struct ReadReturn validate(char *program, size_t len); #endif From 715c52c052197f35534724af703e6acf8b032e82 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 02:06:05 +0100 Subject: [PATCH 3/9] feat(doc): Split docs by file --- docs/api.rst | 15 +++++++++------ docs/api_common.rst | 4 ++++ docs/api_interp.rst | 4 ++++ docs/api_ir.rst | 4 ++++ docs/api_llvm.rst | 4 ++++ docs/api_read.rst | 4 ++++ src/read.h | 26 ++++++++++++++++++-------- 7 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 docs/api_common.rst create mode 100644 docs/api_interp.rst create mode 100644 docs/api_ir.rst create mode 100644 docs/api_llvm.rst create mode 100644 docs/api_read.rst diff --git a/docs/api.rst b/docs/api.rst index 6eb27fb..7c79458 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,10 +1,13 @@ API Reference ============= -The following pages are generated from the public headers under ``src/``. +The API reference is split by public header for easier navigation. -.. doxygenfile:: common.h -.. doxygenfile:: ir.h -.. doxygenfile:: read.h -.. doxygenfile:: interp.h -.. doxygenfile:: llvm.h \ No newline at end of file +.. toctree:: + :maxdepth: 1 + + api_common + api_ir + api_read + api_interp + api_llvm diff --git a/docs/api_common.rst b/docs/api_common.rst new file mode 100644 index 0000000..5154c0c --- /dev/null +++ b/docs/api_common.rst @@ -0,0 +1,4 @@ +common.h +======== + +.. doxygenfile:: common.h diff --git a/docs/api_interp.rst b/docs/api_interp.rst new file mode 100644 index 0000000..918b2d1 --- /dev/null +++ b/docs/api_interp.rst @@ -0,0 +1,4 @@ +interp.h +======== + +.. doxygenfile:: interp.h diff --git a/docs/api_ir.rst b/docs/api_ir.rst new file mode 100644 index 0000000..8edcbbd --- /dev/null +++ b/docs/api_ir.rst @@ -0,0 +1,4 @@ +ir.h +==== + +.. doxygenfile:: ir.h diff --git a/docs/api_llvm.rst b/docs/api_llvm.rst new file mode 100644 index 0000000..99f7a4b --- /dev/null +++ b/docs/api_llvm.rst @@ -0,0 +1,4 @@ +llvm.h +====== + +.. doxygenfile:: llvm.h diff --git a/docs/api_read.rst b/docs/api_read.rst new file mode 100644 index 0000000..3819317 --- /dev/null +++ b/docs/api_read.rst @@ -0,0 +1,4 @@ +read.h +====== + +.. doxygenfile:: read.h diff --git a/src/read.h b/src/read.h index 66ca8ba..b7cc31e 100644 --- a/src/read.h +++ b/src/read.h @@ -9,17 +9,27 @@ struct Error { char *message; }; +union ProgramOrError { + /// Normalized source on success. + char *program_str; + /// Error payload on failure. + struct Error error; +}; + +/// Discriminator values for ReadReturn. +enum ReadResultType { + /// Read and validation succeeded. + OK, + /// Read and validation failed. + ERROR, +}; + /// Tagged return type for reading and validating source input. struct ReadReturn { /// Result discriminator. - enum { OK, ERROR } type; - - union { - /// Normalized source on success. - char *program_str; - /// Error payload on failure. - struct Error error; - } value; + enum ReadResultType type; + /// Result payload. + union ProgramOrError value; }; /// Remove non-Brainfuck characters from a mutable source buffer. From 6be9bc63c7df43f48ff622b80da1c4fe67f42428 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 02:16:35 +0100 Subject: [PATCH 4/9] feat(doc): Add @param and @return to functions --- src/common.h | 2 +- src/interp.h | 9 +++++++++ src/ir.c | 1 - src/ir.h | 15 +++++++++++++++ src/llvm.h | 7 +++++-- src/read.h | 6 ++++++ 6 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/common.h b/src/common.h index f88700a..4db1c76 100644 --- a/src/common.h +++ b/src/common.h @@ -1,7 +1,7 @@ #ifndef COMMON_H #define COMMON_H -/** Number of cells in the Brainfuck data tape. */ +/// Number of cells in the Brainfuck data tape. #define DATA_SIZE (65536) #endif diff --git a/src/interp.h b/src/interp.h index 5470af4..7c8db08 100644 --- a/src/interp.h +++ b/src/interp.h @@ -22,10 +22,19 @@ struct context_t { }; /// Initialize an interpreter context with the program loaded at pc zero. +/// @param p Parsed Brainfuck program to execute. +/// @return Initialized interpreter context with zeroed tape. struct context_t init_context(struct program p); /// Execute the command at the current program counter and advance execution. +/// @param ctx Interpreter context. +/// @param out_fd File descriptor used for output. +/// @param in_fd File descriptor used for input. +/// @param byte_output If true, emit numeric byte values. +/// @return 1 when program execution has completed; otherwise 0. int interp(struct context_t *ctx, int, int, bool); /// Render the current interpreter state as a human-readable trace string. +/// @param ctx Interpreter context to render. +/// @return Heap-allocated formatted string; caller must free it. char *context_to_string(struct context_t *ctx); #endif diff --git a/src/ir.c b/src/ir.c index f58ec24..5002c0a 100644 --- a/src/ir.c +++ b/src/ir.c @@ -24,7 +24,6 @@ static struct jump_stack jump_stack_new() { }; } -/** Push a command and its source index onto the jump stack. */ static void jump_stack_push(struct jump_stack *js, struct cmd *c, size_t index) { assert(js->head < JUMP_STACK_MAX_SIZE - 1); diff --git a/src/ir.h b/src/ir.h index 2987054..2d637a2 100644 --- a/src/ir.h +++ b/src/ir.h @@ -44,22 +44,37 @@ struct program { }; /// Compute the length of the flattened Brainfuck source string. +/// @param p Parsed program. +/// @return Length of expanded Brainfuck source string. size_t program_str_length(struct program *p); /// Parse a cleaned Brainfuck string into the internal program form. +/// @param s Cleaned Brainfuck source string. +/// @return Parsed program with heap-allocated command array. struct program string_to_program(char *s); /// Release a program's heap-allocated command buffer. +/// @param p Program whose command array should be released. void free_program(struct program *p); /// Map a command type back to its Brainfuck character. +/// @param t Command type. +/// @return Corresponding Brainfuck symbol. char cmd_type_to_char(enum cmd_type t); /// Expand a compressed program back into a Brainfuck source string. +/// @param program Parsed program. +/// @return Heap-allocated source string; caller must free it. char *program_to_string(struct program *program); /// Return whether a program contains any output commands. +/// @param p Parsed program. +/// @return 1 if output exists; otherwise 0. char program_contains_output(struct program *p); /// Return whether a program contains any input commands. +/// @param p Parsed program. +/// @return 1 if input exists; otherwise 0. char program_contains_input(struct program *p); /// Validate that a source string contains only balanced Brainfuck commands. +/// @param s Source string to validate. +/// @return 1 if valid; otherwise 0. char program_is_valid(char *s); #endif diff --git a/src/llvm.h b/src/llvm.h index 03be281..f58498c 100644 --- a/src/llvm.h +++ b/src/llvm.h @@ -5,9 +5,12 @@ #include "ir.h" -/** Generate LLVM IR for a parsed Brainfuck program. */ +/// Generate LLVM IR for a parsed Brainfuck program. +/// @param p Parsed Brainfuck program. +/// @return Generated LLVM module. LLVMModuleRef generate(struct program *p); -/** Release an LLVM module created by generate(). */ +/// Release an LLVM module created by generate(). +/// @param module LLVM module created by `generate`. void dispose_module(LLVMModuleRef module); #endif diff --git a/src/read.h b/src/read.h index b7cc31e..dab02d6 100644 --- a/src/read.h +++ b/src/read.h @@ -33,10 +33,16 @@ struct ReadReturn { }; /// Remove non-Brainfuck characters from a mutable source buffer. +/// @param s Null-terminated source buffer to clean in place. void clean_whitespace(char *s); /// Read a file, validate it, and return normalized source. +/// @param fname Path to the source file. +/// @return Tagged result containing normalized source or an error. struct ReadReturn read_file(char *fname); /// Validate and normalize a raw program buffer before parsing it. +/// @param program Mutable source buffer. +/// @param len Number of bytes to validate from `program`. +/// @return Tagged result containing normalized source or an error. struct ReadReturn validate(char *program, size_t len); #endif From 7ffb3c86ac8d089d7b6c39de7320a6ba09a1d521 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 02:16:57 +0100 Subject: [PATCH 5/9] chore(doc): Use Furo theme --- docs/conf.py | 7 +++++-- docs/requirements.txt | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index deda1ed..6041efe 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -3,14 +3,17 @@ project = "bf" author = "Ben Mandrew" +language = "en_GB" +# Sphinx HTML search uses the English stemmer code `en` for all English variants. +html_search_language = "en" extensions = ["breathe"] templates_path = ["_templates"] exclude_patterns = [] -html_theme = "alabaster" +html_theme = "furo" html_static_path = ["_static"] default_xml_dir = Path(__file__).resolve().parent.parent / "build" / "docs" / "doxygen" / "xml" breathe_projects = { "bf": os.environ.get("SPHINX_DOXYGEN_XML_DIR", str(default_xml_dir)) } -breathe_default_project = "bf" \ No newline at end of file +breathe_default_project = "bf" diff --git a/docs/requirements.txt b/docs/requirements.txt index 895be47..f201aed 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ sphinx>=7.0 -breathe>=4.35 \ No newline at end of file +breathe>=4.35 +furo>=2024.8.6 \ No newline at end of file From 43c3e6b49de415f3b32f9cbb048c970c4e34bbde Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 02:31:48 +0100 Subject: [PATCH 6/9] feat(docs): Add usage to docs --- docs/api.rst | 2 -- docs/conf.py | 4 +++- docs/index.rst | 10 ++++++--- docs/usage.rst | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 docs/usage.rst diff --git a/docs/api.rst b/docs/api.rst index 7c79458..6059827 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,8 +1,6 @@ API Reference ============= -The API reference is split by public header for easier navigation. - .. toctree:: :maxdepth: 1 diff --git a/docs/conf.py b/docs/conf.py index 6041efe..26d3046 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -2,7 +2,8 @@ import os project = "bf" -author = "Ben Mandrew" +author = "Ben M. Andrew" +copyright = f"2026, {author}" language = "en_GB" # Sphinx HTML search uses the English stemmer code `en` for all English variants. html_search_language = "en" @@ -11,6 +12,7 @@ exclude_patterns = [] html_theme = "furo" html_static_path = ["_static"] +html_title = "bf Documentation" default_xml_dir = Path(__file__).resolve().parent.parent / "build" / "docs" / "doxygen" / "xml" breathe_projects = { diff --git a/docs/index.rst b/docs/index.rst index 8ac5a4d..3af4e01 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,10 +1,14 @@ -bf Documentation +``bf`` Documentation ================= -This site is generated by Sphinx, with the API reference pulled from Doxygen XML through Breathe. +This documentation covers usage and API reference for the ``bf`` project. + +- Repository: https://github.com/benmandrew/bf. +- Live instance: https://benmandrew.com/articles/compiler-frontend. .. toctree:: :maxdepth: 2 :caption: Contents - api \ No newline at end of file + usage + api diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000..f515097 --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,61 @@ +Usage +===== + +Live Instance +------------- + +You can try the project online at: + +https://benmandrew.com/articles/compiler-frontend + +Run Locally with Docker Compose +-------------------------------- + +To run the web interface locally: + +.. code-block:: bash + + $ docker compose up + +Then open: + +http://localhost:8080 + +Build the CLI Tools +------------------- + +Build the project with CMake: + +.. code-block:: bash + + $ cmake -B build + $ cmake --build build + +After building, both executables are available in the ``build`` directory: + +- ``bfc``: Brainfuck frontend that emits LLVM IR +- ``bfi``: Brainfuck interpreter + +Use ``bfc`` (compile to LLVM IR) +-------------------------------- + +Generate LLVM IR from a Brainfuck program and compile it with ``clang``: + +.. code-block:: bash + + # Generate LLVM IR + $ ./build/bfc test/res/helloworld.b > main.ll + # Compile IR to binary + $ clang main.ll -o main + $ ./main + Hello, World! + +Use ``bfi`` (interpret directly) +-------------------------------- + +Run a Brainfuck program directly with the interpreter: + +.. code-block:: bash + + $ ./build/bfi test/res/helloworld.b + Hello, World! From 4667692189a7d8ae5bb16b0ad38ac4e40ac7e7b0 Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 02:34:27 +0100 Subject: [PATCH 7/9] chore: Rename scripts to verification --- CMakeLists.txt | 6 +++--- {scripts => verification}/CMakeLists.txt | 0 {scripts => verification}/cbmc_run.sh | 0 {scripts => verification}/fuzz.sh | 0 {scripts => verification}/run_afl_parallel.sh | 0 5 files changed, 3 insertions(+), 3 deletions(-) rename {scripts => verification}/CMakeLists.txt (100%) rename {scripts => verification}/cbmc_run.sh (100%) rename {scripts => verification}/fuzz.sh (100%) rename {scripts => verification}/run_afl_parallel.sh (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45e2f16..f649410 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,11 +127,11 @@ set_target_properties(bfc bf_lib ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} ) -option(DOCKER_BUILD "Docker build mode - disables tests and scripts" OFF) +option(DOCKER_BUILD "Docker build mode - disables tests and verification" OFF) if(NOT DOCKER_BUILD) enable_testing() add_subdirectory(test) - add_subdirectory(scripts) + add_subdirectory(verification) endif() add_custom_target(debug @@ -260,7 +260,7 @@ if(AFL_CC) ) add_custom_command(TARGET fuzz POST_BUILD - COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_afl_parallel.sh ${CMAKE_BINARY_DIR}/bfc_fuzz + COMMAND ${CMAKE_SOURCE_DIR}/verification/run_afl_parallel.sh ${CMAKE_BINARY_DIR}/bfc_fuzz ) endif() diff --git a/scripts/CMakeLists.txt b/verification/CMakeLists.txt similarity index 100% rename from scripts/CMakeLists.txt rename to verification/CMakeLists.txt diff --git a/scripts/cbmc_run.sh b/verification/cbmc_run.sh similarity index 100% rename from scripts/cbmc_run.sh rename to verification/cbmc_run.sh diff --git a/scripts/fuzz.sh b/verification/fuzz.sh similarity index 100% rename from scripts/fuzz.sh rename to verification/fuzz.sh diff --git a/scripts/run_afl_parallel.sh b/verification/run_afl_parallel.sh similarity index 100% rename from scripts/run_afl_parallel.sh rename to verification/run_afl_parallel.sh From 376b9ce880918ddf42bf7c7e1b7627026374c9ba Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 02:38:01 +0100 Subject: [PATCH 8/9] feat(ci): Build docs in CI --- .github/workflows/ci.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0bcff78..ba3401c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,12 +34,28 @@ jobs: run: | cmake --build build --target lint + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup dependencies + uses: ./.github/actions/setup-deps + - name: Install documentation dependencies + run: | + sudo apt-get update + sudo apt-get install -y doxygen python3-venv + - name: Build documentation + run: | + cmake -B build -S . + cmake --build build --target docs + test: strategy: matrix: build_type: [Release, Debug] runs-on: ubuntu-latest - needs: [lint, fmt] + needs: [lint, fmt, docs] steps: - name: Checkout repository uses: actions/checkout@v4 From ba740aa40fc030feac873856fa2c9d42a80e64fa Mon Sep 17 00:00:00 2001 From: benmandrew Date: Mon, 6 Apr 2026 02:45:39 +0100 Subject: [PATCH 9/9] fix(docs): Doxygen warnings --- .github/workflows/ci.yml | 2 +- docs/CMakeLists.txt | 2 +- docs/Doxyfile.in | 5 ++++- docs/index.rst | 2 +- src/interp.c | 22 +++++++++++----------- src/ir.c | 36 +++++++++++++++++++++--------------- src/ir.h | 3 ++- src/llvm.c | 12 ++++++------ test/test_ir.c | 26 +++++++++++++------------- 9 files changed, 60 insertions(+), 50 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ba3401c..3740869 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: - name: Install documentation dependencies run: | sudo apt-get update - sudo apt-get install -y doxygen python3-venv + sudo apt-get install -y doxygen graphviz python3-venv - name: Build documentation run: | cmake -B build -S . diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 13157b9..de35c66 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -36,7 +36,7 @@ if(DOXYGEN_FOUND AND Python3_Interpreter_FOUND) add_custom_target(docs COMMAND ${CMAKE_COMMAND} -E make_directory ${DOXYGEN_OUTPUT_DIR} COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile - COMMAND ${CMAKE_COMMAND} -E env SPHINX_DOXYGEN_XML_DIR=${DOXYGEN_OUTPUT_DIR}/xml ${DOCS_VENV_PYTHON} -m sphinx -b html ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}/docs/html + COMMAND ${CMAKE_COMMAND} -E env SPHINX_DOXYGEN_XML_DIR=${DOXYGEN_OUTPUT_DIR}/xml ${DOCS_VENV_PYTHON} -m sphinx -W --keep-going -b html ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}/docs/html WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMENT "Building Doxygen XML and Sphinx HTML documentation" ) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index df61dd3..39cfdfa 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -6,9 +6,12 @@ FILE_PATTERNS = *.h *.c RECURSIVE = YES GENERATE_HTML = NO GENERATE_XML = YES +GENERATE_LATEX = NO EXTRACT_ALL = YES EXTRACT_STATIC = NO OPTIMIZE_OUTPUT_FOR_C = YES QUIET = YES WARN_IF_UNDOCUMENTED = NO -STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@ \ No newline at end of file +WARN_AS_ERROR = YES +HAVE_DOT = YES +STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@ diff --git a/docs/index.rst b/docs/index.rst index 3af4e01..7c7023c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ ``bf`` Documentation -================= +==================== This documentation covers usage and API reference for the ``bf`` project. diff --git a/src/interp.c b/src/interp.c index 87930ec..40fc38b 100644 --- a/src/interp.c +++ b/src/interp.c @@ -24,7 +24,7 @@ size_t abstract_to_concrete_pc(size_t pc, struct program *p) { case CMD_SIMPLE_LEFT: case CMD_SIMPLE_OUTPUT: case CMD_SIMPLE_INPUT: - concrete_pc += p->cmds[i].simple_count; + concrete_pc += p->cmds[i].value.simple_count; break; case CMD_JUMP_FORWARD: case CMD_JUMP_BACK: @@ -129,40 +129,40 @@ int interp(struct context_t *ctx, int out_fd, int in_fd, bool byte_output) { struct cmd c = ctx->p.cmds[ctx->pc]; switch (c.type) { case CMD_SIMPLE_INC: - ctx->data[ctx->dp] += c.simple_count; + ctx->data[ctx->dp] += c.value.simple_count; break; case CMD_SIMPLE_DEC: - ctx->data[ctx->dp] -= c.simple_count; + ctx->data[ctx->dp] -= c.value.simple_count; break; case CMD_SIMPLE_RIGHT: - assert(ctx->dp < DATA_SIZE - c.simple_count); - ctx->dp += c.simple_count; + assert(ctx->dp < DATA_SIZE - c.value.simple_count); + ctx->dp += c.value.simple_count; if (ctx->dp > ctx->max_dp) { ctx->max_dp = ctx->dp; } break; case CMD_SIMPLE_LEFT: - assert(ctx->dp > c.simple_count - 1); - ctx->dp -= c.simple_count; + assert(ctx->dp > c.value.simple_count - 1); + ctx->dp -= c.value.simple_count; break; case CMD_SIMPLE_OUTPUT: - for (size_t i = 0; i < c.simple_count; i++) { + for (size_t i = 0; i < c.value.simple_count; i++) { interp_dot(ctx, out_fd, byte_output); } break; case CMD_SIMPLE_INPUT: - for (size_t i = 0; i < c.simple_count; i++) { + for (size_t i = 0; i < c.value.simple_count; i++) { interp_comma(ctx, in_fd); } break; case CMD_JUMP_FORWARD: if (ctx->data[ctx->dp] == 0) { - ctx->pc = c.jump_index; + ctx->pc = c.value.jump_index; } break; case CMD_JUMP_BACK: if (ctx->data[ctx->dp] > 0) { - ctx->pc = c.jump_index; + ctx->pc = c.value.jump_index; } break; default: diff --git a/src/ir.c b/src/ir.c index 5002c0a..e0c2833 100644 --- a/src/ir.c +++ b/src/ir.c @@ -50,7 +50,7 @@ size_t program_str_length(struct program *p) { case CMD_SIMPLE_LEFT: case CMD_SIMPLE_OUTPUT: case CMD_SIMPLE_INPUT: - length += p->cmds[i].simple_count; + length += p->cmds[i].value.simple_count; break; case CMD_JUMP_FORWARD: case CMD_JUMP_BACK: @@ -70,22 +70,28 @@ size_t n_simple_consecutive(char *s, size_t start, struct cmd *c) { char first = s[start]; switch (first) { case '+': - *c = (struct cmd){.type = CMD_SIMPLE_INC, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_INC, + .value.simple_count = 1}; break; case '-': - *c = (struct cmd){.type = CMD_SIMPLE_DEC, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_DEC, + .value.simple_count = 1}; break; case '>': - *c = (struct cmd){.type = CMD_SIMPLE_RIGHT, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_RIGHT, + .value.simple_count = 1}; break; case '<': - *c = (struct cmd){.type = CMD_SIMPLE_LEFT, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_LEFT, + .value.simple_count = 1}; break; case '.': - *c = (struct cmd){.type = CMD_SIMPLE_OUTPUT, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_OUTPUT, + .value.simple_count = 1}; break; case ',': - *c = (struct cmd){.type = CMD_SIMPLE_INPUT, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_INPUT, + .value.simple_count = 1}; break; default: fprintf(stderr, "Invalid character '%c'\n", s[i]); @@ -94,9 +100,9 @@ size_t n_simple_consecutive(char *s, size_t start, struct cmd *c) { size_t len = strlen(s); while (s[start + i + 1] == first && start + i + 1 < len) { i++; - c->simple_count++; + c->value.simple_count++; } - return c->simple_count - 1; + return c->value.simple_count - 1; } struct program string_to_program(char *s) { @@ -128,10 +134,10 @@ struct program string_to_program(char *s) { break; case ']': back_jump_frame = jump_stack_pop(&js); - cmd_arena[arena_i] = - (struct cmd){.type = CMD_JUMP_BACK, - .jump_index = back_jump_frame.index}; - back_jump_frame.c->jump_index = arena_i; + cmd_arena[arena_i] = (struct cmd){ + .type = CMD_JUMP_BACK, + .value.jump_index = back_jump_frame.index}; + back_jump_frame.c->value.jump_index = arena_i; break; default: fprintf(stderr, "Invalid character '%c'\n", s[str_i]); @@ -179,8 +185,8 @@ char *program_to_string(struct program *program) { case CMD_SIMPLE_LEFT: case CMD_SIMPLE_OUTPUT: case CMD_SIMPLE_INPUT: - for (size_t j = 0; j < program->cmds[i].simple_count; - j++) { + for (size_t j = 0; + j < program->cmds[i].value.simple_count; j++) { out[str_i++] = cmd_type_to_char(program->cmds[i].type); } diff --git a/src/ir.h b/src/ir.h index 2d637a2..c24c31c 100644 --- a/src/ir.h +++ b/src/ir.h @@ -27,12 +27,13 @@ enum cmd_type { struct cmd { /// Command opcode. enum cmd_type type; + /// Command payload data. union { /// Repeat count for simple commands. size_t simple_count; /// Matching bracket command index. size_t jump_index; - }; + } value; }; /// Parsed Brainfuck program represented as an array of commands. diff --git a/src/llvm.c b/src/llvm.c index cbc809f..01e1715 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -219,24 +219,24 @@ LLVMModuleRef generate(struct program *p) { struct cmd c = p->cmds[i]; switch (c.type) { case CMD_SIMPLE_INC: - add(&ctx, c.simple_count); + add(&ctx, c.value.simple_count); break; case CMD_SIMPLE_DEC: - sub(&ctx, c.simple_count); + sub(&ctx, c.value.simple_count); break; case CMD_SIMPLE_RIGHT: - right(&ctx, c.simple_count); + right(&ctx, c.value.simple_count); break; case CMD_SIMPLE_LEFT: - left(&ctx, c.simple_count); + left(&ctx, c.value.simple_count); break; case CMD_SIMPLE_OUTPUT: - for (size_t j = 0; j < c.simple_count; j++) { + for (size_t j = 0; j < c.value.simple_count; j++) { dot(&ctx); } break; case CMD_SIMPLE_INPUT: - for (size_t j = 0; j < c.simple_count; j++) { + for (size_t j = 0; j < c.value.simple_count; j++) { comma(&ctx); } break; diff --git a/test/test_ir.c b/test/test_ir.c index 6189fee..99e68ae 100644 --- a/test/test_ir.c +++ b/test/test_ir.c @@ -9,13 +9,13 @@ START_TEST(test_to_and_from_string_no_loops) { char *program_string = "+.>++-.<"; struct program p = string_to_program(program_string); - ck_assert_uint_eq(p.cmds[0].simple_count, 1); - ck_assert_uint_eq(p.cmds[1].simple_count, 1); - ck_assert_uint_eq(p.cmds[2].simple_count, 1); - ck_assert_uint_eq(p.cmds[3].simple_count, 2); - ck_assert_uint_eq(p.cmds[4].simple_count, 1); - ck_assert_uint_eq(p.cmds[5].simple_count, 1); - ck_assert_uint_eq(p.cmds[6].simple_count, 1); + ck_assert_uint_eq(p.cmds[0].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[1].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[2].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[3].value.simple_count, 2); + ck_assert_uint_eq(p.cmds[4].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[5].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[6].value.simple_count, 1); char *ret = program_to_string(&p); ck_assert_str_eq(ret, program_string); free(ret); @@ -32,12 +32,12 @@ START_TEST(test_to_and_from_string_with_loops) { free(p.cmds); program_string = "+++[>+++++<-]>"; p = string_to_program(program_string); - ck_assert_uint_eq(p.cmds[0].simple_count, 3); - ck_assert_uint_eq(p.cmds[2].simple_count, 1); - ck_assert_uint_eq(p.cmds[3].simple_count, 5); - ck_assert_uint_eq(p.cmds[4].simple_count, 1); - ck_assert_uint_eq(p.cmds[5].simple_count, 1); - ck_assert_uint_eq(p.cmds[7].simple_count, 1); + ck_assert_uint_eq(p.cmds[0].value.simple_count, 3); + ck_assert_uint_eq(p.cmds[2].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[3].value.simple_count, 5); + ck_assert_uint_eq(p.cmds[4].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[5].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[7].value.simple_count, 1); ret = program_to_string(&p); ck_assert_str_eq(ret, program_string); free(ret);