diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0bcff78..3740869 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,12 +34,28 @@ jobs: run: | cmake --build build --target lint + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup dependencies + uses: ./.github/actions/setup-deps + - name: Install documentation dependencies + run: | + sudo apt-get update + sudo apt-get install -y doxygen graphviz python3-venv + - name: Build documentation + run: | + cmake -B build -S . + cmake --build build --target docs + test: strategy: matrix: build_type: [Release, Debug] runs-on: ubuntu-latest - needs: [lint, fmt] + needs: [lint, fmt, docs] steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 45e2f16..f649410 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,11 +127,11 @@ set_target_properties(bfc bf_lib ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} ) -option(DOCKER_BUILD "Docker build mode - disables tests and scripts" OFF) +option(DOCKER_BUILD "Docker build mode - disables tests and verification" OFF) if(NOT DOCKER_BUILD) enable_testing() add_subdirectory(test) - add_subdirectory(scripts) + add_subdirectory(verification) endif() add_custom_target(debug @@ -260,7 +260,7 @@ if(AFL_CC) ) add_custom_command(TARGET fuzz POST_BUILD - COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_afl_parallel.sh ${CMAKE_BINARY_DIR}/bfc_fuzz + COMMAND ${CMAKE_SOURCE_DIR}/verification/run_afl_parallel.sh ${CMAKE_BINARY_DIR}/bfc_fuzz ) endif() diff --git a/README.md b/README.md index b96b256..6d138e4 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ $ docker compose up The input validation and parsing is mathematically proven correct for `bf` programs up to eight commands long using the [C Bounded Model Checker](https://github.com/diffblue/cbmc) (CBMC). Details are [here](#model-check-memory-safety). -![alt](doc/screenshot.png) +![alt](docs/screenshot.png) ## Dependencies @@ -23,6 +23,13 @@ $ sudo apt-get install cmake llvm-dev check expect clang-format cpplint $ brew install cmake llvm check expect clang-format cpplint ``` +#### Documentation +To build the docs locally, install Doxygen and the Sphinx Python packages: +```bash +$ brew install doxygen +$ python3 -m pip install -r docs/requirements.txt +``` + ## Building ```bash @@ -56,6 +63,14 @@ Hello, World! $ cmake --build build --target fmt lint ``` +### Documentation + +```bash +$ cmake --build build --target docs +``` + +The generated HTML site is written to `build/docs/html/index.html`. + ### Tests ```bash diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 0000000..de35c66 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,53 @@ +find_package(Doxygen QUIET) +find_package(Python3 COMPONENTS Interpreter QUIET) + +set(DOCS_VENV_DIR ${CMAKE_BINARY_DIR}/docs/.venv) +if(WIN32) + set(DOCS_VENV_PYTHON ${DOCS_VENV_DIR}/Scripts/python.exe) +else() + set(DOCS_VENV_PYTHON ${DOCS_VENV_DIR}/bin/python3) +endif() + +if(Python3_Interpreter_FOUND) + add_custom_target(docs-setup + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/docs + COMMAND ${Python3_EXECUTABLE} -m venv ${DOCS_VENV_DIR} + COMMAND ${DOCS_VENV_PYTHON} -m pip install --disable-pip-version-check --quiet --upgrade pip + COMMAND ${DOCS_VENV_PYTHON} -m pip install --disable-pip-version-check --quiet -r ${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt + COMMENT "Creating docs virtualenv and installing Sphinx dependencies" + ) +else() + add_custom_target(docs-setup + COMMAND ${CMAKE_COMMAND} -E echo "docs-setup requires Python 3 with venv support." + COMMENT "Documentation setup unavailable" + ) +endif() + +if(DOXYGEN_FOUND AND Python3_Interpreter_FOUND) + set(DOXYGEN_OUTPUT_DIR ${CMAKE_BINARY_DIR}/docs/doxygen) + set(DOXYGEN_INPUT_DIR ${CMAKE_SOURCE_DIR}/src) + + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in + ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile + @ONLY + ) + + add_custom_target(docs + COMMAND ${CMAKE_COMMAND} -E make_directory ${DOXYGEN_OUTPUT_DIR} + COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile + COMMAND ${CMAKE_COMMAND} -E env SPHINX_DOXYGEN_XML_DIR=${DOXYGEN_OUTPUT_DIR}/xml ${DOCS_VENV_PYTHON} -m sphinx -W --keep-going -b html ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}/docs/html + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Building Doxygen XML and Sphinx HTML documentation" + ) + add_dependencies(docs docs-setup) +else() + message(WARNING "Docs target disabled: install Doxygen, Python 3, and the Sphinx/Breathe packages to build documentation.") + add_custom_target(docs + COMMAND ${CMAKE_COMMAND} -E echo "Docs require Doxygen, Python 3, and the Sphinx/Breathe Python packages." + COMMENT "Documentation tooling not available" + ) + if(Python3_Interpreter_FOUND) + add_dependencies(docs docs-setup) + endif() +endif() \ No newline at end of file diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in new file mode 100644 index 0000000..39cfdfa --- /dev/null +++ b/docs/Doxyfile.in @@ -0,0 +1,17 @@ +PROJECT_NAME = "bf" +PROJECT_BRIEF = "Brainf*ck to LLVM IR compiler frontend" +OUTPUT_DIRECTORY = @DOXYGEN_OUTPUT_DIR@ +INPUT = @DOXYGEN_INPUT_DIR@ +FILE_PATTERNS = *.h *.c +RECURSIVE = YES +GENERATE_HTML = NO +GENERATE_XML = YES +GENERATE_LATEX = NO +EXTRACT_ALL = YES +EXTRACT_STATIC = NO +OPTIMIZE_OUTPUT_FOR_C = YES +QUIET = YES +WARN_IF_UNDOCUMENTED = NO +WARN_AS_ERROR = YES +HAVE_DOT = YES +STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@ diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..6059827 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,11 @@ +API Reference +============= + +.. toctree:: + :maxdepth: 1 + + api_common + api_ir + api_read + api_interp + api_llvm diff --git a/docs/api_common.rst b/docs/api_common.rst new file mode 100644 index 0000000..5154c0c --- /dev/null +++ b/docs/api_common.rst @@ -0,0 +1,4 @@ +common.h +======== + +.. doxygenfile:: common.h diff --git a/docs/api_interp.rst b/docs/api_interp.rst new file mode 100644 index 0000000..918b2d1 --- /dev/null +++ b/docs/api_interp.rst @@ -0,0 +1,4 @@ +interp.h +======== + +.. doxygenfile:: interp.h diff --git a/docs/api_ir.rst b/docs/api_ir.rst new file mode 100644 index 0000000..8edcbbd --- /dev/null +++ b/docs/api_ir.rst @@ -0,0 +1,4 @@ +ir.h +==== + +.. doxygenfile:: ir.h diff --git a/docs/api_llvm.rst b/docs/api_llvm.rst new file mode 100644 index 0000000..99f7a4b --- /dev/null +++ b/docs/api_llvm.rst @@ -0,0 +1,4 @@ +llvm.h +====== + +.. doxygenfile:: llvm.h diff --git a/docs/api_read.rst b/docs/api_read.rst new file mode 100644 index 0000000..3819317 --- /dev/null +++ b/docs/api_read.rst @@ -0,0 +1,4 @@ +read.h +====== + +.. doxygenfile:: read.h diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..26d3046 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,21 @@ +from pathlib import Path +import os + +project = "bf" +author = "Ben M. Andrew" +copyright = f"2026, {author}" +language = "en_GB" +# Sphinx HTML search uses the English stemmer code `en` for all English variants. +html_search_language = "en" +extensions = ["breathe"] +templates_path = ["_templates"] +exclude_patterns = [] +html_theme = "furo" +html_static_path = ["_static"] +html_title = "bf Documentation" + +default_xml_dir = Path(__file__).resolve().parent.parent / "build" / "docs" / "doxygen" / "xml" +breathe_projects = { + "bf": os.environ.get("SPHINX_DOXYGEN_XML_DIR", str(default_xml_dir)) +} +breathe_default_project = "bf" diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..7c7023c --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,14 @@ +``bf`` Documentation +==================== + +This documentation covers usage and API reference for the ``bf`` project. + +- Repository: https://github.com/benmandrew/bf. +- Live instance: https://benmandrew.com/articles/compiler-frontend. + +.. toctree:: + :maxdepth: 2 + :caption: Contents + + usage + api diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..f201aed --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx>=7.0 +breathe>=4.35 +furo>=2024.8.6 \ No newline at end of file diff --git a/doc/screenshot.png b/docs/screenshot.png similarity index 100% rename from doc/screenshot.png rename to docs/screenshot.png diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000..f515097 --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,61 @@ +Usage +===== + +Live Instance +------------- + +You can try the project online at: + +https://benmandrew.com/articles/compiler-frontend + +Run Locally with Docker Compose +-------------------------------- + +To run the web interface locally: + +.. code-block:: bash + + $ docker compose up + +Then open: + +http://localhost:8080 + +Build the CLI Tools +------------------- + +Build the project with CMake: + +.. code-block:: bash + + $ cmake -B build + $ cmake --build build + +After building, both executables are available in the ``build`` directory: + +- ``bfc``: Brainfuck frontend that emits LLVM IR +- ``bfi``: Brainfuck interpreter + +Use ``bfc`` (compile to LLVM IR) +-------------------------------- + +Generate LLVM IR from a Brainfuck program and compile it with ``clang``: + +.. code-block:: bash + + # Generate LLVM IR + $ ./build/bfc test/res/helloworld.b > main.ll + # Compile IR to binary + $ clang main.ll -o main + $ ./main + Hello, World! + +Use ``bfi`` (interpret directly) +-------------------------------- + +Run a Brainfuck program directly with the interpreter: + +.. code-block:: bash + + $ ./build/bfi test/res/helloworld.b + Hello, World! diff --git a/src/common.h b/src/common.h index 75d5800..4db1c76 100644 --- a/src/common.h +++ b/src/common.h @@ -1,6 +1,7 @@ #ifndef COMMON_H #define COMMON_H +/// Number of cells in the Brainfuck data tape. #define DATA_SIZE (65536) #endif diff --git a/src/interp.c b/src/interp.c index 87930ec..40fc38b 100644 --- a/src/interp.c +++ b/src/interp.c @@ -24,7 +24,7 @@ size_t abstract_to_concrete_pc(size_t pc, struct program *p) { case CMD_SIMPLE_LEFT: case CMD_SIMPLE_OUTPUT: case CMD_SIMPLE_INPUT: - concrete_pc += p->cmds[i].simple_count; + concrete_pc += p->cmds[i].value.simple_count; break; case CMD_JUMP_FORWARD: case CMD_JUMP_BACK: @@ -129,40 +129,40 @@ int interp(struct context_t *ctx, int out_fd, int in_fd, bool byte_output) { struct cmd c = ctx->p.cmds[ctx->pc]; switch (c.type) { case CMD_SIMPLE_INC: - ctx->data[ctx->dp] += c.simple_count; + ctx->data[ctx->dp] += c.value.simple_count; break; case CMD_SIMPLE_DEC: - ctx->data[ctx->dp] -= c.simple_count; + ctx->data[ctx->dp] -= c.value.simple_count; break; case CMD_SIMPLE_RIGHT: - assert(ctx->dp < DATA_SIZE - c.simple_count); - ctx->dp += c.simple_count; + assert(ctx->dp < DATA_SIZE - c.value.simple_count); + ctx->dp += c.value.simple_count; if (ctx->dp > ctx->max_dp) { ctx->max_dp = ctx->dp; } break; case CMD_SIMPLE_LEFT: - assert(ctx->dp > c.simple_count - 1); - ctx->dp -= c.simple_count; + assert(ctx->dp > c.value.simple_count - 1); + ctx->dp -= c.value.simple_count; break; case CMD_SIMPLE_OUTPUT: - for (size_t i = 0; i < c.simple_count; i++) { + for (size_t i = 0; i < c.value.simple_count; i++) { interp_dot(ctx, out_fd, byte_output); } break; case CMD_SIMPLE_INPUT: - for (size_t i = 0; i < c.simple_count; i++) { + for (size_t i = 0; i < c.value.simple_count; i++) { interp_comma(ctx, in_fd); } break; case CMD_JUMP_FORWARD: if (ctx->data[ctx->dp] == 0) { - ctx->pc = c.jump_index; + ctx->pc = c.value.jump_index; } break; case CMD_JUMP_BACK: if (ctx->data[ctx->dp] > 0) { - ctx->pc = c.jump_index; + ctx->pc = c.value.jump_index; } break; default: diff --git a/src/interp.h b/src/interp.h index dd79317..7c8db08 100644 --- a/src/interp.h +++ b/src/interp.h @@ -7,18 +7,34 @@ #include "common.h" #include "ir.h" +/// Mutable execution state for the Brainfuck interpreter. struct context_t { + /// Current command index in `p.cmds`. size_t pc; + /// Parsed program being executed. struct program p; + /// Current data pointer position. size_t dp; + /// Interpreter data tape. unsigned char data[DATA_SIZE]; - // Keep track of the largest data pointer seen so far, - // for pretty-printing the context + /// Largest data pointer reached for pretty-printing. size_t max_dp; }; +/// Initialize an interpreter context with the program loaded at pc zero. +/// @param p Parsed Brainfuck program to execute. +/// @return Initialized interpreter context with zeroed tape. struct context_t init_context(struct program p); +/// Execute the command at the current program counter and advance execution. +/// @param ctx Interpreter context. +/// @param out_fd File descriptor used for output. +/// @param in_fd File descriptor used for input. +/// @param byte_output If true, emit numeric byte values. +/// @return 1 when program execution has completed; otherwise 0. int interp(struct context_t *ctx, int, int, bool); +/// Render the current interpreter state as a human-readable trace string. +/// @param ctx Interpreter context to render. +/// @return Heap-allocated formatted string; caller must free it. char *context_to_string(struct context_t *ctx); #endif diff --git a/src/ir.c b/src/ir.c index 5002c0a..e0c2833 100644 --- a/src/ir.c +++ b/src/ir.c @@ -50,7 +50,7 @@ size_t program_str_length(struct program *p) { case CMD_SIMPLE_LEFT: case CMD_SIMPLE_OUTPUT: case CMD_SIMPLE_INPUT: - length += p->cmds[i].simple_count; + length += p->cmds[i].value.simple_count; break; case CMD_JUMP_FORWARD: case CMD_JUMP_BACK: @@ -70,22 +70,28 @@ size_t n_simple_consecutive(char *s, size_t start, struct cmd *c) { char first = s[start]; switch (first) { case '+': - *c = (struct cmd){.type = CMD_SIMPLE_INC, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_INC, + .value.simple_count = 1}; break; case '-': - *c = (struct cmd){.type = CMD_SIMPLE_DEC, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_DEC, + .value.simple_count = 1}; break; case '>': - *c = (struct cmd){.type = CMD_SIMPLE_RIGHT, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_RIGHT, + .value.simple_count = 1}; break; case '<': - *c = (struct cmd){.type = CMD_SIMPLE_LEFT, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_LEFT, + .value.simple_count = 1}; break; case '.': - *c = (struct cmd){.type = CMD_SIMPLE_OUTPUT, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_OUTPUT, + .value.simple_count = 1}; break; case ',': - *c = (struct cmd){.type = CMD_SIMPLE_INPUT, .simple_count = 1}; + *c = (struct cmd){.type = CMD_SIMPLE_INPUT, + .value.simple_count = 1}; break; default: fprintf(stderr, "Invalid character '%c'\n", s[i]); @@ -94,9 +100,9 @@ size_t n_simple_consecutive(char *s, size_t start, struct cmd *c) { size_t len = strlen(s); while (s[start + i + 1] == first && start + i + 1 < len) { i++; - c->simple_count++; + c->value.simple_count++; } - return c->simple_count - 1; + return c->value.simple_count - 1; } struct program string_to_program(char *s) { @@ -128,10 +134,10 @@ struct program string_to_program(char *s) { break; case ']': back_jump_frame = jump_stack_pop(&js); - cmd_arena[arena_i] = - (struct cmd){.type = CMD_JUMP_BACK, - .jump_index = back_jump_frame.index}; - back_jump_frame.c->jump_index = arena_i; + cmd_arena[arena_i] = (struct cmd){ + .type = CMD_JUMP_BACK, + .value.jump_index = back_jump_frame.index}; + back_jump_frame.c->value.jump_index = arena_i; break; default: fprintf(stderr, "Invalid character '%c'\n", s[str_i]); @@ -179,8 +185,8 @@ char *program_to_string(struct program *program) { case CMD_SIMPLE_LEFT: case CMD_SIMPLE_OUTPUT: case CMD_SIMPLE_INPUT: - for (size_t j = 0; j < program->cmds[i].simple_count; - j++) { + for (size_t j = 0; + j < program->cmds[i].value.simple_count; j++) { out[str_i++] = cmd_type_to_char(program->cmds[i].type); } diff --git a/src/ir.h b/src/ir.h index bd25e30..c24c31c 100644 --- a/src/ir.h +++ b/src/ir.h @@ -3,39 +3,79 @@ #include +/// Brainfuck command categories used by the internal IR. enum cmd_type { - CMD_SIMPLE_INC, // '+' - CMD_SIMPLE_DEC, // '-' - CMD_SIMPLE_RIGHT, // '>' - CMD_SIMPLE_LEFT, // '<' - CMD_SIMPLE_OUTPUT, // '.' - CMD_SIMPLE_INPUT, // ',' - CMD_JUMP_FORWARD, // '[' - CMD_JUMP_BACK, // ']' + /// `'+'`: increment current cell value. + CMD_SIMPLE_INC, + /// `'-'`: decrement current cell value. + CMD_SIMPLE_DEC, + /// `'>'`: move data pointer right. + CMD_SIMPLE_RIGHT, + /// `'<'`: move data pointer left. + CMD_SIMPLE_LEFT, + /// `'.'`: write current cell as output. + CMD_SIMPLE_OUTPUT, + /// `','`: read input into current cell. + CMD_SIMPLE_INPUT, + /// `'['`: jump forward if current cell is zero. + CMD_JUMP_FORWARD, + /// `']'`: jump back if current cell is non-zero. + CMD_JUMP_BACK, }; +/// One compressed instruction in the internal Brainfuck IR. struct cmd { + /// Command opcode. enum cmd_type type; + /// Command payload data. union { + /// Repeat count for simple commands. size_t simple_count; + /// Matching bracket command index. size_t jump_index; - }; + } value; }; +/// Parsed Brainfuck program represented as an array of commands. struct program { + /// Heap-allocated command array. struct cmd *cmds; + /// Number of entries in `cmds`. size_t length; }; +/// Compute the length of the flattened Brainfuck source string. +/// @param p Parsed program. +/// @return Length of expanded Brainfuck source string. size_t program_str_length(struct program *p); +/// Parse a cleaned Brainfuck string into the internal program form. +/// @param s Cleaned Brainfuck source string. +/// @return Parsed program with heap-allocated command array. struct program string_to_program(char *s); +/// Release a program's heap-allocated command buffer. +/// @param p Program whose command array should be released. void free_program(struct program *p); +/// Map a command type back to its Brainfuck character. +/// @param t Command type. +/// @return Corresponding Brainfuck symbol. char cmd_type_to_char(enum cmd_type t); +/// Expand a compressed program back into a Brainfuck source string. +/// @param program Parsed program. +/// @return Heap-allocated source string; caller must free it. char *program_to_string(struct program *program); +/// Return whether a program contains any output commands. +/// @param p Parsed program. +/// @return 1 if output exists; otherwise 0. char program_contains_output(struct program *p); +/// Return whether a program contains any input commands. +/// @param p Parsed program. +/// @return 1 if input exists; otherwise 0. char program_contains_input(struct program *p); +/// Validate that a source string contains only balanced Brainfuck commands. +/// @param s Source string to validate. +/// @return 1 if valid; otherwise 0. char program_is_valid(char *s); #endif diff --git a/src/llvm.c b/src/llvm.c index cbc809f..01e1715 100644 --- a/src/llvm.c +++ b/src/llvm.c @@ -219,24 +219,24 @@ LLVMModuleRef generate(struct program *p) { struct cmd c = p->cmds[i]; switch (c.type) { case CMD_SIMPLE_INC: - add(&ctx, c.simple_count); + add(&ctx, c.value.simple_count); break; case CMD_SIMPLE_DEC: - sub(&ctx, c.simple_count); + sub(&ctx, c.value.simple_count); break; case CMD_SIMPLE_RIGHT: - right(&ctx, c.simple_count); + right(&ctx, c.value.simple_count); break; case CMD_SIMPLE_LEFT: - left(&ctx, c.simple_count); + left(&ctx, c.value.simple_count); break; case CMD_SIMPLE_OUTPUT: - for (size_t j = 0; j < c.simple_count; j++) { + for (size_t j = 0; j < c.value.simple_count; j++) { dot(&ctx); } break; case CMD_SIMPLE_INPUT: - for (size_t j = 0; j < c.simple_count; j++) { + for (size_t j = 0; j < c.value.simple_count; j++) { comma(&ctx); } break; diff --git a/src/llvm.h b/src/llvm.h index a205b6e..f58498c 100644 --- a/src/llvm.h +++ b/src/llvm.h @@ -5,9 +5,12 @@ #include "ir.h" -/* Generate LLVM IR for a parsed Brainf*ck program. */ +/// Generate LLVM IR for a parsed Brainfuck program. +/// @param p Parsed Brainfuck program. +/// @return Generated LLVM module. LLVMModuleRef generate(struct program *p); -/* Release an LLVM module created by generate(). */ +/// Release an LLVM module created by generate(). +/// @param module LLVM module created by `generate`. void dispose_module(LLVMModuleRef module); #endif diff --git a/src/main_bfi.c b/src/main_bfi.c index f13d35d..5f6a422 100644 --- a/src/main_bfi.c +++ b/src/main_bfi.c @@ -7,7 +7,6 @@ #include "interp.h" #include "ir.h" -#include "llvm.h" #include "read.h" void print_usage(const char *program_name) { diff --git a/src/read.c b/src/read.c index 6b2c937..e54ae68 100644 --- a/src/read.c +++ b/src/read.c @@ -9,7 +9,6 @@ ((c) == '+' || (c) == '-' || (c) == '>' || (c) == '<' || (c) == '.' || \ (c) == ',' || (c) == '[' || (c) == ']') -// Clean whitespace and other extraneous characters from a BF program void clean_whitespace(char *s) { int64_t j = 0, i = 0; while (s[i] != '\0') { diff --git a/src/read.h b/src/read.h index 44b5ea4..dab02d6 100644 --- a/src/read.h +++ b/src/read.h @@ -3,21 +3,46 @@ #include +/// Validation error payload returned by file/parse helpers. struct Error { + /// Human-readable error message. char *message; }; -struct ReadReturn { - enum { OK, ERROR } type; +union ProgramOrError { + /// Normalized source on success. + char *program_str; + /// Error payload on failure. + struct Error error; +}; - union { - char *program_str; - struct Error error; - } value; +/// Discriminator values for ReadReturn. +enum ReadResultType { + /// Read and validation succeeded. + OK, + /// Read and validation failed. + ERROR, +}; + +/// Tagged return type for reading and validating source input. +struct ReadReturn { + /// Result discriminator. + enum ReadResultType type; + /// Result payload. + union ProgramOrError value; }; -void clean_whitespace(char *); -struct ReadReturn read_file(char *); -struct ReadReturn validate(char *, size_t len); +/// Remove non-Brainfuck characters from a mutable source buffer. +/// @param s Null-terminated source buffer to clean in place. +void clean_whitespace(char *s); +/// Read a file, validate it, and return normalized source. +/// @param fname Path to the source file. +/// @return Tagged result containing normalized source or an error. +struct ReadReturn read_file(char *fname); +/// Validate and normalize a raw program buffer before parsing it. +/// @param program Mutable source buffer. +/// @param len Number of bytes to validate from `program`. +/// @return Tagged result containing normalized source or an error. +struct ReadReturn validate(char *program, size_t len); #endif diff --git a/test/test_ir.c b/test/test_ir.c index 6189fee..99e68ae 100644 --- a/test/test_ir.c +++ b/test/test_ir.c @@ -9,13 +9,13 @@ START_TEST(test_to_and_from_string_no_loops) { char *program_string = "+.>++-.<"; struct program p = string_to_program(program_string); - ck_assert_uint_eq(p.cmds[0].simple_count, 1); - ck_assert_uint_eq(p.cmds[1].simple_count, 1); - ck_assert_uint_eq(p.cmds[2].simple_count, 1); - ck_assert_uint_eq(p.cmds[3].simple_count, 2); - ck_assert_uint_eq(p.cmds[4].simple_count, 1); - ck_assert_uint_eq(p.cmds[5].simple_count, 1); - ck_assert_uint_eq(p.cmds[6].simple_count, 1); + ck_assert_uint_eq(p.cmds[0].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[1].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[2].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[3].value.simple_count, 2); + ck_assert_uint_eq(p.cmds[4].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[5].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[6].value.simple_count, 1); char *ret = program_to_string(&p); ck_assert_str_eq(ret, program_string); free(ret); @@ -32,12 +32,12 @@ START_TEST(test_to_and_from_string_with_loops) { free(p.cmds); program_string = "+++[>+++++<-]>"; p = string_to_program(program_string); - ck_assert_uint_eq(p.cmds[0].simple_count, 3); - ck_assert_uint_eq(p.cmds[2].simple_count, 1); - ck_assert_uint_eq(p.cmds[3].simple_count, 5); - ck_assert_uint_eq(p.cmds[4].simple_count, 1); - ck_assert_uint_eq(p.cmds[5].simple_count, 1); - ck_assert_uint_eq(p.cmds[7].simple_count, 1); + ck_assert_uint_eq(p.cmds[0].value.simple_count, 3); + ck_assert_uint_eq(p.cmds[2].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[3].value.simple_count, 5); + ck_assert_uint_eq(p.cmds[4].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[5].value.simple_count, 1); + ck_assert_uint_eq(p.cmds[7].value.simple_count, 1); ret = program_to_string(&p); ck_assert_str_eq(ret, program_string); free(ret); diff --git a/scripts/CMakeLists.txt b/verification/CMakeLists.txt similarity index 100% rename from scripts/CMakeLists.txt rename to verification/CMakeLists.txt diff --git a/scripts/cbmc_run.sh b/verification/cbmc_run.sh similarity index 100% rename from scripts/cbmc_run.sh rename to verification/cbmc_run.sh diff --git a/scripts/fuzz.sh b/verification/fuzz.sh similarity index 100% rename from scripts/fuzz.sh rename to verification/fuzz.sh diff --git a/scripts/run_afl_parallel.sh b/verification/run_afl_parallel.sh similarity index 100% rename from scripts/run_afl_parallel.sh rename to verification/run_afl_parallel.sh