From dea1e6aa86fd3b91c7edb7570a26fd86a8d23463 Mon Sep 17 00:00:00 2001 From: Marco Edoardo Santimaria Date: Mon, 22 Jun 2026 09:52:27 +0200 Subject: [PATCH 1/5] Documentation --- include/IOWrapper.hpp | 22 +++- include/OsPath.hpp | 68 ++++++++++- include/PyCapioException.hpp | 15 ++- include/ScandirIteratorWrapper.hpp | 39 ++++++- include/libcapio.hpp | 100 +++++++++++++++- pycapio/__init__.py | 180 ++++++++++++++++++++++++++++- pycapio/__main__.py | 41 +++++++ pycapio/internals/__init__.py | 40 +++++++ 8 files changed, 491 insertions(+), 14 deletions(-) diff --git a/include/IOWrapper.hpp b/include/IOWrapper.hpp index 6f982a0..0b761fa 100644 --- a/include/IOWrapper.hpp +++ b/include/IOWrapper.hpp @@ -7,8 +7,26 @@ #define LIBCAPIO_IOWRAPPER_HPP #include -enum class IOMode { Text, Binary }; - +/** + * @brief Stream interpretation used by @ref IOWrapper. + */ +enum class IOMode { Text /**< Decode reads/writes as @c str. */, + Binary /**< Treat reads/writes as raw @c bytes. */ }; + +/** + * @brief File-like wrapper around a CAPIO stream. + * + * Implements the Python file-object protocol (@c read, @c readline, + * @c readlines, @c write, @c writelines, @c seek, @c flush, @c close, + * iteration and context-manager support) on top of a CAPIO file descriptor. + * + * The @ref IOMode template parameter selects the element type: @c Text wrappers + * return @c std::string (Python @c str), while @c Binary wrappers return + * @c pybind11::bytes. The two instantiations are exposed to Python as + * @c PyCapioTextIOWrapper and @c PyCapioBinaryIOWrapper respectively. + * + * @tparam Mode Stream interpretation, see @ref IOMode. + */ template class IOWrapper { const int _file_descriptor = -1; diff --git a/include/OsPath.hpp b/include/OsPath.hpp index 346e227..9a7a641 100644 --- a/include/OsPath.hpp +++ b/include/OsPath.hpp @@ -4,30 +4,88 @@ #include #include +/** + * @brief CAPIO-aware replacement for Python's @c os.path module. + * + * Each static method mirrors the semantics of the corresponding + * @c os.path function. Methods in the *libcapio* group answer their query + * through the CAPIO server (so they work on files that live only inside the + * CAPIO directory); the remaining methods are pure path-string manipulation + * that match the behaviour of the standard library. + * + * On the Python side this class is exposed as @c PyCapioPath and installed in + * place of @c os.path while a @c CapioContext is active. + */ class OsPath { public: /** - * LIBCAPIO METHODS - **/ + * @name CAPIO-backed queries + * These consult the CAPIO server and therefore see files managed by CAPIO. + * @{ + */ + + /// @brief Return @c true if @p path exists. + /// @param path Path to test. static bool exists(const std::string &path); + + /// @brief Return @c true if @p path exists and is a regular file. + /// @param path Path to test. static bool isfile(const std::string &path); + + /// @brief Return @c true if @p path exists and is a directory. + /// @param path Path to test. static bool isdir(const std::string &path); + + /// @brief Return the size of @p path in bytes. + /// @param path File whose size is requested. static uintmax_t getsize(const std::string &path); + /** @} */ /** - * NON LIBCAPIO METHODS - **/ + * @name Pure path manipulation + * String-only helpers that do not touch the filesystem. + * @{ + */ + + /// @brief Return the final component of @p path. static std::string basename(const std::string &path); + + /// @brief Return everything in @p path except the final component. static std::string dirname(const std::string &path); + + /// @brief Return the absolute, normalised form of @p path. static std::string abspath(const std::string &path); + + /// @brief Split @p path into a @c (root, extension) pair. static std::pair splitext(const std::string &path); + + /// @brief Return @c true if @p p is an absolute path. static bool isabs(const std::string &p); + + /// @brief Collapse redundant separators and up-level references in @p p. static std::string normpath(const std::string &p); + + /// @brief Return @p path relative to @p start. + /// @param path Target path. + /// @param start Base directory the result is computed against (default "."). static std::string relpath(const std::string &path, const std::string &start = "."); + + /// @brief Return the last-modification time of @p path as a POSIX timestamp. static double getmtime(const std::string &path); + + /// @brief Return @c true if @p p1 and @p p2 refer to the same file. static bool samefile(const std::string &p1, const std::string &p2); + + /// @brief Split @p p into a @c (head, tail) pair at the last separator. static std::pair split(const std::string &p); + + /// @brief Normalise the case of @p p (platform dependent). static std::string normcase(const std::string &p); + + /// @brief Resolve symbolic links in @p path to a canonical path. static std::string realpath(const std::string &path); + + /// @brief Join @p path1 and @p path2 with the path separator. static std::string join(const std::string &path1, const std::string &path2); -}; \ No newline at end of file + /** @} */ +}; diff --git a/include/PyCapioException.hpp b/include/PyCapioException.hpp index 60083b8..999cb4a 100644 --- a/include/PyCapioException.hpp +++ b/include/PyCapioException.hpp @@ -4,15 +4,26 @@ #include #include +/** + * @brief Exception type raised by the PyCAPIO native layer. + * + * Carries an error message and, when constructed, captures the current Python + * call stack to make failures that originate inside intercepted I/O easier to + * diagnose. Surfaced to Python as @c PyCAPIOException. + */ class PyCapioException : public std::exception { std::string _message; + /// @brief Capture a textual snapshot of the active Python call stack. static std::string capture_python_stack(); - public: +public: + /// @brief Construct with a human-readable error message. + /// @param error_message Description of what went wrong. explicit PyCapioException(const std::string &error_message); + /// @brief Return the stored error message. [[nodiscard]] const char *what() const noexcept override; }; -#endif // PYCAPIO_EXCEPTION_HPP \ No newline at end of file +#endif // PYCAPIO_EXCEPTION_HPP diff --git a/include/ScandirIteratorWrapper.hpp b/include/ScandirIteratorWrapper.hpp index c22ccbf..aa75bc9 100644 --- a/include/ScandirIteratorWrapper.hpp +++ b/include/ScandirIteratorWrapper.hpp @@ -6,6 +6,12 @@ #include #include +/** + * @brief A single entry produced while scanning a CAPIO directory. + * + * Mirrors the relevant parts of Python's @c os.DirEntry. Exposed to Python as + * @c DirEntry and yielded by @c PyCapioScandirWrapper. + */ class CapioDirEntry { std::filesystem::path base_path; std::string name_; @@ -13,28 +19,59 @@ class CapioDirEntry { unsigned char type_; public: + /// @brief Construct from a directory base path and a raw @c dirent64 record. + /// @param base Directory that contains the entry. + /// @param ent Raw directory record returned by CAPIO. CapioDirEntry(const std::filesystem::path &base, const dirent64 &ent); + /// @brief Entry name (final path component). [[nodiscard]] std::string name() const; + + /// @brief Full path of the entry (base path joined with the name). [[nodiscard]] std::string path() const; + + /// @brief Inode number of the entry. [[nodiscard]] uint64_t inode() const; + + /// @brief Return @c true if the entry is a directory. + /// @param follow_symlinks Follow symbolic links when resolving the type. [[nodiscard]] bool is_dir(bool follow_symlinks = true) const; + + /// @brief Return @c true if the entry is a regular file. + /// @param follow_symlinks Follow symbolic links when resolving the type. [[nodiscard]] bool is_file(bool follow_symlinks = true) const; + + /// @brief Return @c true if the entry is a symbolic link. [[nodiscard]] bool is_symlink() const; }; +/** + * @brief Iterator over the contents of a CAPIO directory. + * + * Implements the Python iterator and context-manager protocols, yielding + * @ref CapioDirEntry objects. Exposed to Python as @c PyCapioScandirWrapper and + * used by the @c scandir / @c listdir proxies. + */ class ScandirIteratorWrapper { std::filesystem::path path; int file_descriptor = -1; bool finished = false; public: + /// @brief Open @p path for scanning. explicit ScandirIteratorWrapper(const std::filesystem::path &path); + + /// @brief Close the underlying directory descriptor. ~ScandirIteratorWrapper(); + /// @brief Return the next entry, or raise @c StopIteration when exhausted. CapioDirEntry next(); + + /// @brief Return @c *this so the object is its own iterator. ScandirIteratorWrapper &iter(); + + /// @brief Release the directory descriptor early. void close(); }; -#endif // LIBCAPIO_SCANDIR_ITERATOR_WRAPPER_HPP \ No newline at end of file +#endif // LIBCAPIO_SCANDIR_ITERATOR_WRAPPER_HPP diff --git a/include/libcapio.hpp b/include/libcapio.hpp index 572b00a..469166a 100644 --- a/include/libcapio.hpp +++ b/include/libcapio.hpp @@ -5,36 +5,132 @@ #include "common/constants.hpp" +/** + * @brief RAII guard that serialises CAPIO server startup across processes. + * + * On construction it acquires a per-workflow lock so that only one process + * bootstraps the CAPIO server for a given workflow; the lock is released on + * destruction. Evaluating the object in a boolean context reports whether this + * process is the one responsible for starting the server. + */ class StartupSemaphore final { - int fp; - const std::string lock_to_check; + int fp; ///< File descriptor backing the lock. + const std::string lock_to_check; ///< Path of the workflow lock file. public: + /// @brief Acquire the startup lock for @p workflow_name. + /// @param workflow_name Workflow whose startup is being guarded. explicit StartupSemaphore(const std::string &workflow_name); + + /// @brief Release the startup lock. ~StartupSemaphore(); + + /// @brief Report whether this process should start the CAPIO server. + /// @return @c true if this process holds the right to bootstrap the server. explicit operator bool() const; }; +/** + * @brief Start the CAPIO server process for a workflow and wait until it is ready. + * @param CAPIO_DIR Root directory managed by CAPIO. + * @param CAPIO_WORKFLOW_NAME Logical workflow name reported to CAPIO. + * @param capio_server_exec_path Path or name of the CAPIO server executable. + * @param capio_cl_config Optional path to a CAPIO-CL configuration file. + * @param await_server_timeout_seconds Seconds to wait for the server to be ready. + * @return Status code (0 on success). + */ int bootstrap_capio_server(const std::filesystem::path &CAPIO_DIR, const std::string &CAPIO_WORKFLOW_NAME, const std::string &capio_server_exec_path, const std::string &capio_cl_config, int await_server_timeout_seconds); +/** + * @brief Initialise CAPIO and start or attach to the CAPIO server. + * @param CAPIO_DIR Root directory managed by CAPIO. + * @param CAPIO_APP_NAME Logical application name reported to CAPIO. + * @param CAPIO_WORKFLOW_NAME Logical workflow name reported to CAPIO. + * @param capio_server_exec_path Path or name of the CAPIO server executable. + * @param capio_cl_config Optional path to a CAPIO-CL configuration file. + * @param await_server_timeout_seconds Seconds to wait for the server to be ready. + * @return Status code (0 on success). + */ int libcapio_init(const std::filesystem::path &CAPIO_DIR = ".", const std::string &CAPIO_APP_NAME = CAPIO_DEFAULT_APP_NAME, const std::string &CAPIO_WORKFLOW_NAME = CAPIO_DEFAULT_WORKFLOW_NAME, const std::string &capio_server_exec_path = "capio_server", const std::string &capio_cl_config = "", int await_server_timeout_seconds = 2); +/** + * @brief Tear down CAPIO, optionally stopping the server process. + * @param teardown_server When @c true, also shut down the CAPIO server. + */ void libcapio_teardown(bool teardown_server = false); +/** + * @brief Open a CAPIO-managed file. + * @param path File path inside the CAPIO directory. + * @param flags POSIX open flags (see @c FILE_MODES on the Python side). + * @param mode Permission bits applied when the file is created. + * @return A file descriptor on success. + */ int libcapio_open(const char *path, int flags, mode_t mode = 0); + +/** + * @brief Read up to @p size bytes from a CAPIO file descriptor. + * @param fd Open CAPIO file descriptor. + * @param buf Destination buffer of at least @p size bytes. + * @param size Maximum number of bytes to read. + * @return The number of bytes read, @c 0 at EOF, or a negative value on error. + */ long libcapio_read(int fd, char *buf, size_t size); + +/** + * @brief Write @p size bytes to a CAPIO file descriptor. + * @param fd Open CAPIO file descriptor. + * @param buf Source buffer holding the data to write. + * @param size Number of bytes to write. + * @return The number of bytes written, or a negative value on error. + */ long libcapio_write(int fd, const char *buf, size_t size); + +/** + * @brief Close a CAPIO file descriptor. + * @param fd File descriptor to close. + * @return @c 0 on success or a negative value on error. + */ long libcapio_close(int fd); + +/** + * @brief Read the next directory entry from a CAPIO directory descriptor. + * @param fd Open CAPIO directory descriptor. + * @param entry Output parameter that receives the next entry. + * @return A positive value when an entry was read, @c 0 at the end of the + * directory, or a negative value on error. + */ long libcapio_readdir(int fd, dirent64 *entry); + +/** + * @brief Create a directory inside the CAPIO directory. + * @param path Directory path to create. + * @param mode Permission bits for the new directory. + */ long libcapio_mkdir(const char *path, int mode); + +/** + * @brief Reposition the offset of a CAPIO file descriptor. + * @param fd Open CAPIO file descriptor. + * @param offset Byte offset relative to @p whence. + * @param whence Reference point (@c SEEK_SET, @c SEEK_CUR or @c SEEK_END). + * @return The resulting absolute offset, or a negative value on error. + */ long libcapio_lseek(int fd, long offset, int whence); + +/** + * @brief Retrieve metadata for a CAPIO-managed path. + * @param path Path to stat. + * @param statbuf Output parameter that receives the file metadata. + * @return @c 0 on success or a negative value on error. + */ long libcapio_stat(const char *path, struct stat *statbuf); #endif // PYCAPIO_LIBCAPIO_HPP diff --git a/pycapio/__init__.py b/pycapio/__init__.py index df0ff1a..9bbb88d 100644 --- a/pycapio/__init__.py +++ b/pycapio/__init__.py @@ -1,3 +1,36 @@ +"""Top-level package for PyCAPIO. + +PyCAPIO brings transparent data streaming to file-based Python workflows by +*monkey patching* Python's built-in I/O entry points (:func:`open`, +:func:`os.mkdir`, :func:`os.scandir`, ...) and routing any access that targets +the configured CAPIO directory through the native CAPIO server instead of the +operating system. + +The public surface of this module is intentionally small: + +* :func:`CapioContext` -- a decorator that initialises CAPIO and patches the + built-in I/O functions for the duration of the decorated call. +* the ``*_proxy`` callables (:func:`open_proxy`, :func:`mkdir_proxy`, ...) -- + drop-in replacements for the corresponding built-ins that dispatch to CAPIO + when a path lives inside the CAPIO directory and fall back to the original + implementation otherwise. + +Everything else re-exported here (``pycapio_open``, ``PyCapioPath``, +``FILE_MODES``, the IO wrappers, ...) comes from the compiled extension +:mod:`pycapio._pycapio` and is documented in the *Native API* section. + +Example: + Intercept only the I/O performed inside a single function:: + + from pycapio import CapioContext + + @CapioContext(capio_dir=".", app_name="reader", + workflow_name="example_workflow") + def read(path): + with open(path, "r") as f: + return f.read() +""" + import io import os from functools import wraps @@ -13,6 +46,13 @@ def _dump_context() -> dict[str, Any]: + """Capture the currently installed built-in I/O callables. + + Returns: + A mapping from dotted built-in name (for example ``"builtins.open"``) + to the callable currently bound to it. This snapshot is used to + restore the originals once a :func:`CapioContext` region exits. + """ return { "builtins.open": builtins.open, "os.mkdir": os.mkdir, @@ -25,6 +65,11 @@ def _dump_context() -> dict[str, Any]: def _restore_context(context: dict[str, Any]): + """Reinstate the original built-in I/O callables. + + Args: + context: A snapshot previously produced by :func:`_dump_context`. + """ builtins.open = context["builtins.open"] os.mkdir = context["os.mkdir"] os.makedirs = context["os.makedirs"] @@ -34,6 +79,13 @@ def _restore_context(context: dict[str, Any]): os.path = context["os.path"] def _patch_context(): + """Replace the built-in I/O callables with their CAPIO-aware proxies. + + After this call, :func:`open`, :func:`os.mkdir`, :func:`os.makedirs`, + :func:`os.scandir`, :func:`io.open` and :func:`os.listdir` route through the + matching ``*_proxy`` function, and :data:`os.path` is swapped for the native + :class:`PyCapioPath` implementation. + """ builtins.open = open_proxy os.mkdir = mkdir_proxy os.makedirs = makedirs_proxy @@ -43,6 +95,12 @@ def _patch_context(): os.path = PyCapioPath py_capio_initialized = False +"""bool: ``True`` once :func:`pycapio_init` has run in this process. + +CAPIO is initialised at most once per process; the first +:func:`CapioContext` invocation flips this flag so later ones reuse the running +server. +""" _BUILTIN_STACK = _dump_context() @@ -50,6 +108,16 @@ def _patch_context(): def scandir_proxy(path: str): + """CAPIO-aware replacement for :func:`os.scandir`. + + Args: + path: Directory to scan. + + Returns: + A :class:`PyCapioScandirWrapper` when ``path`` resolves inside the + active CAPIO directory, otherwise the result of the original + :func:`os.scandir`. + """ global _CAPIO_DIR target_path = _BUILTIN_STACK["os.path"].abspath(path) if _CAPIO_DIR and target_path.startswith(_CAPIO_DIR): @@ -57,12 +125,34 @@ def scandir_proxy(path: str): return _BUILTIN_STACK["os.scandir"](path) def open_proxy(*args, **kwargs): + """CAPIO-aware replacement for the built-in :func:`open`. + + The first positional argument is treated as the file path. The Python + ``mode`` string (``"r"``, ``"w+"``, ``"ab"``, ...) is translated into the + corresponding CAPIO open flags (see :data:`FILE_MODES`) before delegating to + :func:`pycapio_open`. + + The original :func:`open` is used unchanged when any of the following hold: + + * the first argument is not a path-like object; + * no CAPIO directory is active; + * the target path lies outside the CAPIO directory; + * the target path is the interactive ``.python_history`` file. + + Args: + *args: Positional arguments accepted by :func:`open`; ``args[0]`` is the + path and the optional ``args[1]`` is the mode. + **kwargs: Keyword arguments accepted by :func:`open` (notably ``mode``). + + Returns: + A :class:`PyCapioBinaryIOWrapper` for binary modes (``"b"``) or a + :class:`PyCapioTextIOWrapper` for text modes when the path is handled by + CAPIO; otherwise a standard file object from the built-in :func:`open`. + """ global _CAPIO_DIR arg0 = args[0] if args else None if not args or not isinstance(arg0, (str, bytes, os.PathLike)): - print(f"DEBUG: Type of arg0 is {type(arg0)}") - print(f"NOT A CAPIO PATH -> {arg0}") return _BUILTIN_STACK["builtins.open"](*args, **kwargs) target_path = _BUILTIN_STACK["os.path"].abspath(args[0]) @@ -88,6 +178,21 @@ def open_proxy(*args, **kwargs): def mkdir_proxy(path_val, mode=0o777, *args, **kwargs): + """CAPIO-aware replacement for :func:`os.mkdir`. + + Args: + path_val: Directory to create. + mode: Permission bits applied to the new directory. + *args: Extra positional arguments forwarded to the original + :func:`os.mkdir` on the fallback path. + **kwargs: Extra keyword arguments forwarded to the original + :func:`os.mkdir` on the fallback path. + + Returns: + The result of :func:`pycapio_mkdir` when ``path_val`` is inside the + active CAPIO directory, otherwise the result of the original + :func:`os.mkdir`. + """ global _CAPIO_DIR target_path = _BUILTIN_STACK["os.path"].abspath(path_val) @@ -98,6 +203,26 @@ def mkdir_proxy(path_val, mode=0o777, *args, **kwargs): def makedirs_proxy(path_val, mode=0o777, *args, **kwargs): + """CAPIO-aware replacement for :func:`os.makedirs`. + + Note: + Inside the CAPIO directory the creation is delegated to + :func:`pycapio_mkdir`; intermediate directories are handled by the CAPIO + server rather than being created recursively in Python. + + Args: + path_val: Directory path to create. + mode: Permission bits applied to the new directory. + *args: Extra positional arguments forwarded to the original + :func:`os.makedirs` on the fallback path. + **kwargs: Extra keyword arguments forwarded to the original + :func:`os.makedirs` on the fallback path. + + Returns: + The result of :func:`pycapio_mkdir` when ``path_val`` is inside the + active CAPIO directory, otherwise the result of the original + :func:`os.makedirs`. + """ global _CAPIO_DIR target_path = _BUILTIN_STACK["os.path"].abspath(path_val) @@ -108,6 +233,16 @@ def makedirs_proxy(path_val, mode=0o777, *args, **kwargs): def listdir_proxy(dirpath: str): + """CAPIO-aware replacement for :func:`os.listdir`. + + Args: + dirpath: Directory whose entries should be listed. + + Returns: + A list of entry names. Inside the CAPIO directory the names are gathered + by iterating a :class:`PyCapioScandirWrapper`; outside it the original + :func:`os.listdir` is used. + """ global _CAPIO_DIR dirpath = _BUILTIN_STACK["os.path"].abspath(dirpath) @@ -132,6 +267,47 @@ def CapioContext(*, await_server_timeout_seconds=2, teardown_server=True ): + """Decorator factory that runs a function with CAPIO interception enabled. + + On first use within a process this initialises CAPIO (starting/attaching to + the CAPIO server) and registers teardown via :mod:`atexit`. For every call + of the decorated function it swaps the built-in I/O callables for their + CAPIO-aware proxies, runs the function, and restores the originals + afterwards -- so interception is scoped to the decorated call only. + + All arguments are keyword-only. + + Args: + capio_dir: Root directory managed by CAPIO. I/O on paths *inside* this + directory is intercepted; everything else falls back to the standard + library. Defaults to ``"."``. + app_name: Logical application name reported to CAPIO. Defaults to + :data:`CAPIO_DEFAULT_APP_NAME`. + workflow_name: Logical workflow name reported to CAPIO. Defaults to + :data:`CAPIO_DEFAULT_WORKFLOW_NAME`. + silent: When ``True`` (default) sets the ``SILENT`` environment variable + to ``"ON"`` to suppress CAPIO server chatter. + server_exec_path: Path or name of the CAPIO server executable. Defaults + to ``"capio_server"``. + capio_cl_configuration_file: Optional path to a CAPIO-CL configuration + file. Empty string means no configuration file. + await_server_timeout_seconds: How long to wait for the CAPIO server to + become ready before giving up. Defaults to ``2``. + teardown_server: When ``True`` (default) the CAPIO server is torn down at + process exit. + + Returns: + A decorator that wraps the target function with CAPIO setup, + I/O patching and cleanup. + + Example: + :: + + @CapioContext(capio_dir="./data", app_name="writer") + def produce(path, payload): + with open(path, "w") as f: + f.write(payload) + """ def _CapioContext(func): @wraps(func) def wrapper(*args, **kwargs): diff --git a/pycapio/__main__.py b/pycapio/__main__.py index e1da278..44af3bf 100644 --- a/pycapio/__main__.py +++ b/pycapio/__main__.py @@ -1,3 +1,17 @@ +"""Command-line entry point for PyCAPIO. + +This module wraps an arbitrary Python script or module in a +:func:`pycapio.CapioContext` so that *every* I/O operation performed by that +program is transparently intercepted by CAPIO. No edits to the target program +are required. + +Usage: + .. code-block:: console + + pycapio --capio-dir ./dir --app-name app \\ + --workflow-name my_workflow my_step.py [script args...] +""" + import argparse import os import runpy @@ -8,6 +22,32 @@ def main(): + """Parse CLI arguments and run the target script under CAPIO interception. + + The launcher accepts the following command-line arguments: + + ``--capio-dir`` + CAPIO directory to manage (default ``"."``). + ``--workflow-name`` + CAPIO workflow name (default :data:`CAPIO_DEFAULT_WORKFLOW_NAME`). + ``--app-name`` + CAPIO application name (default :data:`CAPIO_DEFAULT_APP_NAME`). + ``--capio-cl`` + Path to a CAPIO-CL configuration file (default ``""``). + ``script_path`` + Path to the Python script, package directory, or importable module to + execute. + ``script_args`` + Remaining arguments forwarded verbatim to the target program via + ``sys.argv``.-- + + The target is launched inside a :func:`pycapio.CapioContext`. Execution is + attempted first as a filesystem path with :func:`runpy.run_path` (handling + ``.py`` files and package directories that expose ``__main__.py`` or + ``main.py``); if that fails it is retried as an importable module with + :func:`runpy.run_module`. The process exit code mirrors the target's exit + status. + """ parser = argparse.ArgumentParser(description="PyCAPIO module launcher") parser.add_argument("--capio-dir", help="CAPIO directory", default=".") parser.add_argument("--workflow-name", help="CAPIO workflow name", default=CAPIO_DEFAULT_WORKFLOW_NAME) @@ -21,6 +61,7 @@ def main(): @CapioContext(capio_dir=args.capio_dir, workflow_name=args.workflow_name, app_name=args.app_name, teardown_server=True, capio_cl_configuration_file=args.capio_cl) def _pycapio_launcher(): + """Resolve and execute the user-provided target inside the context.""" # Update sys.argv so the target script sees its own arguments target = args.script_path sys.argv = [target] + args.script_args diff --git a/pycapio/internals/__init__.py b/pycapio/internals/__init__.py index 86f04ff..781da4f 100644 --- a/pycapio/internals/__init__.py +++ b/pycapio/internals/__init__.py @@ -1,3 +1,43 @@ +"""Low-level CAPIO primitives re-exported from the native extension. + +This subpackage exposes the raw building blocks that the high-level +:mod:`pycapio` proxies are built on top of. Import it when you need to write +custom I/O modules that talk to the CAPIO server directly:: + + from pycapio.internals import * + +All names below are defined in the compiled extension +:mod:`pycapio._pycapio`; this module simply curates the subset that is safe and +useful for downstream code, and is documented in detail under *Native API*. + +Exported names: + +``CAPIO_DEFAULT_APP_NAME`` + Default application name used when none is supplied. +``CAPIO_DEFAULT_WORKFLOW_NAME`` + Default workflow name used when none is supplied. +``pycapio_get_capio_dir`` + Return the active CAPIO directory. +``pycapio_init`` + Initialise CAPIO and start/attach to the server. +``pycapio_mkdir`` + Create a directory inside the CAPIO directory. +``pycapio_open`` + Open a CAPIO-managed file and return its descriptor. +``pycapio_teardown`` + Tear down CAPIO (optionally stopping the server). +``DirEntry`` + A single entry yielded while scanning a CAPIO directory. +``PyCapioBinaryIOWrapper`` + File-like wrapper for binary CAPIO streams. +``PyCapioScandirWrapper`` + Iterator over the contents of a CAPIO directory. +``PyCapioTextIOWrapper`` + File-like wrapper for text CAPIO streams. +``FILE_MODES`` + Mapping of POSIX open-flag names to their integer values. +""" + from .._pycapio import ( CAPIO_DEFAULT_APP_NAME, CAPIO_DEFAULT_WORKFLOW_NAME, From 53686e98cdd098f6bda6569c5058325260f0df27 Mon Sep 17 00:00:00 2001 From: Marco Edoardo Santimaria Date: Mon, 22 Jun 2026 10:32:30 +0200 Subject: [PATCH 2/5] Added documentation --- docs/.gitignore | 2 + docs/Makefile | 31 +++++ docs/README.md | 59 +++++++++ docs/requirements.txt | 7 ++ docs/source/_static/custom.css | 3 + docs/source/api/internals.md | 12 ++ docs/source/api/native.md | 42 +++++++ docs/source/api/pycapio.md | 24 ++++ docs/source/conf.py | 211 +++++++++++++++++++++++++++++++++ docs/source/cpp/index.md | 51 ++++++++ docs/source/index.md | 68 +++++++++++ docs/source/installation.md | 44 +++++++ docs/source/usage.md | 54 +++++++++ 13 files changed, 608 insertions(+) create mode 100644 docs/.gitignore create mode 100644 docs/Makefile create mode 100644 docs/README.md create mode 100644 docs/requirements.txt create mode 100644 docs/source/_static/custom.css create mode 100644 docs/source/api/internals.md create mode 100644 docs/source/api/native.md create mode 100644 docs/source/api/pycapio.md create mode 100644 docs/source/conf.py create mode 100644 docs/source/cpp/index.md create mode 100644 docs/source/index.md create mode 100644 docs/source/installation.md create mode 100644 docs/source/usage.md diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..e08e13e --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,2 @@ +_build +doxygen \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..a7d88d4 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,31 @@ +# Minimal makefile for PyCAPIO documentation. +# Usage: +# make html -> Doxygen XML + HTML site in _build/html +# make pdf -> Doxygen XML + PDF in _build/latex/pycapio.pdf +# make clean -> remove generated output + +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = _build + +.PHONY: help doxygen html pdf latexpdf clean + +help: + @echo "Targets: html, pdf (alias latexpdf), doxygen, clean" + +# Generate the C++ XML/HTML used by Breathe. +doxygen: + @command -v doxygen >/dev/null 2>&1 || { echo "doxygen not found; C++ API pages will be empty"; exit 0; } + doxygen Doxyfile + +html: doxygen + $(SPHINXBUILD) -b html "$(SOURCEDIR)" "$(BUILDDIR)/html" + @echo "HTML written to $(BUILDDIR)/html/index.html" + +latexpdf pdf: doxygen + $(SPHINXBUILD) -b latex "$(SOURCEDIR)" "$(BUILDDIR)/latex" + $(MAKE) -C "$(BUILDDIR)/latex" all-pdf + @echo "PDF written to $(BUILDDIR)/latex/pycapio.pdf" + +clean: + rm -rf "$(BUILDDIR)" doxygen diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..7bf4bfd --- /dev/null +++ b/docs/README.md @@ -0,0 +1,59 @@ +# PyCAPIO documentation + +This directory contains the [Sphinx](https://www.sphinx-doc.org) documentation +for PyCAPIO. It documents both the Python API (via `autodoc`, reading the +docstrings in `pycapio/`) and the C++ layer (via [Doxygen](https://www.doxygen.nl) ++ [Breathe](https://breathe.readthedocs.io)), and can produce both an HTML site +and a PDF. + +## Build locally + +Install the documentation dependencies (a virtual environment is recommended): + +```console +pip install -r docs/requirements.txt +``` + +For the C++ API pages you also need Doxygen, and for the PDF you need a LaTeX +toolchain: + +```console +# Debian/Ubuntu +sudo apt-get install doxygen texlive-latex-recommended texlive-latex-extra latexmk +# macOS (Homebrew) +brew install doxygen # and a LaTeX distribution such as MacTeX for PDF +``` + +Then, from inside `docs/`: + +```console +make html # HTML site -> docs/_build/html/index.html +make pdf # PDF -> docs/_build/latex/pycapio.pdf +make clean # remove generated output +``` + +`make` runs Doxygen first (when available) and then Sphinx. The Python pages +build even without Doxygen — only the C++ pages will be empty. + +> **Note** — The docs build does **not** require compiling the native +> `pycapio._pycapio` extension. `conf.py` registers a lightweight stand-in for +> it so `autodoc` can import the pure-Python modules anywhere. The native +> classes are documented from the C++ sources instead. + +## Publishing + +Two ready-to-use options are included: + +- **GitHub Pages** — `.github/workflows/docs.yml` builds the HTML (and a PDF, + copied to `pycapio.pdf` at the site root) on every push to `main` and deploys + it. Enable it under *Settings → Pages → Build and deployment → Source: + GitHub Actions*. +- **Read the Docs** — `.readthedocs.yaml` builds HTML, PDF and ePub. Import the + repository at and it works out of the box. + +## Writing docstrings + +Python docstrings use the [Google style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) +parsed by `napoleon`. C++ entities use Doxygen comments (`/** ... */` or +`///`). Adding a docstring to a function or class is enough for it to appear in +the rendered API reference — no manual page edits required. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..c9b63ab --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,7 @@ +# Documentation build dependencies. +# Install with: pip install -r docs/requirements.txt +sphinx>=7.2 +furo>=2024.1.29 +myst-parser>=2.0 +breathe>=4.35 +sphinx-autodoc-typehints>=2.0 diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css new file mode 100644 index 0000000..79b61c7 --- /dev/null +++ b/docs/source/_static/custom.css @@ -0,0 +1,3 @@ +/* Small tweaks on top of the Furo theme. */ +.wy-table-responsive table td { white-space: normal; } +code.literal { font-size: 0.85em; } diff --git a/docs/source/api/internals.md b/docs/source/api/internals.md new file mode 100644 index 0000000..aa0639d --- /dev/null +++ b/docs/source/api/internals.md @@ -0,0 +1,12 @@ +# `pycapio.internals` — low-level primitives + +This subpackage re-exports the raw CAPIO building blocks from the native +extension. The page below lists what is available; the behaviour of each symbol +is documented from the C++ sources in {doc}`native`. + +```{eval-rst} +.. automodule:: pycapio.internals + :members: + :undoc-members: + :no-index: +``` diff --git a/docs/source/api/native.md b/docs/source/api/native.md new file mode 100644 index 0000000..8ae16e0 --- /dev/null +++ b/docs/source/api/native.md @@ -0,0 +1,42 @@ +# Native API + +Several names exported from {doc}`internals` (and re-exported at the top level) +are implemented in C++ and exposed to Python through pybind11 — the compiled +`pycapio._pycapio` extension. The table below maps each Python name to the C++ +entity that implements it; follow the links for the full, source-generated +reference on the {doc}`../cpp/index` page. + +| Python name | C++ entity | Reference | +| ------------------------ | --------------------------- | --------- | +| `PyCapioTextIOWrapper` | `IOWrapper` | {cpp:class}`IOWrapper` | +| `PyCapioBinaryIOWrapper` | `IOWrapper` | {cpp:class}`IOWrapper` | +| `PyCapioPath` | `OsPath` | {cpp:class}`OsPath` | +| `DirEntry` | `CapioDirEntry` | {cpp:class}`CapioDirEntry` | +| `PyCapioScandirWrapper` | `ScandirIteratorWrapper` | {cpp:class}`ScandirIteratorWrapper` | +| `PyCAPIOException` | `PyCapioException` | {cpp:class}`PyCapioException` | + +| Python function | C++ function | Reference | +| ------------------------------------- | --------------------- | --------- | +| `pycapio_init` | `libcapio_init` | {cpp:func}`libcapio_init` | +| `pycapio_teardown` | `libcapio_teardown` | {cpp:func}`libcapio_teardown` | +| `pycapio_open` | `libcapio_open` | {cpp:func}`libcapio_open` | +| `pycapio_mkdir` | `libcapio_mkdir` | {cpp:func}`libcapio_mkdir` | + +## Behaviour summary + +**IO wrappers.** `PyCapioTextIOWrapper` and `PyCapioBinaryIOWrapper` are the +`Text` and `Binary` instantiations of the same `IOWrapper` class template. They +implement the file-object protocol (`read`, `readline`, `readlines`, `write`, +`writelines`, `seek`, `flush`, `close`, iteration and context-manager support) +on top of a CAPIO file descriptor. Text wrappers return `str`; binary wrappers +return `bytes`. + +**`PyCapioPath`.** A drop-in replacement for {mod}`os.path` whose queries are +answered by CAPIO for paths inside the CAPIO directory, installed in place of +`os.path` while a `CapioContext` is active. + +**Directory scanning.** `PyCapioScandirWrapper` iterates a CAPIO directory and +yields `DirEntry` objects, mirroring {func}`os.scandir` and `os.DirEntry`. + +The full rendered reference for all of these lives on the {doc}`../cpp/index` +page. diff --git a/docs/source/api/pycapio.md b/docs/source/api/pycapio.md new file mode 100644 index 0000000..036e6a2 --- /dev/null +++ b/docs/source/api/pycapio.md @@ -0,0 +1,24 @@ +# `pycapio` — high-level API + +The top-level package exposes the {func}`~pycapio.CapioContext` decorator and the +CAPIO-aware proxy functions that replace Python's built-in I/O entry points. + +## Package + +```{eval-rst} +.. automodule:: pycapio + :members: + :undoc-members: + :show-inheritance: + :exclude-members: PyCapioPath, PyCapioTextIOWrapper, PyCapioBinaryIOWrapper, + PyCapioScandirWrapper, DirEntry, FILE_MODES, pycapio_init, + pycapio_teardown, pycapio_open, pycapio_mkdir, pycapio_get_capio_dir +``` + +## Command-line launcher + +```{eval-rst} +.. automodule:: pycapio.__main__ + :members: + :undoc-members: +``` diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..4077d3a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,211 @@ +# Configuration file for the Sphinx documentation builder. +# +# Full reference: https://www.sphinx-doc.org/en/master/usage/configuration.html + +import os +import sys +import shutil +import subprocess +import types +from pathlib import Path + +# -- Paths ------------------------------------------------------------------- +# Make the `pycapio` package importable by autodoc. +# CONF_DIR = docs/source (this file's directory; the Sphinx source root) +# DOCS_DIR = docs (holds the Doxyfile and Doxygen output) +# REPO_ROOT = repository root +CONF_DIR = Path(__file__).resolve().parent +DOCS_DIR = CONF_DIR.parent +REPO_ROOT = DOCS_DIR.parent +sys.path.insert(0, str(REPO_ROOT)) + +# -- Stub the compiled extension -------------------------------------------- +# `pycapio` does `from ._pycapio import *` at import time. `_pycapio` is a +# pybind11 extension that is only available after a full native build (MPI, +# CMake, CAPIO). To let the docs build anywhere -- locally, on GitHub Actions, +# on Read the Docs -- without that toolchain, we register a lightweight stand-in +# for `pycapio._pycapio` *before* autodoc imports the package. +# +# The pure-Python API (CapioContext, the *_proxy functions, the CLI launcher) +# is then documented from the real source via autodoc, while the native classes +# and functions are documented from the C++ sources via Breathe/Doxygen. + + +def _install_native_stub() -> None: + stub = types.ModuleType("pycapio._pycapio") + + # Constants referenced at import time (default arguments, etc.). + stub.CAPIO_DEFAULT_APP_NAME = "writer" + stub.CAPIO_DEFAULT_WORKFLOW_NAME = "workflow" + stub.FILE_MODES = { + "O_RDONLY": 0, + "O_WRONLY": 1, + "O_RDWR": 2, + "O_CREAT": 64, + "O_APPEND": 1024, + } + + def _make_callable(name): + def _f(*args, **kwargs): # pragma: no cover - never executed + raise RuntimeError( + f"{name} is a native symbol stubbed out for the docs build" + ) + + _f.__name__ = name + return _f + + for fn in ( + "pycapio_init", + "pycapio_teardown", + "pycapio_open", + "pycapio_mkdir", + "pycapio_get_capio_dir", + ): + setattr(stub, fn, _make_callable(fn)) + + for cls in ( + "PyCapioTextIOWrapper", + "PyCapioBinaryIOWrapper", + "PyCapioScandirWrapper", + "PyCapioPath", + "DirEntry", + "PyCAPIOException", + ): + setattr(stub, cls, type(cls, (), {})) + + # Make `from ._pycapio import *` populate every public name. + stub.__all__ = [n for n in vars(stub) if not n.startswith("_")] + + sys.modules["pycapio._pycapio"] = stub + + +_install_native_stub() + +# -- Project information ----------------------------------------------------- +project = "PyCAPIO" +copyright = "2026, Alpha Parallel Computing Research Group, University of Torino, Italy." +author = "Marco Edoardo Santimaria" + +# -- Version ----------------------------------------------------------------- +# Read the version straight from CMakeLists.txt, mirroring the regex used by +# scikit-build-core in pyproject.toml. This works without installing the +# package (which the docs build deliberately avoids). +import re + + +def _read_version() -> str: + cmakelists = REPO_ROOT / "CMakeLists.txt" + try: + text = cmakelists.read_text(encoding="utf-8") + m = re.search( + r"(?m)VERSION\s+(?P\d+)\.(?P\d+)\.(?P\d+)", + text, + ) + if m: + return f"{m['major']}.{m['minor']}.{m['patch']}" + except OSError: + pass + # Fallback: an installed package, if present. + try: + from importlib.metadata import version as _pkg_version + + return _pkg_version("pycapio") + except Exception: + return "0.0.0" + + +release = _read_version() +version = ".".join(release.split(".")[:2]) + +# -- General configuration --------------------------------------------------- +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "myst_parser", + "breathe", +] + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "README.md"] + +# Accept both Markdown (MyST) and reStructuredText sources. +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} + +# -- Autodoc / Napoleon ------------------------------------------------------ +autosummary_generate = True +autodoc_member_order = "bysource" +autodoc_typehints = "description" +autodoc_default_options = { + "members": True, + "undoc-members": True, + "show-inheritance": True, +} +# Any genuinely external module that might be imported. The native extension is +# handled by the stub above; list other heavy deps here. +autodoc_mock_imports = ["py_capio_cl"] + +napoleon_google_docstring = True +napoleon_numpy_docstring = False +napoleon_include_init_with_doc = True +napoleon_use_rtype = True + +# -- Intersphinx ------------------------------------------------------------- +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), +} + +# -- MyST -------------------------------------------------------------------- +myst_enable_extensions = ["colon_fence", "deflist"] +myst_heading_anchors = 3 + +# -- Breathe (C++ via Doxygen) ---------------------------------------------- +breathe_projects = {"pycapio": str(DOCS_DIR / "doxygen" / "xml")} +breathe_default_project = "pycapio" +breathe_default_members = ("members",) + + +def _run_doxygen() -> None: + """Generate Doxygen XML so Breathe has something to read. + + Runs automatically when the XML output is missing (for example on + Read the Docs, where there is no separate build step). Silently does + nothing if the ``doxygen`` executable is unavailable. + """ + xml_index = DOCS_DIR / "doxygen" / "xml" / "index.xml" + if xml_index.exists(): + return + if shutil.which("doxygen") is None: + print("WARNING: 'doxygen' not found; the C++ API pages will be empty.") + return + print("Running Doxygen to generate C++ XML ...") + subprocess.call(["doxygen", "Doxyfile"], cwd=str(DOCS_DIR)) + + +_run_doxygen() + +# -- HTML output ------------------------------------------------------------- +html_theme = "furo" +html_static_path = ["_static"] +html_title = f"PyCAPIO {release}" +html_theme_options = { + "source_repository": "https://github.com/High-Performance-IO/PyCAPIO", + "source_branch": "main", + "source_directory": "docs/", +} + +# -- LaTeX / PDF output ------------------------------------------------------ +latex_engine = "pdflatex" +latex_elements = { + "papersize": "a4paper", + "pointsize": "11pt", + "preamble": r"\usepackage{enumitem}\setlistdepth{99}", +} +latex_documents = [ + ("index", "pycapio.tex", "PyCAPIO Documentation", author, "manual"), +] diff --git a/docs/source/cpp/index.md b/docs/source/cpp/index.md new file mode 100644 index 0000000..1807a65 --- /dev/null +++ b/docs/source/cpp/index.md @@ -0,0 +1,51 @@ +# C++ API reference + +This section documents the C++ layer of PyCAPIO directly from the source +headers. It is generated by Doxygen and embedded with Breathe, so it stays in +sync with the code as long as the Doxygen comments are kept up to date. + +:::{note} +If this page is empty, the Doxygen XML has not been generated yet. Run +`doxygen Doxyfile` inside the `docs/` directory (or build via the provided +`Makefile`, which does it for you) before building the HTML. +::: + +## Streaming I/O + +```{eval-rst} +.. doxygenenum:: IOMode + +.. doxygenclass:: IOWrapper + :members: + :undoc-members: +``` + +## Filesystem helpers + +```{eval-rst} +.. doxygenclass:: OsPath + :members: + :undoc-members: + +.. doxygenclass:: CapioDirEntry + :members: + :undoc-members: + +.. doxygenclass:: ScandirIteratorWrapper + :members: + :undoc-members: +``` + +## Error handling + +```{eval-rst} +.. doxygenclass:: PyCapioException + :members: + :undoc-members: +``` + +## libcapio entry points + +```{eval-rst} +.. doxygenfile:: libcapio.hpp +``` diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 0000000..c0e5cb8 --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,68 @@ +# PyCAPIO + +```{toctree} +:maxdepth: 2 +:hidden: +:caption: Getting started + +installation +usage +``` + +```{toctree} +:maxdepth: 2 +:hidden: +:caption: Python API + +api/pycapio +api/internals +api/native +``` + +```{toctree} +:maxdepth: 2 +:hidden: +:caption: C++ API + +cpp/index +``` + +**PyCAPIO** brings transparent data streaming to file-based Python workflows, +minimizing I/O bottlenecks without requiring code modifications. It works by +*monkey patching* Python's built-in I/O methods and classes, interfacing them +natively with the [CAPIO](https://github.com/High-Performance-IO/capio) +methodology. + +When a workflow step runs under PyCAPIO, any access to a path inside the +configured **CAPIO directory** is routed through the CAPIO server instead of the +operating system, while all other I/O behaves exactly as it normally would. + +## Where to start + +- {doc}`installation` — prerequisites and how to install PyCAPIO. +- {doc}`usage` — the three ways to enable interception (CLI, `CapioContext`, + and the low-level internals). +- {doc}`api/pycapio` — the high-level Python API. +- {doc}`api/native` and {doc}`cpp/index` — the native (C++/pybind11) layer. + +## At a glance + +```python +from pycapio import CapioContext + + +@CapioContext(capio_dir=".", app_name="reader", workflow_name="example_workflow") +def read(path): + with open(path, "r") as f: + return f.read() + + +# Only the I/O performed inside this call is intercepted by CAPIO. +data = read("example.txt") +``` + +## Indices + +- {ref}`genindex` +- {ref}`modindex` +- {ref}`search` diff --git a/docs/source/installation.md b/docs/source/installation.md new file mode 100644 index 0000000..f2ef73e --- /dev/null +++ b/docs/source/installation.md @@ -0,0 +1,44 @@ +# Installation + +## Prerequisites + +Before installing PyCAPIO, ensure your system meets the following requirements: + +| Dependency | Minimum version / notes | +| --------------- | ------------------------------------------------------ | +| **Python** | ≥ 3.10 (including development libraries / headers) | +| **MPI** | A working MPI implementation (e.g. OpenMPI, MPICH) | +| **Build tools** | CMake and GCC | + +At compile time, [CAPIO](https://github.com/High-Performance-IO/capio) is also +fetched and compiled from source. + +## From PyPI (recommended) + +PyCAPIO is published as prebuilt wheels on PyPI. Install it inside a virtual +environment: + +```console +python3 -m venv venv +source venv/bin/activate +pip install pycapio +``` + +## From source + +To build and install PyCAPIO from the repository: + +```console +git clone https://github.com/High-Performance-IO/PyCAPIO.git +cd PyCAPIO +python3 -m venv venv +source venv/bin/activate +pip install . +``` + +:::{important} +PyCAPIO natively intercepts I/O operations that use Python's built-in methods. +If your script relies on external libraries compiled into shared objects +(`.so` files), bypass Python-level interception and use the standard +`LD_PRELOAD` approach for CAPIO instead. +::: diff --git a/docs/source/usage.md b/docs/source/usage.md new file mode 100644 index 0000000..8a20405 --- /dev/null +++ b/docs/source/usage.md @@ -0,0 +1,54 @@ +# Usage + +PyCAPIO can inject transparent streaming at three levels of granularity. + +## 1. Global interception (CLI) + +Invoke PyCAPIO from the command line to inject streaming across an entire +workflow step. The target script runs unmodified; every built-in I/O call it +makes against the CAPIO directory is intercepted. + +```console +pycapio --capio-dir ./dir --app-name app --workflow-name my_workflow my_step.py +``` + +Available options are documented in {func}`pycapio.__main__.main`. Any arguments +after the script path are forwarded verbatim to the script. + +## 2. Fine-grained interception (`CapioContext`) + +For precise control over which regions are intercepted, use the +{func}`pycapio.CapioContext` decorator. Only I/O performed *inside* the decorated +call is routed through CAPIO. + +```python +from pycapio import CapioContext + + +@CapioContext(capio_dir=".", app_name="reader", workflow_name="example_workflow") +def read(path): + with open(path, "r") as f: + data = f.read() + return data + + +# Only I/O within this function call is intercepted. +data = read("example.txt") +``` + +The decorator initialises CAPIO on first use, patches the built-in I/O callables +for the duration of the call, and restores the originals afterwards. See the +{doc}`api/pycapio` reference for every parameter. + +## 3. Low-level internals + +You can also drive CAPIO directly through its primitives to build custom I/O +modules that interface with the CAPIO server: + +```python +from pycapio.internals import * +``` + +This imports the native functions and classes — {func}`~pycapio.internals.pycapio_open`, +the IO wrappers, the scandir iterator, and so on — listed in {doc}`api/internals` +and described in {doc}`api/native`. From 32dfe812801c1f878ffe33e4fd2e0cefef916002 Mon Sep 17 00:00:00 2001 From: Marco Edoardo Santimaria Date: Mon, 22 Jun 2026 10:58:13 +0200 Subject: [PATCH 3/5] Added publish docs workflow --- .github/workflows/release.yml | 89 ++++++++++++++++++++++++++--------- README.md | 1 - docs/Doxyfile | 43 +++++++++++++++++ docs/Makefile | 6 --- docs/README.md | 11 ----- docs/requirements.txt | 2 - 6 files changed, 110 insertions(+), 42 deletions(-) create mode 100644 docs/Doxyfile diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 84165ec..79b77ef 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,4 +1,4 @@ -name: Release to PyPI +name: Release to PyPI and Publish Documentation on: workflow_run: @@ -13,10 +13,15 @@ jobs: runs-on: ubuntu-latest outputs: exists: ${{ steps.check-tag.outputs.exists }} + version: ${{ steps.get-version.outputs.version }} steps: - uses: actions/checkout@v6 - - name: "Get CAPIO-CL version" - run: echo "PYCAPIO_VERSION=$(grep -E 'VERSION [0-9]+\.[0-9]+\.[0-9]+' CMakeLists.txt | awk '{print $2}')" >> $GITHUB_ENV + - name: "Get PYCAPIO version" + id: get-version + run: | + VERSION=$(grep -E 'VERSION [0-9]+\.[0-9]+\.[0-9]+' CMakeLists.txt | awk '{print $2}') + echo "PYCAPIO_VERSION=$VERSION" >> $GITHUB_ENV + echo "version=$VERSION" >> $GITHUB_OUTPUT - name: "Check if tag exists" id: check-tag uses: mukunku/tag-exists-action@v1.7.0 @@ -26,7 +31,7 @@ jobs: build-wheels: name: Build wheels on ${{ matrix.os }} needs: check-tag-existance - if: needs.check-version.outputs.tag_exists == 'false' + if: needs.check-tag-existance.outputs.exists == 'false' runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -35,15 +40,12 @@ jobs: steps: - name: Check out repository uses: actions/checkout@v4 - - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.x" - - name: Install cibuildwheel run: python -m pip install --upgrade cibuildwheel - - name: Build wheels env: CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-* cp314-*" @@ -54,11 +56,8 @@ jobs: CIBW_BUILD_VERBOSITY: 1 CIBW_BEFORE_ALL_LINUX: > dnf install -y openmpi-devel - CIBW_ENVIRONMENT_LINUX: 'PATH="/usr/lib64/openmpi/bin:$PATH"' - run: cibuildwheel --output-dir wheelhouse - - name: Upload wheel artifacts uses: actions/upload-artifact@v4 with: @@ -68,22 +67,19 @@ jobs: build-sdist: name: Build source distribution needs: check-tag-existance - if: needs.check-version.outputs.tag_exists == 'false' + if: needs.check-tag-existance.outputs.exists == 'false' runs-on: ubuntu-latest steps: - name: Check out repository uses: actions/checkout@v4 - - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.x" - - name: Build sdist run: | python -m pip install --upgrade build python -m build --sdist - - name: Upload sdist artifact uses: actions/upload-artifact@v4 with: @@ -93,35 +89,31 @@ jobs: publish: name: Release and Publish to PyPI needs: [check-tag-existance, build-wheels, build-sdist] - if: needs.check-version.outputs.tag_exists == 'false' + if: needs.check-tag-existance.outputs.exists == 'false' runs-on: ubuntu-latest permissions: contents: write # Required for creating GitHub Releases steps: - name: Check out repository uses: actions/checkout@v4 - - name: Download all wheels uses: actions/download-artifact@v4 with: pattern: wheels-* merge-multiple: true path: dist - - name: Download sdist uses: actions/download-artifact@v4 with: name: sdist path: dist - - name: Create GitHub Release uses: softprops/action-gh-release@v2 with: - tag_name: "v${{ env.PYCAPIO_VERSION }}" - name: "v${{ env.PYCAPIO_VERSION }}" + tag_name: "v${{ needs.check-tag-existance.outputs.version }}" + name: "v${{ needs.check-tag-existance.outputs.version }}" generate_release_notes: true files: dist/* - - name: Publish to PyPI env: TWINE_USERNAME: __token__ @@ -129,4 +121,57 @@ jobs: run: | python -m pip install --upgrade twine twine check dist/* - twine upload dist/* \ No newline at end of file + twine upload dist/* + + build-docs: + name: Build documentation + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install system packages + run: | + sudo apt-get update + sudo apt-get install -y doxygen + + - name: Install documentation dependencies + run: pip install -r docs/requirements.txt + + - name: Build HTML + run: make -C docs html + + - name: Upload documentation artifact + uses: actions/upload-artifact@v4 + with: + name: documentation + path: docs/_build/html + + deploy-docs: + name: Deploy documentation + needs: [ build-docs, publish ] + if: needs.check-tag-existance.outputs.exists == 'false' + runs-on: ubuntu-24.04 + concurrency: + group: docs-deploy + cancel-in-progress: false + steps: + - name: Download documentation artifact + uses: actions/download-artifact@v4 + with: + name: documentation + path: site + + - name: "Deploy documentation page" + uses: appleboy/scp-action@v1.0.0 + with: + host: capio.hpc4ai.it + username: capio-user + key: ${{ secrets.PYCAPIO_DOC_DEPLOY_KEY }} + rm: true + source: "site/*" + strip_components: 1 + target: /mnt/services/capio/nginx/html/pycapio \ No newline at end of file diff --git a/README.md b/README.md index 90a9e9c..2b9d675 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ [![codecov](https://codecov.io/gh/High-Performance-IO/PyCAPIO/graph/badge.svg?token=YOUR_TOKEN)](https://codecov.io/gh/OWNER/REPO) [![CI Tests](https://github.com/High-Performance-IO/PyCAPIO/actions/workflows/ci_cd.yml/badge.svg)](https://github.com/High-Performance-IO/PyCAPIO/actions) - [![PyPI version](https://img.shields.io/pypi/v/pycapio.svg)](https://pypi.org/project/pycapio/) [![Python Versions](https://img.shields.io/pypi/pyversions/pycapio.svg)](https://pypi.org/project/pycapio/) diff --git a/docs/Doxyfile b/docs/Doxyfile new file mode 100644 index 0000000..62726e2 --- /dev/null +++ b/docs/Doxyfile @@ -0,0 +1,43 @@ +# Doxyfile for PyCAPIO — tuned to feed Breathe (XML) and provide a standalone +# C++ HTML reference. Run from the docs/ directory: doxygen Doxyfile + +PROJECT_NAME = "PyCAPIO" +PROJECT_BRIEF = "Native C++/pybind11 layer of PyCAPIO" +OUTPUT_DIRECTORY = doxygen + +# Parse the public headers and their implementations. +INPUT = ../include ../src +FILE_PATTERNS = *.hpp *.h *.cpp +RECURSIVE = YES + +# We document the public interface; private members are surfaced by Breathe +# only where requested. +EXTRACT_ALL = YES +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = YES +HIDE_UNDOC_MEMBERS = NO + +# Breathe consumes the XML output. +GENERATE_XML = YES +XML_OUTPUT = xml +XML_PROGRAMLISTING = YES + +# A standalone HTML C++ reference is handy on its own; disable if not wanted. +GENERATE_HTML = YES +HTML_OUTPUT = html +GENERATE_LATEX = NO + +# C++ niceties. +BUILTIN_STL_SUPPORT = YES +TEMPLATE_RELATIONS = YES +JAVADOC_AUTOBRIEF = YES +QT_AUTOBRIEF = YES +MARKDOWN_SUPPORT = YES +SORT_MEMBER_DOCS = NO + +# Keep the log readable. +QUIET = YES +WARN_IF_UNDOCUMENTED = NO + +# The CAPIO headers pulled in transitively are not part of our public API. +EXCLUDE_PATTERNS = */common/* */_libcapio_impl.hpp diff --git a/docs/Makefile b/docs/Makefile index a7d88d4..846ad74 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,9 +1,3 @@ -# Minimal makefile for PyCAPIO documentation. -# Usage: -# make html -> Doxygen XML + HTML site in _build/html -# make pdf -> Doxygen XML + PDF in _build/latex/pycapio.pdf -# make clean -> remove generated output - SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = _build diff --git a/docs/README.md b/docs/README.md index 7bf4bfd..da807ee 100644 --- a/docs/README.md +++ b/docs/README.md @@ -40,17 +40,6 @@ build even without Doxygen — only the C++ pages will be empty. > it so `autodoc` can import the pure-Python modules anywhere. The native > classes are documented from the C++ sources instead. -## Publishing - -Two ready-to-use options are included: - -- **GitHub Pages** — `.github/workflows/docs.yml` builds the HTML (and a PDF, - copied to `pycapio.pdf` at the site root) on every push to `main` and deploys - it. Enable it under *Settings → Pages → Build and deployment → Source: - GitHub Actions*. -- **Read the Docs** — `.readthedocs.yaml` builds HTML, PDF and ePub. Import the - repository at and it works out of the box. - ## Writing docstrings Python docstrings use the [Google style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) diff --git a/docs/requirements.txt b/docs/requirements.txt index c9b63ab..0cbb984 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,3 @@ -# Documentation build dependencies. -# Install with: pip install -r docs/requirements.txt sphinx>=7.2 furo>=2024.1.29 myst-parser>=2.0 From 10e1269ff527d865e3e19ebf22be320d19f3c605 Mon Sep 17 00:00:00 2001 From: Marco Edoardo Santimaria Date: Mon, 22 Jun 2026 11:22:43 +0200 Subject: [PATCH 4/5] Updated README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2b9d675..75387ca 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # PyCAPIO -[![codecov](https://codecov.io/gh/High-Performance-IO/PyCAPIO/graph/badge.svg?token=YOUR_TOKEN)](https://codecov.io/gh/OWNER/REPO) -[![CI Tests](https://github.com/High-Performance-IO/PyCAPIO/actions/workflows/ci_cd.yml/badge.svg)](https://github.com/High-Performance-IO/PyCAPIO/actions) -[![PyPI version](https://img.shields.io/pypi/v/pycapio.svg)](https://pypi.org/project/pycapio/) -[![Python Versions](https://img.shields.io/pypi/pyversions/pycapio.svg)](https://pypi.org/project/pycapio/) +[![codecov](https://img.shields.io/codecov/c/github/High-Performance-IO/PyCAPIO?logo=codecov)](https://codecov.io/gh/High-Performance-IO/PyCAPIO) +[![CI Tests](https://img.shields.io/github/actions/workflow/status/High-Performance-IO/PyCAPIO/ci_cd.yml?logo=githubactions&label=CI%20Tests)](https://github.com/High-Performance-IO/PyCAPIO/actions) +[![PyPI version](https://img.shields.io/pypi/v/pycapio.svg?logo=pypi)](https://pypi.org/project/pycapio/) +![Python](https://img.shields.io/badge/dynamic/regex?url=https%3A%2F%2Fraw.githubusercontent.com%2FHigh-Performance-IO%2FPyCAPIO%2Frefs%2Fheads%2Fmain%2Fpyproject.toml&search=requires-python%20%3D%20%22%28%5B%5E%22%5D%2B%29%22&replace=%241&label=Python&logo=python) **PyCAPIO** brings transparent data streaming to file-based Python workflows, minimizing I/O bottlenecks without requiring code modifications. From 19be26c0bb107987ed5638483f29b8bb5143affe Mon Sep 17 00:00:00 2001 From: Marco Edoardo Santimaria Date: Mon, 22 Jun 2026 11:25:50 +0200 Subject: [PATCH 5/5] Updated fixed CAPIO tag --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index decb135..b4a34bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ cmake.version = ">=3.15" wheel.packages = ["pycapio"] [tool.scikit-build.cmake.define] -CAPIO_RELEASE_TAG = "6b14036e85678f54cf9fa265141edb98b3394c8a" +CAPIO_RELEASE_TAG = "07427e39a4e475011e790580a4961ac947728f7d" CMAKE_BUILD_TYPE = "Release" CAPIO_LOG = "OFF" CAPIO_BUILD_POSIX = "OFF"