diff --git a/components/BlockParser/README.md b/components/BlockParser/README.md new file mode 100644 index 00000000..2cf95fb9 --- /dev/null +++ b/components/BlockParser/README.md @@ -0,0 +1,218 @@ +# BlockParser + +A standalone extraction of WordPress core's block parser. It takes a document containing WordPress block markup (`...`) and returns a structured array of parsed blocks with their attributes, inner HTML, inner blocks, and content interleaving. This is the same parser that powers `parse_blocks()` in WordPress core, packaged as an independent library with no WordPress dependency. + +## Installation + +``` +composer require wp-php-toolkit/blockparser +``` + +## Quick Start + +```php +$document = << +

Welcome

+ + + +

Hello from the block editor.

+ +HTML; + +$parser = new WP_Block_Parser(); +$blocks = $parser->parse( $document ); + +foreach ( $blocks as $block ) { + if ( 'core/heading' === $block['blockName'] ) { + echo 'Found heading: ' . strip_tags( $block['innerHTML'] ); + // "Found heading: Welcome" + } +} +``` + +## Usage + +### Parsing a Document + +Call `parse()` with any string containing block markup. It returns an array of block arrays, each with the following keys: + +```php +$parser = new WP_Block_Parser(); +$blocks = $parser->parse( $document ); + +// Each element in $blocks is an array: +// array( +// 'blockName' => 'core/paragraph', // Fully-qualified block name, or null for freeform HTML. +// 'attrs' => array(), // Attributes from the block comment delimiter. +// 'innerBlocks' => array(), // Nested blocks (same structure, recursive). +// 'innerHTML' => '

Text

', // The HTML inside the block, with inner blocks removed. +// 'innerContent' => array( '

Text

' ), // Interleaved HTML strings and null markers for inner block positions. +// ) +``` + +### Block Types + +The parser recognizes three kinds of block tokens: + +**Standard blocks** have an opener and closer: + +```php +$blocks = ( new WP_Block_Parser() )->parse( + '

Hello

' +); +// $blocks[0]['blockName'] === 'core/paragraph' +// $blocks[0]['innerHTML'] === '

Hello

' +``` + +**Self-closing (void) blocks** end with `/-→`: + +```php +$blocks = ( new WP_Block_Parser() )->parse( + '' +); +// $blocks[0]['blockName'] === 'core/spacer' +// $blocks[0]['attrs'] === array( 'height' => '50px' ) +// $blocks[0]['innerHTML'] === '' +``` + +**Freeform HTML** is any content outside of block delimiters: + +```php +$blocks = ( new WP_Block_Parser() )->parse( + '

Just some HTML, no blocks here.

' +); +// $blocks[0]['blockName'] === null +// $blocks[0]['innerHTML'] === '

Just some HTML, no blocks here.

' +``` + +### Block Attributes + +Attributes are encoded as JSON inside the block comment delimiter. The parser decodes them into a PHP associative array: + +```php +$blocks = ( new WP_Block_Parser() )->parse( + '' . + '
' . + '' +); + +$attrs = $blocks[0]['attrs']; +// array( +// 'id' => 123, +// 'sizeSlug' => 'large', +// 'linkDestination' => 'none', +// ) +``` + +### Nested Blocks + +Blocks can contain other blocks. Inner blocks appear in the `innerBlocks` array, and `innerContent` interleaves the HTML fragments with `null` markers showing where each inner block was located: + +```php +$document = << +
+ +
+ +

Left column

+ +
+ + +
+ +

Right column

+ +
+ +
+ +HTML; + +$parser = new WP_Block_Parser(); +$blocks = $parser->parse( $document ); + +$columns = $blocks[0]; +// $columns['blockName'] === 'core/columns' +// count( $columns['innerBlocks'] ) === 2 + +$left_column = $columns['innerBlocks'][0]; +// $left_column['blockName'] === 'core/column' +// $left_column['innerBlocks'][0]['blockName'] === 'core/paragraph' + +// innerContent shows the interleaving of HTML and inner block positions: +// array( +// '
\n', // HTML before first inner block +// null, // Position of first inner block (core/column) +// '\n', // HTML between inner blocks +// null, // Position of second inner block (core/column) +// '\n
\n', // HTML after last inner block +// ) +``` + +### Namespaced Blocks + +The parser handles both core blocks (`wp:paragraph`) and namespaced third-party blocks (`wp:my-plugin/custom-block`). Block names without an explicit namespace are prefixed with `core/`: + +```php +$blocks = ( new WP_Block_Parser() )->parse( + '' . + '
Great product!
' . + '' +); +// $blocks[0]['blockName'] === 'my-plugin/testimonial' +// $blocks[0]['attrs'] === array( 'author' => 'Jane' ) +``` + +### Error Recovery + +The parser is designed to never fail. When it encounters malformed markup such as missing closers or mismatched block names, it produces a best-effort parse rather than returning an error: + +```php +// Missing closer -- the parser treats it as implicitly closed. +$blocks = ( new WP_Block_Parser() )->parse( + '

No closer here' +); +// $blocks[0]['blockName'] === 'core/paragraph' +// $blocks[0]['innerHTML'] === '

No closer here' +``` + +## API Reference + +### WP_Block_Parser + +| Method | Description | +|--------|-------------| +| `parse( $document )` | Parse block markup and return an array of block structures | + +### Block Structure (array keys) + +| Key | Type | Description | +|-----|------|-------------| +| `blockName` | `string\|null` | Fully-qualified name (e.g. `core/paragraph`), or `null` for freeform HTML | +| `attrs` | `array` | Block attributes decoded from the JSON in the comment delimiter | +| `innerBlocks` | `array` | Nested blocks, same structure recursively | +| `innerHTML` | `string` | HTML content with inner blocks stripped out | +| `innerContent` | `array` | Interleaved HTML strings and `null` markers for inner block positions | + +### WP_Block_Parser_Block + +| Property | Type | Description | +|----------|------|-------------| +| `$blockName` | `string\|null` | Block name | +| `$attrs` | `array\|null` | Block attributes | +| `$innerBlocks` | `array` | Nested block instances | +| `$innerHTML` | `string` | Inner HTML content | +| `$innerContent` | `array` | Interleaved content with `null` placeholders | + +## Attribution + +This component is extracted from [WordPress core](https://github.com/WordPress/wordpress-develop). The `WP_Block_Parser`, `WP_Block_Parser_Block`, and `WP_Block_Parser_Frame` classes are maintained as part of the WordPress block editor infrastructure. Licensed under GPL v2. + +## Requirements + +- PHP 7.2+ +- No external dependencies diff --git a/components/Blueprints/README.md b/components/Blueprints/README.md new file mode 100644 index 00000000..23f4519e --- /dev/null +++ b/components/Blueprints/README.md @@ -0,0 +1,296 @@ +# Blueprints + +Declarative WordPress site provisioning. Define a site's desired state as a JSON blueprint -- which plugins to install, which options to set, which content to import -- and let the runner execute it. Blueprints can create a new WordPress site from scratch or modify an existing one, making them useful for development environments, demo sites, automated testing, and reproducible WordPress setups. + +## Installation + +``` +composer require wp-php-toolkit/blueprints +``` + +## Quick Start + +Create a new WordPress site from a blueprint JSON file: + +```php +use WordPress\Blueprints\Runner; +use WordPress\Blueprints\RunnerConfiguration; +use WordPress\Blueprints\DataReference\AbsoluteLocalPath; + +$config = ( new RunnerConfiguration() ) + ->set_execution_mode( Runner::EXECUTION_MODE_CREATE_NEW_SITE ) + ->set_blueprint( new AbsoluteLocalPath( '/path/to/blueprint.json' ) ) + ->set_target_site_root( '/var/www/my-site' ) + ->set_target_site_url( 'http://localhost:8080' ) + ->set_database_engine( 'sqlite' ); + +$runner = new Runner( $config ); +$runner->run(); +``` + +Where `blueprint.json` looks like: + +```json +{ + "version": 2, + "steps": [ + { + "step": "installPlugin", + "pluginData": "https://downloads.wordpress.org/plugin/gutenberg.zip" + }, + { + "step": "setSiteOptions", + "options": { + "blogname": "My Test Site", + "blogdescription": "Built with Blueprints" + } + } + ] +} +``` + +## Usage + +### Execution modes + +Blueprints supports two execution modes: + +- **`EXECUTION_MODE_CREATE_NEW_SITE`** -- Downloads WordPress, creates the database, and applies the blueprint steps. Use this for spinning up fresh sites. +- **`EXECUTION_MODE_APPLY_TO_EXISTING_SITE`** -- Applies the blueprint steps to an already-installed WordPress site. Use this for modifying live or staging sites. + +```php +use WordPress\Blueprints\Runner; +use WordPress\Blueprints\RunnerConfiguration; +use WordPress\Blueprints\DataReference\AbsoluteLocalPath; + +// Apply a blueprint to an existing site +$config = ( new RunnerConfiguration() ) + ->set_execution_mode( Runner::EXECUTION_MODE_APPLY_TO_EXISTING_SITE ) + ->set_blueprint( new AbsoluteLocalPath( '/path/to/blueprint.json' ) ) + ->set_target_site_root( '/var/www/existing-site' ) + ->set_target_site_url( 'http://localhost:8080' ) + ->set_database_engine( 'mysql' ) + ->set_database_credentials( array( + 'host' => '127.0.0.1', + 'port' => 3306, + 'user' => 'wp', + 'password' => 'secret', + 'dbname' => 'wordpress', + ) ); + +$runner = new Runner( $config ); +$runner->run(); +``` + +### Blueprint JSON structure + +A blueprint is a JSON document with a `version` field and a `steps` array. Each step declares a single operation: + +```json +{ + "version": 2, + "steps": [ + { + "step": "mkdir", + "path": "wp-content/custom-dir" + }, + { + "step": "writeFiles", + "files": { + "wp-content/custom-dir/config.txt": { + "data": "inline", + "content": "key=value" + } + } + }, + { + "step": "installPlugin", + "pluginData": "https://downloads.wordpress.org/plugin/akismet.zip" + }, + { + "step": "activatePlugin", + "plugin": "akismet/akismet.php" + }, + { + "step": "installTheme", + "themeData": "https://downloads.wordpress.org/theme/twentytwentyfour.zip" + }, + { + "step": "activateTheme", + "theme": "twentytwentyfour" + }, + { + "step": "setSiteOptions", + "options": { + "blogname": "My Site", + "permalink_structure": "/%postname%/" + } + }, + { + "step": "runPHP", + "code": "on( + 'progress', + function ( $event ) { + echo sprintf( + "[%d%%] %s\n", + $event->progress, + $event->caption + ); + } +); + +$config = ( new RunnerConfiguration() ) + ->set_progress_observer( $observer ); + // ... other configuration ... +``` + +### Blueprint validation + +Validate a blueprint against the JSON schema before executing it: + +```php +use WordPress\Blueprints\Validator\HumanFriendlySchemaValidator; + +$schema = array( + 'type' => 'object', + 'properties' => array( + 'version' => array( 'type' => 'integer' ), + 'steps' => array( 'type' => 'array' ), + ), + 'required' => array( 'version' ), +); + +$validator = new HumanFriendlySchemaValidator( $schema ); +$error = $validator->validate( json_decode( $blueprint_json ) ); + +if ( null !== $error ) { + echo 'Validation failed: ' . $error->get_message(); +} +``` + +## API Reference + +### Core classes + +| Class | Purpose | +|-------|---------| +| `Runner` | Executes a blueprint. Constructor takes a `RunnerConfiguration`. Call `run()` to execute. | +| `RunnerConfiguration` | Fluent configuration builder. Key methods: `set_blueprint()`, `set_execution_mode()`, `set_target_site_root()`, `set_target_site_url()`, `set_database_engine()`, `set_database_credentials()`, `set_progress_observer()`. | +| `Runtime` | Execution context available to steps. Provides `get_target_filesystem()`, `eval_php_code_in_subprocess()`. | + +### Execution mode constants + +| Constant | Value | +|----------|-------| +| `Runner::EXECUTION_MODE_CREATE_NEW_SITE` | `'create-new-site'` | +| `Runner::EXECUTION_MODE_APPLY_TO_EXISTING_SITE` | `'apply-to-existing-site'` | + +### Data reference classes + +| Class | Purpose | +|-------|---------| +| `DataReference` | Factory class. Use `DataReference::create( $value )` to auto-detect the source type. | +| `InlineFile` | Embed file content directly. Constructor takes `array( 'filename' => '...', 'content' => '...' )`. | +| `AbsoluteLocalPath` | Reference a file by its absolute path on disk. | +| `ExecutionContextPath` | Reference a file relative to the blueprint's directory. | +| `URLReference` | Reference a file by URL (downloaded at execution time). | +| `WordPressOrgPlugin` | Reference a plugin on wordpress.org by slug. | +| `WordPressOrgTheme` | Reference a theme on wordpress.org by slug. | + +### Validation + +| Class | Purpose | +|-------|---------| +| `HumanFriendlySchemaValidator` | Validates data against a JSON Schema. Returns `null` on success or a `ValidationError` on failure. | + +## Requirements + +- PHP 7.2+ +- No external dependencies diff --git a/components/ByteStream/README.md b/components/ByteStream/README.md new file mode 100644 index 00000000..30809cef --- /dev/null +++ b/components/ByteStream/README.md @@ -0,0 +1,255 @@ +# ByteStream + +Composable streaming primitives for reading, writing, and transforming byte data in pure PHP. ByteStream provides a pull-based model where you request bytes from a source, peek at or consume them, and optionally transform them through filters like compression or checksums -- all without loading entire files into memory. + +## Installation + +```bash +composer require wp-php-toolkit/bytestream +``` + +## Quick Start + +```php +use WordPress\ByteStream\ReadStream\FileReadStream; + +// Read a file in chunks +$reader = FileReadStream::from_path( '/path/to/file.txt' ); +while ( ! $reader->reached_end_of_data() ) { + $available = $reader->pull( 1024 ); + $chunk = $reader->consume( $available ); + // Process $chunk... +} +$reader->close_reading(); +``` + +## Usage + +### Reading Files + +`FileReadStream` opens a file and exposes it through the pull/consume model. Use `pull()` to buffer bytes, `peek()` to inspect them without advancing, and `consume()` to read and advance the position. + +```php +use WordPress\ByteStream\ReadStream\FileReadStream; + +$reader = FileReadStream::from_path( '/path/to/data.bin' ); + +// Pull up to 100 bytes into the internal buffer +$reader->pull( 100 ); + +// Peek at the first 10 bytes without consuming them +$header = $reader->peek( 10 ); + +// Consume (read and advance past) 10 bytes +$header = $reader->consume( 10 ); + +// Read the current position +$offset = $reader->tell(); // 10 + +// Seek to a specific offset +$reader->seek( 0 ); + +// Read all remaining bytes at once +$rest = $reader->consume_all(); + +$reader->close_reading(); +``` + +You can also create a `FileReadStream` from an existing resource handle: + +```php +$handle = fopen( '/path/to/file.txt', 'r' ); +$reader = FileReadStream::from_resource( $handle, filesize( '/path/to/file.txt' ) ); +``` + +### In-Memory Streams with MemoryPipe + +`MemoryPipe` holds data in memory and supports both reading and writing. It is useful for testing, for wrapping string data in the stream interface, or for piping data between components. + +```php +use WordPress\ByteStream\MemoryPipe; + +// Wrap a string as a readable stream +$pipe = new MemoryPipe( 'Hello, world!' ); +$pipe->pull( 5 ); +echo $pipe->consume( 5 ); // "Hello" + +// Use as a write-then-read pipe +$pipe = new MemoryPipe( null, 1024 ); // Expected length of 1024 +$pipe->append_bytes( 'chunk one ' ); +$pipe->append_bytes( 'chunk two' ); +$pipe->close_writing(); + +echo $pipe->consume_all(); // "chunk one chunk two" +``` + +### Writing Files + +`FileWriteStream` appends data to a file. It supports truncating or appending to existing files. + +```php +use WordPress\ByteStream\WriteStream\FileWriteStream; + +// Truncate and write +$writer = FileWriteStream::from_path( '/path/to/output.txt', 'truncate' ); +$writer->append_bytes( 'First line' ); +$writer->append_bytes( "\nSecond line" ); +$writer->close_writing(); + +// Append to existing file +$writer = FileWriteStream::from_path( '/path/to/output.txt', 'append' ); +$writer->append_bytes( "\nThird line" ); +$writer->close_writing(); +``` + +### Reading and Writing the Same File + +`FileReadWriteStream` provides both read and write access to a single file. Writes always append to the end while reads track their own position independently. + +```php +use WordPress\ByteStream\FileReadWriteStream; + +$stream = FileReadWriteStream::from_path( '/tmp/buffer.bin', true ); +$stream->append_bytes( 'Hello' ); +$stream->append_bytes( ' World' ); + +// Read back from the beginning +$stream->pull( 11 ); +echo $stream->consume( 11 ); // "Hello World" + +$stream->close_writing(); +$stream->close_reading(); +``` + +### Compression and Decompression + +`DeflateReadStream` compresses data as you read it, and `InflateReadStream` decompresses. They wrap any `ByteReadStream` and produce a new stream of transformed bytes. + +```php +use WordPress\ByteStream\MemoryPipe; +use WordPress\ByteStream\ReadStream\DeflateReadStream; +use WordPress\ByteStream\ReadStream\InflateReadStream; + +$original = 'The quick brown fox jumps over the lazy dog.'; + +// Compress +$source = new MemoryPipe( $original ); +$deflated = new DeflateReadStream( $source, ZLIB_ENCODING_DEFLATE ); +$compressed = $deflated->consume_all(); + +// Decompress +$compressed_source = new MemoryPipe( $compressed ); +$inflated = new InflateReadStream( $compressed_source, ZLIB_ENCODING_DEFLATE ); +echo $inflated->consume_all(); // "The quick brown fox jumps over the lazy dog." +``` + +### Transforming Streams with Filters + +`TransformedReadStream` and `TransformedWriteStream` apply a chain of `ByteTransformer` filters as data flows through the stream. Built-in transformers include `ChecksumTransformer`, `DeflateTransformer`, and `InflateTransformer`. + +```php +use WordPress\ByteStream\ReadStream\FileReadStream; +use WordPress\ByteStream\ReadStream\TransformedReadStream; +use WordPress\ByteStream\ByteTransformer\ChecksumTransformer; + +// Read a file and compute its SHA-1 hash as you go +$checksum = new ChecksumTransformer( 'sha1' ); +$reader = FileReadStream::from_path( '/path/to/file.txt' ); +$stream = new TransformedReadStream( $reader, array( 'checksum' => $checksum ) ); + +$contents = $stream->consume_all(); +echo $stream['checksum']->get_hash(); // SHA-1 hex digest +``` + +Compress data as you write it: + +```php +use WordPress\ByteStream\WriteStream\FileWriteStream; +use WordPress\ByteStream\WriteStream\TransformedWriteStream; +use WordPress\ByteStream\ByteTransformer\DeflateTransformer; + +$file_writer = FileWriteStream::from_path( '/path/to/output.deflate', 'truncate' ); +$writer = new TransformedWriteStream( + $file_writer, + array( new DeflateTransformer( ZLIB_ENCODING_DEFLATE ) ) +); +$writer->append_bytes( 'Data to compress...' ); +$writer->close_writing(); +$file_writer->close_writing(); +``` + +### Limiting Read Length + +`LimitedByteReadStream` restricts reading to a fixed number of bytes from a larger stream. This is useful for reading structured binary formats where you know the length of each section. + +```php +use WordPress\ByteStream\ReadStream\FileReadStream; +use WordPress\ByteStream\ReadStream\LimitedByteReadStream; + +$reader = FileReadStream::from_path( '/path/to/archive.bin' ); + +// Read only the first 256 bytes +$header_reader = new LimitedByteReadStream( $reader, 256 ); +$header = $header_reader->consume_all(); +echo $header_reader->length(); // 256 +``` + +### Pull Modes + +The `pull()` method supports two modes that control how bytes are buffered: + +```php +use WordPress\ByteStream\ReadStream\ByteReadStream; + +// PULL_NO_MORE_THAN (default): pull up to N bytes, may return fewer +$available = $reader->pull( 1024 ); +$chunk = $reader->consume( $available ); + +// PULL_EXACTLY: pull exactly N bytes, throws NotEnoughDataException if +// the stream doesn't have enough data +$reader->pull( 100, ByteReadStream::PULL_EXACTLY ); +$chunk = $reader->consume( 100 ); +``` + +## API Reference + +### Interfaces + +| Interface | Methods | +|---|---| +| `ByteReadStream` | `pull()`, `peek()`, `consume()`, `consume_all()`, `seek()`, `tell()`, `length()`, `reached_end_of_data()`, `close_reading()` | +| `ByteWriteStream` | `append_bytes()`, `close_writing()` | +| `BytePipe` | Combines `ByteReadStream` and `ByteWriteStream` | +| `ByteTransformer` | `filter_bytes()`, `flush()` | + +### Read Stream Classes + +| Class | Description | +|---|---| +| `FileReadStream` | Reads from a file via `from_path()` or `from_resource()` | +| `InflateReadStream` | Decompresses a wrapped `ByteReadStream` | +| `DeflateReadStream` | Compresses a wrapped `ByteReadStream` | +| `TransformedReadStream` | Applies a chain of `ByteTransformer` filters while reading | +| `LimitedByteReadStream` | Limits reading to a fixed byte count from a larger stream | + +### Write Stream Classes + +| Class | Description | +|---|---| +| `FileWriteStream` | Writes to a file via `from_path()` or `from_resource_handle()` | +| `TransformedWriteStream` | Applies a chain of `ByteTransformer` filters while writing | + +### Other Classes + +| Class | Description | +|---|---| +| `MemoryPipe` | In-memory read/write buffer (implements `BytePipe`) | +| `FileReadWriteStream` | File-backed read/write stream (implements `BytePipe`) | +| `ChecksumTransformer` | Computes a hash (SHA-1, MD5, etc.) as bytes flow through | +| `DeflateTransformer` | Compresses bytes as a write-side transformer | +| `InflateTransformer` | Decompresses bytes as a write-side transformer | + +## Requirements + +- PHP 7.2+ +- No external dependencies diff --git a/components/CLI/README.md b/components/CLI/README.md new file mode 100644 index 00000000..a9a1047f --- /dev/null +++ b/components/CLI/README.md @@ -0,0 +1,158 @@ +# CLI + +A POSIX-style command-line argument parser for PHP. It handles long options (`--verbose`), short options (`-v`), bundled short options (`-abc`), inline values (`--port=8080`, `-p=8080`), and positional arguments -- all in a single static method call with no external dependencies. + +## Installation + +```bash +composer require wp-php-toolkit/cli +``` + +## Quick Start + +```php +use WordPress\CLI\CLI; + +$option_defs = array( + 'output' => array( 'o', true, null, 'Output file path' ), + 'force' => array( 'f', false, false, 'Overwrite existing files' ), +); + +$argv = array( '--output', '/tmp/result.txt', '-f', 'input.json' ); + +list( $positionals, $options ) = CLI::parse_command_args_and_options( $argv, $option_defs ); + +// $positionals = array( 'input.json' ) +// $options = array( 'output' => '/tmp/result.txt', 'force' => true ) +``` + +## Usage + +### Defining Options + +Each option is defined as an entry in an associative array. The key is the long option name, and the value is a four-element array: + +```php +$option_defs = array( + // 'long-name' => array( short, hasValue, default, description ) + 'site-url' => array( 'u', true, null, 'Public site URL' ), + 'site-path' => array( null, true, null, 'Target directory (no short form)' ), + 'help' => array( 'h', false, false, 'Show help message' ), + 'verbose' => array( 'v', false, false, 'Enable verbose output' ), +); +``` + +| Element | Type | Meaning | +|-----------|----------------|------------------------------------------------------| +| `short` | `string\|null` | Single-character short alias, or `null` for none | +| `hasValue`| `bool` | `true` if the option takes a value, `false` for flags | +| `default` | `mixed` | Default value when the option is not provided | +| `description` | `string` | Human-readable description (for help text) | + +### Long Options + +Long options can be passed with `=` or as a separate argument: + +```php +$option_defs = array( + 'port' => array( 'p', true, '3000', 'Server port' ), +); + +// These are equivalent: +// --port=8080 +// --port 8080 + +$argv = array( '--port=8080' ); +list( $positionals, $options ) = CLI::parse_command_args_and_options( $argv, $option_defs ); +// $options['port'] === '8080' +``` + +### Short Options + +Short options work the same way as long options. Boolean flags can be bundled: + +```php +$option_defs = array( + 'all' => array( 'a', false, false, 'Process all items' ), + 'force' => array( 'f', false, false, 'Force overwrite' ), + 'verbose' => array( 'v', false, false, 'Verbose output' ), + 'output' => array( 'o', true, null, 'Output path' ), +); + +// Bundle boolean flags: -afv is the same as -a -f -v +$argv = array( '-afv' ); +list( $positionals, $options ) = CLI::parse_command_args_and_options( $argv, $option_defs ); +// $options['all'] === true +// $options['force'] === true +// $options['verbose'] === true + +// A value-bearing short option can appear at the end of a bundle: +$argv = array( '-afo', '/tmp/out.txt' ); +list( $positionals, $options ) = CLI::parse_command_args_and_options( $argv, $option_defs ); +// $options['all'] === true +// $options['force'] === true +// $options['output'] === '/tmp/out.txt' +``` + +### Positional Arguments + +Any argument that is not an option or an option value is collected as a positional argument: + +```php +$option_defs = array( + 'help' => array( 'h', false, false, 'Show help' ), +); + +$argv = array( 'blueprint.json', '-h', 'extra-arg' ); +list( $positionals, $options ) = CLI::parse_command_args_and_options( $argv, $option_defs ); +// $positionals = array( 'blueprint.json', 'extra-arg' ) +// $options['help'] === true +``` + +### Error Handling + +The parser throws `InvalidArgumentException` for unknown options or missing required values: + +```php +use InvalidArgumentException; + +$option_defs = array( + 'port' => array( 'p', true, null, 'Server port' ), +); + +try { + $argv = array( '--unknown' ); + CLI::parse_command_args_and_options( $argv, $option_defs ); +} catch ( InvalidArgumentException $e ) { + // "Unknown option --unknown" +} + +try { + $argv = array( '--port' ); // missing value + CLI::parse_command_args_and_options( $argv, $option_defs ); +} catch ( InvalidArgumentException $e ) { + // "Option --port requires a value" +} +``` + +## API Reference + +### `CLI` (class) + +| Method | Description | +|--------|-------------| +| `CLI::parse_command_args_and_options( array $argv, array $option_defs ): array` | Parses CLI arguments and returns `array( $positionals, $options )`. | + +**Parameters:** + +- `$argv` -- Array of command-line arguments (typically `array_slice( $argv, 1 )` to skip the script name). +- `$option_defs` -- Associative array of option definitions. Each key is a long option name and each value is `array( $short, $has_value, $default, $description )`. + +**Returns:** A two-element array: `array( $positionals, $options )` where `$positionals` is a list of non-option arguments and `$options` is an associative array of option values. + +**Throws:** `InvalidArgumentException` for unknown options or missing values. + +## Requirements + +- PHP 7.2+ +- No external dependencies diff --git a/components/CORSProxy/README.md b/components/CORSProxy/README.md index f16caed1..d91e033c 100644 --- a/components/CORSProxy/README.md +++ b/components/CORSProxy/README.md @@ -1,36 +1,182 @@ -## PHP CORS Proxy +# CORSProxy -A PHP CORS proxy need to integrate git clone via fetch(). +A PHP CORS proxy that lets browser-based JavaScript make cross-origin requests to external services. Built for WordPress Playground to bridge `fetch()` calls to git servers and other APIs that don't set CORS headers. The proxy streams data bidirectionally, blocks requests to private IP ranges, filters sensitive headers, and enforces size limits -- all without external dependencies. -### Configuration +## Installation -In order to avoid running a CORS proxy that is easy to abuse by default, the proxy requires administrators to explicitly -declare what to do about rate-limiting, by doing one of the following: +``` +composer require wp-php-toolkit/corsproxy +``` -- Provide a rate-limiting function `playground_cors_proxy_maybe_rate_limit()`. -- Define a truthy `PLAYGROUND_CORS_PROXY_DISABLE_RATE_LIMIT` to explicitly disable rate-limiting. +## Quick Start -These can be provided in an optional `cors-proxy-config.php` file in the same directory as `cors-proxy.php` or in a PHP -file that is loaded before all PHP execution via the [ -`auto_prepend_file`](https://www.php.net/manual/en/ini.core.php#ini.auto-prepend-file) php.ini option. +Deploy `cors-proxy.php` behind a web server. Clients make requests through the proxy by appending the target URL to the proxy's path: -### Usage +``` +GET https://your-server.com/cors-proxy.php/https://api.example.com/data +``` -Request http://127.0.0.1:5263/proxy.php/https://w.org/?test=1 to get the response from https://w.org/?test=1 plus the -CORS headers. +The proxy fetches `https://api.example.com/data`, streams the response back with CORS headers attached, and the browser's same-origin policy is satisfied. -### Development and testing +## Usage -- Run `dev.sh` to start a local server, then go to http://127.0.0.1:5263/proxy.php/https://w.org/ and confirm it worked. -- Run `test.sh` to run PHPUnit tests, confirm they all pass. -- Run `test-watch.sh` to run PHPUnit tests in watch mode. +### Deployment -### Design decisions +Place `cors-proxy.php` and `cors-proxy-functions.php` in a web-accessible directory. The proxy works with Apache, Nginx, or PHP's built-in development server. -- Stream data both ways, don't buffer. -- Don't pass auth headers in either direction. - - Opt-in for request headers possible using `X-Cors-Proxy-Allowed-Request-Headers`. -- Refuse to request private IPs. -- Refuse to process non-GET non-POST non-OPTIONS requests. -- Refuse to process POST request body larger than, say, 100KB. -- Refuse to process responses larger than, say, 100MB. +For local development: + +```bash +php -S 127.0.0.1:5263 cors-proxy.php +# Then request: http://127.0.0.1:5263/cors-proxy.php/https://w.org/ +``` + +### Rate limiting + +The proxy refuses to run without rate limiting configured. You must do one of the following: + +1. **Define a rate-limiting function** in a `cors-proxy-config.php` file placed alongside `cors-proxy.php`: + +```php + 'application/json', + 'Content-Type' => 'application/json', + 'Cookie' => 'session=abc', + 'Host' => 'example.com', + 'Authorization' => 'Bearer token123', + ), + array( 'Cookie', 'Host' ), // always stripped + array( 'Authorization' ) // requires opt-in +); +// Result: ['Accept' => 'application/json', 'Content-Type' => 'application/json'] +// Authorization was stripped because the client did not send +// X-Cors-Proxy-Allowed-Request-Headers: Authorization +``` + +**URL validation.** Target URLs are validated for scheme (only `http` and `https`), checked for embedded credentials, and verified not to point back at the proxy server itself. + +### Redirect handling + +When the target server returns a redirect, the proxy rewrites the `Location` header so the client follows the redirect back through the proxy: + +```php +$rewritten = rewrite_relative_redirect( + 'https://w.org/hosting', // original request + '/hosting/', // redirect location + 'https://cors.example.com/proxy.php' // proxy URL +); +// Result: "https://cors.example.com/proxy.php?https://w.org/hosting/" +``` + +This works for both relative and absolute redirects. + +### Extracting the target URL + +The proxy extracts the target URL from either `PATH_INFO` or `QUERY_STRING`: + +```php +// PATH_INFO style: +// GET /cors-proxy.php/https://example.com +get_target_url( array( 'PATH_INFO' => '/https://example.com' ) ); +// Returns: "https://example.com" + +// Query string style: +// GET /cors-proxy.php?https://example.com +get_target_url( array( 'QUERY_STRING' => 'https://example.com' ) ); +// Returns: "https://example.com" +``` + +### CORS headers + +CORS response headers are added for requests originating from: + +- `https://playground.wordpress.net` (when the proxy is hosted elsewhere) +- `localhost` or `127.0.0.1` (for local development) + +The proxy responds to `OPTIONS` preflight requests with appropriate `Access-Control-Allow-*` headers. + +## API Reference + +### Functions + +| Function | Purpose | +|----------|---------| +| `get_target_url( $server_data )` | Extracts the target URL from `$_SERVER` (or a custom array). Returns the URL string or `false`. | +| `get_current_script_uri( $target_url, $request_uri )` | Returns the proxy's own URI prefix (everything before the target URL in the request). | +| `url_validate_and_resolve( $url, $resolve_function )` | Validates a URL (scheme, no credentials, no private IPs) and resolves the hostname. Returns `array( 'host' => ..., 'ip' => ... )` or throws `CorsProxyException`. | +| `is_private_ip( $ip )` | Returns `true` if the IP address falls within any private, loopback, link-local, or reserved range. Supports both IPv4 and IPv6. | +| `filter_headers_by_name( $headers, $disallowed, $opt_in )` | Filters an associative array of headers, removing disallowed ones and enforcing opt-in for sensitive headers. | +| `rewrite_relative_redirect( $request_url, $redirect_location, $proxy_url )` | Rewrites a redirect `Location` to route back through the proxy. | +| `should_respond_with_cors_headers( $host, $origin )` | Returns `true` if the given origin should receive CORS response headers. | + +### Classes + +| Class | Purpose | +|-------|---------| +| `IpUtils` | Static methods for private IP detection: `isPrivateIp( $ip )`. Covers RFC 1918, RFC 4193, loopback, link-local, carrier-grade NAT, and more. | +| `CorsProxyException` | Thrown when URL validation fails (invalid scheme, private IP, unresolvable hostname, etc.). | + +## Requirements + +- PHP 7.2+ +- `curl` extension (for proxying HTTP requests) +- No other external dependencies diff --git a/components/DataLiberation/README.md b/components/DataLiberation/README.md new file mode 100644 index 00000000..e260a2fe --- /dev/null +++ b/components/DataLiberation/README.md @@ -0,0 +1,321 @@ +# DataLiberation + +Streaming data import and export for WordPress. Reads and writes WordPress content in multiple formats -- WXR (WordPress eXtended RSS), SQL dumps, block markup, and more -- without loading everything into memory. Designed for migrating content between WordPress sites, converting between formats, and processing large exports that would otherwise exhaust PHP's memory limits. + +## Installation + +``` +composer require wp-php-toolkit/data-liberation +``` + +## Quick Start + +Export a WordPress post to WXR format: + +```php +use WordPress\ByteStream\MemoryPipe; +use WordPress\DataLiberation\EntityWriter\WXRWriter; +use WordPress\DataLiberation\ImportEntity; + +$output = new MemoryPipe(); +$writer = new WXRWriter( $output ); + +$post = new ImportEntity( 'post', array( + 'post_title' => 'Hello World', + 'post_date' => '2024-01-15', + 'guid' => 'https://example.com/?p=1', + 'content' => '

Welcome to my site.

', + 'excerpt' => 'A short summary.', + 'post_id' => '1', + 'post_name' => 'hello-world', + 'status' => 'publish', + 'post_type' => 'post', +) ); + +$writer->append_entity( $post ); +$writer->finalize(); +$writer->close_writing(); +$output->close_writing(); + +echo $output->consume_all(); +// Outputs a complete WXR XML document with the post. +``` + +## Usage + +### Writing WXR exports + +`WXRWriter` generates WordPress eXtended RSS (WXR) XML files. You feed it entities one at a time -- posts, metadata, terms, and comments -- and it produces valid WXR output. Entities must be appended in logical order: metadata, terms, and comments belong to the most recently appended post. + +```php +use WordPress\ByteStream\MemoryPipe; +use WordPress\DataLiberation\EntityWriter\WXRWriter; +use WordPress\DataLiberation\ImportEntity; + +$output = new MemoryPipe(); +$writer = new WXRWriter( $output ); + +// Write a post +$writer->append_entity( new ImportEntity( 'post', array( + 'post_title' => 'My Article', + 'post_date' => '2024-03-01', + 'guid' => 'https://example.com/?p=42', + 'content' => '

Article body.

', + 'post_id' => '42', + 'post_name' => 'my-article', + 'status' => 'publish', + 'post_type' => 'post', + 'comment_status' => 'open', +) ) ); + +// Attach metadata to that post +$writer->append_entity( new ImportEntity( 'post_meta', array( + 'meta_key' => '_thumbnail_id', + 'meta_value' => '99', +) ) ); + +// Attach a term +$writer->append_entity( new ImportEntity( 'term', array( + 'term_id' => '5', + 'taxonomy' => 'category', + 'slug' => 'tutorials', + 'parent' => '0', +) ) ); + +// Attach a comment +$writer->append_entity( new ImportEntity( 'comment', array( + 'comment_id' => '1', + 'comment_author' => 'Jane', + 'comment_content' => 'Great post!', + 'comment_date' => '2024-03-02', + 'comment_approved' => '1', +) ) ); + +$writer->finalize(); +$writer->close_writing(); +$output->close_writing(); +``` + +The writer supports pausing and resuming via a reentrancy cursor. This lets you split large exports across multiple PHP requests: + +```php +// Save state after writing some entities +$cursor = $writer->get_reentrancy_cursor(); +$writer->close_writing(); + +// Later, resume from where you left off +$writer = new WXRWriter( $output, $cursor ); +$writer->append_entity( $next_post ); +``` + +### Writing SQL dumps + +`MySQLDumpWriter` produces SQL INSERT statements from entity data: + +```php +use WordPress\ByteStream\MemoryPipe; +use WordPress\DataLiberation\EntityWriter\MySQLDumpWriter; +use WordPress\DataLiberation\ImportEntity; + +$output = new MemoryPipe(); +$writer = new MySQLDumpWriter( $output ); + +$writer->append_entity( new ImportEntity( 'database_row', array( + 'table' => 'wp_posts', + 'record' => array( + 'ID' => 1, + 'post_title' => 'First Post', + 'post_content' => 'Hello World', + ), +) ) ); + +$writer->close_writing(); +echo $output->consume_all(); +// INSERT INTO wp_posts (ID, post_title, post_content) VALUES (1, 'First Post', 'Hello World'); +``` + +String values are automatically escaped. NULL values are written as SQL NULL. + +### Reading WXR files + +`WXREntityReader` streams through WXR files and emits entities as it encounters them. It never loads the full document into memory, so it can handle exports of any size: + +```php +use WordPress\DataLiberation\EntityReader\WXREntityReader; + +$reader = WXREntityReader::create(); +$reader->append_bytes( file_get_contents( 'export.xml' ) ); +$reader->input_finished(); + +while ( $reader->next_entity() ) { + $entity = $reader->get_entity(); + switch ( $entity->get_type() ) { + case 'site_option': + $data = $entity->get_data(); + // $data['option_name'], $data['option_value'] + break; + + case 'post': + $data = $entity->get_data(); + // $data['post_title'], $data['post_content'], etc. + break; + + case 'comment': + $data = $entity->get_data(); + // $data['comment_author'], $data['comment_content'], etc. + break; + } +} +``` + +For streaming large files without reading them entirely into memory: + +```php +$reader = WXREntityReader::create(); +$handle = fopen( 'large-export.xml', 'r' ); + +while ( ! feof( $handle ) ) { + $reader->append_bytes( fread( $handle, 65536 ) ); + + while ( $reader->next_entity() ) { + $entity = $reader->get_entity(); + // Process entity... + } +} +fclose( $handle ); +``` + +### Processing block markup + +`BlockMarkupProcessor` parses WordPress block comments (like ``) and lets you inspect and modify block names, attributes, and content: + +```php +use WordPress\DataLiberation\BlockMarkup\BlockMarkupProcessor; + +$markup = '' + . '' + . ''; + +$p = new BlockMarkupProcessor( $markup ); + +while ( $p->next_token() ) { + if ( '#block-comment' === $p->get_token_type() ) { + echo $p->get_block_name(); // "wp:image" + $attrs = $p->get_block_attributes(); // ["url" => "/photo.jpg", "class" => "wide"] + echo $p->is_block_closer() ? 'closer' : 'opener'; + } +} +``` + +Iterate over individual block attributes and modify them: + +```php +$p = new BlockMarkupProcessor( + '' +); +$p->next_token(); + +while ( $p->next_block_attribute() ) { + $key = $p->get_block_attribute_key(); // "class", then "url" + $value = $p->get_block_attribute_value(); // "wp-bold", then "old.png" + + if ( 'url' === $key ) { + $p->set_block_attribute_value( 'new.png' ); + } +} + +echo $p->get_updated_html(); +// +``` + +### Rewriting URLs in block markup + +`BlockMarkupUrlProcessor` finds and rewrites URLs across all parts of block markup -- HTML attributes, block comment attributes, text nodes, and inline CSS: + +```php +use WordPress\DataLiberation\BlockMarkup\BlockMarkupUrlProcessor; + +$markup = 'About' + . ''; + +$p = new BlockMarkupUrlProcessor( $markup, 'https://old-site.com' ); + +while ( $p->next_url() ) { + $raw = $p->get_raw_url(); // "https://old-site.com/about", etc. + $parsed = $p->get_parsed_url(); // URL object with host, path, etc. + + // Rewrite to a new domain + $new_url = str_replace( 'old-site.com', 'new-site.com', $raw ); + $p->set_raw_url( $new_url ); +} + +echo $p->get_updated_html(); +``` + +### CSS tokenization + +`CSSProcessor` tokenizes CSS according to the CSS Syntax Level 3 specification. It processes stylesheets one token at a time without building a full AST: + +```php +use WordPress\DataLiberation\CSS\CSSProcessor; + +$css = 'body { background: url("image.png"); color: red; }'; +$processor = CSSProcessor::create( $css ); + +while ( $processor->next_token() ) { + echo $processor->get_token_type() . ': ' . $processor->get_normalized_token() . "\n"; +} +``` + +## API Reference + +### Entity types (ImportEntity) + +| Type | Constants | Key data fields | +|------|-----------|----------------| +| `post` | `ImportEntity::TYPE_POST` | `post_title`, `post_content`, `post_date`, `guid`, `post_name`, `status`, `post_type`, `post_id` | +| `post_meta` | `ImportEntity::TYPE_POST_META` | `meta_key`, `meta_value` | +| `comment` | `ImportEntity::TYPE_COMMENT` | `comment_id`, `comment_author`, `comment_content`, `comment_date`, `comment_approved` | +| `term` | `ImportEntity::TYPE_TERM` | `term_id`, `taxonomy`, `slug`, `parent` | +| `site_option` | `ImportEntity::TYPE_SITE_OPTION` | `option_name`, `option_value` | +| `database_row` | -- | `table`, `record` (associative array of column => value) | + +### Writers (EntityWriter interface) + +| Class | Purpose | +|-------|---------| +| `WXRWriter` | Writes WXR XML exports. Constructor takes a `ByteWriteStream`. | +| `MySQLDumpWriter` | Writes SQL INSERT statements. Constructor takes a `ByteWriteStream`. | + +Shared methods: `append_entity( ImportEntity )`, `close_writing()`, `get_reentrancy_cursor()`. + +### Readers (EntityReader interface) + +| Class | Purpose | +|-------|---------| +| `WXREntityReader` | Streams WXR XML files. Use `WXREntityReader::create()`. | +| `HTMLEntityReader` | Converts an HTML file into WordPress entities. | +| `EPubEntityReader` | Reads EPUB documents as WordPress entities. | +| `DatabaseRowsEntityReader` | Reads database query results as entities. | +| `FilesystemEntityReader` | Reads a directory tree as entities. | + +Shared methods: `next_entity()`, `get_entity()`, `is_finished()`, `get_reentrancy_cursor()`. + +### Block markup processors + +| Class | Purpose | +|-------|---------| +| `BlockMarkupProcessor` | Parses block comments. Key methods: `next_token()`, `get_block_name()`, `get_block_attributes()`, `is_self_closing_block()`, `is_block_closer()`, `next_block_attribute()`, `set_block_attribute_value()`. | +| `BlockMarkupUrlProcessor` | Finds and rewrites URLs in block markup. Key methods: `next_url()`, `get_raw_url()`, `get_parsed_url()`, `set_raw_url()`. | + +### CSS processors + +| Class | Purpose | +|-------|---------| +| `CSSProcessor` | CSS Syntax Level 3 tokenizer. Key methods: `next_token()`, `get_token_type()`, `get_normalized_token()`. | +| `CSSURLProcessor` | Finds and rewrites URLs inside CSS. | + +## Requirements + +- PHP 7.2+ +- No external dependencies diff --git a/components/Encoding/README.md b/components/Encoding/README.md new file mode 100644 index 00000000..add93761 --- /dev/null +++ b/components/Encoding/README.md @@ -0,0 +1,138 @@ +# Encoding + +Pure PHP utilities for UTF-8 validation, scrubbing, and conversion. This component detects invalid byte sequences, replaces them with the Unicode Replacement Character using the maximal subpart algorithm, and provides low-level tools for working with Unicode code points -- all without requiring the `mbstring` extension. When `mbstring` is available, the library delegates to it for better performance. + +## Installation + +```bash +composer require wp-php-toolkit/encoding +``` + +## Quick Start + +```php +use function WordPress\Encoding\wp_is_valid_utf8; +use function WordPress\Encoding\wp_scrub_utf8; + +// Validate a string +wp_is_valid_utf8( 'Hello, world!' ); // true +wp_is_valid_utf8( "invalid \xC0 byte" ); // false + +// Replace invalid bytes with the replacement character +echo wp_scrub_utf8( "caf\xC0 latte" ); // "caf\xEF\xBF\xBD latte" (caf? latte) +``` + +## Usage + +### Validating UTF-8 + +`wp_is_valid_utf8()` checks whether a byte string is well-formed UTF-8. It rejects overlong sequences, surrogate halves, bytes that are never valid in UTF-8, and incomplete multi-byte sequences. + +```php +use function WordPress\Encoding\wp_is_valid_utf8; + +// Valid UTF-8 +wp_is_valid_utf8( '' ); // true (empty string) +wp_is_valid_utf8( 'just a test' ); // true (plain ASCII) +wp_is_valid_utf8( "\xE2\x9C\x8F" ); // true (Pencil, U+270F) + +// Invalid UTF-8 +wp_is_valid_utf8( "just \xC0 test" ); // false (0xC0 is never valid) +wp_is_valid_utf8( "\xE2\x9C" ); // false (incomplete 3-byte sequence) +wp_is_valid_utf8( "\xC1\xBF" ); // false (overlong encoding) +wp_is_valid_utf8( "\xED\xB0\x80" ); // false (surrogate half U+DC00) +wp_is_valid_utf8( "B\xFCch" ); // false (ISO-8859-1 high byte) +``` + +### Scrubbing Invalid Bytes + +`wp_scrub_utf8()` replaces ill-formed byte sequences with the Unicode Replacement Character (U+FFFD). It follows the "maximal subpart" algorithm recommended by the Unicode Standard for secure and interoperable string handling. + +```php +use function WordPress\Encoding\wp_scrub_utf8; + +// Valid strings pass through unchanged +wp_scrub_utf8( 'test' ); // "test" + +// Single invalid byte becomes one replacement character +wp_scrub_utf8( ".\xC0." ); // ".\\xEF\\xBF\\xBD." (i.e., ".?.") + +// Incomplete multi-byte sequence +wp_scrub_utf8( ".\xE2\x8C." ); // ".?." (missing third byte) + +// Each maximal subpart gets its own replacement character +wp_scrub_utf8( ".\xC1\xBF." ); // ".??." (overlong: two invalid subparts) + +// Surrogate half U+D800 encoded as three bytes -- all three are invalid +wp_scrub_utf8( ".\xED\xA0\x80." ); // ".???." +``` + +### Detecting Noncharacters + +`wp_has_noncharacters()` checks whether a string contains Unicode noncharacters -- code points that are permanently reserved and should not appear in open data interchange. + +```php +use function WordPress\Encoding\wp_has_noncharacters; + +// U+FFFE is a noncharacter +wp_has_noncharacters( "\xEF\xBF\xBE" ); // true + +// Normal text +wp_has_noncharacters( 'Hello' ); // false +``` + +The noncharacter ranges are U+FDD0-U+FDEF, plus U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, and so on through U+10FFFE, U+10FFFF. + +### Converting Code Points to UTF-8 + +`codepoint_to_utf8_bytes()` encodes a Unicode code point number into its UTF-8 byte representation. Invalid code points (surrogate halves, values above U+10FFFF) produce the replacement character. + +```php +use function WordPress\Encoding\codepoint_to_utf8_bytes; + +echo codepoint_to_utf8_bytes( 0x41 ); // "A" +echo codepoint_to_utf8_bytes( 0x270F ); // "\xE2\x9C\x8F" (Pencil) +echo codepoint_to_utf8_bytes( 0x1F170 ); // "\xF0\x9F\x85\xB0" (Negative Squared Latin Capital Letter A) + +// Invalid code points produce the replacement character +echo codepoint_to_utf8_bytes( 0xD83C ); // "\xEF\xBF\xBD" (surrogate half) +``` + +### Decoding UTF-8 to Code Points + +`utf8_ord()` converts a single UTF-8 character (byte sequence) back to its Unicode code point number. + +```php +use function WordPress\Encoding\utf8_ord; + +echo utf8_ord( 'A' ); // 65 (0x41) +echo utf8_ord( "\xE2\x9C\x8F" ); // 9999 (0x270F, Pencil) +echo utf8_ord( "\xF0\x9F\x85\xB0" ); // 127344 (0x1F170) +``` + +### How the Fallback Works + +When `mbstring` is available, `wp_is_valid_utf8()` delegates to `mb_check_encoding()` and `wp_scrub_utf8()` delegates to `mb_scrub()`. Without `mbstring`, the library uses a pure-PHP byte scanner (`_wp_scan_utf8()`) that validates byte sequences against the UTF-8 well-formedness table from the Unicode Standard. This fallback is fully conformant and handles all edge cases, including the maximal subpart algorithm for scrubbing. + +The PCRE-based implementation of `wp_has_noncharacters()` is preferred when `PCRE/u` is available. Otherwise, a byte-level fallback scans the string directly. + +## API Reference + +### Functions + +| Function | Description | +|---|---| +| `wp_is_valid_utf8( $bytes )` | Returns `true` if the string is well-formed UTF-8 | +| `wp_scrub_utf8( $text )` | Replaces invalid byte sequences with U+FFFD | +| `wp_has_noncharacters( $text )` | Returns `true` if the string contains Unicode noncharacters | +| `codepoint_to_utf8_bytes( $codepoint )` | Encodes a code point number to its UTF-8 byte sequence | +| `utf8_ord( $character )` | Decodes a UTF-8 character to its code point number | + +## Attribution + +The `wp_is_valid_utf8()`, `wp_scrub_utf8()`, and `wp_has_noncharacters()` functions originate from [WordPress core](https://github.com/WordPress/wordpress-develop). The pure PHP fallback scanner implements the UTF-8 well-formedness rules from the Unicode Standard. Licensed under GPL v2. + +## Requirements + +- PHP 7.2+ +- No external dependencies (`mbstring` is used when available but is not required) diff --git a/components/Filesystem/README.md b/components/Filesystem/README.md new file mode 100644 index 00000000..17a60541 --- /dev/null +++ b/components/Filesystem/README.md @@ -0,0 +1,240 @@ +# Filesystem + +A unified filesystem abstraction that lets you work with local disks, in-memory trees, SQLite-backed storage, and other backends through a single interface. Every implementation uses forward slashes as path separators regardless of the host OS, so code that works on Linux works identically on Windows and macOS. + +## Installation + +```bash +composer require wp-php-toolkit/filesystem +``` + +## Quick Start + +```php +use WordPress\Filesystem\InMemoryFilesystem; + +$fs = InMemoryFilesystem::create(); +$fs->mkdir( '/docs' ); +$fs->put_contents( '/docs/readme.txt', 'Hello, world!' ); +echo $fs->get_contents( '/docs/readme.txt' ); // "Hello, world!" +``` + +## Usage + +### Local Filesystem + +`LocalFilesystem` wraps the real disk. Pass a root directory to `create()` and all paths are resolved relative to it. + +```php +use WordPress\Filesystem\LocalFilesystem; + +$fs = LocalFilesystem::create( '/var/www/mysite' ); + +// Write and read files +$fs->put_contents( '/config.json', '{"debug": true}' ); +echo $fs->get_contents( '/config.json' ); // '{"debug": true}' + +// Directory operations +$fs->mkdir( '/uploads/2024', array( 'recursive' => true ) ); +$fs->put_contents( '/uploads/2024/photo.txt', 'image data here' ); + +// List directory contents +$entries = $fs->ls( '/uploads/2024' ); // ['photo.txt'] + +// Check paths +$fs->is_dir( '/uploads' ); // true +$fs->is_file( '/config.json' ); // true +$fs->exists( '/missing' ); // false +``` + +Without a root argument, `LocalFilesystem::create()` defaults to the system root (`/` on Unix, the system drive on Windows). + +### In-Memory Filesystem + +`InMemoryFilesystem` stores everything in PHP arrays. It is useful for tests, temporary processing, and anywhere you need a fast, disposable filesystem. + +```php +use WordPress\Filesystem\InMemoryFilesystem; + +$fs = InMemoryFilesystem::create(); + +$fs->mkdir( '/src/components', array( 'recursive' => true ) ); +$fs->put_contents( '/src/components/button.php', 'put_contents( '/src/components/form.php', 'ls( '/src/components' ); // ['button.php', 'form.php'] +``` + +### SQLite Filesystem + +`SQLiteFilesystem` persists files and directories in a SQLite database. It requires the `sqlite3` PHP extension (dev-only dependency, not required by the library at runtime). + +```php +use WordPress\Filesystem\SQLiteFilesystem; + +// In-memory SQLite database +$fs = SQLiteFilesystem::create( ':memory:' ); + +// Or persist to a file +$fs = SQLiteFilesystem::create( '/tmp/my-files.sqlite' ); + +$fs->mkdir( '/data' ); +$fs->put_contents( '/data/report.csv', 'id,name\n1,Alice' ); +echo $fs->get_contents( '/data/report.csv' ); +``` + +### File and Directory Operations + +All filesystem implementations share the same interface. These operations work identically across backends. + +```php +// Rename (move) a file +$fs->put_contents( '/old-name.txt', 'content' ); +$fs->rename( '/old-name.txt', '/new-name.txt' ); + +// Copy a file +$fs->put_contents( '/source.txt', 'content' ); +$fs->copy( '/source.txt', '/dest.txt' ); + +// Copy a directory tree +$fs->mkdir( '/src/lib', array( 'recursive' => true ) ); +$fs->put_contents( '/src/lib/utils.php', 'copy( '/src', '/backup', array( 'recursive' => true ) ); +echo $fs->get_contents( '/backup/lib/utils.php' ); // 'rm( '/dest.txt' ); +$fs->rmdir( '/backup', array( 'recursive' => true ) ); +``` + +### Streaming Reads and Writes + +Every filesystem can open byte streams for reading and writing. This integrates with the ByteStream component for chunk-based processing of large files. + +```php +// Write via stream +$writer = $fs->open_write_stream( '/output.bin' ); +$writer->append_bytes( 'chunk 1' ); +$writer->append_bytes( 'chunk 2' ); +$writer->close_writing(); + +// Read via stream +$reader = $fs->open_read_stream( '/output.bin' ); +$contents = $reader->consume_all(); +$reader->close_reading(); +``` + +### Copying Between Filesystems + +The `copy_between_filesystems()` function streams data from one filesystem to another, even across different backends. + +```php +use WordPress\Filesystem\LocalFilesystem; +use WordPress\Filesystem\InMemoryFilesystem; + +use function WordPress\Filesystem\copy_between_filesystems; + +$local = LocalFilesystem::create( '/var/www/site' ); +$memory = InMemoryFilesystem::create(); + +// Copy an entire directory tree from disk into memory +copy_between_filesystems( array( + 'source_filesystem' => $local, + 'source_path' => '/wp-content/themes/flavor', + 'target_filesystem' => $memory, + 'target_path' => '/theme', +) ); + +echo $memory->get_contents( '/theme/style.css' ); +``` + +### Traversing a Filesystem + +`FilesystemVisitor` walks a filesystem tree depth-first, emitting enter and exit events for each directory along with its files. + +```php +use WordPress\Filesystem\Visitor\FilesystemVisitor; +use WordPress\Filesystem\Visitor\FileVisitorEvent; + +$visitor = new FilesystemVisitor( $fs ); +while ( $visitor->next() ) { + $event = $visitor->get_event(); + if ( $event->is_entering() ) { + echo "Entering: " . $event->dir . "\n"; + foreach ( $event->files as $file ) { + echo " File: " . $file . "\n"; + } + } +} +``` + +### Path Helpers + +The Filesystem component provides Unix-style path utilities that behave consistently on every OS. + +```php +use function WordPress\Filesystem\wp_join_unix_paths; +use function WordPress\Filesystem\wp_unix_dirname; +use function WordPress\Filesystem\wp_unix_path_resolve_dots; + +// Join path segments, collapsing duplicate slashes +echo wp_join_unix_paths( '/var/www', 'site', 'index.php' ); +// "/var/www/site/index.php" + +// Get the parent directory +echo wp_unix_dirname( '/var/www/site/index.php' ); +// "/var/www/site" + +// Resolve . and .. segments +echo wp_unix_path_resolve_dots( '/var/www/site/../other/./page.php' ); +// "/var/www/other/page.php" +``` + +## API Reference + +### Filesystem Interface + +All implementations provide these methods: + +| Method | Description | +|---|---| +| `ls( $dir )` | List entries in a directory | +| `is_dir( $path )` | Check if path is a directory | +| `is_file( $path )` | Check if path is a file | +| `exists( $path )` | Check if path exists | +| `mkdir( $path, $options )` | Create a directory. Use `['recursive' => true]` for nested paths | +| `rm( $path )` | Remove a file | +| `rmdir( $path, $options )` | Remove a directory. Use `['recursive' => true]` for non-empty dirs | +| `put_contents( $path, $data )` | Write a string to a file | +| `get_contents( $path )` | Read a file into a string | +| `open_read_stream( $path )` | Open a `ByteReadStream` for chunk-based reading | +| `open_write_stream( $path )` | Open a `ByteWriteStream` for chunk-based writing | +| `copy( $from, $to, $options )` | Copy a file or directory | +| `rename( $from, $to )` | Move/rename a file or directory | + +### Implementations + +| Class | Description | +|---|---| +| `LocalFilesystem` | Wraps the real disk via `LocalFilesystem::create( $root )` | +| `InMemoryFilesystem` | Array-backed filesystem via `InMemoryFilesystem::create()` | +| `SQLiteFilesystem` | SQLite-backed filesystem via `SQLiteFilesystem::create( $path )` | +| `UploadedFilesystem` | Read-only filesystem for handling REST API file uploads | + +Other packages extend this interface with additional backends: `GitFilesystem` (from the Git component) and `ZipFilesystem` (from the Zip component). + +### Helper Functions + +| Function | Description | +|---|---| +| `wp_join_unix_paths( ...$segments )` | Join path segments with forward slashes | +| `wp_unix_dirname( $path )` | Get parent directory (Unix semantics on all OSes) | +| `wp_unix_path_resolve_dots( $path )` | Resolve `.` and `..` segments | +| `wp_unix_sys_get_temp_dir()` | Like `sys_get_temp_dir()` but always uses forward slashes | +| `copy_between_filesystems( $args )` | Stream data between two filesystem instances | +| `pipe_stream( $from, $to )` | Pipe a read stream into a write stream | + +## Requirements + +- PHP 7.2+ +- No external dependencies (SQLiteFilesystem requires the `sqlite3` extension, which is a dev-only dependency) diff --git a/components/Git/README.md b/components/Git/README.md new file mode 100644 index 00000000..ed61f56d --- /dev/null +++ b/components/Git/README.md @@ -0,0 +1,229 @@ +# Git + +A pure PHP implementation of a Git client and server. It can create repositories, read and write objects, commit files, manage branches, diff, merge, and communicate with remote servers over HTTP -- all without shelling out to the `git` binary or requiring any native extensions. + +## Installation + +```bash +composer require wp-php-toolkit/git +``` + +## Quick Start + +```php +use WordPress\Filesystem\InMemoryFilesystem; +use WordPress\Git\GitRepository; +use WordPress\Git\Model\Commit; + +// Create a repository backed by an in-memory filesystem. +// You can also use a local filesystem for on-disk storage. +$repo = new GitRepository( InMemoryFilesystem::create() ); + +// Commit files directly -- the repository builds the +// blob, tree, and commit objects for you. +$commit_oid = $repo->commit( array( + 'updates' => array( + 'README.md' => '# My Project', + 'src/hello-world.php' => 'read_object_by_path( '/README.md' )->consume_all(); +// "# My Project" +``` + +## Usage + +### Creating and reading objects + +Every piece of data in Git is an object identified by its SHA-1 hash. You can create blobs, trees, and commits directly: + +```php +use WordPress\Filesystem\InMemoryFilesystem; +use WordPress\Git\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); + +// Store a blob and get its SHA-1 hash. +$blob_oid = $repo->add_object( 'blob', 'Hello, world!' ); +// "5dd01c177f5d7d1be5346a5bc18a569a7410c2ef" + +// Read it back. +$reader = $repo->read_object( $blob_oid ); +$reader->pull( 8096 ); +$data = $reader->peek( 8096 ); +// "Hello, world!" +``` + +### Committing files + +The `commit()` method handles building the tree hierarchy, creating blob objects, and wiring up parent commits automatically: + +```php +use WordPress\Filesystem\InMemoryFilesystem; +use WordPress\Git\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); + +// First commit. +$first_oid = $repo->commit( array( + 'updates' => array( + 'dir1/file1.txt' => 'Initial content of file1', + 'dir2/file2.txt' => 'Initial content of file2', + ), +) ); + +// Second commit -- only the changed files are updated. +$second_oid = $repo->commit( array( + 'updates' => array( + 'dir1/file1.txt' => 'Updated file1', + ), +) ); + +// Delete a file in a commit. +$third_oid = $repo->commit( array( + 'deletes' => array( 'dir2/file2.txt' ), +) ); +``` + +### Branch management + +```php +use WordPress\Filesystem\InMemoryFilesystem; +use WordPress\Git\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); +$initial_oid = $repo->commit( array( + 'updates' => array( 'file.txt' => 'initial' ), +) ); + +// Create a new branch pointing at the current commit. +$repo->create_branch( 'refs/heads/feature', $initial_oid ); + +// Switch to it. +$repo->checkout( 'refs/heads/feature' ); + +// Commit on the new branch. +$repo->commit( array( + 'updates' => array( 'file.txt' => 'changed on feature' ), +) ); + +// Switch back to the default branch. +$repo->checkout( 'refs/heads/trunk' ); + +// Read the current branch tip hash. +$head_hash = $repo->get_branch_tip( 'HEAD' ); +``` + +### Merging + +```php +$repo->checkout( 'refs/heads/trunk' ); +$result = $repo->merge( 'refs/heads/feature' ); + +// $result['new_head'] -- the hash of the merge commit +// $result['conflicts'] -- array of conflicting paths (empty if none) +``` + +### Using GitFilesystem + +`GitFilesystem` wraps a `GitRepository` with the standard `Filesystem` interface, so you can read and write files as if working with a regular filesystem. Each write creates a new commit. + +```php +use WordPress\Filesystem\InMemoryFilesystem; +use WordPress\Git\GitFilesystem; +use WordPress\Git\GitRepository; +use WordPress\Git\Model\Commit; + +$repo = new GitRepository( InMemoryFilesystem::create() ); +$repo->commit( array( + 'updates' => array( + 'README.md' => 'Hello, world!', + 'subdirectory/hello-world.txt' => 'Hello, world!', + ), +) ); + +$fs = GitFilesystem::create( $repo ); + +$fs->ls( '/' ); +// ['README.md', 'subdirectory'] + +$fs->is_file( '/README.md' ); // true +$fs->is_dir( '/subdirectory' ); // true +$fs->get_contents( '/README.md' ); // "Hello, world!" + +// Writing creates a new commit automatically. +$fs->put_contents( '/new-file.txt', 'content' ); + +// Rename a directory. +$fs->rename( '/subdirectory', '/renamed' ); +``` + +### Working with remotes + +```php +use WordPress\Filesystem\InMemoryFilesystem; +use WordPress\Git\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); +$repo->add_remote( 'origin', 'https://github.com/user/repo' ); + +$remote = $repo->get_remote_client( 'origin' ); + +// List remote refs. +$refs = $remote->ls_refs( 'refs/heads/' ); + +// Pull a branch. +$remote->pull( 'refs/heads/trunk' ); + +// Push local changes. +$remote->push( 'trunk' ); +``` + +## API Reference + +### GitRepository + +| Method | Description | +|---|---| +| `__construct( Filesystem $fs )` | Create a repository backed by a filesystem | +| `add_object( $type, $content )` | Store a blob, tree, or commit; returns its SHA-1 hash | +| `read_object( $oid )` | Read an object by hash; returns a stream with `consume_all()` and `as_commit()` / `as_tree()` | +| `has_object( $oid )` | Check whether an object exists locally | +| `find_hash_by_path( $path, $commit )` | Resolve a file path to its object hash | +| `read_object_by_path( $path, $commit )` | Read a file's content by path | +| `commit( $options )` | Create a commit with `'updates'`, `'deletes'`, and `'move_trees'` | +| `create_branch( $name, $oid )` | Create a new branch | +| `checkout( $branch_or_hash )` | Switch HEAD to a branch or commit | +| `get_branch_tip( $name )` | Get the commit hash a branch points to | +| `set_branch_tip( $name, $oid )` | Point a branch at a specific commit | +| `merge( $branch_name, $options )` | Three-way merge; returns `['new_head' => ..., 'conflicts' => [...]]` | +| `diff_commits( $hash1, $hash2 )` | Diff two commits | +| `add_remote( $name, $url )` | Register a remote | +| `get_remote_client( $name )` | Get a `GitRemote` for push/pull operations | + +### GitFilesystem + +| Method | Description | +|---|---| +| `GitFilesystem::create( $repo )` | Wrap a repository with the Filesystem interface | +| `ls( $path )` | List directory entries | +| `is_file( $path )` / `is_dir( $path )` | Check entry type | +| `get_contents( $path )` | Read file contents | +| `put_contents( $path, $data )` | Write a file (creates a commit) | +| `rename( $from, $to )` | Rename a file or directory | +| `rm( $path )` / `rmdir( $path )` | Delete a file or directory | + +### Model classes + +| Class | Key properties | +|---|---| +| `Commit` | `$hash`, `$tree`, `$parents`, `$author`, `$message` | +| `Tree` | `$entries` (map of name to `TreeEntry`) | +| `TreeEntry` | `$mode`, `$name`, `$hash`; constants `FILE_MODE_REGULAR_NON_EXECUTABLE`, `FILE_MODE_DIRECTORY` | + +## Requirements + +- PHP 7.2+ +- No external dependencies (no `git` binary required) diff --git a/components/HTML/README.md b/components/HTML/README.md new file mode 100644 index 00000000..b034be17 --- /dev/null +++ b/components/HTML/README.md @@ -0,0 +1,260 @@ +# HTML + +A full HTML5 parser and tag processor implemented in pure PHP, mirroring WordPress core's HTML API. It provides two levels of access: `WP_HTML_Tag_Processor` for fast, linear scanning and modification of HTML attributes, and `WP_HTML_Processor` for structure-aware parsing that understands nested elements, implicit tag closers, and the HTML5 insertion algorithm. No libxml2, no DOM extension, no external dependencies. + +## Installation + +``` +composer require wp-php-toolkit/html +``` + +## Quick Start + +Find and modify HTML tags: + +```php +$html = '

Hello

'; + +$tags = new WP_HTML_Tag_Processor( $html ); +if ( $tags->next_tag( 'img' ) ) { + $tags->set_attribute( 'loading', 'lazy' ); + $tags->add_class( 'responsive' ); +} + +echo $tags->get_updated_html(); +//

Hello

+``` + +## Usage + +### Tag Processor: Linear Scanning + +`WP_HTML_Tag_Processor` scans through HTML linearly, finding tags by name, class, or other criteria. It does not parse the DOM tree -- it operates on a flat stream of tags, which makes it fast and predictable. + +```php +$html = ''; +$tags = new WP_HTML_Tag_Processor( $html ); + +// Find tags by name. +while ( $tags->next_tag( 'li' ) ) { + $tags->set_attribute( 'role', 'listitem' ); +} +echo $tags->get_updated_html(); +// Every
  • now has role="listitem". +``` + +#### Querying with Arrays + +Pass an array to `next_tag()` to match by tag name, class, or both: + +```php +$tags = new WP_HTML_Tag_Processor( $html ); + +// Find by tag name. +$tags->next_tag( array( 'tag_name' => 'img' ) ); + +// Find by CSS class. +$tags->next_tag( array( 'class_name' => 'hero' ) ); + +// Find by both. +$tags->next_tag( array( 'tag_name' => 'div', 'class_name' => 'sidebar' ) ); +``` + +#### Reading Attributes + +```php +$html = 'Visit'; +$tags = new WP_HTML_Tag_Processor( $html ); + +if ( $tags->next_tag( 'a' ) ) { + $tags->get_tag(); // 'A' + $tags->get_attribute( 'href' ); // 'https://wordpress.org' + $tags->get_attribute( 'title' ); // 'WP' + $tags->get_attribute( 'missing' ); // null (attribute not present) + $tags->has_class( 'button' ); // true + $tags->has_class( 'danger' ); // false +} +``` + +#### Modifying Attributes and Classes + +```php +$tags = new WP_HTML_Tag_Processor( '
    ' ); +$tags->next_tag(); + +$tags->set_attribute( 'id', 'main' ); // Add a new attribute. +$tags->set_attribute( 'data-x', '2' ); // Update an existing attribute. +$tags->remove_attribute( 'data-x' ); // Remove an attribute. +$tags->add_class( 'new' ); // Add a CSS class. +$tags->remove_class( 'old' ); // Remove a CSS class. + +echo $tags->get_updated_html(); +//
    +``` + +#### Custom Filtering + +When the query syntax is not enough, loop through tags and inspect them directly: + +```php +$tags = new WP_HTML_Tag_Processor( $html ); +while ( $tags->next_tag() ) { + if ( + ( 'DIV' === $tags->get_tag() || 'SPAN' === $tags->get_tag() ) && + 'highlight' === $tags->get_attribute( 'data-style' ) + ) { + $tags->add_class( 'theme-highlight' ); + } +} +``` + +#### Bookmarks + +Bookmarks let you save a position and return to it later. This is the one exception to the forward-only scanning rule: + +```php +$tags = new WP_HTML_Tag_Processor( '
    text
    ' ); +$tags->next_tag( 'div' ); +$tags->set_bookmark( 'the-div' ); + +$tags->next_tag( 'span' ); +$tags->set_attribute( 'class', 'inner' ); + +// Jump back to the bookmarked position. +$tags->seek( 'the-div' ); +$tags->set_attribute( 'class', 'outer' ); + +$tags->release_bookmark( 'the-div' ); +echo $tags->get_updated_html(); +//
    text
    +``` + +### HTML Processor: Structure-Aware Parsing + +`WP_HTML_Processor` extends the tag processor with HTML5-compliant structural parsing. It understands nested elements, implied closers, and can query by element nesting (breadcrumbs). + +```php +$html = '
    A lovely day
    '; + +$processor = WP_HTML_Processor::create_fragment( $html ); + +// Find an IMG that is a direct child of FIGURE. +if ( $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'IMG' ) ) ) ) { + $processor->set_attribute( 'loading', 'lazy' ); +} +``` + +#### Breadcrumbs + +Breadcrumbs represent the stack of open elements from the root down to the current tag. They work like a CSS child combinator (`FIGURE > IMG`): + +```php +$html = '

    One

    Two Three

    '; +$processor = WP_HTML_Processor::create_fragment( $html ); + +while ( $processor->next_tag() ) { + $crumbs = $processor->get_breadcrumbs(); + // First match: array( 'HTML', 'BODY', 'DIV' ) + // Second match: array( 'HTML', 'BODY', 'DIV', 'P' ) + // ... and so on for each tag encountered. +} +``` + +#### Token-Level Access + +Both processors support token-level iteration via `next_token()`, which visits every token in the document including text nodes, comments, and tags: + +```php +$processor = WP_HTML_Processor::create_fragment( '

    Hello world

    ' ); + +while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + // '#tag' for HTML tags (openers and closers) + // '#text' for text content + // Other types for comments, doctypes, etc. + + if ( '#text' === $type ) { + echo $processor->get_modifiable_text(); + // "Hello ", then "world" + } +} +``` + +#### Serialization + +The processor can serialize its parsed document back to a well-formed HTML string: + +```php +$messy = '

    one

    two'; // Missing closer -- valid HTML5, parsed as two paragraphs. +$processor = WP_HTML_Processor::create_fragment( $messy ); +echo $processor->serialize(); +//

    one

    two

    +``` + +### HTML Decoder + +`WP_HTML_Decoder` decodes HTML character references in text nodes and attribute values, handling named entities, numeric references, and edge cases from the HTML5 spec: + +```php +$decoded = WP_HTML_Decoder::decode_text_node( 'AT&T — 100%' ); +// 'AT&T — 100%' + +$decoded = WP_HTML_Decoder::decode_attribute( 'path?a=1&b=2' ); +// 'path?a=1&b=2' + +// Check if an encoded attribute value starts with a given string. +$starts = WP_HTML_Decoder::attribute_starts_with( 'http://example.com', 'http:', 'ascii-case-insensitive' ); +// true +``` + +## API Reference + +### WP_HTML_Tag_Processor + +| Method | Description | +|--------|-------------| +| `__construct( $html )` | Create a processor for the given HTML string | +| `next_tag( $query = null )` | Advance to the next matching tag; returns `bool` | +| `next_token()` | Advance to the next token (tag, text, comment); returns `bool` | +| `get_tag()` | Get the uppercase tag name of the current tag | +| `get_token_type()` | Get the token type (`#tag`, `#text`, `#comment`, etc.) | +| `get_attribute( $name )` | Get an attribute value, `null` if missing, `true` for boolean attributes | +| `set_attribute( $name, $value )` | Set or update an attribute | +| `remove_attribute( $name )` | Remove an attribute | +| `add_class( $class_name )` | Add a CSS class | +| `remove_class( $class_name )` | Remove a CSS class | +| `has_class( $wanted_class )` | Check if a CSS class is present | +| `get_updated_html()` | Get the modified HTML string | +| `get_modifiable_text()` | Get the text content of the current text node | +| `set_bookmark( $name )` | Save the current position | +| `seek( $bookmark_name )` | Return to a bookmarked position | +| `release_bookmark( $name )` | Free a bookmark | + +### WP_HTML_Processor + +| Method | Description | +|--------|-------------| +| `create_fragment( $html )` | Create a processor for an HTML fragment (static factory) | +| `next_tag( $query = null )` | Find the next tag, supports `'breadcrumbs'` queries | +| `next_token()` | Advance to the next token with structural awareness | +| `get_breadcrumbs()` | Get the stack of open elements as an array of tag names | +| `serialize()` | Serialize the parsed document to well-formed HTML | + +Inherits all attribute and class methods from `WP_HTML_Tag_Processor`. + +### WP_HTML_Decoder + +| Method | Description | +|--------|-------------| +| `decode_text_node( $text )` | Decode character references in an HTML text node | +| `decode_attribute( $text )` | Decode character references in an attribute value | +| `attribute_starts_with( $haystack, $search, $case )` | Check if an encoded attribute starts with a plain string | + +## Attribution + +This component is extracted from [WordPress core's HTML API](https://developer.wordpress.org/reference/classes/wp_html_processor/). The `WP_HTML_Tag_Processor` and `WP_HTML_Processor` were created by the WordPress core team to provide a safe, spec-compliant way to modify HTML without regular expressions. Licensed under GPL v2. + +## Requirements + +- PHP 7.2+ +- No external dependencies diff --git a/components/HttpClient/README.md b/components/HttpClient/README.md index 394adf5f..c83b488e 100644 --- a/components/HttpClient/README.md +++ b/components/HttpClient/README.md @@ -1,43 +1,231 @@ -# HTTP Client +# HttpClient -An asynchronous HTTP client library. +An asynchronous HTTP client that works on vanilla PHP without requiring `curl` or any other extensions. It can use `curl` when available for better performance, but falls back to pure PHP sockets automatically. Supports concurrent requests, streaming responses, redirects, chunked encoding, gzip decompression, and basic auth. -### Key Features +## Installation -- **No dependencies:** Works on vanilla PHP without external libraries. `SocketClient` uses `stream_socket_client()` for non-blocking HTTP requests and `CurlClient` uses `curl_multi` for parallel requests. -- **Streaming support:** Enables efficient handling of large response bodies. -- **Progress monitoring:** Track the progress of requests and responses. -- **Concurrency limits:** Control the number of simultaneous connections. -- **PHP 7.2+ support and no dependencies:** Works on vanilla PHP without external libraries. +```bash +composer require wp-php-toolkit/http-client +``` + +## Quick Start + +```php +use WordPress\HttpClient\Client; +use WordPress\HttpClient\Request; + +$client = new Client(); + +// Fetch a URL and read the entire response body. +$stream = $client->fetch( 'https://api.example.com/data.json' ); +$body = $stream->consume_all(); + +// Or parse JSON directly. +$stream = $client->fetch( 'https://api.example.com/data.json' ); +$data = $stream->json(); +``` + +## Usage -### Usage Example +### Simple GET request ```php -$requests = [ - new Request("[https://wordpress.org/latest.zip](https://wordpress.org/latest.zip)"), - new Request("[https://raw.githubusercontent.com/wpaccessibility/a11y-theme-unit-test/master/a11y-theme-unit-test-data.xml](https://raw.githubusercontent.com/wpaccessibility/a11y-theme-unit-test/master/a11y-theme-unit-test-data.xml)"), -]; +use WordPress\HttpClient\Client; +use WordPress\HttpClient\Request; -// Creates the most appropriate client based for your environment. -$client = Client::create(); -$client->enqueue($requests); +$client = new Client(); +$request = new Request( 'https://wordpress.org/' ); +$stream = $client->fetch( $request ); -while ($client->await_next_event()) { - $event = $client->get_event(); +// Wait for the response headers to arrive. +$response = $stream->await_response(); +echo $response->status_code; // 200 + +// Read the full body. +$html = $stream->consume_all(); +``` + +### POST request with a body + +```php +use WordPress\HttpClient\Client; +use WordPress\HttpClient\Request; +use WordPress\ByteStream\MemoryPipe; + +$client = new Client(); +$request = new Request( 'https://httpbin.org/post', array( + 'method' => 'POST', + 'headers' => array( 'content-type' => 'application/json' ), + 'body_stream' => new MemoryPipe( '{"key": "value"}' ), +) ); + +$stream = $client->fetch( $request ); +$response = $stream->await_response(); +$body = $stream->consume_all(); +``` + +### Concurrent downloads + +Multiple requests run concurrently, whether using the curl or socket transport: + +```php +use WordPress\HttpClient\Client; +use WordPress\HttpClient\Request; + +$requests = array( + new Request( 'https://wordpress.org/latest.zip' ), + new Request( 'https://example.com/large-file.xml' ), +); + +$client = new Client(); +$client->enqueue( $requests ); + +while ( $client->await_next_event() ) { $request = $client->get_request(); - if ($event === Client::EVENT_BODY_CHUNK_AVAILABLE) { - $chunk = $client->get_response_body_chunk(); - // Process the chunk... + switch ( $client->get_event() ) { + case Client::EVENT_GOT_HEADERS: + // Response headers are available. + echo $request->response->status_code . "\n"; + break; + + case Client::EVENT_BODY_CHUNK_AVAILABLE: + // Stream body chunks to disk as they arrive. + $chunk = $client->get_response_body_chunk(); + file_put_contents( + '/tmp/download-' . $request->id, + $chunk, + FILE_APPEND + ); + break; + + case Client::EVENT_FINISHED: + echo "Done: " . $request->url . "\n"; + break; + + case Client::EVENT_FAILED: + echo "Failed: " . $request->error->message . "\n"; + break; } - // Handle other events... } ``` -### TODO +### Choosing a transport + +The client automatically picks `curl` if the extension is loaded, otherwise it uses pure PHP sockets. You can force a specific transport: + +```php +// Force pure PHP sockets (no curl dependency). +$client = new Client( array( 'transport' => 'sockets' ) ); + +// Force curl. +$client = new Client( array( 'transport' => 'curl' ) ); +``` + +### Response caching + +Enable disk-based caching by providing a cache directory: + +```php +$client = new Client( array( + 'cache_dir' => '/tmp/http-cache', +) ); +``` + +### Redirect handling + +Redirects are followed automatically (up to 5 by default). You can traverse the redirect chain through the request object: + +```php +$stream = $client->fetch( new Request( 'https://example.com/old-page' ) ); +$response = $stream->await_response(); + +// The request object tracks the full redirect chain. +$original = $stream->get_request(); +if ( $original->is_redirected() ) { + $final_request = $original->latest_redirect(); + echo $final_request->url; // the final URL after redirects +} +``` + +### Custom headers + +```php +$request = new Request( 'https://api.example.com/resource', array( + 'method' => 'GET', + 'headers' => array( + 'authorization' => 'Bearer my-token', + 'accept' => 'application/json', + ), +) ); +``` + +### Basic auth via URL + +Credentials embedded in the URL are automatically extracted and sent as a Basic Authorization header: + +```php +$request = new Request( 'https://user:pass@api.example.com/resource' ); +// Sends "Authorization: Basic dXNlcjpwYXNz" header automatically. +``` + +## API Reference + +### Client + +| Method | Description | +|---|---| +| `__construct( $options )` | Create a client. Options: `transport` (`'curl'`, `'sockets'`, `'auto'`), `cache_dir` | +| `fetch( $request )` | Start a request; returns a `RequestReadStream` | +| `fetch_many( $requests )` | Start multiple requests; returns an array of `RequestReadStream` | +| `enqueue( $requests )` | Queue requests for async processing | +| `await_next_event( $query )` | Block until the next event; returns `false` when all done | +| `get_event()` | The event type from the last `await_next_event()` call | +| `get_request()` | The `Request` associated with the last event | +| `get_response_body_chunk()` | The body chunk from an `EVENT_BODY_CHUNK_AVAILABLE` event | + +### Request + +| Method / Property | Description | +|---|---| +| `__construct( $url, $request_info )` | Create a request. Info keys: `method`, `headers`, `body_stream`, `http_version` | +| `$url` | The request URL | +| `$method` | HTTP method (default: `'GET'`) | +| `$headers` | Associative array of headers | +| `$response` | The `Response` object (available after headers arrive) | +| `$error` | An `HttpError` if the request failed | +| `latest_redirect()` | Follow the redirect chain to the final request | +| `is_redirected()` | Whether this request was redirected | + +### RequestReadStream + +| Method | Description | +|---|---| +| `consume_all()` | Read the entire response body as a string | +| `json()` | Parse the response body as JSON | +| `await_response()` | Block until response headers arrive; returns a `Response` | +| `get_request()` | The underlying `Request` object | +| `length()` | Content length if known, `null` otherwise | + +### Response + +| Method / Property | Description | +|---|---| +| `$status_code` | HTTP status code | +| `$headers` | Associative array of response headers (lowercase keys) | +| `get_header( $name )` | Get a single header value | +| `ok()` | `true` if status is 200-399 | + +### Events + +| Constant | When it fires | +|---|---| +| `Client::EVENT_GOT_HEADERS` | Response headers have been received | +| `Client::EVENT_BODY_CHUNK_AVAILABLE` | A chunk of the response body is ready | +| `Client::EVENT_FINISHED` | The request completed successfully | +| `Client::EVENT_FAILED` | The request failed (check `$request->error`) | + +## Requirements -* Request headers – accept string lines such as "Content-type: text/plain" instead of key-value pairs. K/V pairs - are confusing and lead to accidental errors such as `0: Content-type: text/plain`. They also diverge from the - format that curl accepts. -* Response caching – add a custom cache handler for easy caching of the same URLs -* Response caching – support HTTP cache-control headers +- PHP 7.2+ +- No external dependencies (`curl` used when available but not required) diff --git a/components/HttpServer/README.md b/components/HttpServer/README.md new file mode 100644 index 00000000..51ab7623 --- /dev/null +++ b/components/HttpServer/README.md @@ -0,0 +1,201 @@ +# HttpServer + +A minimal, blocking TCP-based HTTP server written in pure PHP. It is designed for CLI tools, local development servers, and test harnesses where you need a lightweight HTTP endpoint without pulling in a full web server. + +## Installation + +```bash +composer require wp-php-toolkit/http-server +``` + +## Quick Start + +```php +use WordPress\HttpServer\TcpServer; +use WordPress\HttpServer\IncomingRequest; +use WordPress\HttpServer\Response\ResponseWriteStream; + +$server = new TcpServer( '127.0.0.1', 8080 ); + +$server->set_handler( function ( IncomingRequest $request, ResponseWriteStream $response ) { + $response->send_http_code( 200 ); + $response->send_header( 'Content-Type', 'text/plain' ); + $response->append_bytes( 'Hello, world!' ); +} ); + +echo "Listening on http://127.0.0.1:8080\n"; +$server->serve(); +``` + +## Usage + +### Routing by path + +The handler receives an `IncomingRequest` which extends the HttpClient `Request` class. You can inspect the method, URL, headers, and body to decide how to respond: + +```php +use WordPress\HttpServer\TcpServer; +use WordPress\HttpServer\IncomingRequest; +use WordPress\HttpServer\Response\ResponseWriteStream; + +$server = new TcpServer( '127.0.0.1', 8080 ); + +$server->set_handler( function ( IncomingRequest $request, ResponseWriteStream $response ) { + $parsed = $request->get_parsed_url(); + $path = $parsed->pathname; + + if ( '/api/status' === $path && 'GET' === $request->method ) { + $response->send_http_code( 200 ); + $response->send_header( 'Content-Type', 'application/json' ); + $response->append_bytes( '{"status": "ok"}' ); + return; + } + + if ( '/api/echo' === $path && 'POST' === $request->method ) { + // Read the incoming request body. + $body = ''; + while ( ! $request->body_stream->reached_end_of_data() ) { + $n = $request->body_stream->pull( 4096 ); + if ( $n > 0 ) { + $body .= $request->body_stream->consume( $n ); + } + } + + $response->send_http_code( 200 ); + $response->send_header( 'Content-Type', 'text/plain' ); + $response->append_bytes( $body ); + return; + } + + $response->send_http_code( 404 ); + $response->send_header( 'Content-Type', 'text/plain' ); + $response->append_bytes( 'Not Found' ); +} ); + +$server->serve(); +``` + +### Chunked transfer encoding + +For large or streaming responses, enable chunked encoding on the response writer. This sends data in chunks without needing to know the total content length upfront: + +```php +use WordPress\HttpServer\TcpServer; +use WordPress\HttpServer\IncomingRequest; +use WordPress\HttpServer\Response\TcpResponseWriteStream; + +$server = new TcpServer( '127.0.0.1', 8080 ); + +$server->set_handler( function ( IncomingRequest $request, TcpResponseWriteStream $response ) { + $response->send_http_code( 200 ); + $response->send_header( 'Content-Type', 'text/plain' ); + $response->use_chunked_encoding(); + + for ( $i = 0; $i < 10; $i++ ) { + $response->append_bytes( "Chunk $i\n" ); + } +} ); + +$server->serve(); +``` + +### Buffering the response + +`BufferingResponseWriter` collects the entire response in memory before sending it. This is useful when you need to compute `Content-Length` automatically or when using `php-cgi`: + +```php +use WordPress\HttpServer\Response\BufferingResponseWriter; + +$writer = new BufferingResponseWriter(); +$writer->send_http_code( 200 ); +$writer->send_header( 'Content-Type', 'text/html' ); +$writer->append_bytes( '

    Hello

    ' ); + +// Sends all headers (including Content-Length) and the body at once. +$writer->close_writing(); +``` + +### Streaming via php://output + +`StreamingResponseWriter` writes directly to PHP's output stream using `http_response_code()` and `header()`. Use it when running behind Apache/nginx as a CGI script: + +```php +use WordPress\HttpServer\Response\StreamingResponseWriter; + +$writer = new StreamingResponseWriter(); +$writer->send_http_code( 200 ); +$writer->send_header( 'Content-Type', 'text/plain' ); +$writer->append_bytes( 'streamed directly to the client' ); +$writer->close_writing(); +``` + +### Startup callback + +Pass a callback to `serve()` to be notified when the server is ready to accept connections. This is handy for tests or scripts that need to know the exact host and port: + +```php +$server->serve( function ( $host, $port ) { + echo "Server ready at http://{$host}:{$port}\n"; +} ); +``` + +## API Reference + +### TcpServer + +| Method | Description | +|---|---| +| `__construct( $host, $port )` | Create a server bound to the given host and port | +| `set_handler( callable $handler )` | Set the request handler. Receives `(IncomingRequest, ResponseWriteStream, $socket)` | +| `serve( callable $on_accept )` | Start the blocking server loop. Optional callback fires when listening begins | + +### IncomingRequest + +Extends `WordPress\HttpClient\Request`. + +| Method / Property | Description | +|---|---| +| `IncomingRequest::from_resource( $stream )` | Parse an HTTP request from a socket resource | +| `$method` | HTTP method (`GET`, `POST`, etc.) | +| `$url` | Full request URL | +| `$headers` | Associative array of request headers (lowercase keys) | +| `$body_stream` | A `ByteReadStream` for reading the request body | +| `get_parsed_url()` | Returns a parsed URL object with `->pathname` | +| `get_header( $name )` | Get a single header value | + +### ResponseWriteStream (interface) + +| Method | Description | +|---|---| +| `send_http_code( $code )` | Set the HTTP status code (must be called before writing body) | +| `send_header( $name, $value )` | Add a response header (must be called before writing body) | +| `append_bytes( $bytes )` | Write bytes to the response body | +| `close_writing()` | Finalize and close the response | + +### TcpResponseWriteStream + +Implements `ResponseWriteStream`. Writes directly to a TCP socket. + +| Method | Description | +|---|---| +| `use_chunked_encoding()` | Enable HTTP chunked transfer encoding | +| `is_writing_closed()` | Check if the response has been finalized | + +### BufferingResponseWriter + +Implements `ResponseWriteStream`. Buffers the entire response in memory and sends it on `close_writing()` with an automatic `Content-Length` header. + +### StreamingResponseWriter + +Implements `ResponseWriteStream`. Writes headers via `header()` and body via `echo`, suitable for CGI environments. + +### StatusCode + +| Method | Description | +|---|---| +| `StatusCode::text( $code )` | Return the standard reason phrase for an HTTP status code (e.g., `200` -> `'OK'`) | + +## Requirements + +- PHP 7.2+ +- No external dependencies diff --git a/components/Markdown/README.md b/components/Markdown/README.md new file mode 100644 index 00000000..d4fe3a82 --- /dev/null +++ b/components/Markdown/README.md @@ -0,0 +1,154 @@ +# Markdown + +Bidirectional converter between Markdown and WordPress block markup. Use `MarkdownConsumer` to parse Markdown (with optional YAML frontmatter) into WordPress blocks, and `MarkdownProducer` to serialize blocks back to Markdown. Designed for content synchronization workflows where round-trip fidelity and whitespace preservation matter, such as three-way merging of static Markdown files with a WordPress database. + +## Installation + +``` +composer require wp-php-toolkit/markdown +``` + +## Quick Start + +Convert a Markdown string into WordPress block markup: + +```php +use WordPress\Markdown\MarkdownConsumer; + +$markdown = "# Hello World\n\nThis is a paragraph with **bold** text."; + +$consumer = new MarkdownConsumer( $markdown ); +$result = $consumer->consume(); + +$block_markup = $result->get_block_markup(); +// +//

    Hello World

    +// +// +// +//

    This is a paragraph with bold text.

    +// +``` + +## Usage + +### Markdown to Blocks + +Pass any Markdown string to `MarkdownConsumer` and call `consume()`. The returned `BlocksWithMetadata` object gives you both the block markup and any frontmatter metadata: + +```php +use WordPress\Markdown\MarkdownConsumer; + +$markdown = <<consume(); + +// Get YAML frontmatter as metadata. +// Each value is wrapped in an array to match the WP_Block_Markup_Converter interface. +$metadata = $result->get_all_metadata(); +// array( +// 'post_title' => array( 'WordPress 6.8 was released' ), +// 'post_date' => array( '2024-12-16' ), +// 'post_author' => array( '1' ), +// ) + +$blocks = $result->get_block_markup(); +``` + +### Supported Markdown Elements + +The consumer handles paragraphs, headings (all levels), bold, italic, inline code, links, images, ordered and unordered lists (including nested lists), blockquotes, fenced and indented code blocks, tables, horizontal rules, and raw HTML blocks. + +```php +use WordPress\Markdown\MarkdownConsumer; + +// Lists convert to wp:list and wp:list-item blocks. +$consumer = new MarkdownConsumer( "- Item 1\n - Item 1.1\n - Item 1.2\n- Item 2" ); +$result = $consumer->consume(); +$blocks = $result->get_block_markup(); +// +//
      +//
    • Item 1 +// +//
        +//
      • Item 1.1
      • +//
      • Item 1.2
      • +//
      +// +//
    • +//
    • Item 2
    • +//
    +// + +// Tables convert to wp:table blocks with thead/tbody structure. +$table_md = "| Name | Role |\n|------|------|\n| Ada | Dev |"; +$consumer = new MarkdownConsumer( $table_md ); +$result = $consumer->consume(); +``` + +### Blocks to Markdown + +Convert WordPress block markup back to Markdown using `MarkdownProducer`. Pass a `BlocksWithMetadata` instance containing the block markup and any metadata to include as YAML frontmatter: + +```php +use WordPress\DataLiberation\DataFormatConsumer\BlocksWithMetadata; +use WordPress\Markdown\MarkdownProducer; + +$blocks = '

    A paragraph with a link.

    '; + +$metadata = array( + 'post_title' => 'My Post', +); + +$producer = new MarkdownProducer( new BlocksWithMetadata( $blocks, $metadata ) ); +$markdown = $producer->produce(); +// --- +// post_title: "My Post" +// --- +// +// A paragraph with a [link](https://wordpress.org). +``` + +The producer converts headings to `#` syntax, lists to `-` or `1.` syntax, images to `![alt](url)` syntax, bold/italic to `**`/`*`, inline code to backticks, code blocks to fenced blocks, tables to pipe tables, and blockquotes to `>` prefixed lines. Blocks that cannot be represented in Markdown are serialized as fenced code blocks with the `block` language tag, preserving them for round-trip conversion. + +## API Reference + +### MarkdownConsumer + +| Method | Description | +|--------|-------------| +| `__construct( $markdown )` | Create a consumer from a Markdown string | +| `consume()` | Parse and return a `BlocksWithMetadata` instance | +| `get_all_metadata()` | Get frontmatter as `array( 'key' => array( value ) )` | +| `get_meta_value( $key )` | Get a single metadata value by key | +| `get_block_markup()` | Get the resulting block markup string | + +### MarkdownProducer + +| Method | Description | +|--------|-------------| +| `__construct( BlocksWithMetadata $blocks_with_meta )` | Create a producer from blocks and metadata | +| `produce()` | Convert to Markdown string with optional YAML frontmatter | + +### BlocksWithMetadata + +| Method | Description | +|--------|-------------| +| `get_block_markup()` | Get the block markup string | +| `get_all_metadata()` | Get all metadata as an associative array | + +## Requirements + +- PHP 7.2+ +- No external dependencies beyond other `wp-php-toolkit` components diff --git a/components/Merge/README.md b/components/Merge/README.md new file mode 100644 index 00000000..6bee0599 --- /dev/null +++ b/components/Merge/README.md @@ -0,0 +1,253 @@ +# Merge + +A three-way merge and diff library for PHP. Given a common base version and two diverging branches, it computes diffs and merges the changes together, detecting conflicts along the way. The architecture is pluggable: swap out the differ (line-based or character-based), the merger (line-level or chunk-level), and add optional validation of the merged result. + +## Installation + +``` +composer require wp-php-toolkit/merge +``` + +## Quick Start + +```php +use WordPress\Merge\Diff\LineDiffer; +use WordPress\Merge\Merge\LineMerger; +use WordPress\Merge\MergeStrategy; + +$strategy = new MergeStrategy( + new LineDiffer(), + new LineMerger() +); + +$base = "Line 1\nLine 2\nLine 3\n"; +$branch_a = "Line 1\nLine 2 modified\nLine 3\n"; +$branch_b = "Line 1\nLine 2\nLine 3\nLine 4\n"; + +$result = $strategy->merge( $base, $branch_a, $branch_b ); +echo $result->get_merged_content(); +// Line 1 +// Line 2 modified +// Line 3 +// Line 4 +``` + +## Usage + +### Computing Diffs + +The `Diff` class represents a sequence of operations: equal, insert, and delete. You can create diffs manually or through a `Differ` implementation. + +```php +use WordPress\Merge\Diff\Diff; +use WordPress\Merge\Diff\LineDiffer; + +$differ = new LineDiffer(); +$diff = $differ->diff( + "The quick brown fox\njumps over the lazy dog.\n", + "The quick brown fox\njumps over the lazy cat.\nA new line.\n" +); + +// Inspect the changes +foreach ( $diff->get_changes() as $change ) { + $op = $change[0]; // Diff::DIFF_EQUAL, DIFF_DELETE, or DIFF_INSERT + $text = $change[1]; +} + +// Reconstruct the original and modified documents +echo $diff->get_old_document(); +// The quick brown fox +// jumps over the lazy dog. + +echo $diff->get_new_document(); +// The quick brown fox +// jumps over the lazy cat. +// A new line. +``` + +### Delta Format + +The delta format is a compact representation of a diff. Equal spans are encoded as byte counts, deletions as negative byte counts, and insertions as literal text. + +```php +use WordPress\Merge\Diff\Diff; + +$diff = new Diff( array( + array( Diff::DIFF_EQUAL, "Line 1: The quick brown fox\n" ), + array( Diff::DIFF_DELETE, "Line 2: jumps over the lazy dog.\n" ), + array( Diff::DIFF_INSERT, 'A new line' ), +) ); + +echo $diff->format_as_delta(); +// =28\r-33\r+A new line +// +// =28 means "keep 28 bytes unchanged" +// -33 means "delete 33 bytes" +// +A new line means "insert this text" +``` + +### Git Patch Format + +Generate standard unified diffs that look like `git diff` output. + +```php +use WordPress\Merge\Diff\Diff; + +$diff = new Diff( array( + array( Diff::DIFF_EQUAL, "Line 1: The quick brown fox\n" ), + array( Diff::DIFF_DELETE, "Line 2: jumps over the lazy dog.\n" ), + array( Diff::DIFF_INSERT, "Line 2: jumps over the lazy cat.\n" ), + array( Diff::DIFF_EQUAL, "Line 3: consectetur adipiscing elit.\n" ), +) ); + +echo $diff->format_as_git_patch(); +// diff --git a/string b/string +// --- a/string +// +++ b/string +// @@ -1,3 +1,3 @@ Line 1: The quick brown fox +// - Line 2: jumps over the lazy dog. +// + Line 2: jumps over the lazy cat. +// Line 3: consectetur adipiscing elit. +``` + +### Three-Way Merge + +`MergeStrategy` orchestrates the full merge workflow. It diffs each branch against the common base and then merges the two diffs together. + +```php +use WordPress\Merge\Diff\MyersDiffer; +use WordPress\Merge\Merge\ChunkMerger; +use WordPress\Merge\MergeStrategy; + +$strategy = new MergeStrategy( + new MyersDiffer(), + new ChunkMerger() +); + +$base = '{"level":1}'; +$branch_a = '{"newattr": "before", "level":1}'; +$branch_b = '{"level":2}'; + +$result = $strategy->merge( $base, $branch_a, $branch_b ); +echo $result->get_merged_content(); +// {"newattr": "before", "level":2} +``` + +### Handling Merge Conflicts + +When both branches modify the same region, the merger produces a `MergeConflict`. You can inspect conflicts programmatically or render them as git-style conflict markers. + +```php +use WordPress\Merge\Diff\LineDiffer; +use WordPress\Merge\Merge\LineMerger; +use WordPress\Merge\MergeStrategy; + +$strategy = new MergeStrategy( + new LineDiffer(), + new LineMerger() +); + +$result = $strategy->merge( + "Line 1\nLine 2\n", + "Line 1\nLine 2 from branch A\n", + "Line 1\nLine 2 from branch B\n" +); + +if ( $result->has_conflicts() ) { + foreach ( $result->get_conflicts() as $conflict ) { + echo 'Ours: ' . $conflict->ours . "\n"; + echo 'Theirs: ' . $conflict->theirs . "\n"; + } +} + +// The merged content includes git-style conflict markers +echo $result->get_merged_content(); +``` + +### Merge Validation + +Add a `MergeValidator` to reject merges that produce structurally invalid output, even when there are no textual conflicts. The built-in `BlockMarkupMergeValidator` validates WordPress block markup. + +```php +use WordPress\Merge\Diff\MyersDiffer; +use WordPress\Merge\Merge\ChunkMerger; +use WordPress\Merge\MergeStrategy; +use WordPress\Merge\Validate\BlockMarkupMergeValidator; + +$strategy = new MergeStrategy( + new MyersDiffer(), + new ChunkMerger(), + new BlockMarkupMergeValidator() +); + +$result = $strategy->merge( $base, $branch_a, $branch_b ); + +if ( $result->has_conflicts() ) { + // The merge produced valid text but invalid block markup, + // so it was converted into a conflict. + $message = $result->get_conflicts()[0]->get_message(); +} +``` + +## API Reference + +### MergeStrategy + +| Method | Description | +|--------|-------------| +| `__construct( Differ, Merger, ?MergeValidator )` | Create a strategy with pluggable components | +| `merge( $base, $branch_a, $branch_b )` | Perform a three-way merge, returns `MergeResult` | + +### Diff + +| Method | Description | +|--------|-------------| +| `__construct( array $changes )` | Create from an array of `[op, text]` pairs | +| `get_changes()` | Get the raw array of diff operations | +| `get_old_document()` | Reconstruct the original document from the diff | +| `get_new_document()` | Reconstruct the modified document from the diff | +| `format_as_delta()` | Compact delta format (`=28`, `-33`, `+text`) | +| `format_as_git_patch( $options )` | Unified diff format like `git diff` | + +### Diff Constants + +| Constant | Value | Meaning | +|----------|-------|---------| +| `Diff::DIFF_EQUAL` | `0` | Text is the same in both versions | +| `Diff::DIFF_DELETE` | `-1` | Text was removed | +| `Diff::DIFF_INSERT` | `1` | Text was added | + +### MergeResult + +| Method | Description | +|--------|-------------| +| `get_merged_content()` | Get the merged text, with conflict markers if applicable | +| `has_conflicts()` | Whether the merge has unresolved conflicts | +| `get_conflicts()` | Get an array of `MergeConflict` objects | + +### MergeConflict + +| Property/Method | Description | +|-----------------|-------------| +| `$ours` | Text from branch A | +| `$theirs` | Text from branch B | +| `get_message()` | Human-readable conflict description | + +### Differ Implementations + +| Class | Description | +|-------|-------------| +| `LineDiffer` | Line-by-line diff using longest common subsequence | +| `MyersDiffer` | Character-level diff using the Myers algorithm (via diff-match-patch) | + +### Merger Implementations + +| Class | Description | +|-------|-------------| +| `LineMerger` | Merges line-by-line diffs | +| `ChunkMerger` | Merges character-level chunk diffs | + +## Requirements + +- PHP 7.4+ +- `ext-mbstring` diff --git a/components/Polyfill/README.md b/components/Polyfill/README.md new file mode 100644 index 00000000..276e3cd6 --- /dev/null +++ b/components/Polyfill/README.md @@ -0,0 +1,205 @@ +# Polyfill + +Provides polyfills for PHP functions and WordPress core APIs so that WordPress-adjacent code can run in standalone PHP applications without a full WordPress installation. It backports PHP 8.0 string functions to PHP 7.2, stubs common WordPress escaping and translation functions, and implements a minimal but functional WordPress hook system (`add_filter`/`apply_filters`/`add_action`/`do_action`). + +## Installation + +```bash +composer require wp-php-toolkit/polyfill +``` + +All polyfills are loaded automatically via Composer's `autoload.files` mechanism. No manual `require` or initialization is needed. + +## Quick Start + +```php +// After `composer require`, all polyfills are available globally. + +// PHP 8.0 string functions work on PHP 7.2+: +str_starts_with( 'hello world', 'hello' ); // true +str_contains( 'hello world', 'world' ); // true +str_ends_with( 'hello world', 'world' ); // true + +// WordPress functions work without WordPress: +$safe = esc_html( '' ); +$text = __( 'Translatable string' ); // returns the string as-is + +// WordPress hook system works standalone: +add_filter( 'the_title', 'strtoupper' ); +$title = apply_filters( 'the_title', 'hello world' ); // 'HELLO WORLD' +``` + +## Usage + +### PHP Function Polyfills + +These functions are defined only when they do not already exist, so they are safe to use alongside PHP 8.0+ or other polyfill libraries. + +```php +// str_starts_with (PHP 8.0+) +str_starts_with( '/var/www/html', '/var' ); // true +str_starts_with( '/var/www/html', '/tmp' ); // false + +// str_ends_with (PHP 8.0+) +str_ends_with( 'image.png', '.png' ); // true +str_ends_with( 'image.png', '.jpg' ); // false + +// str_contains (PHP 8.0+) +str_contains( 'WordPress Toolkit', 'Toolkit' ); // true +str_contains( 'WordPress Toolkit', 'Drupal' ); // false + +// array_key_first (PHP 7.3+) +$data = array( 'alpha' => 1, 'beta' => 2 ); +array_key_first( $data ); // 'alpha' +``` + +### WordPress Function Stubs + +These stubs provide pass-through implementations of common WordPress functions. They allow code that calls WordPress APIs to run without modification in non-WordPress environments. + +```php +// Translation: returns the input string unchanged. +echo __( 'Hello' ); // 'Hello' + +// Escaping: applies htmlspecialchars(). +echo esc_html( 'Bold' ); // '<b>Bold</b>' +echo esc_attr( 'a "quoted" value' ); // 'a "quoted" value' +echo esc_url( 'https://example.com/?a=1&b=2' ); + +// Error reporting stubs: +_doing_it_wrong( 'my_function', 'Use new_function() instead.', '2.0.0' ); +// Stores messages in $GLOBALS['_doing_it_wrong_messages'] + +wp_trigger_error( 'my_function', 'Something went wrong', E_USER_NOTICE ); +// Triggers a PHP notice. E_USER_ERROR throws a WP_Exception instead. +``` + +### WordPress Hook System + +A minimal but fully functional implementation of the WordPress filter and action system. Hooks support priorities and multiple callbacks. + +```php +// Filters transform a value through one or more callbacks. +add_filter( 'sanitize_title', 'strtolower' ); +add_filter( 'sanitize_title', 'trim' ); + +$title = apply_filters( 'sanitize_title', ' My Post Title ' ); +// $title === 'my post title' + +// Priorities control execution order (default is 10, lower runs first). +add_filter( 'the_content', 'first_callback', 5 ); +add_filter( 'the_content', 'second_callback', 20 ); + +// Actions are hooks that do not return a value. +add_action( 'init', function () { + // Perform initialization... +} ); +do_action( 'init' ); + +// Actions can pass arguments to callbacks. +add_action( 'save_post', function ( $post_id ) { + // React to a post being saved... +}, 10, 1 ); +do_action( 'save_post', 42 ); +``` + +### WordPress Classes + +#### WP_Error + +A minimal stub of the WordPress `WP_Error` class: + +```php +$error = new WP_Error( 'not_found', 'The item was not found.', array( 'status' => 404 ) ); +echo $error->code; // 'not_found' +echo $error->message; // 'The item was not found.' +``` + +#### WP_Exception + +Extends PHP's base `Exception` class. Used by `wp_trigger_error()` when called with `E_USER_ERROR`: + +```php +try { + wp_trigger_error( 'my_function', 'Fatal problem', E_USER_ERROR ); +} catch ( WP_Exception $e ) { + echo $e->getMessage(); // 'my_function(): Fatal problem' +} +``` + +### Block Parser and Serializer + +When the `BlockParser` component is available, the polyfill provides `parse_blocks()` and `serialize_blocks()`: + +```php +$html = '

    Hello

    '; +$blocks = parse_blocks( $html ); +$output = serialize_blocks( $blocks ); +// $output === $html +``` + +### mbstring Polyfills + +Safe encoding helpers for working with binary data when `mbstring.func_overload` is enabled: + +```php +// Switch mbstring to binary-safe encoding. +mbstring_binary_safe_encoding(); +$length = strlen( $binary_data ); // byte length, not character length +reset_mbstring_encoding(); + +// mb_str_split (PHP 7.4+) +$chars = mb_str_split( 'Hello', 1 ); // array( 'H', 'e', 'l', 'l', 'o' ) +``` + +## API Reference + +### PHP Function Polyfills + +| Function | Polyfills | Description | +|----------|-----------|-------------| +| `str_starts_with( $haystack, $needle )` | PHP 8.0 | Check if string starts with substring | +| `str_ends_with( $haystack, $needle )` | PHP 8.0 | Check if string ends with substring | +| `str_contains( $haystack, $needle )` | PHP 8.0 | Check if string contains substring | +| `array_key_first( $array )` | PHP 7.3 | Get the first key of an array | + +### mbstring Polyfills + +| Function | Description | +|----------|-------------| +| `mbstring_binary_safe_encoding( $reset = false )` | Switch to binary-safe encoding | +| `reset_mbstring_encoding()` | Restore previous mbstring encoding | +| `mb_str_split( $string, $split_length, $encoding )` | Split a multibyte string into an array | + +### WordPress Function Stubs + +| Function | Description | +|----------|-------------| +| `__( $input )` | Translation stub (returns input unchanged) | +| `esc_attr( $input )` | Attribute escaping via `htmlspecialchars()` | +| `esc_html( $input )` | HTML escaping via `htmlspecialchars()` | +| `esc_url( $url )` | URL escaping via `htmlspecialchars()` | +| `add_filter( $hook, $callback, $priority, $accepted_args )` | Register a filter callback | +| `apply_filters( $hook, $value, ...$args )` | Apply all registered filter callbacks | +| `add_action( $hook, $callback, $priority, $accepted_args )` | Register an action callback | +| `do_action( $hook, ...$args )` | Execute all registered action callbacks | +| `parse_blocks( $input )` | Parse block markup into an array of blocks | +| `serialize_blocks( $blocks )` | Serialize an array of blocks back to markup | +| `_doing_it_wrong( $method, $message, $version )` | Log a developer notice | +| `wp_trigger_error( $function_name, $message, $error_level )` | Trigger a PHP error or throw `WP_Exception` | + +### WordPress Classes + +| Class | Description | +|-------|-------------| +| `WP_Error` | Minimal error container with `$code`, `$message`, and `$data` properties | +| `WP_Exception` | Exception subclass used by `wp_trigger_error()` | + +## Attribution + +The WordPress function stubs and `WP_Error` class are modeled after their counterparts in [WordPress core](https://github.com/WordPress/wordpress-develop). The hook system (`add_filter`/`apply_filters`/`add_action`/`do_action`) implements the same interface as WordPress core's plugin API. Licensed under GPL v2. + +## Requirements + +- PHP 7.2+ +- No external dependencies diff --git a/components/ToolkitCodingStandards/README.md b/components/ToolkitCodingStandards/README.md new file mode 100644 index 00000000..b9d1ed88 --- /dev/null +++ b/components/ToolkitCodingStandards/README.md @@ -0,0 +1,120 @@ +# ToolkitCodingStandards + +Custom PHP_CodeSniffer sniffs used internally by the PHP Toolkit project. This component provides two sniffs that enforce WordPress-style coding conventions: one requires Yoda-style comparisons (literal on the left side of `===`), and the other forbids the short ternary (Elvis) operator `?:`. Both sniffs support automatic fixing via `phpcbf`. + +This is internal tooling for the toolkit's own linter pipeline, not a general-purpose coding standard. + +## Installation + +```bash +composer require wp-php-toolkit/toolkit-coding-standards +``` + +In practice this component is used through the toolkit's root `composer.json` configuration. It is referenced alongside the main phpcs ruleset in `.phpcs.xml.dist`. + +## Usage + +### Adding to a PHPCS Configuration + +Reference the coding standard in your `phpcs.xml` or `.phpcs.xml.dist` file: + +```xml + + + + + +``` + +Or enable individual sniffs selectively: + +```xml + + +``` + +### Running the Linter + +From the toolkit root: + +```bash +# Check for violations +composer lint + +# Auto-fix violations +composer lint-fix +``` + +Or directly with phpcs/phpcbf: + +```bash +vendor/bin/phpcs -d memory_limit=1G --standard=WordPressToolkitCodingStandards . +vendor/bin/phpcbf -d memory_limit=1G --standard=WordPressToolkitCodingStandards . +``` + +### Sniff: EnforceYodaComparison + +Requires Yoda-style comparisons where the literal or constant value is placed on the left side of a comparison operator. This prevents accidental assignment (`=` instead of `===`) and follows WordPress coding standards. + +```php +// Wrong -- variable on the left: +if ( $value === true ) { /* ... */ } +if ( $name === 'admin' ) { /* ... */ } +if ( $count === 0 ) { /* ... */ } + +// Correct -- literal on the left (Yoda style): +if ( true === $value ) { /* ... */ } +if ( 'admin' === $name ) { /* ... */ } +if ( 0 === $count ) { /* ... */ } +``` + +When both sides are dynamic expressions (function calls, variables, etc.), the sniff does not report an error since neither side is "more constant" than the other: + +```php +// Both sides are dynamic -- no error: +if ( get_option( 'a' ) === get_option( 'b' ) ) { /* ... */ } +``` + +The sniff applies to `===`, `!==`, `==`, and `!=` operators. + +### Sniff: DisallowShortTernary + +Forbids the short ternary (Elvis) operator `?:` and auto-fixes it to a full ternary by duplicating the condition: + +```php +// Wrong -- short ternary: +$name = $input ?: 'default'; + +// Auto-fixed to full ternary: +$name = $input ? $input : 'default'; +``` + +The WordPress coding standards discourage the short ternary because it is often used incorrectly and can reduce readability. + +## API Reference + +### Sniff Classes + +| Class | Code | Description | +|-------|------|-------------| +| `EnforceYodaComparisonSniff` | `DisallowedYodaComparison` | Enforces Yoda-style comparisons (literal on left) | +| `DisallowShortTernarySniff` | `ShortTernaryUsed` | Forbids the Elvis operator `?:`, auto-fixes to full ternary | + +### Ruleset + +The `WordPressToolkitCodingStandards/ruleset.xml` file registers both sniffs. Including the standard by name activates both rules at once. + +### Dependencies + +These sniffs extend helpers from the `SlevomatCodingStandard` package: + +- `SlevomatCodingStandard\Helpers\YodaHelper` -- used by the Yoda comparison sniff for dynamism analysis and auto-fixing +- `SlevomatCodingStandard\Helpers\TernaryOperatorHelper` -- used by the short ternary sniff to locate operand boundaries +- `SlevomatCodingStandard\Helpers\TokenHelper` and `FixerHelper` -- token navigation and fixer utilities + +## Requirements + +- PHP 7.2+ +- PHP_CodeSniffer 3.x (dev dependency, provided by the toolkit root) +- slevomat/coding-standard (dev dependency, provided by the toolkit root) +- No runtime external dependencies diff --git a/components/XML/README.md b/components/XML/README.md new file mode 100644 index 00000000..bc37ccb7 --- /dev/null +++ b/components/XML/README.md @@ -0,0 +1,223 @@ +# XML + +A pure PHP XML processor that parses and modifies XML documents without requiring the `libxml2` extension. It implements a subset of the XML 1.0 specification and operates as a streaming, forward-only scanner with namespace support, attribute manipulation, and bookmark-based seeking. Designed for environments where native XML extensions are unavailable, such as sandboxed WordPress installations. + +## Installation + +``` +composer require wp-php-toolkit/xml +``` + +## Quick Start + +```php +use WordPress\XML\XMLProcessor; + +$xml = 'PHP Internals'; +$processor = XMLProcessor::create_from_string( $xml ); + +if ( $processor->next_tag( 'book' ) ) { + $price = $processor->get_attribute( '', 'price' ); // "29.99" + $processor->set_attribute( '', 'price', '24.99' ); +} + +echo $processor->get_updated_xml(); +// PHP Internals +``` + +## Usage + +### Navigating Tags + +Use `next_tag()` to move the cursor forward through the document. It accepts a tag name string, a namespace-qualified array, or a query array. + +```php +$xml = '
    Hello
    '; +$processor = XMLProcessor::create_from_string( $xml ); + +// Find any tag +$processor->next_tag(); +echo $processor->get_tag_local_name(); // "root" + +// Find a specific tag by name +$processor->next_tag( 'section' ); +echo $processor->get_attribute( '', 'id' ); // "intro" +``` + +### Working with Namespaces + +Namespaces are first-class citizens. Methods like `get_attribute()` and `set_attribute()` take the full namespace URI as the first argument, not a prefix. + +```php +$xml = '' + . 'Content' + . ''; + +$processor = XMLProcessor::create_from_string( $xml ); +$ns = 'http://wordpress.org/export/1.2/'; + +// Find a namespaced tag by passing array( namespace_uri, local_name ) +if ( $processor->next_tag( array( $ns, 'post' ) ) ) { + echo $processor->get_tag_local_name(); // "post" + echo $processor->get_tag_namespace(); // "http://wordpress.org/export/1.2/" + + // Read and write namespaced attributes + echo $processor->get_attribute( $ns, 'status' ); // "draft" + $processor->set_attribute( $ns, 'status', 'published' ); +} + +echo $processor->get_updated_xml(); +// Content +``` + +### Modifying Attributes + +```php +$xml = ''; +$processor = XMLProcessor::create_from_string( $xml ); + +if ( $processor->next_tag( 'setting' ) ) { + // Update an attribute + $processor->set_attribute( '', 'value', '60' ); + + // Remove an attribute + $processor->remove_attribute( '', 'deprecated' ); + + // Add a new attribute + $processor->set_attribute( '', 'unit', 'seconds' ); +} + +echo $processor->get_updated_xml(); +// +``` + +### Token-Level Processing + +Use `next_token()` to visit every lexical token in the document, including text nodes, comments, CDATA sections, and processing instructions. + +```php +$xml = '
    Hello WorldSome text
    '; +$processor = XMLProcessor::create_from_string( $xml ); + +$text_content = ''; +while ( $processor->next_token() ) { + if ( '#text' === $processor->get_token_name() ) { + $text_content .= $processor->get_modifiable_text(); + } +} + +echo $text_content; // "Hello WorldSome text" +``` + +### Modifying Text Content + +```php +$xml = 'Hello'; +$processor = XMLProcessor::create_from_string( $xml ); + +$processor->next_tag( 'greeting' ); +$processor->next_token(); // Move to the text node +$processor->set_modifiable_text( 'Goodbye' ); + +echo $processor->get_updated_xml(); +// Goodbye +``` + +### Self-Closing Elements + +```php +$xml = '

    Text

    '; +$processor = XMLProcessor::create_from_string( $xml ); + +while ( $processor->next_tag( array( 'tag_closers' => 'visit' ) ) ) { + if ( $processor->is_empty_element() ) { + echo $processor->get_tag_local_name() . ' is self-closing' . "\n"; + } +} +// img is self-closing +// br is self-closing +``` + +### Bookmarks + +Bookmarks let you save a position in the document and return to it later. This is useful when you need to inspect downstream content before deciding how to modify an earlier tag. + +```php +$xml = 'ABC'; +$processor = XMLProcessor::create_from_string( $xml ); + +$processor->next_tag( 'list' ); +$processor->set_bookmark( 'list-start' ); + +// Count items +$count = 0; +while ( $processor->next_tag( 'item' ) ) { + $count++; +} + +// Go back and annotate the list with the count +$processor->seek( 'list-start' ); +$processor->set_attribute( '', 'data-count', (string) $count ); + +echo $processor->get_updated_xml(); +// ABC +``` + +### Streaming XML Processing + +For large documents, use `create_for_streaming()` to feed XML in chunks and process it incrementally. + +```php +$processor = XMLProcessor::create_for_streaming(); + +// Feed chunks of XML data +$processor->append_bytes( 'First' ); +$processor->append_bytes( 'Second' ); +$processor->input_finished(); + +// Process all tags +while ( $processor->next_tag( 'item' ) ) { + echo $processor->get_attribute( '', 'id' ) . "\n"; +} +// 1 +// 2 +``` + +## API Reference + +### XMLProcessor + +| Method | Description | +|--------|-------------| +| `create_from_string( $xml )` | Create a processor for a complete XML string | +| `create_for_streaming( $xml )` | Create a processor that accepts incremental input | +| `next_tag( $query )` | Advance to the next matching tag. Returns `true` if found | +| `next_token()` | Advance to the next lexical token of any kind | +| `get_tag_local_name()` | Get the local name of the current tag | +| `get_tag_namespace()` | Get the namespace URI of the current tag | +| `is_tag_opener()` | Whether the current tag is an opening tag | +| `is_tag_closer()` | Whether the current tag is a closing tag | +| `is_empty_element()` | Whether the current tag is self-closing | +| `get_attribute( $ns, $name )` | Get the decoded value of an attribute | +| `set_attribute( $ns, $name, $value )` | Set or add an attribute on the current tag | +| `remove_attribute( $ns, $name )` | Remove an attribute from the current tag | +| `get_modifiable_text()` | Get decoded text content of the current text/CDATA/comment node | +| `set_modifiable_text( $value )` | Replace text content of the current node | +| `get_token_name()` | Get the name of the current token (tag name, `#text`, `#comment`, etc.) | +| `set_bookmark( $name )` | Save the current position with a name | +| `seek( $name )` | Return to a previously saved bookmark | +| `release_bookmark( $name )` | Free a bookmark | +| `get_updated_xml()` | Get the full XML document with all modifications applied | +| `append_bytes( $chunk )` | Feed more XML bytes (streaming mode) | +| `input_finished()` | Signal that all XML bytes have been provided | +| `is_paused_at_incomplete_input()` | Whether the parser stopped due to incomplete input | + +## Attribution + +The `XMLProcessor` follows the same architecture and API patterns as [WordPress core's HTML API](https://developer.wordpress.org/reference/classes/wp_html_processor/), extending the streaming tag-processor approach from HTML to XML. Licensed under GPL v2. + +## Requirements + +- PHP 7.2+ +- No external PHP extensions required (no libxml2) diff --git a/components/Zip/README.md b/components/Zip/README.md new file mode 100644 index 00000000..29ba4df9 --- /dev/null +++ b/components/Zip/README.md @@ -0,0 +1,173 @@ +# Zip + +A pure PHP library for reading and writing ZIP archives without the `libzip` extension or `ZipArchive` class. It provides a streaming `ZipFilesystem` reader that exposes ZIP contents through a standard filesystem interface, and a `ZipEncoder` that writes ZIP files incrementally. Handles both stored and deflate-compressed entries. + +## Installation + +``` +composer require wp-php-toolkit/zip +``` + +## Quick Start + +```php +use WordPress\ByteStream\ReadStream\FileReadStream; +use WordPress\Zip\ZipFilesystem; + +// Open a ZIP file and read its contents +$zip = ZipFilesystem::create( FileReadStream::from_path( 'archive.zip' ) ); + +// List top-level entries +$entries = $zip->ls(); // ['readme.txt', 'src', 'images'] + +// Read a file +$content = $zip->get_contents( 'readme.txt' ); +``` + +## Usage + +### Reading ZIP Archives + +`ZipFilesystem` implements the `Filesystem` interface, so you can list directories, check paths, and read files just like a regular filesystem. + +```php +use WordPress\ByteStream\ReadStream\FileReadStream; +use WordPress\Zip\ZipFilesystem; + +$zip = ZipFilesystem::create( FileReadStream::from_path( 'book.epub' ) ); + +// List the root directory +$entries = $zip->ls(); +// ['mimetype', 'EPUB', 'META-INF'] + +// List a subdirectory +$epub_files = $zip->ls( '/EPUB' ); +// ['cover.xhtml', 'css', 'images', 'nav.xhtml', 'package.opf', ...] + +// Check if a path exists +$zip->exists( 'mimetype' ); // true +$zip->is_file( 'mimetype' ); // true +$zip->is_dir( 'EPUB' ); // true +$zip->is_file( 'EPUB' ); // false + +// Read file contents +$mimetype = $zip->get_contents( 'mimetype' ); +// "application/epub+zip" + +$cover = $zip->get_contents( 'EPUB/cover.xhtml' ); +// "..." +``` + +### Streaming File Reads + +For large files inside the archive, use `open_read_stream()` to read data incrementally instead of loading everything into memory. + +```php +use WordPress\ByteStream\ReadStream\FileReadStream; +use WordPress\Zip\ZipFilesystem; + +$zip = ZipFilesystem::create( FileReadStream::from_path( 'archive.zip' ) ); + +$stream = $zip->open_read_stream( 'large-dataset.csv' ); +while ( $bytes = $stream->pull( 4096 ) ) { + $chunk = $stream->consume( $bytes ); + // Process the chunk... +} +``` + +### Creating ZIP Archives + +Use `ZipEncoder` to build ZIP files from scratch. Write individual files with `append_file()`, or copy an entire filesystem tree with `append_from_filesystem()`. + +```php +use WordPress\ByteStream\MemoryPipe; +use WordPress\ByteStream\WriteStream\FileWriteStream; +use WordPress\Zip\FileEntry; +use WordPress\Zip\ZipDecoder; +use WordPress\Zip\ZipEncoder; + +// Create a new ZIP file +$output = FileWriteStream::from_path( 'output.zip', 'truncate' ); +$encoder = new ZipEncoder( $output ); + +// Add a file with no compression +$encoder->append_file( + new FileEntry( array( + 'path' => 'hello.txt', + 'compression_method' => ZipDecoder::COMPRESSION_NONE, + 'body_reader' => new MemoryPipe( 'Hello, world!' ), + ) ) +); + +// Add a file with deflate compression +$encoder->append_file( + new FileEntry( array( + 'path' => 'data/notes.txt', + 'compression_method' => ZipDecoder::COMPRESSION_DEFLATE, + 'body_reader' => new MemoryPipe( 'This will be compressed.' ), + ) ) +); + +// Finalize and close +$encoder->close(); +$output->close_writing(); +``` + +### Copying from One ZIP to Another + +Because `ZipFilesystem` implements the standard `Filesystem` interface, you can pass it directly to `ZipEncoder::append_from_filesystem()` to repackage a ZIP archive. + +```php +use WordPress\ByteStream\ReadStream\FileReadStream; +use WordPress\ByteStream\WriteStream\FileWriteStream; +use WordPress\Zip\ZipEncoder; +use WordPress\Zip\ZipFilesystem; + +// Open the source ZIP +$source = ZipFilesystem::create( FileReadStream::from_path( 'original.zip' ) ); + +// Create a new ZIP with the same contents +$output = FileWriteStream::from_path( 'copy.zip', 'truncate' ); +$encoder = new ZipEncoder( $output ); +$encoder->append_from_filesystem( $source ); +$encoder->close(); +$output->close_writing(); +``` + +## API Reference + +### ZipFilesystem + +| Method | Description | +|--------|-------------| +| `create( ByteReadStream $reader )` | Create a filesystem view of a ZIP archive | +| `ls( $dir = '/' )` | List entries in a directory | +| `is_file( $path )` | Check if a path is a file | +| `is_dir( $path )` | Check if a path is a directory | +| `exists( $path )` | Check if a path exists | +| `get_contents( $path )` | Read an entire file as a string | +| `open_read_stream( $path )` | Open a streaming reader for a file | + +### ZipEncoder + +| Method | Description | +|--------|-------------| +| `__construct( ByteWriteStream $output )` | Create an encoder that writes to the given stream | +| `append_file( FileEntry $entry )` | Add a single file to the archive | +| `append_from_filesystem( Filesystem $fs, $path )` | Recursively add files from a filesystem | +| `close()` | Write the central directory and finalize the archive | + +### FileEntry + +Constructed with an associative array of header fields: + +| Field | Description | +|-------|-------------| +| `path` | File path inside the archive | +| `body_reader` | A `ByteReadStream` with the file data | +| `compression_method` | `ZipDecoder::COMPRESSION_NONE` or `ZipDecoder::COMPRESSION_DEFLATE` | + +## Requirements + +- PHP 7.2+ +- No external PHP extensions required (no libzip)