diff --git a/experiments/remote-wp-cow/.dockerignore b/experiments/remote-wp-cow/.dockerignore new file mode 100644 index 00000000..aef7117e --- /dev/null +++ b/experiments/remote-wp-cow/.dockerignore @@ -0,0 +1,8 @@ +/target/ +/.adversarial-loop/ +/.git/ +/.env +/.env.* +!/.env.example +/*.log +/.wp-cow/ diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example new file mode 100644 index 00000000..30c45414 --- /dev/null +++ b/experiments/remote-wp-cow/.env.example @@ -0,0 +1,67 @@ +# Docker lab defaults for wp-cow. +# +# Copy this file to .env, fill in the remote site values, then run: +# docker compose build +# docker compose up -d +# docker compose exec wp-cow-lab bash + +# Host port exposed by Docker Desktop. The container always listens on 8080. +WPCOW_HTTP_PORT=9481 + +# Clone identity and remote WordPress site. +WPCOW_NAME=example +WPCOW_SSH= +WPCOW_PATH= +WPCOW_REMOTE_URL= + +# Leave blank to derive http://localhost:${WPCOW_HTTP_PORT}. +WPCOW_LOCAL_URL= + +# Docker Desktop resolver fallback. +WPCOW_DNS1=1.1.1.1 +WPCOW_DNS2=8.8.8.8 + +# Runtime defaults. +WPCOW_WEB_SERVER=frankenphp +WPCOW_SPLASH=1 +WPCOW_REMOTE_DB_TUNNEL=0 +WPCOW_REMOTE_DB_HELPER=1 +WPCOW_RUNTIME_CODE_PACK=1 +WPCOW_RUNTIME_CODE_PACK_MAX_MB=256 +WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB=8 +WPCOW_RUNTIME_CODE_PACK_MAX_FILES=20000 +WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS=180 +WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN=0 +WPCOW_MATERIALIZE_OPTIONS_TABLE=1 +WPCOW_REMOTE_QUERY_CACHE=1 +WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 +# Keep arbitrary production plugins disabled unless explicitly debugging them. +WPCOW_ENABLE_PLUGINS=0 +WPCOW_PLUGIN_MODE=auto +WPCOW_PLUGIN_ADMISSION=1 +WPCOW_PLUGIN_ADMISSION_DELAY_SECS=20 +WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS=15 +WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0 +WPCOW_PHP_DISABLE_FUNCTIONS=exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client,curl_exec,curl_multi_exec +WPCOW_CACHE_MAX_FILE_MB=64 +WPCOW_REMOTE_STAT_PREFETCH_MAX_KB=0 +WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB=0 +WPCOW_FUSE_TTL_SECS=60 +WPCOW_REMOTE_METADATA_CACHE_TTL_SECS=3600 +WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=60 +WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 +WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 +WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 +WPCOW_PHP_MAX_EXECUTION_SECS=90 +WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 +WPCOW_PHP_WORKERS=4 +WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0 + +# Optional local-only admin override used with wp-cow-lab-sever. +# This updates only the local clone DB after the relevant user rows are copied. +WPCOW_LOCAL_ADMIN_LOGIN= +WPCOW_LOCAL_ADMIN_PASSWORD= + +# Testing/debug switches. +WPCOW_SKIP_SCHEMA=0 +WPCOW_NO_PROBE=0 diff --git a/experiments/remote-wp-cow/.gitignore b/experiments/remote-wp-cow/.gitignore new file mode 100644 index 00000000..f346ebb6 --- /dev/null +++ b/experiments/remote-wp-cow/.gitignore @@ -0,0 +1,2 @@ +/target/ +/.env diff --git a/experiments/remote-wp-cow/Cargo.lock b/experiments/remote-wp-cow/Cargo.lock new file mode 100644 index 00000000..b9b2487a --- /dev/null +++ b/experiments/remote-wp-cow/Cargo.lock @@ -0,0 +1,2552 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "ascii" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bigdecimal" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.117", +] + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5" +dependencies = [ + "objc2", +] + +[[package]] +name = "borsh" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd1e3f8955a5d7de9fab72fc8373fade9fb8a703968cb200ae3dc6cf08e185a" +dependencies = [ + "borsh-derive", + "bytes", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfcfdc083699101d5a7965e49925975f2f55060f94f9a05e7187be95d530ca59" +dependencies = [ + "once_cell", + "proc-macro-crate 3.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "btoi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd6407f73a9b8b6162d8a2ef999fe6afd7cc15902ebf42c5cd296addf17e0ad" +dependencies = [ + "num-traits", +] + +[[package]] +name = "btoi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b5ab9db53bcda568284df0fd39f6eac24ad6f7ba7ff1168b9e76eba6576b976" +dependencies = [ + "num-traits", +] + +[[package]] +name = "bufstream" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cc" +version = "1.2.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "chunked_transfer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "ctrlc" +version = "3.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0b1fab2ae45819af2d0731d60f2afe17227ebb1a1538a236da84c93e9a60162" +dependencies = [ + "dispatch2", + "nix 0.31.2", + "windows-sys", +] + +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derive_utils" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "362f47930db19fe7735f527e6595e4900316b893ebf6d48ad3d31be928d57dd6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags", + "block2", + "libc", + "objc2", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "libz-sys", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "frunk" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28aef0f9aa070bce60767c12ba9cb41efeaf1a2bc6427f87b7d83f11239a16d7" +dependencies = [ + "frunk_core", + "frunk_derives", + "frunk_proc_macros", + "serde", +] + +[[package]] +name = "frunk_core" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "476eeaa382e3462b84da5d6ba3da97b5786823c2d0d3a0d04ef088d073da225c" +dependencies = [ + "serde", +] + +[[package]] +name = "frunk_derives" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0b4095fc99e1d858e5b8c7125d2638372ec85aa0fe6c807105cf10b0265ca6c" +dependencies = [ + "frunk_proc_macro_helpers", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "frunk_proc_macro_helpers" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1952b802269f2db12ab7c0bd328d0ae8feaabf19f352a7b0af7bb0c5693abfce" +dependencies = [ + "frunk_core", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "frunk_proc_macros" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3462f590fa236005bd7ca4847f81438bd6fe0febd4d04e11968d4c2e96437e78" +dependencies = [ + "frunk_core", + "frunk_proc_macro_helpers", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "fuser" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb29a3ae32279fe3e79a958fe01899f5fb23eadccee919cf88e145b54ed9367" +dependencies = [ + "libc", + "log", + "memchr", + "nix 0.29.0", + "page_size", + "smallvec", + "zerocopy", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.0", + "serde", + "serde_core", +] + +[[package]] +name = "io-enum" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7de9008599afe8527a8c9d70423437363b321649161e98473f433de802d76107" +dependencies = [ + "derive_utils", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libz-sys" +version = "1.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3a226e576f50782b3305c5ccf458698f92798987f551c6a02efe8276721e22" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "msql-srv" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b821d09e9a4ed6b61015a889597446b3b6c7721544d0f4b617bcfdacf6ee7877" +dependencies = [ + "byteorder", + "chrono", + "mysql_common 0.31.0", + "nom", +] + +[[package]] +name = "mysql" +version = "28.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a732193888328fc060ab901c0ed1355521267a51ffbfd9a0b3786434c6b8e7f" +dependencies = [ + "bufstream", + "bytes", + "crossbeam-queue", + "crossbeam-utils", + "flate2", + "io-enum", + "libc", + "lru", + "mysql_common 0.37.1", + "named_pipe", + "pem", + "percent-encoding", + "socket2", + "twox-hash", + "url", +] + +[[package]] +name = "mysql-common-derive" +version = "0.30.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56b0d8a0db9bf6d2213e11f2c701cb91387b0614361625ab7b9743b41aa4938f" +dependencies = [ + "darling", + "heck 0.4.1", + "num-bigint", + "proc-macro-crate 1.3.1", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.117", + "termcolor", + "thiserror 1.0.69", +] + +[[package]] +name = "mysql_common" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06f19e4cfa0ab5a76b627cec2d81331c49b034988eaf302c3bafeada684eadef" +dependencies = [ + "base64 0.21.7", + "bigdecimal", + "bindgen", + "bitflags", + "bitvec", + "btoi 0.4.3", + "byteorder", + "bytes", + "cc", + "chrono", + "cmake", + "crc32fast", + "flate2", + "frunk", + "lazy_static", + "mysql-common-derive", + "num-bigint", + "num-traits", + "rand", + "regex", + "rust_decimal", + "saturating", + "serde", + "serde_json", + "sha1", + "sha2", + "smallvec", + "subprocess", + "thiserror 1.0.69", + "time", + "uuid", + "zstd", +] + +[[package]] +name = "mysql_common" +version = "0.37.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffc2127d4035fa5a614935c663a15a4468e64e798473e0cc21c8df40a607588" +dependencies = [ + "base64 0.22.1", + "bitflags", + "btoi 0.5.0", + "byteorder", + "bytes", + "crc32fast", + "flate2", + "getrandom 0.3.4", + "num-bigint", + "num-traits", + "regex", + "saturating", + "serde", + "serde_json", + "sha1", + "sha2", + "thiserror 2.0.18", + "uuid", +] + +[[package]] +name = "named_pipe" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad9c443cce91fc3e12f017290db75dde490d685cdaaf508d7159d7cf41f0eb2b" +dependencies = [ + "winapi", +] + +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nix" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64 0.22.1", + "serde_core", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit 0.19.15", +] + +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit 0.25.11+spec-1.1.0", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rust_decimal" +version = "1.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ce901f9a19d251159075a4c37af514c3b8ef99c22e02dd8c19161cf397ee94a" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", + "wasm-bindgen", +] + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "saturating" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subprocess" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c56e8662b206b9892d7a5a3f2ecdbcb455d3d6b259111373b7e08b8055158a8" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tiny_http" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82" +dependencies = [ + "ascii", + "chunked_transfer", + "httpdate", + "log", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" + +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "toml_datetime 0.6.11", + "winnow 0.5.40", +] + +[[package]] +name = "toml_edit" +version = "0.25.11+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +dependencies = [ + "indexmap", + "toml_datetime 1.1.1+spec-1.1.0", + "toml_parser", + "winnow 1.0.2", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow 1.0.2", +] + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "serde", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + +[[package]] +name = "winnow" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck 0.5.0", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck 0.5.0", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "wp-cow" +version = "0.1.0" +dependencies = [ + "anyhow", + "base64 0.22.1", + "clap", + "ctrlc", + "fuser", + "hex", + "libc", + "msql-srv", + "mysql", + "serde", + "serde_json", + "sha2", + "tempfile", + "tiny_http", + "url", +] + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zerofrom" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/experiments/remote-wp-cow/Cargo.toml b/experiments/remote-wp-cow/Cargo.toml new file mode 100644 index 00000000..56dd4d9f --- /dev/null +++ b/experiments/remote-wp-cow/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "wp-cow" +version = "0.1.0" +edition = "2021" +license = "MIT" + +[workspace] + +[dependencies] +anyhow = "1.0" +base64 = "0.22" +clap = { version = "4.5", features = ["derive"] } +ctrlc = "3.4" +fuser = "0.16" +hex = "0.4" +libc = "0.2" +msql-srv = { version = "0.11", default-features = false } +mysql = { version = "28", default-features = false, features = ["minimal-rust"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sha2 = "0.10" +tiny_http = "0.12" +url = "2.5" + +[dev-dependencies] +tempfile = "3.13" diff --git a/experiments/remote-wp-cow/DB_ROW_COW.md b/experiments/remote-wp-cow/DB_ROW_COW.md new file mode 100644 index 00000000..fb96b0fb --- /dev/null +++ b/experiments/remote-wp-cow/DB_ROW_COW.md @@ -0,0 +1,91 @@ +# Row-Level Database COW Design + +## Goal + +The remote WordPress clone must avoid dumping whole database tables before the +first ordinary edit. For simple primary-key operations, wp-cow treats the remote +database as the lower layer and the local database as the upper layer: + +- remote rows are read-only lower data, +- copied or inserted local rows shadow remote rows with the same primary key, +- local tombstones hide remote rows without deleting them remotely, +- ambiguous SQL is not treated as row-level safe. + +This is the hard database COW path that lets a large site stay lazy at row +granularity. + +## Model Implemented In This Iteration + +The Rust row-COW engine has a conservative SQL planner and a fakeable backend +trait. It supports single-table WordPress primary-key operations for these +columns: + +- `ID` +- `option_id` +- `umeta_id` +- `meta_id` +- `term_id` +- `term_taxonomy_id` +- `object_id` +- `comment_ID` +- `link_id` + +Supported row-level statements: + +- `SELECT ... FROM table WHERE pk = value` +- `SELECT ... FROM table WHERE pk IN (...)` +- `UPDATE table SET ... WHERE pk = value` or `pk IN (...)` +- `DELETE FROM table WHERE pk = value` or `pk IN (...)` +- `INSERT INTO table ...` as local-only + +`UPDATE` first copies up exactly the requested remote primary keys, excluding +locally tombstoned keys, then the caller runs the update locally. `DELETE` +records local tombstones and deletes any matching local upper rows; it never +sends a write to remote. Row-level `SELECT` merges remote rows, local rows, and +tombstones so deleted remote rows do not reappear and local rows shadow remote +rows. + +The production control server exposes `/row-cow`. The generated `wp-content/db.php` +drop-in calls it before the older full-table materialization path. If row-COW +handles a statement, WordPress continues against the local database or receives +the merged result. If a write is not row-level safe, the existing table +promotion/materialization fallback remains the conservative path. + +`wp-cow run` also exposes a local MySQL protocol proxy. The generated +`wp-config.php` points `DB_HOST` at this proxy so plugins that bypass `$wpdb` +still go through the COW routing layer. The drop-in itself uses +`WPCOW_LOCAL_DB_HOST` to connect directly to local MariaDB and avoid recursively +calling the proxy. + +Promotion is overlay-preserving. Before importing a full remote table, wp-cow +dumps the current local upper rows for that table, imports the remote lower +table, restores the local upper rows, then reapplies tombstones. This keeps +later complex SQL correct after earlier row-level edits: local updates and +inserts survive promotion, and deleted remote rows do not reappear. + +## Conservative Fallbacks + +The planner returns `PromoteTable` or `Unsupported`, never row-level safe, for: + +- joins and multi-table statements, +- non-primary-key writes, +- aggregate reads, +- `DISTINCT`, grouping, ordering, or limiting that cannot be merged safely, +- malformed or ambiguous SQL. + +The strict unit harness uses an in-memory fake remote/local backend. It fails if +write-class SQL reaches the fake remote, if update/delete preparation fetches +more than the requested primary keys, if tombstoned remote rows reappear, if +local inserts are sent to remote, or if ambiguous SQL is planned as row-level +safe. + +## Out Of Scope + +This iteration intentionally does not solve every MySQL/WordPress query shape: + +- joins, aggregates, range predicates, secondary-index predicates, subqueries, + and complex expressions remain full-table-promotion or unsupported cases; +- safe merge support for `ORDER BY` and `LIMIT` is not implemented; +- auto-increment ID allocation for local inserts is still delegated to the + local database and is not reconciled with the remote lower layer; +- no real remote SiteGround instance is required or touched by the test harness. diff --git a/experiments/remote-wp-cow/PRD.md b/experiments/remote-wp-cow/PRD.md new file mode 100644 index 00000000..aaaa6cc4 --- /dev/null +++ b/experiments/remote-wp-cow/PRD.md @@ -0,0 +1,179 @@ +# PRD: Fast Remote WordPress COW Serve + +## Problem + +`wp-cow-lab-serve` must make a remote WordPress site locally usable quickly. +The current prototype can still feel stuck because WordPress boot touches many +files and options before the first byte reaches the browser. A design that is +theoretically lazy but blocks the first page for minutes is not acceptable. + +## Goal + +Given SSH access, a remote WordPress path, and a local port, one command should +produce a responsive local WordPress server without copying media uploads or all +database rows. + +Target command shape: + +```bash +wp-cow-lab-serve +``` + +## Success Criteria + +- First run reaches a local HTTP response in under 15 seconds for the + SiteGround test site, excluding Docker image build time. +- Repeated runs reach a local HTTP response in under 5 seconds. +- Startup output shows timed phases so slow work is visible. +- The browser must not spin indefinitely. Slow or failing remote work must + return a visible error with timing context. +- If the first real WordPress response is still warming files, the browser + should receive a local splash/progress page quickly instead of a blank loading + tab. +- The clone must serve the actual remote-backed site. A WordPress installation + wizard indicates an empty or unavailable DB lower layer and must be surfaced + as a wp-cow runtime error, not success. +- No full `wp-content/uploads` copy. +- No optimistic full database row dump. +- Local writes must not reach production. +- A warmed clone can be explicitly severed from the remote lower layers, then + refreshed and used in `wp-admin` without opening SSH or remote DB reads. +- A local admin password reset must affect only the local materialized DB. + +## Non-Goals + +- Perfect visual fidelity for every media asset on first page load. +- Full production snapshot semantics. +- Supporting non-Linux runtime hosts. + +## Product Shape + +The default runtime is request-driven. It must not assume plugin/theme/runtime +directories are small: any directory may contain large generated artifacts, +vendor caches, backups, or media-like data. Files are fetched only when the +local request path opens them, then remembered in the persistent local cache. + +Startup should do: + +1. Probe WordPress and remote DB credentials. +2. Export schema only. +3. Initialize empty local DB schema. +4. Start a persistent SSH tunnel for safe remote DB reads when the remote DB is + reachable over TCP from the SSH host. +5. Start a local MySQL protocol proxy for plugins that bypass `$wpdb` and use + the generated `DB_HOST` constant directly. +6. Start local PHP immediately with generated local `wp-config.php`, DB drop-in, + and safety MU plugin. +7. Serve files lazily and persistently cache only the files touched by requests. +8. For the first dynamic browser request, show a temporary splash page that + polls real file-cache progress while a bypass request warms WordPress. + +## File Materialization Policy + +Fetch locally on demand: + +- Any remote file that WordPress, PHP, or the browser actually opens. +- Remote directory entries only when a request actually lists that directory. +- Remote metadata needed for opened/listed files. + +Remember: + +- Cached file bytes in `file-cache/`. +- Cached remote metadata in `file-cache/metadata.json`. +- Local mutations separately in `upper/`. + +Do not: + +- Batch copy runtime directories by default. +- Copy `wp-content/uploads` up front. +- Assume plugin/theme directories are small. +- Re-fetch cached file bytes or metadata on subsequent runs unless explicitly + refreshed. + +## DB Policy + +Initial startup must not dump rows. Reads should use a persistent tunneled +remote DB connection when available, not per-query SSH/PHP subprocesses. If a +query touches a locally materialized table, the involved tables should be routed +locally. Writes must be local-only. + +If remote DB reads are too slow for first page boot, the next fallback should +be a bounded bootstrap materialization of only essential option rows, not a full +table dump. + +The MVP implements that fallback for `*_options`: on the first matching +autoload/core-option read, it copies only autoloaded rows and core +identity/theme/plugin option names into the local database and routes matching +reads locally. Arbitrary non-bootstrap option reads still go through the remote +read path unless the table has been fully materialized. + +## Severed Mode + +`wp-cow sever ` turns a clone from live-lower mode into local-only mode. +It is not the default startup path because it must copy database rows, but it is +the expected path when the user wants to disconnect from production and keep +working locally. + +Severing should: + +- Materialize the core WordPress tables needed for local frontend/admin/content + edits: options, users, usermeta, posts, postmeta, terms, term_taxonomy, + term_relationships, comments, commentmeta, and links. +- Cache WordPress admin/runtime program files needed for offline `wp-admin` + access without copying uploads. +- Optionally set a local administrator password in the local DB only. +- Write an offline marker that makes future `wp-cow run` skip SSH control + masters, remote DB tunnels, remote filesystem reads, and daemon remote + `/query` calls. +- Continue serving local upper-layer file writes and local DB writes after the + remote link is severed. + +## Observability + +The CLI should print phase names and durations: + +- probe +- schema export +- local schema init +- file cache hits/misses where practical +- mount +- php start +- first request file-cache progress through `/__wp-cow/progress` + +## Test Site + +Use the SiteGround WordPress site supplied by the user: + +```text +SSH: u2199-yx4tznmyunag@calm-cottage-mindfulness.com:18765 +Key: ~/.ssh/id_siteground +Path: /home/u2199-yx4tznmyunag/www/calm-cottage-mindfulness.com/public_html +Remote URL: https://calm-cottage-mindfulness.com +Local URL: http://localhost:9481 +``` + +Do not print secrets. Do not modify production data. + +## Acceptance Test + +From a clean clone state: + +```bash +WPCOW_NAME=calm-cottage \ +WPCOW_SSH=wp-cow-siteground-calm-cottage \ +WPCOW_PATH=/home/u2199-yx4tznmyunag/www/calm-cottage-mindfulness.com/public_html \ +WPCOW_REMOTE_URL=https://calm-cottage-mindfulness.com \ +WPCOW_LOCAL_URL=http://localhost:9481 \ +wp-cow-lab-serve +``` + +Then: + +```bash +curl -I --max-time 10 http://localhost:9481/ +``` + +The response must complete within the timeout. A splash/progress page is +acceptable while the first real page warms. A WordPress error page is acceptable +during development only if it returns quickly with diagnostic output. The +WordPress installation wizard is not acceptable as a successful response. diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md new file mode 100644 index 00000000..72251aef --- /dev/null +++ b/experiments/remote-wp-cow/README.md @@ -0,0 +1,395 @@ +# wp-cow + +`wp-cow` is a Linux-only prototype for a lazy local WordPress clone runtime. +It creates a local clone description instead of copying a whole site, mounts a +copy-on-write FUSE filesystem over SSH/PHP, and keeps database writes local by +materializing remote tables into a local MySQL database before writes run. + +## ForkPress exploration status + +This directory is an isolated experiment. It is not wired into the ForkPress +workspace, release artifact, runtime, or CI path. The goal is to explore whether +ForkPress should have a remote-site onboarding mode where a very large +production WordPress tree can be made locally usable without first copying every +file and every database row. + +This deliberately violates the current ForkPress product shape in a few ways: +it uses a long-running local helper, FUSE, local MySQL, and SSH/PHP calls to the +remote host. Those choices are useful for proving out the lazy-lower-layer +model, but they should not be read as a proposed final integration shape. + +## Build + +```bash +cargo build +``` + +## Strict harness + +```bash +scripts/strict-harness.sh +``` + +The harness runs the full Rust/PHP test suite plus targeted checks for lazy +file caching, installer blocking, row-level DB write isolation, offline guards, +FrankenPHP routing, local admin override wiring, and Docker lab port exposure. + +## Docker lab on macOS + +Use this when you are on a Mac and want a Linux shell with FUSE, FrankenPHP, +SSH, and local MariaDB available. The container is intentionally privileged so +FUSE can mount inside Docker Desktop's Linux VM. The Docker image uses the +official FrankenPHP PHP 8.3 image and installs `mysqli`, `opcache`, and `pdo_mysql` for +WordPress. + +From this directory: + +```bash +cp .env.example .env +$EDITOR .env +docker compose build +docker compose up -d +docker compose exec wp-cow-lab bash +``` + +The Compose host port is created from `WPCOW_HTTP_PORT` when the container is +created. FrankenPHP still listens on port `8080` inside the container. If you +want to open port 9481 on the Mac, set it in `.env` or pass it when starting +the lab: + +```bash +WPCOW_HTTP_PORT=9481 docker compose up -d +``` + +If you change the port after the container already exists, recreate it: + +```bash +docker compose down +WPCOW_HTTP_PORT=9481 docker compose up -d --force-recreate +``` + +Inside the container, keep `WPCOW_HTTP=0.0.0.0:8080`. `wp-cow-lab-serve` +derives `WPCOW_LOCAL_URL` from `WPCOW_HTTP_PORT` when the URL is not explicitly +overridden. + +Inside the container, check the lab: + +```bash +wp-cow-lab-check +``` + +If DNS fails inside Docker Desktop with an error such as +`Temporary failure in name resolution`, check and temporarily repair the +container resolver: + +```bash +wp-cow-lab-dns +wp-cow-lab-dns --fix +``` + +If `--fix` works, keep these values in `.env` and recreate the container: + +```bash +WPCOW_DNS1=1.1.1.1 +WPCOW_DNS2=8.8.8.8 +``` + +If your SSH command has flags, put them in `~/.ssh/config` on the Mac before +starting the container. For example: + +```sshconfig +Host mysite + HostName example.com + User user + Port 2222 + IdentityFile ~/.ssh/id_ed25519 +``` + +Docker Compose mounts your Mac `~/.ssh` read-only at `/host-ssh`, copies it +into the container, and removes Apple-only OpenSSH options such as +`UseKeychain`. It also forwards the Docker Desktop SSH agent socket at +`/run/host-services/ssh-auth.sock`. + +Set the real site values in `.env`, or export them inside the container. +`WPCOW_SSH` can be either a host alias from `~/.ssh/config` or a simple SSH +command copied from a host dashboard: + +```bash +export WPCOW_NAME=example +export WPCOW_SSH=mysite +export WPCOW_PATH=/home/user/public_html +export WPCOW_REMOTE_URL=https://example.com +export WPCOW_LOCAL_URL=http://localhost:9481 +``` + +For example, this is accepted: + +```bash +export WPCOW_SSH='ssh -p18765 -i ~/.ssh/id_siteground user@example.com' +``` + +For a full local WordPress runtime, use one command: + +```bash +wp-cow-lab-serve +``` + +That is the normal path. It creates or reuses the lazy clone, exports schema +only if needed, initializes an empty local MariaDB database if needed, mounts +the lazy filesystem, starts the DB control layer, and starts FrankenPHP. It does +not download media, runtime directories, or table rows up front. + +File reads are request-driven. When WordPress opens a remote file, `wp-cow` +fetches that file into the persistent `file-cache/` and records the remote +metadata beside it. Later reads and later runs use the local cached copy instead +of fetching the file or statting it remotely again. Runtime batch sync is not +part of `serve`; old `WPCOW_RUNTIME_SYNC` environment values are ignored so +plugin/theme/runtime trees stay lazy too. + +The first browser hit can still spend time fetching the exact PHP files needed +to boot WordPress. With `WPCOW_SPLASH=1` (the Docker default), `wp-cow` returns a +temporary local splash page immediately and starts the real request in the +browser. The splash polls `/__wp-cow/progress`, which is backed by the local file +cache progress file, then swaps in the warmed WordPress response. FrankenPHP is +started with multiple PHP threads (`WPCOW_PHP_WORKERS`, default `4`) so progress +polling can continue while the warm request is running. Set +`WPCOW_WEB_SERVER=php` only when you explicitly want the old PHP built-in +development server fallback. + +Remote database reads are mediated by the local daemon by default. Generated +PHP does not contain the production DB name, user, password, or host, so plugins +using the normal `DB_*` constants see only the local COW proxy. Write-class SQL +is blocked from the remote database and materialized locally first. +`WPCOW_REMOTE_DB_HELPER=1` keeps a read-only PHP/MySQL helper open over SSH so a +cold render does not spawn a fresh remote PHP process for every safe lower-layer +read. +`WPCOW_REMOTE_DB_TUNNEL=1` is an opt-in debugging/performance mode for hosts +where you explicitly accept opening a local SSH tunnel to the remote DB. + +`wp-cow run` also starts a local MySQL protocol proxy on the generated `DB_HOST` +port. Core WordPress still uses the generated `db.php` drop-in with a direct +local MariaDB connection to avoid recursion, but plugins that open their own +`mysqli` connection using `DB_HOST` hit the proxy instead of the empty local +schema. The proxy applies the same row-COW/read-routing/write-blocking rules as +the drop-in before forwarding anything to local MariaDB or the remote read-only +lower layer. + +On first WordPress boot, `wp-cow` special-cases the options-table bootstrap +query. It materializes only autoloaded option rows plus core identity/theme/plugin +option names into the local database, then routes those matching reads locally. +That keeps the common `SELECT ... FROM *_options WHERE autoload IN (...)` query +off the slow remote `/query` fallback without dumping the whole database. + +Remote read queries that still need the lower database are cached under +`~/.wp-cow/clones//db/query-cache` by default. This makes repeated page +loads reuse local query results instead of crossing SSH/remote MySQL again. +Set `WPCOW_REMOTE_QUERY_CACHE=0` to disable it or adjust +`WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS` for large result sets. Local write-class SQL +does not globally clear this cache; cached remote reads are used only while the +referenced tables have no dirty local overlay state. + +The FUSE mount also keeps warmed path metadata live long enough for repeat +renders to reuse the program files WordPress just touched. The Docker lab +defaults `WPCOW_FUSE_TTL_SECS` to `60` for kernel attribute caching and +`WPCOW_REMOTE_METADATA_CACHE_TTL_SECS` to `3600` for daemon-side remote metadata, +including negative lookups for files or directories that WordPress probes but +the remote site does not have. Lower values make live remote changes visible +sooner, while higher values reduce repeated path walking. +FrankenPHP also enables OPcache for parsed PHP code in the local web runtime. +By default `WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0`, so warmed PHP files do not get +restatted through FUSE on every render. Restart `wp-cow run` after editing PHP +program files, or set `WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=1` while actively +working on plugin/theme code. +There is no recursive runtime warm-up: PHP files, themes, plugins, and uploads +are fetched only when a request touches them, then cached for repeated reads. +There are experimental cold-start knobs for hosts where batch reads beat +single-file SSH round trips. Set `WPCOW_REMOTE_STAT_PREFETCH_MAX_KB` to prefetch +PHP/JSON/translation file bytes during stat, and +`WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB` to batch same-directory runtime siblings. +Both default to `0` because the real SiteGround trace showed broad prefetching +can make cold start worse when WordPress stats assets it will not read during +render. These knobs never recurse into uploads and never fetch CSS, JS, or media +unless the browser asks for them. +Remote plugin and language directories stay visible through the lazy lower +layer, but plugin execution defaults to policy mode. With +`WPCOW_PLUGIN_MODE=auto`, the generated safety mu-plugin starts with no +production plugins enabled. After the first successful local render, the daemon +tries active plugins one at a time using a bounded PHP smoke boot +(`WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS`) and records the result in +`run/plugin-policy.json`. Admitted plugins are enabled on later requests; +failing plugins are quarantined locally. Set `WPCOW_PLUGIN_MODE=off` to suppress +all plugins, or `WPCOW_PLUGIN_MODE=full` / `WPCOW_ENABLE_PLUGINS=1` only when +you intentionally want every active production plugin to run in the clone. + +Because active plugins are production code, the launched PHP runtime also +disables common side-effect escape hatches by default: process spawning, +`mail()`, raw socket clients, and URL-based includes. That is in addition to the +mu-plugin guards for WordPress mail and HTTP APIs. The disabled PHP function +list is configurable with `WPCOW_PHP_DISABLE_FUNCTIONS`; set it to `0` only for +debugging. The generated DB drop-in still needs local HTTP for daemon control +calls, so this is not a kernel sandbox. Set +`WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=1` only when you intentionally want to +let plugin code spawn local processes, send raw socket traffic, or bypass these +PHP-level guards. + +The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH +host, or slow remote file read should fail visibly instead of leaving the +browser spinning forever. Adjust the defaults with +`WPCOW_CONTROL_REQUEST_TIMEOUT_SECS`, `WPCOW_REMOTE_COMMAND_TIMEOUT_SECS`, +`WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS`, `WPCOW_PHP_MAX_EXECUTION_SECS`, and +`WPCOW_PHP_SOCKET_TIMEOUT_SECS`. + +If WordPress tries to show the installation wizard, the router treats that as a +wp-cow DB/runtime failure. The clone should either show the real remote-backed +site or a diagnostic error; the installer is not considered a successful local +copy. + +To sever a warmed clone from the remote lower layers, run: + +```bash +export WPCOW_LOCAL_ADMIN_PASSWORD='8u239huiwdsj91das' +wp-cow-lab-sever +wp-cow-lab-run +``` + +`wp-cow-lab-sever` materializes only the WordPress tables already touched by the +clone, plus the user tables needed for a requested local admin password +override. It does not walk or prefetch the remote WordPress tree; pages and +admin screens you want available offline must be loaded once before severing so +their PHP files and DB rows are already materialized. It then writes +`run/offline.json`. After that marker exists, `wp-cow run` does not open SSH, +does not start the remote DB tunnel, and routes DB reads locally. + +Open this on the Mac: + +```text +http://localhost:9481/ +``` + +For a filesystem-only smoke test that does not touch the remote DB, skip schema +export and mount the remote tree lazily: + +```bash +export WPCOW_SKIP_SCHEMA=1 +wp-cow-lab-clone +wp-cow-lab-mount +``` + +If you already created a filesystem-only clone and then want to open WordPress +in a browser, initialize the local database schema without deleting the clone or +its file cache: + +```bash +wp-cow-lab-db-init +wp-cow-lab-run +``` + +Then open another shell: + +```bash +docker compose exec wp-cow-lab bash +ls -la /mnt/wp-cow/example +cat /mnt/wp-cow/example/wp-config.php +``` + +Remote file contents are cached separately from local mutations in +`~/.wp-cow/clones//file-cache`, which is persisted by the Docker +`wp-cow-state` volume. Files up to `WPCOW_CACHE_MAX_FILE_MB` are cached as whole +files on first touch/read, and their remote metadata is recorded in +`file-cache/metadata.json` so later runs do not need to stat those files +remotely again. Negative lookups are recorded in `file-cache/missing.json` for +the metadata TTL so repeated renders do not keep rechecking absent plugins, +languages, template directories, or optional WordPress files. Larger files are +streamed by range. The Docker lab defaults the whole-file cache limit to 64 MB +and leaves experimental cold-start prefetch knobs off unless explicitly enabled. +Check or clear the cache with: + +```bash +wp-cow-lab-cache status +wp-cow-lab-cache warm-core +wp-cow-lab-cache clear +``` + +Stop the lab: + +```bash +docker compose down +``` + +Remove persisted clone state and local MariaDB data: + +```bash +docker compose down -v +``` + +## Typical flow + +```bash +wp-cow serve \ + --name example \ + --ssh user@example.com \ + --path /home/user/public_html \ + --remote-url https://example.com \ + --local-url http://example.test \ + --mountpoint /mnt/wp-cow/example \ + --http 127.0.0.1:8080 +``` + +`wp-cow serve` is the one-command runtime. It prepares only the metadata needed +to boot WordPress locally and leaves file contents and database rows lazy until +WordPress actually asks for them. + +The clone state is stored under `~/.wp-cow/clones//`: + +```text +manifest.json +upper/ +whiteouts.json +file-cache/ +db/ + schema.sql + state.json +generated/ +run/ +``` + +## What is implemented + +- SSH session reuse through OpenSSH control sockets. +- Remote WordPress probe through an ephemeral PHP script. +- Lazy remote file operations through PHP over SSH. +- Local COW filesystem through FUSE: + - upper layer shadows remote files, + - remote reads are fetched lazily, + - small remote files are cached separately from local mutations, + - deletions are recorded as whiteouts. +- Generated local `wp-config.php`, `wp-content/db.php`, and safety MU plugin. +- Schema import and full-table DB materialization through remote `mysqldump`. +- A local control HTTP server used by the DB drop-in and MySQL proxy: + - read queries can be served from the remote DB through daemon-mediated PHP, + - write-class SQL is never sent to the remote DB, + - writes materialize affected table groups before executing locally. +- A local MySQL protocol proxy for code paths that bypass WordPress's `$wpdb` + object and connect with the generated `DB_HOST` constant. + +## Requirements + +Local machine: + +- Linux with `/dev/fuse` access. +- `ssh`, `frankenphp`, `php`, `mysql`, and `mysqldump` on `PATH`. +- A local MySQL/MariaDB server reachable by the generated DB settings. + +Remote host: + +- SSH access. +- PHP CLI. +- `mysqldump`. +- WordPress files at the supplied `--path`. + +## Notes + +This is an MVP. The DB layer now has both a WordPress `db.php` drop-in and a +local MySQL protocol proxy, but it is still conservative: complex SQL promotes +tables instead of attempting unsafe partial merges, and it does not provide true +point-in-time snapshot support without cooperation from the remote host. diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml new file mode 100644 index 00000000..891b56a9 --- /dev/null +++ b/experiments/remote-wp-cow/compose.yaml @@ -0,0 +1,82 @@ +services: + wp-cow-lab: + build: + context: . + dockerfile: docker/Dockerfile + container_name: wp-cow-lab + privileged: true + cap_add: + - SYS_ADMIN + devices: + - /dev/fuse:/dev/fuse + security_opt: + - apparmor:unconfined + dns: + - "${WPCOW_DNS1:-1.1.1.1}" + - "${WPCOW_DNS2:-8.8.8.8}" + ports: + - "${WPCOW_HTTP_PORT:-8080}:8080" + environment: + WPCOW_HOME: /root/.wp-cow + WPCOW_NAME: "${WPCOW_NAME:-example}" + WPCOW_SSH: "${WPCOW_SSH:-}" + WPCOW_PATH: "${WPCOW_PATH:-}" + WPCOW_REMOTE_URL: "${WPCOW_REMOTE_URL:-}" + WPCOW_LOCAL_URL: "${WPCOW_LOCAL_URL-}" + WPCOW_SKIP_SCHEMA: "${WPCOW_SKIP_SCHEMA:-0}" + WPCOW_NO_PROBE: "${WPCOW_NO_PROBE:-0}" + WPCOW_DNS1: "${WPCOW_DNS1:-1.1.1.1}" + WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" + WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" + WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" + WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-0}" + WPCOW_REMOTE_DB_HELPER: "${WPCOW_REMOTE_DB_HELPER:-1}" + WPCOW_RUNTIME_CODE_PACK: "${WPCOW_RUNTIME_CODE_PACK:-1}" + WPCOW_RUNTIME_CODE_PACK_MAX_MB: "${WPCOW_RUNTIME_CODE_PACK_MAX_MB:-256}" + WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB: "${WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB:-8}" + WPCOW_RUNTIME_CODE_PACK_MAX_FILES: "${WPCOW_RUNTIME_CODE_PACK_MAX_FILES:-20000}" + WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS: "${WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS:-180}" + WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN: "${WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0}" + WPCOW_MATERIALIZE_OPTIONS_TABLE: "${WPCOW_MATERIALIZE_OPTIONS_TABLE:-1}" + WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-60}" + WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" + WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" + WPCOW_REMOTE_QUERY_CACHE: "${WPCOW_REMOTE_QUERY_CACHE:-1}" + WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS: "${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" + WPCOW_ENABLE_PLUGINS: "${WPCOW_ENABLE_PLUGINS:-0}" + WPCOW_PLUGIN_MODE: "${WPCOW_PLUGIN_MODE:-auto}" + WPCOW_PLUGIN_ADMISSION: "${WPCOW_PLUGIN_ADMISSION:-1}" + WPCOW_PLUGIN_ADMISSION_DELAY_SECS: "${WPCOW_PLUGIN_ADMISSION_DELAY_SECS:-20}" + WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS: "${WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS:-15}" + WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS: "${WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS:-0}" + WPCOW_PHP_DISABLE_FUNCTIONS: "${WPCOW_PHP_DISABLE_FUNCTIONS:-exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client,curl_exec,curl_multi_exec}" + WPCOW_FUSE_TTL_SECS: "${WPCOW_FUSE_TTL_SECS:-60}" + WPCOW_REMOTE_STAT_PREFETCH_MAX_KB: "${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" + WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB: "${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" + WPCOW_REMOTE_METADATA_CACHE_TTL_SECS: "${WPCOW_REMOTE_METADATA_CACHE_TTL_SECS:-3600}" + WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" + WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" + WPCOW_PHP_SOCKET_TIMEOUT_SECS: "${WPCOW_PHP_SOCKET_TIMEOUT_SECS:-15}" + WPCOW_PHP_WORKERS: "${WPCOW_PHP_WORKERS:-4}" + WPCOW_OPCACHE_VALIDATE_TIMESTAMPS: "${WPCOW_OPCACHE_VALIDATE_TIMESTAMPS:-0}" + WPCOW_WEB_SERVER: "${WPCOW_WEB_SERVER:-frankenphp}" + WPCOW_SPLASH: "${WPCOW_SPLASH:-1}" + WPCOW_LOCAL_ADMIN_PASSWORD: "${WPCOW_LOCAL_ADMIN_PASSWORD:-}" + WPCOW_LOCAL_ADMIN_LOGIN: "${WPCOW_LOCAL_ADMIN_LOGIN:-}" + WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" + WPCOW_HTTP: 0.0.0.0:8080 + SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock + volumes: + - wp-cow-state:/root/.wp-cow + - wp-cow-mounts:/mnt/wp-cow + - type: bind + source: ${HOME}/.ssh + target: /host-ssh + read_only: true + - type: bind + source: /run/host-services/ssh-auth.sock + target: /run/host-services/ssh-auth.sock + +volumes: + wp-cow-state: + wp-cow-mounts: diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile new file mode 100644 index 00000000..b0b2bd1c --- /dev/null +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -0,0 +1,56 @@ +FROM rust:1-bookworm AS builder + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + fuse3 \ + libfuse3-dev \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace +COPY Cargo.toml Cargo.lock ./ +COPY src ./src +RUN cargo build --release \ + && cp target/release/wp-cow /usr/local/bin/wp-cow + +FROM dunglas/frankenphp:1-php8.3-bookworm + +ENV DEBIAN_FRONTEND=noninteractive +ENV WPCOW_HOME=/root/.wp-cow + +RUN install-php-extensions mysqli opcache pdo_mysql \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + bash \ + ca-certificates \ + curl \ + fuse3 \ + less \ + libfuse3-dev \ + mariadb-client \ + mariadb-server \ + openssh-client \ + pkg-config \ + tini \ + vim-tiny \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /usr/local/bin/wp-cow /usr/local/bin/wp-cow + +COPY docker/wp-cow-lab-entrypoint /usr/local/bin/wp-cow-lab-entrypoint +COPY docker/wp-cow-lab-check /usr/local/bin/wp-cow-lab-check +COPY docker/wp-cow-lab-clone /usr/local/bin/wp-cow-lab-clone +COPY docker/wp-cow-lab-mount /usr/local/bin/wp-cow-lab-mount +COPY docker/wp-cow-lab-run /usr/local/bin/wp-cow-lab-run +COPY docker/wp-cow-lab-ssh-target /usr/local/bin/wp-cow-lab-ssh-target +COPY docker/wp-cow-lab-dns /usr/local/bin/wp-cow-lab-dns +COPY docker/wp-cow-lab-db-init /usr/local/bin/wp-cow-lab-db-init +COPY docker/wp-cow-lab-cache /usr/local/bin/wp-cow-lab-cache +COPY docker/wp-cow-lab-serve /usr/local/bin/wp-cow-lab-serve +COPY docker/wp-cow-lab-sever /usr/local/bin/wp-cow-lab-sever +RUN chmod +x /usr/local/bin/wp-cow-lab-* + +ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/wp-cow-lab-entrypoint"] +CMD ["sleep", "infinity"] diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-cache b/experiments/remote-wp-cow/docker/wp-cow-lab-cache new file mode 100755 index 00000000..a2f220e2 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-cache @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" +root="${WPCOW_HOME:-/root/.wp-cow}/clones/$name" +cache="$root/file-cache" +mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" +cmd="${1:-status}" + +case "$cmd" in + status) + echo "cache directory: $cache" + if [ -d "$cache" ]; then + du -sh "$cache" 2>/dev/null || true + find "$cache" -type f 2>/dev/null | wc -l | awk '{ print "cached files: " $1 }' + else + echo "cache directory does not exist yet" + fi + ;; + clear) + rm -rf "$cache" + mkdir -p "$cache" + echo "cleared $cache" + ;; + warm-core) + for path in \ + index.php \ + wp-blog-header.php \ + wp-load.php \ + wp-settings.php \ + wp-includes/version.php \ + wp-includes/load.php \ + wp-includes/plugin.php + do + if [ -e "$mountpoint/$path" ]; then + dd if="$mountpoint/$path" of=/dev/null bs=1M status=none || true + fi + done + "$0" status + ;; + *) + echo "usage: wp-cow-lab-cache [status|clear|warm-core]" >&2 + exit 2 + ;; +esac diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-check b/experiments/remote-wp-cow/docker/wp-cow-lab-check new file mode 100755 index 00000000..848875d5 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-check @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "wp-cow:" +wp-cow --help | sed -n '1,8p' + +echo +echo "tools:" +ssh -V 2>&1 +php -v | sed -n '1p' +frankenphp version | sed -n '1p' +if frankenphp php-cli -m | grep -qx 'mysqli'; then + echo "frankenphp mysqli extension: yes" +else + echo "frankenphp mysqli extension: missing" >&2 + exit 1 +fi +if frankenphp php-cli -m | grep -qx 'Zend OPcache'; then + echo "frankenphp opcache extension: yes" +else + echo "frankenphp opcache extension: missing" >&2 + exit 1 +fi +mysql --version +mysqldump --version + +echo +echo "fuse:" +if [ ! -e /dev/fuse ]; then + echo "missing /dev/fuse; start this lab with docker compose so the FUSE device is passed through" >&2 + exit 1 +fi +ls -l /dev/fuse + +echo +echo "local mysql:" +mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping + +echo +echo "ssh:" +if [ -S "${SSH_AUTH_SOCK:-}" ]; then + echo "ssh-agent socket: $SSH_AUTH_SOCK" + ssh-add -L >/dev/null 2>&1 && echo "ssh-agent has identities" || echo "ssh-agent is available but has no listed identities" +else + echo "no ssh-agent socket; host ~/.ssh is still copied into the container" +fi + +if [ -n "${WPCOW_SSH:-}" ]; then + target="$(wp-cow-lab-ssh-target)" + ssh -G "$target" >/dev/null + echo "ssh config resolves: $target" + wp-cow-lab-dns +fi diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-clone b/experiments/remote-wp-cow/docker/wp-cow-lab-clone new file mode 100755 index 00000000..303307d1 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-clone @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +require_env() { + local name="$1" + if [ -z "${!name:-}" ]; then + echo "missing required environment variable: $name" >&2 + exit 2 + fi +} + +require_env WPCOW_SSH +require_env WPCOW_PATH +require_env WPCOW_REMOTE_URL +require_env WPCOW_LOCAL_URL + +name="${WPCOW_NAME:-example}" +ssh_target="$(wp-cow-lab-ssh-target)" +args=( + clone + --force + --name "$name" + --ssh "$ssh_target" + --path "$WPCOW_PATH" + --remote-url "$WPCOW_REMOTE_URL" + --local-url "$WPCOW_LOCAL_URL" +) + +if [ "${WPCOW_NO_PROBE:-0}" = "1" ]; then + args+=(--no-probe) +fi + +if [ "${WPCOW_SKIP_SCHEMA:-0}" = "1" ]; then + args+=(--skip-schema) +fi + +wp-cow "${args[@]}" + +if [ "${WPCOW_SKIP_SCHEMA:-0}" != "1" ] && [ "${WPCOW_NO_PROBE:-0}" != "1" ]; then + wp-cow init-db "$name" +else + echo + echo "warning: DB schema was not initialized, so wp-cow-lab-run will not boot full WordPress yet." >&2 + echo "run wp-cow-lab-db-init before opening the site in a browser." >&2 +fi + +echo +echo "clone ready: $name" +echo "mount files with: wp-cow-lab-mount" +echo "run local PHP with: wp-cow-lab-run" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-db-init b/experiments/remote-wp-cow/docker/wp-cow-lab-db-init new file mode 100755 index 00000000..a1b777a1 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-db-init @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" + +if [ ! -f "${WPCOW_HOME:-/root/.wp-cow}/clones/$name/manifest.json" ]; then + echo "clone '$name' does not exist yet; run wp-cow-lab-clone first" >&2 + exit 2 +fi + +echo "exporting remote schema for $name" +wp-cow export-schema "$name" + +echo "initializing local MariaDB schema for $name" +wp-cow init-db "$name" + +echo +echo "database schema is ready. Remote read queries will be routed by wp-content/db.php." diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-dns b/experiments/remote-wp-cow/docker/wp-cow-lab-dns new file mode 100755 index 00000000..2a45e89b --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-dns @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +set -euo pipefail + +fix=0 +if [ "${1:-}" = "--fix" ]; then + fix=1 +fi + +host="${WPCOW_DNS_HOST:-}" +if [ -z "$host" ] && [ -n "${WPCOW_SSH:-}" ]; then + target="$(wp-cow-lab-ssh-target)" + host="$(ssh -G "$target" 2>/dev/null | awk '$1 == "hostname" { print $2; exit }')" +fi + +if [ -z "$host" ] && [ -n "${WPCOW_REMOTE_URL:-}" ]; then + host="$(php -r 'echo parse_url(getenv("WPCOW_REMOTE_URL"), PHP_URL_HOST) ?: "";')" +fi + +if [ -z "$host" ]; then + echo "could not determine host; set WPCOW_DNS_HOST=example.com" >&2 + exit 2 +fi + +echo "host: $host" +echo +echo "/etc/resolv.conf:" +cat /etc/resolv.conf + +echo +echo "resolution:" +if getent hosts "$host"; then + exit 0 +fi + +echo "DNS lookup failed for $host" >&2 + +if [ "$fix" = "1" ]; then + dns1="${WPCOW_DNS1:-1.1.1.1}" + dns2="${WPCOW_DNS2:-8.8.8.8}" + printf 'nameserver %s\nnameserver %s\n' "$dns1" "$dns2" > /etc/resolv.conf + echo + echo "rewrote /etc/resolv.conf:" + cat /etc/resolv.conf + echo + echo "retry:" + getent hosts "$host" + exit 0 +fi + +cat >&2 <<'EOF' + +Try: + wp-cow-lab-dns --fix + +If that works, restart the lab after adding DNS values to .env: + WPCOW_DNS1=1.1.1.1 + WPCOW_DNS2=8.8.8.8 +EOF +exit 1 diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint b/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint new file mode 100755 index 00000000..34b4c6c5 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +set -euo pipefail + +export WPCOW_HOME="${WPCOW_HOME:-/root/.wp-cow}" + +mkdir -p "$WPCOW_HOME" /mnt/wp-cow /run/mysqld /var/log +chown -R mysql:mysql /run/mysqld /var/lib/mysql + +mkdir -p /root/.ssh +chmod 700 /root/.ssh +if [ -d /host-ssh ]; then + cp -a /host-ssh/. /root/.ssh/ 2>/dev/null || true +fi +if [ -f /root/.ssh/config ]; then + # Linux OpenSSH rejects Apple-specific options such as UseKeychain. + sed -i.bak -E '/^[[:space:]]*UseKeychain([[:space:]]|$)/Id' /root/.ssh/config +fi +find /root/.ssh -type d -exec chmod 700 {} + 2>/dev/null || true +find /root/.ssh -type f -exec chmod 600 {} + 2>/dev/null || true + +if [ ! -d /var/lib/mysql/mysql ]; then + mariadb-install-db \ + --user=mysql \ + --datadir=/var/lib/mysql \ + --auth-root-authentication-method=normal \ + >/var/log/wp-cow-mariadb-install.log 2>&1 +fi + +if ! mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + mariadbd \ + --user=mysql \ + --datadir=/var/lib/mysql \ + --socket=/run/mysqld/mysqld.sock \ + --bind-address=127.0.0.1 \ + --port=33071 \ + --skip-networking=0 \ + --skip-name-resolve \ + --skip-grant-tables \ + >/var/log/wp-cow-mariadb.log 2>&1 & + + for _ in $(seq 1 80); do + if mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + break + fi + sleep 0.25 + done +fi + +if ! mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + echo "MariaDB did not start. See /var/log/wp-cow-mariadb.log" >&2 + exit 1 +fi + +exec "$@" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-mount b/experiments/remote-wp-cow/docker/wp-cow-lab-mount new file mode 100755 index 00000000..63425142 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-mount @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" +mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" + +mkdir -p "$mountpoint" +echo "mounting $name at $mountpoint" +echo "this command stays in the foreground; open another shell with: docker compose exec wp-cow-lab bash" +exec wp-cow mount "$name" --mountpoint "$mountpoint" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-run b/experiments/remote-wp-cow/docker/wp-cow-lab-run new file mode 100755 index 00000000..cca598b3 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-run @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" +mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" +http="${WPCOW_HTTP:-0.0.0.0:8080}" +public_url="${WPCOW_LOCAL_URL:-http://localhost:${WPCOW_HTTP_PORT:-${http##*:}}}" +if [ "$public_url" = "http://localhost:8080" ] && [ "${WPCOW_HTTP_PORT:-8080}" != "8080" ]; then + public_url="http://localhost:$WPCOW_HTTP_PORT" +fi +public_url="${public_url%/}" + +mkdir -p "$mountpoint" +if [ ! -f "${WPCOW_HOME:-/root/.wp-cow}/clones/$name/db/schema.sql" ]; then + echo "warning: ${WPCOW_HOME:-/root/.wp-cow}/clones/$name/db/schema.sql is missing" >&2 + echo "WordPress will likely show a database connection error. Run wp-cow-lab-db-init first." >&2 +fi +echo "running $name at $public_url/ from $mountpoint" +echo "container web listener: $http" +exec wp-cow run "$name" --mountpoint "$mountpoint" --http "$http" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-serve b/experiments/remote-wp-cow/docker/wp-cow-lab-serve new file mode 100755 index 00000000..3fa49c63 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-serve @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +require_env() { + local name="$1" + if [ -z "${!name:-}" ]; then + echo "missing required environment variable: $name" >&2 + exit 2 + fi +} + +require_env WPCOW_SSH +require_env WPCOW_PATH +require_env WPCOW_REMOTE_URL + +name="${WPCOW_NAME:-example}" +mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" +http="${WPCOW_HTTP:-0.0.0.0:8080}" +local_url="${WPCOW_LOCAL_URL:-http://localhost:${WPCOW_HTTP_PORT:-8080}}" +if [ "$local_url" = "http://localhost:8080" ] && [ "${WPCOW_HTTP_PORT:-8080}" != "8080" ]; then + local_url="http://localhost:$WPCOW_HTTP_PORT" +fi +ssh_target="$(wp-cow-lab-ssh-target)" + +mkdir -p "$mountpoint" + +exec wp-cow serve \ + --name "$name" \ + --ssh "$ssh_target" \ + --path "$WPCOW_PATH" \ + --remote-url "$WPCOW_REMOTE_URL" \ + --local-url "$local_url" \ + --mountpoint "$mountpoint" \ + --http "$http" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-sever b/experiments/remote-wp-cow/docker/wp-cow-lab-sever new file mode 100755 index 00000000..e7a7a6f6 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-sever @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" + +if [ ! -f "${WPCOW_HOME:-/root/.wp-cow}/clones/$name/manifest.json" ]; then + echo "clone '$name' does not exist yet; run wp-cow-lab-serve once first" >&2 + exit 2 +fi + +args=(sever "$name") +if [ -n "${WPCOW_LOCAL_ADMIN_PASSWORD:-}" ]; then + args+=(--admin-password "$WPCOW_LOCAL_ADMIN_PASSWORD") +fi +if [ -n "${WPCOW_LOCAL_ADMIN_LOGIN:-}" ]; then + args+=(--admin-login "$WPCOW_LOCAL_ADMIN_LOGIN") +fi + +echo "severing remote lower layers for $name" +exec wp-cow "${args[@]}" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target b/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target new file mode 100755 index 00000000..8417d9c8 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target @@ -0,0 +1,167 @@ +#!/usr/bin/env bash +set -euo pipefail + +spec="${WPCOW_SSH:-}" +if [ -z "$spec" ]; then + echo "missing required environment variable: WPCOW_SSH" >&2 + exit 2 +fi + +if [[ "$spec" != *[[:space:]]* ]]; then + echo "$spec" + exit 0 +fi + +alias_name="${WPCOW_SSH_ALIAS:-wp-cow-target}" +config="${HOME}/.ssh/config" +mkdir -p "${HOME}/.ssh" +touch "$config" +chmod 600 "$config" + +# Parse the common OpenSSH command shape people paste from hosting dashboards, +# for example: ssh -p18765 -i ~/.ssh/id_siteground user@example.com +# It also accepts ssh option strings such as: -F /tmp/wp-cow.conf mysite +# This eval is intentionally local to the developer-controlled lab container. +eval "set -- $spec" +if [ "${1:-}" = "ssh" ]; then + shift +fi + +host="" +user="" +port="" +identity_file="" +config_file="" +declare -a options=() + +while [ "$#" -gt 0 ]; do + case "$1" in + -p) + port="${2:-}" + shift 2 + ;; + -p*) + port="${1#-p}" + shift + ;; + -i) + identity_file="${2:-}" + shift 2 + ;; + -i*) + identity_file="${1#-i}" + shift + ;; + -F) + config_file="${2:-}" + shift 2 + ;; + -F*) + config_file="${1#-F}" + shift + ;; + -l) + user="${2:-}" + shift 2 + ;; + -l*) + user="${1#-l}" + shift + ;; + -o) + options+=("${2:-}") + shift 2 + ;; + -o*) + options+=("${1#-o}") + shift + ;; + --) + shift + break + ;; + -*) + echo "unsupported SSH option in WPCOW_SSH: $1" >&2 + echo "put complex SSH settings in ~/.ssh/config and set WPCOW_SSH to the Host alias" >&2 + exit 2 + ;; + *) + host="$1" + shift + ;; + esac +done + +if [ -z "$host" ] && [ "$#" -gt 0 ]; then + host="$1" +fi + +if [ -z "$host" ]; then + echo "could not find user@host in WPCOW_SSH: $spec" >&2 + exit 2 +fi + +if [ -n "$config_file" ]; then + if [ ! -f "$config_file" ]; then + echo "SSH config from WPCOW_SSH does not exist: $config_file" >&2 + exit 2 + fi + if ! grep -Eq "^[[:space:]]*Include[[:space:]]+$config_file([[:space:]]|\$)" "$config"; then + tmp="${config}.tmp" + { + echo "Include $config_file" + cat "$config" + } > "$tmp" + mv "$tmp" "$config" + fi + chmod 600 "$config" + echo "$host" + exit 0 +fi + +if [[ "$host" == *@* ]]; then + if [ -z "$user" ]; then + user="${host%@*}" + fi + host="${host#*@}" +fi + +tmp="${config}.tmp" +{ + echo "Host $alias_name" + echo " HostName $host" + if [ -n "$user" ]; then + echo " User $user" + fi + if [ -n "$port" ]; then + echo " Port $port" + fi + if [ -n "$identity_file" ]; then + echo " IdentityFile $identity_file" + echo " IdentitiesOnly yes" + fi + for option in "${options[@]}"; do + key="${option%%=*}" + value="${option#*=}" + if [ "$key" != "$value" ]; then + echo " $key $value" + fi + done + echo + awk -v alias="$alias_name" ' + BEGIN { skip = 0 } + /^[[:space:]]*Host[[:space:]]+/ { + skip = 0 + for (i = 2; i <= NF; i++) { + if ($i == alias) { + skip = 1 + } + } + } + skip == 0 { print } + ' "$config" +} > "$tmp" +mv "$tmp" "$config" + +chmod 600 "$config" +echo "$alias_name" diff --git a/experiments/remote-wp-cow/scripts/live-site-acceptance.sh b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh new file mode 100755 index 00000000..4b70e76a --- /dev/null +++ b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh @@ -0,0 +1,540 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT" + +fail() { + echo "live-acceptance: $*" >&2 + exit 1 +} + +need_cmd() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +require_env() { + local name="$1" + if [ -z "${!name:-}" ]; then + fail "missing required environment variable: $name" + fi +} + +wait_for_tcp() { + local host="$1" + local port="$2" + local timeout="${3:-20}" + local start + start="$(date +%s)" + while true; do + if (exec 3<>"/dev/tcp/$host/$port") >/dev/null 2>&1; then + exec 3>&- + exec 3<&- + return 0 + fi + if [ $(( $(date +%s) - start )) -ge "$timeout" ]; then + return 1 + fi + sleep 0.2 + done +} + +wait_for_tcp_closed() { + local host="$1" + local port="$2" + local timeout="${3:-20}" + local start + start="$(date +%s)" + while true; do + if ! (exec 3<>"/dev/tcp/$host/$port") >/dev/null 2>&1; then + return 0 + fi + exec 3>&- + exec 3<&- + if [ $(( $(date +%s) - start )) -ge "$timeout" ]; then + return 1 + fi + sleep 0.2 + done +} + +kill_lingering_runtime_processes() { + local pid args + while read -r pid args; do + [ -n "${pid:-}" ] || continue + [ "$pid" = "$$" ] && continue + case "$args" in + *"$WORK_DIR"*) + case "$args" in + *mariadbd*|*mariadb-install-db*|*mysqladmin*|*mysql\ *) ;; + *) kill "$pid" >/dev/null 2>&1 || true ;; + esac + ;; + esac + done < <(ps -eo pid=,args=) +} + +unmount_acceptance_mountpoint() { + [ -n "${MOUNTPOINT:-}" ] || return 0 + fusermount3 -u "$MOUNTPOINT" >/dev/null 2>&1 || + fusermount3 -uz "$MOUNTPOINT" >/dev/null 2>&1 || + fusermount -u "$MOUNTPOINT" >/dev/null 2>&1 || + fusermount -uz "$MOUNTPOINT" >/dev/null 2>&1 || + true +} + +http_body() { + local url="$1" + local output="$2" + local max_time="${3:-60}" + local status + status="$(curl -L -sS --max-time "$max_time" --connect-timeout 5 \ + -o "$output" -w '%{http_code}' "$url")" + case "$status" in + 2*|3*) return 0 ;; + *) echo "HTTP $status for $url" >&2; return 1 ;; + esac +} + +deny_runtime_error_body() { + local file="$1" + local label="$2" + if rg -qi 'Fatal error|There has been a critical error|WordPress › Error|WordPress.*Installation|wp-admin/install.php|wp-cow DB/runtime error|wp-cow did not load the remote site' "$file"; then + sed -n '1,100p' "$file" >&2 + fail "$label returned installer, fatal error, or wp-cow runtime error" + fi +} + +mysql_exec() { + mysql --protocol=TCP -h127.0.0.1 -P33071 -uroot "$@" +} + +mysql_scalar() { + mysql_exec --batch --raw --skip-column-names --execute "$1" +} + +remote_post_count() { + local title="$1" + local code + code=' +error_reporting(0); +if (!defined("WP_INSTALLING")) { define("WP_INSTALLING", true); } +require_once rtrim(getcwd(), "/") . "/wp-load.php"; +global $wpdb; +$title = $argv[1]; +$count = (int) $wpdb->get_var($wpdb->prepare("SELECT COUNT(*) FROM {$wpdb->posts} WHERE post_title = %s", $title)); +echo $count, "\n"; +' + HOME="$SSH_HOME" ssh "$SSH_TARGET" "cd '$WPCOW_PATH' && php -r $(printf '%q' "$code") -- $(printf '%q' "$title")" +} + +cleanup() { + set +e + if [ -n "${SERVE_PID:-}" ] && kill -0 "$SERVE_PID" >/dev/null 2>&1; then + kill "$SERVE_PID" >/dev/null 2>&1 || true + wait "$SERVE_PID" >/dev/null 2>&1 || true + fi + if [ -n "${WORK_DIR:-}" ]; then + for pid in $(pgrep -f "$WORK_DIR" 2>/dev/null || true); do + if [ "$pid" != "$$" ]; then + kill "$pid" >/dev/null 2>&1 || true + fi + done + fi + unmount_acceptance_mountpoint + if [ -n "${MYSQL_PID:-}" ] && kill -0 "$MYSQL_PID" >/dev/null 2>&1; then + kill "$MYSQL_PID" >/dev/null 2>&1 || true + wait "$MYSQL_PID" >/dev/null 2>&1 || true + fi + if [ "${WPCOW_KEEP_ACCEPTANCE_STATE:-0}" != "1" ] && [ -n "${WORK_DIR:-}" ]; then + rm -rf "$WORK_DIR" + elif [ -n "${WORK_DIR:-}" ]; then + echo "live-acceptance: kept state at $WORK_DIR" + fi +} +trap cleanup EXIT + +require_env WPCOW_SSH +require_env WPCOW_PATH +require_env WPCOW_REMOTE_URL + +need_cmd cargo +need_cmd curl +need_cmd mariadb-install-db +need_cmd mariadbd +need_cmd mysql +need_cmd mysqladmin +need_cmd php +need_cmd ssh +need_cmd fusermount3 + +cargo build --locked + +WP_COW_BIN="${WP_COW_BIN:-$ROOT/target/debug/wp-cow}" +NAME="${WPCOW_NAME:-live-acceptance}" +HTTP_PORT="${WPCOW_HTTP_PORT:-9481}" +HTTP_ADDR="${WPCOW_HTTP:-127.0.0.1:${HTTP_PORT}}" +LOCAL_URL="${WPCOW_LOCAL_URL:-http://127.0.0.1:${HTTP_PORT}}" +ADMIN_PASSWORD="${WPCOW_LOCAL_ADMIN_PASSWORD:-8u239huiwdsj91das}" +EXPECT_TEXT="${WPCOW_EXPECT_TEXT:-}" +WORK_DIR="${WPCOW_ACCEPTANCE_WORK_DIR:-$(mktemp -d /tmp/wp-cow-live-acceptance.XXXXXX)}" +STATE_DIR="$WORK_DIR/state" +MOUNTPOINT="$WORK_DIR/mount" +UPPER_DIR="$STATE_DIR/clones/$NAME/upper" +MYSQL_DATA="$WORK_DIR/mysql-data" +MYSQL_SOCKET="$WORK_DIR/mysql.sock" +MYSQL_LOG="$WORK_DIR/mariadb.log" +MYSQL_INSTALL_LOG="$WORK_DIR/mariadb-install.log" +SERVE_LOG="$WORK_DIR/serve.log" +COOKIE_JAR="$WORK_DIR/cookies.txt" +TITLE="WP COW Local Only $(date +%s)-$$" + +mkdir -p "$MOUNTPOINT" "$STATE_DIR" + +SSH_TARGET="$WPCOW_SSH" +SSH_HOME="$HOME" +if [[ "$WPCOW_SSH" == *[[:space:]]* ]]; then + # Accept the same pasted SSH command shape as the Docker helper by creating a + # temporary OpenSSH host alias for this acceptance run. + SSH_TARGET="wp-cow-live-acceptance" + SSH_HOME="$WORK_DIR/ssh-home" + SSH_CONFIG="$SSH_HOME/.ssh/config" + mkdir -p "$SSH_HOME/.ssh" + chmod 700 "$SSH_HOME/.ssh" + eval "set -- $WPCOW_SSH" + [ "${1:-}" = "ssh" ] && shift + host="" + user="" + port="" + identity="" + while [ "$#" -gt 0 ]; do + case "$1" in + -p) port="${2:-}"; shift 2 ;; + -p*) port="${1#-p}"; shift ;; + -i) identity="${2:-}"; shift 2 ;; + -i*) identity="${1#-i}"; shift ;; + -l) user="${2:-}"; shift 2 ;; + -l*) user="${1#-l}"; shift ;; + -o) shift 2 ;; + -o*) shift ;; + ssh) shift ;; + --) shift; break ;; + -*) fail "unsupported SSH option in WPCOW_SSH for live acceptance: $1" ;; + *) host="$1"; shift ;; + esac + done + if [[ "$host" == *@* ]]; then + [ -z "$user" ] && user="${host%@*}" + host="${host#*@}" + fi + [ -n "$host" ] || fail "could not parse SSH host from WPCOW_SSH" + if [[ "$identity" == "~/"* ]]; then + identity="$HOME/${identity#~/}" + fi + { + echo "Host $SSH_TARGET" + echo " HostName $host" + [ -n "$user" ] && echo " User $user" + [ -n "$port" ] && echo " Port $port" + [ -n "$identity" ] && echo " IdentityFile $identity" + [ -n "$identity" ] && echo " IdentitiesOnly yes" + echo " BatchMode yes" + echo " StrictHostKeyChecking accept-new" + } > "$SSH_CONFIG" + chmod 600 "$SSH_CONFIG" +fi + +if mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + fail "port 33071 already has a MySQL server; stop it or run inside the Docker lab" +fi + +MARIADBD_PATH="$(readlink -f "$(command -v mariadbd)")" +BASE_DIR="$(cd "$(dirname "$MARIADBD_PATH")/.." && pwd)" +if ! mariadb-install-db \ + "--basedir=$BASE_DIR" \ + "--datadir=$MYSQL_DATA" \ + --innodb-log-file-size=16M \ + --innodb-buffer-pool-size=64M \ + --auth-root-authentication-method=normal \ + --skip-test-db \ + >"$MYSQL_INSTALL_LOG" 2>&1; then + cat "$MYSQL_INSTALL_LOG" >&2 + fail "mariadb-install-db failed" +fi + +mariadbd \ + --no-defaults \ + "--basedir=$BASE_DIR" \ + "--datadir=$MYSQL_DATA" \ + "--socket=$MYSQL_SOCKET" \ + --port=33071 \ + --bind-address=127.0.0.1 \ + "--pid-file=$WORK_DIR/mysql.pid" \ + --innodb-log-file-size=16M \ + --innodb-buffer-pool-size=64M \ + --aria-pagecache-buffer-size=8M \ + --key-buffer-size=8M \ + --skip-networking=0 \ + --skip-grant-tables \ + >"$MYSQL_LOG" 2>&1 & +MYSQL_PID="$!" + +for _ in $(seq 1 100); do + if mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + break + fi + sleep 0.2 +done +mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1 || + fail "temporary MariaDB did not start; see $MYSQL_LOG" + +before_remote="$(remote_post_count "$TITLE" | tr -d '[:space:]')" +[ "$before_remote" = "0" ] || fail "remote already has unexpected acceptance title" + +WPCOW_WEB_SERVER="${WPCOW_WEB_SERVER:-php}" \ +WPCOW_SPLASH="${WPCOW_SPLASH:-1}" \ +WPCOW_PROXY_FRONTEND=0 \ +WPCOW_REMOTE_DB_HELPER="${WPCOW_REMOTE_DB_HELPER:-1}" \ +WPCOW_RUNTIME_CODE_PACK="${WPCOW_RUNTIME_CODE_PACK:-1}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_MB="${WPCOW_RUNTIME_CODE_PACK_MAX_MB:-256}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB="${WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB:-8}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_FILES="${WPCOW_RUNTIME_CODE_PACK_MAX_FILES:-20000}" \ +WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS="${WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS:-180}" \ +WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN="${WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0}" \ +WPCOW_MATERIALIZE_OPTIONS_TABLE="${WPCOW_MATERIALIZE_OPTIONS_TABLE:-1}" \ +WPCOW_REMOTE_QUERY_CACHE=1 \ +WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS="${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" \ +WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ +WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" \ +WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" \ +WPCOW_PLUGIN_MODE="${WPCOW_PLUGIN_MODE:-auto}" \ +WPCOW_PLUGIN_ADMISSION="${WPCOW_PLUGIN_ADMISSION:-1}" \ +WPCOW_PLUGIN_ADMISSION_DELAY_SECS="${WPCOW_PLUGIN_ADMISSION_DELAY_SECS:-20}" \ +WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS="${WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS:-15}" \ +WPCOW_PHP_WORKERS="${WPCOW_PHP_WORKERS:-1}" \ +HOME="$SSH_HOME" \ +"$WP_COW_BIN" serve \ + --state-dir "$STATE_DIR" \ + --name "$NAME" \ + --ssh "$SSH_TARGET" \ + --path "$WPCOW_PATH" \ + --remote-url "$WPCOW_REMOTE_URL" \ + --local-url "$LOCAL_URL" \ + --mountpoint "$MOUNTPOINT" \ + --http "$HTTP_ADDR" \ + >"$SERVE_LOG" 2>&1 & +SERVE_PID="$!" + +host="${HTTP_ADDR%:*}" +port="${HTTP_ADDR##*:}" +wait_for_tcp "$host" "$port" 45 || { + tail -n 200 "$SERVE_LOG" >&2 || true + fail "wp-cow server did not open $HTTP_ADDR" +} + +first_splash="$WORK_DIR/first-splash.html" +http_body "$LOCAL_URL/" "$first_splash" 10 || { + tail -n 200 "$SERVE_LOG" >&2 || true + fail "first splash/progress request failed" +} +deny_runtime_error_body "$first_splash" "first splash request" +pack_wait="${WPCOW_RUNTIME_CODE_PACK_WAIT_SECS:-120}" +pack_started="$(date +%s)" +progress_path="$STATE_DIR/clones/$NAME/file-cache/progress.json" +while true; do + if [ -f "$progress_path" ]; then + progress_json="$(cat "$progress_path")" + else + progress_json="$(curl -sS --max-time 5 --connect-timeout 2 "$LOCAL_URL/__wp-cow/progress" || true)" + fi + phase="$(php -r '$j=json_decode(stream_get_contents(STDIN), true); echo is_array($j) && isset($j["phase"]) ? $j["phase"] : "";' <<<"$progress_json")" + if [ "${WPCOW_RUNTIME_CODE_PACK:-1}" = "0" ] && [ -z "$phase" ]; then + break + fi + case "$phase" in + runtime-code-pack-starting|runtime-code-pack|"") + if [ $(( $(date +%s) - pack_started )) -ge "$pack_wait" ]; then + echo "$progress_json" >&2 + fail "runtime code pack did not finish within ${pack_wait}s" + fi + sleep 0.5 + ;; + *) break ;; + esac +done + +first_body="$WORK_DIR/first.html" +actual_timeout="${WPCOW_ACTUAL_TIMEOUT_SECS:-180}" +http_body "$LOCAL_URL/?__wp_cow_bypass_splash=1" "$first_body" "$actual_timeout" || { + tail -n 200 "$SERVE_LOG" >&2 || true + fail "first WordPress request failed" +} +deny_runtime_error_body "$first_body" "first request" +if [ -n "$EXPECT_TEXT" ]; then + rg -q "$EXPECT_TEXT" "$first_body" || fail "first response did not contain WPCOW_EXPECT_TEXT=$EXPECT_TEXT" +fi + +second_body="$WORK_DIR/second.html" +http_body "$LOCAL_URL/?__wp_cow_bypass_splash=1" "$second_body" "${WPCOW_SECOND_TIMEOUT_SECS:-60}" || + fail "second cached WordPress request failed" +deny_runtime_error_body "$second_body" "second cached WordPress request" + +php_create="$WORK_DIR/create-local-page.php" +cat > "$php_create" <<'PHP' + $title, + 'post_content' => 'local-only acceptance content', + 'post_status' => 'publish', + 'post_type' => 'page', +), true); +if (is_wp_error($post_id)) { + fwrite(STDERR, $post_id->get_error_message() . "\n"); + exit(1); +} +echo 'WPCOW_POST_ID=' . (int) $post_id . "\n"; +PHP +mkdir -p "$UPPER_DIR" +cp "$php_create" "$UPPER_DIR/.wp-cow-create-local-page.php" +post_output="$( + cd "$MOUNTPOINT" && + WPCOW_ACCEPTANCE_TITLE="$TITLE" \ + WPCOW_ACCEPTANCE_HTTP_HOST="${LOCAL_URL#http://}" \ + php .wp-cow-create-local-page.php +)" +printf '%s\n' "$post_output" > "$WORK_DIR/create-local-page.out" +post_id="$(sed -n 's/^WPCOW_POST_ID=//p' "$WORK_DIR/create-local-page.out" | tail -n 1 | tr -dc '0-9')" +[ -n "$post_id" ] || fail "local wp_insert_post did not return a post id" + +local_body="$WORK_DIR/local-page.html" +http_body "$LOCAL_URL/?p=$post_id&__wp_cow_bypass_splash=1" "$local_body" 30 || + fail "local-only page did not render" +deny_runtime_error_body "$local_body" "local-only page request" +rg -q "$TITLE" "$local_body" || fail "local-only page response did not contain its title" + +after_remote="$(remote_post_count "$TITLE" | tr -d '[:space:]')" +[ "$after_remote" = "0" ] || fail "local-only page title appeared in remote database" + +sever_log="$WORK_DIR/sever.log" +HOME="$SSH_HOME" \ +"$WP_COW_BIN" sever "$NAME" \ + --state-dir "$STATE_DIR" \ + --admin-password "$ADMIN_PASSWORD" \ + >"$sever_log" 2>&1 || { + cat "$sever_log" >&2 + fail "wp-cow sever failed" + } +admin_user="$(sed -n "s/.*set local administrator password for '\([^']*\)'.*/\1/p" "$sever_log" | tail -n 1)" +[ -n "$admin_user" ] || fail "could not determine local admin user from sever output" + +kill "$SERVE_PID" >/dev/null 2>&1 || true +wait "$SERVE_PID" >/dev/null 2>&1 || true +SERVE_PID="" +kill_lingering_runtime_processes +wait_for_tcp_closed "$host" "$port" 30 || + fail "old web server did not release $HTTP_ADDR before offline restart" +wait_for_tcp_closed 127.0.0.1 39070 30 || + fail "old control server did not release 127.0.0.1:39070 before offline restart" +wait_for_tcp_closed 127.0.0.1 33070 30 || + fail "old MySQL proxy did not release 127.0.0.1:33070 before offline restart" +wait_for_tcp_closed 127.0.0.1 33072 30 || true +unmount_acceptance_mountpoint + +manifest="$STATE_DIR/clones/$NAME/manifest.json" +php -r '$p=$argv[1]; $j=json_decode(file_get_contents($p), true); $j["ssh"]="wp-cow-offline-should-not-connect"; file_put_contents($p, json_encode($j, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n");' "$manifest" + +WPCOW_WEB_SERVER="${WPCOW_WEB_SERVER:-php}" \ +WPCOW_SPLASH="${WPCOW_SPLASH:-1}" \ +WPCOW_REMOTE_DB_HELPER="${WPCOW_REMOTE_DB_HELPER:-1}" \ +WPCOW_RUNTIME_CODE_PACK="${WPCOW_RUNTIME_CODE_PACK:-1}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_MB="${WPCOW_RUNTIME_CODE_PACK_MAX_MB:-256}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB="${WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB:-8}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_FILES="${WPCOW_RUNTIME_CODE_PACK_MAX_FILES:-20000}" \ +WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS="${WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS:-180}" \ +WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN="${WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0}" \ +WPCOW_MATERIALIZE_OPTIONS_TABLE="${WPCOW_MATERIALIZE_OPTIONS_TABLE:-1}" \ +WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ +WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" \ +WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" \ +WPCOW_PLUGIN_MODE="${WPCOW_PLUGIN_MODE:-auto}" \ +WPCOW_PLUGIN_ADMISSION="${WPCOW_PLUGIN_ADMISSION:-1}" \ +WPCOW_PLUGIN_ADMISSION_DELAY_SECS="${WPCOW_PLUGIN_ADMISSION_DELAY_SECS:-20}" \ +WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS="${WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS:-15}" \ +WPCOW_PHP_WORKERS="${WPCOW_PHP_WORKERS:-1}" \ +HOME="$SSH_HOME" \ +"$WP_COW_BIN" run "$NAME" \ + --state-dir "$STATE_DIR" \ + --mountpoint "$MOUNTPOINT" \ + --http "$HTTP_ADDR" \ + >"$SERVE_LOG.offline" 2>&1 & +SERVE_PID="$!" +wait_for_tcp "$host" "$port" 30 || { + tail -n 200 "$SERVE_LOG.offline" >&2 || true + fail "offline wp-cow server did not open $HTTP_ADDR" +} + +offline_body="$WORK_DIR/offline-local-page.html" +http_body "$LOCAL_URL/?p=$post_id&__wp_cow_bypass_splash=1" "$offline_body" 30 || + fail "offline local-only page did not render" +deny_runtime_error_body "$offline_body" "offline local-only page request" +rg -q "$TITLE" "$offline_body" || fail "offline refresh did not use local materialized post" + +login_body="$WORK_DIR/login.html" +login_status="$(curl -L -sS --max-time 30 --connect-timeout 5 \ + -c "$COOKIE_JAR" -b "$COOKIE_JAR" \ + -d "log=$admin_user" \ + -d "pwd=$ADMIN_PASSWORD" \ + -d "wp-submit=Log In" \ + -d "redirect_to=$LOCAL_URL/wp-admin/" \ + -d "testcookie=1" \ + -o "$login_body" \ + -w '%{http_code}' \ + "$LOCAL_URL/wp-login.php")" +case "$login_status" in + 2*|3*) ;; + *) fail "local admin login returned HTTP $login_status" ;; +esac +deny_runtime_error_body "$login_body" "local admin login" +rg -q 'wordpress_logged_in' "$COOKIE_JAR" || fail "local admin login did not set wordpress_logged_in cookie" + +admin_body="$WORK_DIR/admin.html" +http_status="$(curl -L -sS --max-time 30 --connect-timeout 5 \ + -c "$COOKIE_JAR" -b "$COOKIE_JAR" \ + -o "$admin_body" \ + -w '%{http_code}' \ + "$LOCAL_URL/wp-admin/")" +case "$http_status" in + 2*|3*) ;; + *) fail "wp-admin returned HTTP $http_status after login" ;; +esac +if rg -qi ']+id="loginform"|name="loginform"' "$admin_body"; then + fail "wp-admin still shows login form after local admin login" +fi +deny_runtime_error_body "$admin_body" "wp-admin" +if ! rg -q 'id="wpbody-content"|id="dashboard-widgets"|wp-admin-bar' "$admin_body"; then + sed -n '1,120p' "$admin_body" >&2 + fail "wp-admin response did not look like an authenticated dashboard" +fi + +cache_files="$(find "$STATE_DIR/clones/$NAME/file-cache" -type f | wc -l | tr -d ' ')" +cache_bytes="$(du -sb "$STATE_DIR/clones/$NAME/file-cache" | awk '{print $1}')" +if [ -d "$STATE_DIR/clones/$NAME/file-cache/mirror/wp-content/uploads" ]; then + fail "uploads directory was mirrored into the file cache" +fi + +cat <&2 + exit 1 +} + +need_pattern() { + local file="$1" + local pattern="$2" + local label="$3" + rg -q "$pattern" "$file" || fail "$label missing in $file" +} + +deny_pattern() { + local file="$1" + local pattern="$2" + local label="$3" + if rg -q "$pattern" "$file"; then + fail "$label found in $file" + fi +} + +run_exact_test() { + local test_name="$1" + cargo test --locked "$test_name" -- --exact --nocapture +} + +run_exact_ignored_test() { + local test_name="$1" + cargo test --locked "$test_name" -- --exact --ignored --nocapture +} + +echo "== full Rust/PHP unit suite ==" +cargo test --locked + +echo "== targeted behavior proofs ==" +run_exact_test overlay::tests::lazy_remote_file_is_cached_and_survives_remote_loss +run_exact_test overlay::tests::stat_prefetched_bytes_are_reused_without_remote_read +run_exact_test overlay::tests::cached_metadata_refreshes_when_another_overlay_appends_journal +run_exact_ignored_test remote::tests::stat_prefetch_returns_small_file_bytes_from_helper +run_exact_ignored_test remote::tests::prefetch_dir_batches_only_runtime_file_types +run_exact_ignored_test remote::tests::runtime_code_pack_streams_bounded_runtime_files +run_exact_test cli::tests::offline_core_runtime_cache_is_bounded_to_wordpress_core +run_exact_test plugin_policy::tests::policy_starts_auto_with_no_allowed_plugins +run_exact_test plugin_policy::tests::candidate_policy_allows_one_extra_plugin +run_exact_test runtime_cache::tests::runtime_code_roots_are_bounded_to_core_theme_and_active_plugins +run_exact_test runtime_cache::tests::runtime_code_roots_respect_disabled_plugins +run_exact_test overlay::tests::cached_only_copy_up_uses_materialized_files_without_remote +run_exact_test fusefs::tests::offline_readdir_uses_cached_remote_metadata_without_remote +run_exact_test fusefs::tests::remote_stat_metadata_survives_severed_mode_without_remote +run_exact_test fusefs::tests::remote_missing_metadata_survives_daemon_restart +run_exact_test fusefs::tests::stat_prefetch_is_limited_to_runtime_read_files +run_exact_test generate::tests::router_splash_and_progress_smoke_responds_quickly +run_exact_test db::tests::remote_query_cache_round_trips_safe_read_results +run_exact_test db::tests::dirty_row_overlay_tables_are_local_state +run_exact_test row_cow::tests::select_materializes_remote_rows_for_later_offline_reads +run_exact_test row_cow::tests::primary_key_single_row_selects_allow_safe_order_and_limit_clauses +run_exact_test row_cow::tests::local_insert_is_not_sent_to_remote_and_appears_in_merged_select +run_exact_test row_cow::tests::update_copy_up_fetches_only_affected_primary_keys +run_exact_test row_cow::tests::delete_tombstone_hides_remote_row_from_merged_selects +run_exact_test run::tests::frankenphp_routes_wp_admin_directory_to_index +run_exact_test run::tests::frankenphp_routes_installer_paths_through_runtime_guard +run_exact_test run::tests::web_runtime_disables_common_plugin_side_effect_primitives +run_exact_test run::tests::web_runtime_defaults_to_no_opcache_timestamp_revalidation +run_exact_test sql::tests::extract_tables_preserves_wordpress_table_case_for_proxy_cow +run_exact_ignored_test generate::tests::runtime_cow_harness_proves_admin_login_local_mutation_and_offline_refresh +run_exact_ignored_test generate::tests::production_run_harness_proves_fuse_rust_control_and_offline_refresh + +echo "== implementation invariants ==" +need_pattern src/cli.rs 'Command::Serve' "one-command serve subcommand" +need_pattern src/cli.rs 'WPCOW_MATERIALIZE_OPTIONS_TABLE' "serve materializes bounded WordPress options table for plugin bootstrap" +need_pattern src/cli.rs 'Command::Sever' "sever/offline subcommand" +need_pattern src/cli.rs 'cache_offline_core_runtime' "offline login/admin core runtime cache" +need_pattern src/cli.rs 'wp-content/uploads' "offline core runtime cache excludes uploads" +need_pattern src/config.rs 'offline\.json' "offline marker" +need_pattern src/run.rs 'WPCOW_WEB_SERVER' "web-server selection" +need_pattern src/run.rs 'falling back to PHP' "FrankenPHP unavailable fallback" +need_pattern src/run.rs 'start_php_dev_server' "PHP dev-server fallback" +need_pattern src/run.rs '@wpCowInstaller path /wp-admin/install\.php /wp-admin/setup-config\.php' "FrankenPHP installer guard route" +need_pattern src/run.rs '__wp_cow_installer_guard=1' "FrankenPHP installer guard router flag" +need_pattern src/fusefs.rs 'clone is severed and file is not cached locally' "offline cached-file guard" +need_pattern src/fusefs.rs 'copy_up_cached_only' "offline write-open cached-only copy-up" +need_pattern src/fusefs.rs 'put_cached_entry\(rel, &entry\)' "FUSE stat metadata persistence" +need_pattern src/fusefs.rs 'put_cached_missing' "FUSE missing metadata persistence" +need_pattern src/overlay.rs 'clone is severed and writable lower file is not cached locally' "offline write-open remote guard" +need_pattern src/overlay.rs 'missing\.json' "persistent missing metadata cache" +need_pattern src/control.rs 'clone is severed from the remote database' "offline remote-DB guard" +need_pattern src/generate.rs 'will not fall back to the empty local schema' "installer/runtime failure guard" +need_pattern src/generate.rs 'wp_cow_looks_like_installer' "installer response detector" +need_pattern src/generate.rs '__wp_cow_installer_guard' "direct installer route guard" +need_pattern src/generate.rs "'1' !== getenv\\( 'WPCOW_PROXY_FRONTEND' \\)" "local-first frontend default" +need_pattern src/db.rs 'cached_remote_readonly_query' "Rust remote read cache for MySQL proxy/control" +need_pattern src/db.rs 'dirty_tables' "dirty row-overlay table routing state" +need_pattern src/generate.rs 'cow_cached_remote_read_is_safe_without_control' "PHP cached remote read fast path" +need_pattern src/generate.rs 'cow_safe_local_read_without_control' "PHP local read fast path for materialized runtime data" +need_pattern src/remote.rs 'WPCOW_REMOTE_DB_TUNNEL", false' "remote DB SSH tunnel is opt-in" +need_pattern src/remote.rs 'WPCOW_REMOTE_DB_HELPER", true' "remote DB lower reads use persistent helper by default" +need_pattern src/remote.rs 'remote_db_helper_php' "remote DB helper keeps one read-only mysqli session open" +need_pattern src/remote.rs 'is_remote_db_connection_lost' "remote DB helper reconnects after idle connection loss" +need_pattern src/remote.rs 'WPCOW_REFUSED_WRITE' "remote DB helper refuses write-shaped SQL" +need_pattern src/remote.rs 'runtime_code_pack_php' "remote runtime code pack streams bounded executable files" +need_pattern src/runtime_cache.rs 'warm_runtime_code_cache' "one-command runtime code warmup" +need_pattern src/runtime_cache.rs 'WPCOW_RUNTIME_CODE_PACK_MAX_MB' "runtime code cache has a byte cap" +need_pattern src/runtime_cache.rs 'warm_runtime_code_cache_with_admin' "sever path explicitly warms admin runtime" +need_pattern src/runtime_cache.rs 'wp-content/uploads' "runtime code cache excludes uploads" +need_pattern src/runtime_cache.rs 'active_plugins' "runtime code cache includes active plugin roots" +need_pattern src/plugin_policy.rs 'PluginPolicy' "plugin admission policy state" +need_pattern src/run.rs 'WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS' "bounded plugin admission timeout" +need_pattern src/run.rs 'WPCOW_PLUGIN_POLICY_FILE' "candidate plugin policy smoke override" +need_pattern src/generate.rs 'WPCOW_PLUGIN_POLICY_FILE' "generated plugin policy path" +need_pattern src/cli.rs 'uploads/media remain lazy' "serve explains media remains lazy after runtime code pack" +need_pattern src/fusefs.rs 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB' "FUSE stat path can prefetch small file bytes" +need_pattern src/fusefs.rs 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB' "FUSE batches same-directory runtime files with a byte cap" +need_pattern src/remote.rs '\$op === "prefetch_dir"' "remote file helper supports bounded directory batch reads" +need_pattern src/overlay.rs 'put_cached_file_bytes' "stat-prefetched file bytes are stored in the normal file cache" +need_pattern src/overlay.rs 'metadata_journal_len_on_disk' "mounted FUSE metadata view refreshes runtime cache journal writes" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_REMOTE_DB_HELPER="\$\{WPCOW_REMOTE_DB_HELPER:-1\}"' "live acceptance runs through persistent remote DB helper" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_RUNTIME_CODE_PACK="\$\{WPCOW_RUNTIME_CODE_PACK:-1\}"' "live acceptance runs through bounded runtime code cache" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_MATERIALIZE_OPTIONS_TABLE="\$\{WPCOW_MATERIALIZE_OPTIONS_TABLE:-1\}"' "live acceptance materializes options table" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="\$\{WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0\}"' "live acceptance keeps experimental stat prefetch off by default" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="\$\{WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0\}"' "live acceptance keeps experimental sibling prefetch off by default" +need_pattern src/run.rs 'WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS' "plugin side-effect escape hatch is explicit" +need_pattern src/run.rs 'disable_functions' "PHP side-effect functions are disabled by default" +need_pattern src/run.rs 'stream_socket_client' "raw plugin socket egress is disabled by default" +need_pattern src/run.rs 'curl_exec' "direct plugin cURL egress is disabled by default" +need_pattern src/run.rs 'WPCOW_OPCACHE_VALIDATE_TIMESTAMPS' "OPcache timestamp validation is configurable" +need_pattern src/generate.rs 'function cow_offline' "PHP DB offline mode" +need_pattern src/db.rs 'set_local_admin_password' "local-only admin password override" +need_pattern src/row_cow.rs 'LocalOnlyInsert' "local-only content mutation path" +need_pattern src/generate.rs 'production_run_harness_proves_fuse_rust_control_and_offline_refresh' "strict production FUSE/control harness" +need_pattern src/generate.rs 'run_site_with_shutdown' "strict harness production run entry" +need_pattern src/generate.rs 'install_fake_ssh' "strict harness fake SSH remote" +need_pattern src/generate.rs 'read_line_count\(&fake_ssh_log\)' "strict harness offline no-SSH assertion" +need_pattern compose.yaml '\$\{WPCOW_HTTP_PORT:-8080\}:8080' "Docker host HTTP port exposure" +need_pattern compose.yaml 'WPCOW_HTTP: 0\.0\.0\.0:8080' "Docker in-container HTTP listener" +need_pattern compose.yaml 'WPCOW_REMOTE_DB_HELPER: "\$\{WPCOW_REMOTE_DB_HELPER:-1\}"' "Docker compose defaults persistent DB helper on" +need_pattern compose.yaml 'WPCOW_RUNTIME_CODE_PACK: "\$\{WPCOW_RUNTIME_CODE_PACK:-1\}"' "Docker compose defaults bounded runtime code cache on" +need_pattern compose.yaml 'WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN: "\$\{WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0\}"' "Docker compose keeps admin pack out of frontend warmup" +need_pattern compose.yaml 'WPCOW_MATERIALIZE_OPTIONS_TABLE: "\$\{WPCOW_MATERIALIZE_OPTIONS_TABLE:-1\}"' "Docker compose defaults options table materialization on" +need_pattern compose.yaml 'WPCOW_PLUGIN_MODE: "\$\{WPCOW_PLUGIN_MODE:-auto\}"' "Docker compose defaults plugin admission mode" +need_pattern compose.yaml 'WPCOW_PLUGIN_ADMISSION: "\$\{WPCOW_PLUGIN_ADMISSION:-1\}"' "Docker compose defaults plugin admission on" +need_pattern compose.yaml 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB: "\$\{WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0\}"' "Docker compose defaults experimental stat prefetch off" +need_pattern compose.yaml 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB: "\$\{WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0\}"' "Docker compose defaults experimental sibling prefetch off" +need_pattern .dockerignore '^/target/$' "Docker build context target exclusion" +need_pattern .dockerignore '^/\.env$' "Docker build context local env exclusion" +need_pattern .dockerignore '^!/\.env\.example$' "Docker build context env example inclusion" +need_pattern docker/wp-cow-lab-serve 'wp-cow serve' "Docker one-command serve wrapper" +need_pattern docker/wp-cow-lab-sever 'WPCOW_LOCAL_ADMIN_PASSWORD' "Docker local admin override wiring" +need_pattern .env.example '^WPCOW_HTTP_PORT=9481$' "Docker lab example host HTTP port" +need_pattern .env.example '^WPCOW_WEB_SERVER=frankenphp$' "Docker lab example FrankenPHP preference" +need_pattern .env.example '^WPCOW_REMOTE_DB_TUNNEL=0$' "Docker lab example disables remote DB tunnel by default" +need_pattern .env.example '^WPCOW_REMOTE_DB_HELPER=1$' "Docker lab example uses persistent remote DB helper" +need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK=1$' "Docker lab example uses bounded runtime code cache" +need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK_MAX_MB=256$' "Docker lab example caps runtime code cache" +need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK_MAX_FILES=20000$' "Docker lab example allows large active plugin sets within cap" +need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN=0$' "Docker lab example keeps admin pack out of frontend warmup" +need_pattern .env.example '^WPCOW_MATERIALIZE_OPTIONS_TABLE=1$' "Docker lab example materializes options table" +need_pattern .env.example '^WPCOW_ENABLE_PLUGINS=0$' "Docker lab example keeps arbitrary production plugins disabled by default" +need_pattern .env.example '^WPCOW_PLUGIN_MODE=auto$' "Docker lab example plugin admission mode" +need_pattern .env.example '^WPCOW_PLUGIN_ADMISSION=1$' "Docker lab example plugin admission enabled" +need_pattern .env.example '^WPCOW_SPLASH=1$' "Docker lab example splash default" +need_pattern .env.example '^WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0$' "Docker lab example keeps PHP side-effect guards enabled" +need_pattern .env.example '^WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0$' "Docker lab example keeps warm render OPcache fast path enabled" +need_pattern .env.example '^WPCOW_REMOTE_STAT_PREFETCH_MAX_KB=0$' "Docker lab example keeps experimental stat prefetch off by default" +need_pattern .env.example '^WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB=0$' "Docker lab example keeps experimental sibling prefetch off by default" +need_pattern .env.example '^WPCOW_REMOTE_METADATA_CACHE_TTL_SECS=3600$' "Docker lab example keeps remote metadata warm long enough for rerenders" +need_pattern .env.example '^WPCOW_LOCAL_ADMIN_PASSWORD=$' "Docker lab example local admin override" + +deny_pattern src 'rsync|scp[[:space:]]+-r' "eager source tree copy command" +deny_pattern src/cli.rs 'wordpress_offline_table_names' "full core-table sever materialization" +deny_pattern src/cli.rs 'prefetch_runtime_files' "sever-triggered runtime prefetch" +deny_pattern src/run.rs 'WPCOW_PREFETCH_RUNTIME|prefetch_runtime_files|wp-cow-runtime-prefetch' "background runtime prefetch" +deny_pattern src/run.rs 'tar[[:space:]]+-cf[[:space:]]+-' "recursive remote tar runtime prefetch" +deny_pattern src/generate.rs 'function cow_remote_query_cache_clear|cow_remote_query_cache_clear\(\);' "global remote query cache invalidation" +deny_pattern src/generate.rs "define\\( 'WPCOW_REMOTE_DB_(NAME|USER|PASSWORD|HOST)'|function cow_remote_mysqli|real_connect" "remote DB credentials in generated PHP" +deny_pattern docker/Dockerfile 'rsync|scp[[:space:]]+-r' "eager copy tooling" +deny_pattern docker/wp-cow-lab-serve 'rsync|scp[[:space:]]+-r' "eager lab serve copy command" +deny_pattern docker/wp-cow-lab-run 'rsync|scp[[:space:]]+-r' "eager lab run copy command" + +echo "strict-harness: PASS" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs new file mode 100644 index 00000000..11f4c003 --- /dev/null +++ b/experiments/remote-wp-cow/src/cli.rs @@ -0,0 +1,728 @@ +use anyhow::{anyhow, Context, Result}; +use clap::{Args, Parser, Subcommand}; +use std::collections::{BTreeSet, VecDeque}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::time::{Instant, SystemTime, UNIX_EPOCH}; + +use crate::config::{ + clone_paths, default_state_dir, derive_name, ensure_clone_dirs, load_manifest, write_manifest, + write_offline_marker, Manifest, OfflineMarker, Probe, +}; +use crate::db; +use crate::generate; +use crate::overlay::OverlayStore; +use crate::remote::{probe_wordpress, RemoteClient}; +use crate::run::{self, RunOptions}; +use crate::runtime_cache; + +#[derive(Debug, Parser)] +#[command(name = "wp-cow")] +#[command(about = "Lazy local WordPress clone runtime over SSH")] +pub struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + #[command(name = "clone")] + Clone(CloneArgs), + #[command(name = "serve")] + Serve(ServeArgs), + #[command(name = "init-db")] + InitDb(NameArgs), + #[command(name = "export-schema")] + ExportSchema(NameArgs), + #[command(name = "materialize")] + Materialize(MaterializeArgs), + #[command(name = "sever")] + Sever(SeverArgs), + #[command(name = "mount")] + Mount(MountArgs), + #[command(name = "run")] + Run(RunArgs), + #[command(name = "probe")] + Probe(ProbeArgs), +} + +#[derive(Debug, Args)] +struct CloneArgs { + #[arg(long = "ssh")] + ssh: String, + #[arg(long = "path")] + path: String, + #[arg(long = "remote-url")] + remote_url: String, + #[arg(long = "local-url")] + local_url: String, + #[arg(long)] + name: Option, + #[arg(long)] + state_dir: Option, + #[arg(long)] + force: bool, + #[arg(long)] + no_probe: bool, + #[arg(long)] + skip_schema: bool, +} + +#[derive(Debug, Args)] +struct ServeArgs { + #[arg(long = "ssh")] + ssh: String, + #[arg(long = "path")] + path: String, + #[arg(long = "remote-url")] + remote_url: String, + #[arg(long = "local-url")] + local_url: String, + #[arg(long)] + name: Option, + #[arg(long)] + state_dir: Option, + #[arg(long)] + force: bool, + #[arg(long)] + no_probe: bool, + #[arg(long)] + mountpoint: Option, + #[arg(long, default_value = "127.0.0.1:8080")] + http: String, + #[arg(long)] + no_php: bool, + #[arg(long, hide = true)] + no_runtime_sync: bool, +} + +#[derive(Debug, Args)] +struct NameArgs { + name: String, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct MaterializeArgs { + name: String, + #[arg(long = "table", required = true)] + tables: Vec, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct SeverArgs { + name: String, + #[arg(long = "admin-password")] + admin_password: Option, + #[arg(long = "admin-login")] + admin_login: Option, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct MountArgs { + name: String, + #[arg(long)] + mountpoint: Option, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct RunArgs { + name: String, + #[arg(long)] + mountpoint: Option, + #[arg(long, default_value = "127.0.0.1:8080")] + http: String, + #[arg(long)] + no_php: bool, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct ProbeArgs { + #[arg(long = "ssh")] + ssh: String, + #[arg(long = "path")] + path: String, +} + +pub fn run() -> Result<()> { + let cli = Cli::parse(); + match cli.command { + Command::Clone(args) => clone_site(args), + Command::Serve(args) => serve_site(args), + Command::InitDb(args) => init_db(args), + Command::ExportSchema(args) => export_schema(args), + Command::Materialize(args) => materialize(args), + Command::Sever(args) => sever(args), + Command::Mount(args) => mount(args), + Command::Run(args) => run_clone(args), + Command::Probe(args) => { + let probe = probe_wordpress(&args.ssh, &args.path)?; + println!("{}", serde_json::to_string_pretty(&probe)?); + Ok(()) + } + } +} + +fn clone_site(args: CloneArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let name = args + .name + .unwrap_or_else(|| derive_name(&args.remote_url, &args.local_url)); + let paths = clone_paths(&state_dir, &name); + + if paths.root.exists() { + if !args.force { + return Err(anyhow!( + "{} already exists; pass --force to replace generated clone metadata", + paths.root.display() + )); + } + fs::remove_dir_all(&paths.root)?; + } + + ensure_clone_dirs(&paths)?; + + let probe = if args.no_probe { + Probe { + abspath: args.path.clone(), + wp_content_dir: format!("{}/wp-content", args.path.trim_end_matches('/')), + uploads_dir: format!("{}/wp-content/uploads", args.path.trim_end_matches('/')), + table_prefix: "wp_".to_string(), + siteurl: args.remote_url.clone(), + home: args.remote_url.clone(), + ..Probe::default() + } + } else { + probe_wordpress(&args.ssh, &args.path)? + }; + + let manifest = Manifest::new( + name, + args.ssh, + args.path, + args.remote_url, + args.local_url, + probe, + ); + + write_manifest(&paths.manifest, &manifest)?; + generate::write_wordpress_overrides(&paths, &manifest)?; + db::write_state(&paths, &db::DbState::default())?; + + if !args.skip_schema && !args.no_probe { + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); + remote.ensure_master()?; + db::export_schema(&remote, &paths).context("export schema")?; + } + + println!( + "created clone '{}': {}", + manifest.name, + paths.root.display() + ); + Ok(()) +} + +fn serve_site(args: ServeArgs) -> Result<()> { + let serve_started = Instant::now(); + let state_dir = args.state_dir.clone().unwrap_or(default_state_dir()?); + let name = args + .name + .clone() + .unwrap_or_else(|| derive_name(&args.remote_url, &args.local_url)); + let paths = clone_paths(&state_dir, &name); + + let metadata_started = Instant::now(); + let manifest = if !paths.root.exists() || args.force { + if paths.root.exists() { + fs::remove_dir_all(&paths.root)?; + } + ensure_clone_dirs(&paths)?; + + let probe = if args.no_probe { + Probe { + abspath: args.path.clone(), + wp_content_dir: format!("{}/wp-content", args.path.trim_end_matches('/')), + uploads_dir: format!("{}/wp-content/uploads", args.path.trim_end_matches('/')), + table_prefix: "wp_".to_string(), + siteurl: args.remote_url.clone(), + home: args.remote_url.clone(), + ..Probe::default() + } + } else { + probe_wordpress(&args.ssh, &args.path)? + }; + + let manifest = Manifest::new( + name, + args.ssh.clone(), + args.path.clone(), + args.remote_url.clone(), + args.local_url.clone(), + probe, + ); + + write_manifest(&paths.manifest, &manifest)?; + generate::write_wordpress_overrides(&paths, &manifest)?; + db::write_state(&paths, &db::DbState::default())?; + println!("created lazy clone '{}'", manifest.name); + manifest + } else { + let mut manifest = load_manifest(&paths.manifest)?; + let mut changed = false; + let mut should_probe = false; + + if manifest.ssh != args.ssh { + manifest.ssh = args.ssh.clone(); + changed = true; + should_probe = true; + } + if manifest.remote_path != args.path { + manifest.remote_path = args.path.clone(); + changed = true; + should_probe = true; + } + if manifest.remote_url != args.remote_url { + manifest.remote_url = args.remote_url.clone(); + changed = true; + } + if manifest.local_url != args.local_url { + manifest.local_url = args.local_url.clone(); + changed = true; + } + + if !args.no_probe + && (should_probe + || manifest.probe.db_name.is_empty() + || manifest.probe.db_host.is_empty() + || manifest.probe.db_user.is_empty() + || (manifest.probe.template.is_empty() && manifest.probe.stylesheet.is_empty())) + { + manifest.probe = probe_wordpress(&manifest.ssh, &manifest.remote_path)?; + changed = true; + } + + if changed { + write_manifest(&paths.manifest, &manifest)?; + generate::write_wordpress_overrides(&paths, &manifest)?; + println!("updated lazy clone '{}'", manifest.name); + } else { + println!("using existing lazy clone '{}'", manifest.name); + } + + manifest + }; + println!( + "prepared clone metadata in {:.2}s", + metadata_started.elapsed().as_secs_f64() + ); + + if args.no_runtime_sync { + println!( + "--no-runtime-sync is now the fixed serve behavior for '{}'", + manifest.name + ); + } + if std::env::var_os("WPCOW_RUNTIME_SYNC").is_some() + || std::env::var_os("WPCOW_RUNTIME_SYNC_FORCE").is_some() + { + println!( + "ignoring runtime sync environment for '{}'; requested files will be fetched on demand", + manifest.name + ); + } + println!( + "runtime code is cached in a bounded pack for '{}'; uploads/media remain lazy and are cached on demand", + manifest.name + ); + + generate::write_wordpress_overrides(&paths, &manifest)?; + + if !paths.db.join("schema.sql").exists() { + let phase_started = Instant::now(); + if args.no_probe { + return Err(anyhow!( + "schema is missing and --no-probe prevents discovering remote DB settings" + )); + } + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); + remote.ensure_master()?; + db::export_schema(&remote, &paths).context("export schema")?; + println!( + "exported schema only for '{}' in {:.2}s", + manifest.name, + phase_started.elapsed().as_secs_f64() + ); + } + + let phase_started = Instant::now(); + if db::init_local_db_if_empty(&manifest, &paths)? { + println!( + "initialized empty local database '{}' in {:.2}s", + manifest.local_db.name, + phase_started.elapsed().as_secs_f64() + ); + } else { + println!( + "using existing local database '{}' ({:.2}s)", + manifest.local_db.name, + phase_started.elapsed().as_secs_f64() + ); + } + + if materialize_options_table_enabled() { + let phase_started = Instant::now(); + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); + remote.ensure_master()?; + let options_table = format!("{}options", manifest.probe.table_prefix); + let materialized = db::materialize_tables( + &remote, + &manifest, + &paths, + std::slice::from_ref(&options_table), + ) + .context("materialize WordPress options table")?; + println!( + "materialized {} WordPress options table(s) for local plugin/runtime reads in {:.2}s", + materialized.len(), + phase_started.elapsed().as_secs_f64() + ); + } + + println!( + "starting lazy COW server after {:.2}s; files and database rows are fetched on demand, not copied up front", + serve_started.elapsed().as_secs_f64() + ); + + let mountpoint = args + .mountpoint + .unwrap_or_else(|| PathBuf::from("/mnt/wp-cow").join(&manifest.name)); + let options = RunOptions { + mountpoint, + http_addr: args.http, + skip_php: args.no_php, + }; + run::run_site(manifest, paths, options) +} + +fn init_db(args: NameArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + db::init_local_db(&manifest, &paths)?; + println!("initialized local database '{}'", manifest.local_db.name); + Ok(()) +} + +fn export_schema(args: NameArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); + remote.ensure_master()?; + db::export_schema(&remote, &paths)?; + println!("exported remote schema for '{}'", manifest.name); + Ok(()) +} + +fn materialize(args: MaterializeArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); + remote.ensure_master()?; + let materialized = db::materialize_tables(&remote, &manifest, &paths, &args.tables)?; + println!("{}", serde_json::to_string_pretty(&materialized)?); + Ok(()) +} + +fn sever(args: SeverArgs) -> Result<()> { + let started = Instant::now(); + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); + + remote.ensure_master()?; + if !paths.db.join("schema.sql").exists() { + db::export_schema(&remote, &paths).context("export schema")?; + } + if db::init_local_db_if_empty(&manifest, &paths)? { + println!( + "initialized empty local database '{}'", + manifest.local_db.name + ); + } + + let refreshed_options = db::refresh_option_bootstrap_for_offline(&remote, &manifest, &paths) + .context("refresh remote option bootstrap rows for offline use")?; + println!( + "refreshed {} WordPress option bootstrap rows for local/offline use", + refreshed_options.len() + ); + + let mut requested_tables = db::load_state(&paths)? + .materialized_tables + .into_iter() + .collect::>(); + if args.admin_password.is_some() { + requested_tables.insert(format!("{}users", manifest.probe.table_prefix)); + requested_tables.insert(format!("{}usermeta", manifest.probe.table_prefix)); + } + let requested_tables = requested_tables.into_iter().collect::>(); + let tables = db::existing_local_tables(&manifest, &requested_tables)?; + let skipped = requested_tables.len().saturating_sub(tables.len()); + if skipped > 0 { + println!( + "skipping {} previously materialized WordPress tables that are not present in the local schema", + skipped + ); + } + let materialized = db::materialize_tables(&remote, &manifest, &paths, &tables) + .context("materialize local offline database lower layer")?; + println!( + "materialized {} WordPress tables for local/offline use", + materialized.len() + ); + + let admin = if let Some(password) = args.admin_password.as_deref() { + let admin = db::set_local_admin_password(&manifest, args.admin_login.as_deref(), password) + .context("set local administrator password")?; + println!( + "set local administrator password for '{}' without writing to the remote DB", + admin.user_login + ); + Some(admin.user_login) + } else { + None + }; + + if admin.is_some() { + let warmed = runtime_cache::warm_runtime_code_cache_with_admin(&remote, &manifest, &paths) + .context("cache WordPress runtime code for offline login")?; + if warmed.files > 0 { + println!( + "cached {} bounded runtime code files for offline login", + warmed.files + ); + } else { + let cached = cache_offline_core_runtime(&remote, &manifest, &paths) + .context("cache WordPress core/admin runtime for offline login")?; + println!("cached {cached} WordPress core/admin runtime files for offline login"); + } + } + + let marker = OfflineMarker { + severed_at_unix: SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + materialized_tables: tables, + admin_user: admin, + }; + write_offline_marker(&paths, &marker)?; + generate::write_wordpress_overrides(&paths, &manifest)?; + if let Err(err) = remote.stop_master() { + eprintln!("warning: could not close SSH control master after severing: {err:#}"); + } + + println!( + "severed clone '{}' from remote lower layers in {:.2}s", + manifest.name, + started.elapsed().as_secs_f64() + ); + Ok(()) +} + +fn cache_offline_core_runtime( + remote: &RemoteClient, + manifest: &Manifest, + paths: &crate::config::ClonePaths, +) -> Result { + let overlay = OverlayStore::new(paths); + let mut queue = VecDeque::from([PathBuf::new()]); + let mut cached = 0_usize; + + while let Some(dir) = queue.pop_front() { + let entries = match remote.readdir(&dir) { + Ok(entries) => entries, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue, + Err(err) => { + return Err(err).with_context(|| { + format!( + "read remote runtime directory {}", + OverlayStore::rel_string(&dir) + ) + }) + } + }; + + for entry in entries { + let rel = dir.join(&entry.name); + let _ = overlay.put_cached_entry(&rel, &entry); + + if entry.kind == "dir" && should_descend_offline_core_runtime_dir(&rel) { + queue.push_back(rel); + continue; + } + + if !should_cache_offline_core_runtime_file(&rel, &entry.kind) { + continue; + } + if entry.size > manifest.cache_max_file_bytes { + continue; + } + + overlay + .read_cached_or_remote_with_entry( + remote, + &rel, + 0, + 1, + manifest.cache_max_file_bytes, + Some(entry), + ) + .with_context(|| { + format!( + "cache remote runtime file {}", + OverlayStore::rel_string(&rel) + ) + })?; + cached += 1; + } + } + + Ok(cached) +} + +fn should_descend_offline_core_runtime_dir(rel: &Path) -> bool { + rel == Path::new("wp-admin") + || rel.starts_with(Path::new("wp-admin/")) + || rel == Path::new("wp-includes") + || rel.starts_with(Path::new("wp-includes/")) +} + +fn should_cache_offline_core_runtime_file(rel: &Path, kind: &str) -> bool { + if kind != "file" { + return false; + } + if rel.starts_with(Path::new("wp-content")) { + return false; + } + if rel.starts_with(Path::new("wp-admin")) || rel.starts_with(Path::new("wp-includes")) { + return true; + } + let Some(name) = rel.file_name().and_then(|name| name.to_str()) else { + return false; + }; + rel.parent() + .is_none_or(|parent| parent.as_os_str().is_empty()) + && (name == "index.php" || (name.starts_with("wp-") && name.ends_with(".php"))) +} + +fn mount(args: MountArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let mountpoint = args + .mountpoint + .unwrap_or_else(|| PathBuf::from("/mnt/wp-cow").join(&manifest.name)); + run::mount_only(manifest, paths, &mountpoint) +} + +fn run_clone(args: RunArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let mountpoint = args + .mountpoint + .unwrap_or_else(|| PathBuf::from("/mnt/wp-cow").join(&manifest.name)); + let options = RunOptions { + mountpoint, + http_addr: args.http, + skip_php: args.no_php, + }; + run::run_site(manifest, paths, options) +} + +fn materialize_options_table_enabled() -> bool { + std::env::var("WPCOW_MATERIALIZE_OPTIONS_TABLE") + .ok() + .map(|raw| { + matches!( + raw.to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) + }) + .unwrap_or(true) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn offline_core_runtime_cache_is_bounded_to_wordpress_core() { + assert!(should_cache_offline_core_runtime_file( + Path::new("wp-login.php"), + "file" + )); + assert!(should_cache_offline_core_runtime_file( + Path::new("wp-admin/admin.php"), + "file" + )); + assert!(should_cache_offline_core_runtime_file( + Path::new("wp-includes/version.php"), + "file" + )); + assert!(should_descend_offline_core_runtime_dir(Path::new( + "wp-admin/includes" + ))); + assert!(should_descend_offline_core_runtime_dir(Path::new( + "wp-includes/blocks" + ))); + + assert!(!should_cache_offline_core_runtime_file( + Path::new("wp-content/uploads/2026/05/large.mov"), + "file" + )); + assert!(!should_cache_offline_core_runtime_file( + Path::new("wp-content/plugins/woocommerce/woocommerce.php"), + "file" + )); + assert!(!should_cache_offline_core_runtime_file( + Path::new("wp-content/themes/neve/functions.php"), + "file" + )); + assert!(!should_descend_offline_core_runtime_dir(Path::new( + "wp-content/uploads" + ))); + } +} diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs new file mode 100644 index 00000000..6894584b --- /dev/null +++ b/experiments/remote-wp-cow/src/config.rs @@ -0,0 +1,351 @@ +use anyhow::{anyhow, Context, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::hash_map::DefaultHasher; +use std::fs::{self, File, OpenOptions}; +use std::hash::{Hash, Hasher}; +use std::io::{Read, Write}; +use std::os::unix::fs::OpenOptionsExt; +use std::path::{Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; +use url::Url; + +pub const MANIFEST_VERSION: u32 = 1; +const OFFLINE_MARKER: &str = "offline.json"; +const DEFAULT_CACHE_MAX_FILE_BYTES: u64 = 8 * 1024 * 1024; +const DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS: u64 = 3600; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Manifest { + pub version: u32, + pub name: String, + pub ssh: String, + pub remote_path: String, + pub remote_url: String, + pub local_url: String, + pub created_at_unix: u64, + pub probe: Probe, + pub local_db: LocalDb, + #[serde(default = "default_db_proxy")] + pub db_proxy: DbProxy, + #[serde(default = "default_remote_db_tunnel")] + pub remote_db_tunnel: RemoteDbTunnel, + pub control_url: String, + #[serde(default = "default_cache_max_file_bytes")] + pub cache_max_file_bytes: u64, + #[serde(default = "default_remote_metadata_cache_ttl_secs")] + pub remote_metadata_cache_ttl_secs: u64, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Probe { + pub abspath: String, + pub wp_content_dir: String, + pub uploads_dir: String, + pub table_prefix: String, + pub db_name: String, + pub db_host: String, + pub db_user: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub db_password: String, + pub siteurl: String, + pub home: String, + #[serde(default)] + pub template: String, + #[serde(default)] + pub stylesheet: String, + #[serde(default)] + pub active_plugins: Vec, + #[serde(default)] + pub active_sitewide_plugins: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LocalDb { + pub name: String, + pub user: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub password: String, + pub host: String, + pub port: u16, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteDbTunnel { + pub host: String, + pub port: u16, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DbProxy { + pub host: String, + pub port: u16, +} + +#[derive(Debug, Clone)] +pub struct ClonePaths { + pub root: PathBuf, + pub manifest: PathBuf, + pub upper: PathBuf, + pub file_cache: PathBuf, + pub db: PathBuf, + pub generated: PathBuf, + pub run: PathBuf, + pub whiteouts: PathBuf, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OfflineMarker { + pub severed_at_unix: u64, + pub materialized_tables: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub admin_user: Option, +} + +impl Manifest { + pub fn new( + name: String, + ssh: String, + remote_path: String, + remote_url: String, + local_url: String, + probe: Probe, + ) -> Self { + let safe_name = sanitize_name(&name); + Self { + version: MANIFEST_VERSION, + name: safe_name.clone(), + ssh, + remote_path, + remote_url, + local_url, + created_at_unix: SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + probe, + local_db: LocalDb { + name: format!("cow_{}", safe_name.replace('-', "_")), + user: format!("cow_{}", safe_name.replace('-', "_")), + password: String::new(), + host: "127.0.0.1".to_string(), + port: 33071, + }, + db_proxy: default_db_proxy(), + remote_db_tunnel: default_remote_db_tunnel(), + control_url: "http://127.0.0.1:39070".to_string(), + cache_max_file_bytes: cache_max_file_bytes_from_env(), + remote_metadata_cache_ttl_secs: remote_metadata_cache_ttl_secs_from_env(), + } + } +} + +fn default_cache_max_file_bytes() -> u64 { + DEFAULT_CACHE_MAX_FILE_BYTES +} + +fn default_remote_metadata_cache_ttl_secs() -> u64 { + DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS +} + +fn remote_metadata_cache_ttl_secs_from_env() -> u64 { + std::env::var("WPCOW_REMOTE_METADATA_CACHE_TTL_SECS") + .ok() + .and_then(|raw| raw.parse::().ok()) + .filter(|ttl| *ttl > 0) + .unwrap_or(DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS) +} + +fn default_remote_db_tunnel() -> RemoteDbTunnel { + RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: 33072, + } +} + +fn default_db_proxy() -> DbProxy { + DbProxy { + host: "127.0.0.1".to_string(), + port: 33070, + } +} + +fn cache_max_file_bytes_from_env() -> u64 { + std::env::var("WPCOW_CACHE_MAX_FILE_MB") + .ok() + .and_then(|raw| raw.parse::().ok()) + .map(|mb| mb.saturating_mul(1024 * 1024)) + .filter(|bytes| *bytes > 0) + .unwrap_or(DEFAULT_CACHE_MAX_FILE_BYTES) +} + +pub fn default_state_dir() -> Result { + if let Ok(home) = std::env::var("WPCOW_HOME") { + return Ok(PathBuf::from(home)); + } + let home = std::env::var("HOME").context("HOME is not set; pass --state-dir")?; + Ok(PathBuf::from(home).join(".wp-cow")) +} + +pub fn clone_paths(state_dir: &Path, name: &str) -> ClonePaths { + let root = state_dir.join("clones").join(name); + ClonePaths { + manifest: root.join("manifest.json"), + upper: root.join("upper"), + file_cache: root.join("file-cache"), + db: root.join("db"), + generated: root.join("generated"), + run: root.join("run"), + whiteouts: root.join("whiteouts.json"), + root, + } +} + +pub fn ssh_control_path(paths: &ClonePaths) -> PathBuf { + let mut hasher = DefaultHasher::new(); + paths.root.hash(&mut hasher); + let hash = hasher.finish(); + let dir = std::env::var_os("WPCOW_SSH_CONTROL_DIR") + .map(PathBuf::from) + .unwrap_or_else(std::env::temp_dir); + dir.join(format!("wp-cow-ssh-{hash:016x}.sock")) +} + +pub fn offline_marker_path(paths: &ClonePaths) -> PathBuf { + paths.run.join(OFFLINE_MARKER) +} + +pub fn is_offline(paths: &ClonePaths) -> bool { + offline_marker_path(paths).is_file() + || std::env::var("WPCOW_OFFLINE") + .ok() + .map(|raw| { + matches!( + raw.to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) + }) + .unwrap_or(false) +} + +pub fn write_offline_marker(paths: &ClonePaths, marker: &OfflineMarker) -> Result<()> { + fs::create_dir_all(&paths.run)?; + let json = serde_json::to_vec_pretty(marker)?; + fs::write(offline_marker_path(paths), [json, b"\n".to_vec()].concat())?; + Ok(()) +} + +pub fn ensure_clone_dirs(paths: &ClonePaths) -> Result<()> { + fs::create_dir_all(&paths.upper)?; + fs::create_dir_all(&paths.file_cache)?; + fs::create_dir_all(&paths.db)?; + fs::create_dir_all(paths.db.join("local-mysql"))?; + fs::create_dir_all(&paths.generated)?; + fs::create_dir_all(&paths.run)?; + Ok(()) +} + +pub fn write_manifest(path: &Path, manifest: &Manifest) -> Result<()> { + let json = serde_json::to_vec_pretty(manifest)?; + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .mode(0o600) + .open(path) + .with_context(|| format!("write {}", path.display()))?; + file.write_all(&json)?; + file.write_all(b"\n")?; + Ok(()) +} + +pub fn load_manifest(path: &Path) -> Result { + let mut json = String::new(); + File::open(path) + .with_context(|| format!("open {}", path.display()))? + .read_to_string(&mut json)?; + let manifest: Manifest = serde_json::from_str(&json)?; + if manifest.version != MANIFEST_VERSION { + return Err(anyhow!( + "unsupported manifest version {} in {}", + manifest.version, + path.display() + )); + } + Ok(manifest) +} + +pub fn derive_name(remote_url: &str, local_url: &str) -> String { + let from_url = |raw: &str| -> Option { + let parsed = Url::parse(raw).ok()?; + let host = parsed.host_str()?; + let host = host.strip_prefix("www.").unwrap_or(host); + let first = host.split('.').next().unwrap_or(host); + Some(sanitize_name(first)) + }; + + from_url(remote_url) + .or_else(|| from_url(local_url)) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "site".to_string()) +} + +pub fn sanitize_name(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + for ch in input.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else if ch.is_ascii_whitespace() || ch == '-' || ch == '_' || ch == '.' { + out.push('-'); + } + } + while out.contains("--") { + out = out.replace("--", "-"); + } + out.trim_matches('-').to_string() +} + +pub fn parse_host_port(host: &str, default_port: u16) -> (String, u16) { + if let Some((h, p)) = host.rsplit_once(':') { + if let Ok(port) = p.parse::() { + return (h.to_string(), port); + } + } + (host.to_string(), default_port) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn derives_name_from_remote_url() { + assert_eq!( + derive_name("https://www.example.com", "http://x.test"), + "example" + ); + assert_eq!( + derive_name("not a url", "http://local-site.test"), + "local-site" + ); + } + + #[test] + fn sanitizes_name() { + assert_eq!(sanitize_name("Example Site_1"), "example-site-1"); + assert_eq!(sanitize_name("...Cow!!!"), "cow"); + } + + #[test] + fn ssh_control_path_stays_short_for_long_clone_paths() { + let paths = clone_paths( + Path::new("/tmp/wp-cow-live-acceptance.with-a-long-random-name/state"), + "calm-cottage-core-live-with-a-long-name", + ); + let path = ssh_control_path(&paths); + assert!( + path.to_string_lossy().len() < 100, + "OpenSSH Unix-domain control sockets need a short path: {}", + path.display() + ); + } +} diff --git a/experiments/remote-wp-cow/src/control.rs b/experiments/remote-wp-cow/src/control.rs new file mode 100644 index 00000000..180e970c --- /dev/null +++ b/experiments/remote-wp-cow/src/control.rs @@ -0,0 +1,184 @@ +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tiny_http::{Header, Request, Response, Server, StatusCode}; + +use crate::config::{self, ClonePaths, Manifest}; +use crate::db; +use crate::remote::RemoteClient; + +#[derive(Debug, Deserialize)] +struct ControlRequest { + #[allow(dead_code)] + clone: Option, + tables: Option>, + sql: Option, +} + +#[derive(Debug, Serialize)] +struct BasicResponse<'a> { + ok: bool, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option<&'a str>, +} + +pub fn serve_control( + addr: &str, + manifest: Manifest, + paths: ClonePaths, + remote: RemoteClient, + shutdown: Arc, +) -> Result<()> { + let server = + Server::http(addr).map_err(|err| anyhow!("bind control server {}: {}", addr, err))?; + while !shutdown.load(Ordering::SeqCst) { + match server.recv_timeout(Duration::from_millis(250)) { + Ok(Some(request)) => { + if let Err(err) = handle_request(request, &manifest, &paths, &remote) { + eprintln!("wp-cow control error: {err:#}"); + } + } + Ok(None) => {} + Err(err) => return Err(anyhow!("control server receive failed: {}", err)), + } + } + Ok(()) +} + +fn handle_request( + mut request: Request, + manifest: &Manifest, + paths: &ClonePaths, + remote: &RemoteClient, +) -> Result<()> { + if request.method().as_str() != "POST" { + return send_json( + request, + StatusCode(405), + &BasicResponse { + ok: false, + error: Some("method not allowed"), + }, + ); + } + + let mut body = String::new(); + request.as_reader().read_to_string(&mut body)?; + + let response = match serde_json::from_str::(&body) { + Ok(input) => match control_response(request.url(), input, manifest, paths, remote) { + Ok(response) => response, + Err(err) => json!({ "ok": false, "error": format!("{err:#}") }), + }, + Err(err) => json!({ "ok": false, "error": format!("decode control JSON: {err}") }), + }; + + let status = match response.get("ok").and_then(|v| v.as_bool()) { + Some(true) => StatusCode(200), + Some(false) if response.get("error").and_then(|v| v.as_str()) == Some("not found") => { + StatusCode(404) + } + Some(false) => StatusCode(500), + None => StatusCode(500), + }; + send_json(request, status, &response) +} + +fn control_response( + url: &str, + input: ControlRequest, + manifest: &Manifest, + paths: &ClonePaths, + remote: &RemoteClient, +) -> Result { + match url { + "/materialize" => { + let tables = input.tables.unwrap_or_default(); + if config::is_offline(paths) { + return Ok(json!({ + "ok": true, + "backend": "local", + "materialized": [], + "offline": true + })); + } + let materialized = db::materialize_tables(remote, manifest, paths, &tables)?; + Ok(json!({ "ok": true, "backend": "local", "materialized": materialized })) + } + "/route" => { + let tables = input.tables.unwrap_or_default(); + if config::is_offline(paths) { + return Ok(json!({ + "ok": true, + "backend": "local", + "materialized": [], + "offline": true + })); + } + let decision = if let Some(sql) = input.sql.as_deref() { + db::route_for_query(remote, manifest, paths, sql, &tables)? + } else { + db::route_for_tables(remote, manifest, paths, &tables)? + }; + Ok( + json!({ "ok": true, "backend": decision.backend, "materialized": decision.materialized }), + ) + } + "/row-cow" => { + if config::is_offline(paths) { + return Ok(json!({ + "ok": true, + "handled": false, + "backend": "local", + "materialized": [], + "offline": true + })); + } + let sql = input.sql.ok_or_else(|| anyhow!("missing sql"))?; + let tables = input.tables.unwrap_or_default(); + let response = db::row_cow_query(remote, manifest, paths, &sql, &tables)?; + Ok(json!({ + "ok": true, + "handled": response.handled, + "backend": response.backend, + "materialized": response.materialized, + "fallback": response.fallback, + "result": response.result + })) + } + "/query" => { + if config::is_offline(paths) { + return Ok(json!({ + "ok": false, + "error": "clone is severed from the remote database" + })); + } + let sql = input.sql.ok_or_else(|| anyhow!("missing sql"))?; + let result = db::cached_remote_readonly_query(remote, paths, &sql)?; + Ok(json!({ + "ok": result.ok, + "error": result.error, + "rows": result.rows, + "fields": result.fields, + "affected": result.affected + })) + } + _ => Ok(json!({ "ok": false, "error": "not found" })), + } +} + +fn send_json(request: Request, status: StatusCode, value: &T) -> Result<()> { + let body = serde_json::to_vec(value)?; + let header = Header::from_bytes("Content-Type", "application/json") + .map_err(|_| anyhow!("invalid content-type header"))?; + request + .respond( + Response::from_data(body) + .with_status_code(status) + .with_header(header), + ) + .map_err(|err| anyhow!("send control response: {}", err)) +} diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs new file mode 100644 index 00000000..e0299534 --- /dev/null +++ b/experiments/remote-wp-cow/src/db.rs @@ -0,0 +1,1951 @@ +use anyhow::{anyhow, Context, Result}; +use mysql::prelude::Queryable; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::collections::BTreeSet; +use std::fs::{self, File}; +use std::io::{self, Read}; +use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::config::{parse_host_port, ClonePaths, Manifest}; +use crate::remote::{shell_quote, RemoteClient, RemoteQueryResult}; +use crate::row_cow::{ + self, CowQueryResult, PkValue, Row, RowCowBackend, RowCowExecution, RowCowPlan, +}; +use crate::sql; + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct DbState { + #[serde(default)] + pub materialized_tables: BTreeSet, + #[serde(default)] + pub dirty_tables: BTreeSet, + #[serde(default)] + pub option_bootstrap_tables: BTreeSet, + #[serde(default)] + pub option_rows: BTreeSet, + #[serde(default)] + pub dirty_option_rows: BTreeSet, +} + +#[derive(Debug, Clone, Serialize)] +pub struct LocalAdmin { + pub id: u64, + pub user_login: String, +} + +pub fn state_path(paths: &ClonePaths) -> PathBuf { + paths.db.join("state.json") +} + +pub fn load_state(paths: &ClonePaths) -> Result { + let path = state_path(paths); + if !path.exists() { + return Ok(DbState::default()); + } + let mut json = String::new(); + File::open(path)?.read_to_string(&mut json)?; + Ok(serde_json::from_str(&json)?) +} + +pub fn write_state(paths: &ClonePaths, state: &DbState) -> Result<()> { + fs::create_dir_all(&paths.db)?; + let json = serde_json::to_vec_pretty(state)?; + fs::write(state_path(paths), [json, b"\n".to_vec()].concat())?; + Ok(()) +} + +pub fn export_schema(remote: &RemoteClient, paths: &ClonePaths) -> Result<()> { + let probe = &remote.manifest().probe; + ensure_probe_has_db(probe)?; + fs::create_dir_all(&paths.db)?; + let command = format!( + "MYSQL_PWD={} mysqldump {} --user={} --no-data --skip-lock-tables {}", + shell_quote(&probe.db_password), + remote_mysql_cli_options(&probe.db_host), + shell_quote(&probe.db_user), + shell_quote(&probe.db_name) + ); + let schema = remote + .exec_capture(&command, None) + .context("export remote schema with mysqldump")?; + fs::write(paths.db.join("schema.sql"), schema)?; + Ok(()) +} + +pub fn init_local_db(manifest: &Manifest, paths: &ClonePaths) -> Result<()> { + let schema = paths.db.join("schema.sql"); + if !schema.exists() { + return Err(anyhow!( + "{} does not exist; run clone without --skip-schema or materialize a table first", + schema.display() + )); + } + + let create_sql = format!( + "CREATE DATABASE IF NOT EXISTS `{}` DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;", + manifest.local_db.name.replace('`', "``") + ); + run_mysql_exec(manifest, &create_sql)?; + + let mut mysql = local_mysql_command(manifest); + mysql.arg(&manifest.local_db.name).stdin(Stdio::piped()); + let mut child = mysql + .spawn() + .context("start local mysql for schema import")?; + let mut stdin = child.stdin.take().expect("mysql stdin piped"); + let mut schema_file = File::open(&schema)?; + io::copy(&mut schema_file, &mut stdin)?; + drop(stdin); + let status = child.wait()?; + if !status.success() { + return Err(anyhow!( + "local mysql schema import failed with status {}", + status + )); + } + Ok(()) +} + +pub fn init_local_db_if_empty(manifest: &Manifest, paths: &ClonePaths) -> Result { + if local_schema_table_count(manifest)? > 0 { + return Ok(false); + } + + init_local_db(manifest, paths)?; + Ok(true) +} + +pub fn local_schema_table_count(manifest: &Manifest) -> Result { + let sql_text = format!( + "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '{}';", + mysql_string_literal(&manifest.local_db.name) + ); + let output = local_mysql_command(manifest) + .arg("--batch") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql_text) + .output() + .context("query local mysql schema state")?; + if !output.status.success() { + return Err(anyhow!( + "local mysql schema state query failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + stdout + .trim() + .parse::() + .with_context(|| format!("parse local table count from {}", stdout.trim())) +} + +pub fn materialize_tables( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + tables: &[String], +) -> Result> { + let expanded = sql::expand_wordpress_groups(&manifest.probe.table_prefix, tables); + let mut state = load_state(paths)?; + let mut changed = Vec::new(); + + for table in expanded { + validate_table_name(&table)?; + if state.materialized_tables.contains(&table) { + continue; + } + materialize_one_table(remote, manifest, paths, &table) + .with_context(|| format!("materialize table {}", table))?; + state.materialized_tables.insert(table.clone()); + changed.push(table); + } + + write_state(paths, &state)?; + Ok(changed) +} + +pub fn existing_local_tables(manifest: &Manifest, tables: &[String]) -> Result> { + for table in tables { + validate_table_name(table)?; + } + if tables.is_empty() { + return Ok(Vec::new()); + } + + let in_list = tables + .iter() + .map(|table| format!("'{}'", mysql_string_literal(table))) + .collect::>() + .join(", "); + let sql_text = format!( + "SELECT table_name FROM information_schema.tables \ + WHERE table_schema='{}' AND table_name IN ({});", + mysql_string_literal(&manifest.local_db.name), + in_list + ); + let output = local_mysql_command(manifest) + .arg("--batch") + .arg("--raw") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql_text) + .output() + .context("query local WordPress table list")?; + if !output.status.success() { + return Err(anyhow!( + "local table list query failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + let present = String::from_utf8_lossy(&output.stdout) + .lines() + .map(|line| line.to_string()) + .collect::>(); + Ok(tables + .iter() + .filter(|table| present.contains(table.as_str())) + .cloned() + .collect()) +} + +pub fn set_local_admin_password( + manifest: &Manifest, + login: Option<&str>, + password: &str, +) -> Result { + let users_table = format!("{}users", manifest.probe.table_prefix); + let usermeta_table = format!("{}usermeta", manifest.probe.table_prefix); + validate_table_name(&users_table)?; + validate_table_name(&usermeta_table)?; + + let admin = if let Some(login) = login { + local_user_by_login(manifest, &users_table, login)? + } else { + local_first_admin_user(manifest, &users_table, &usermeta_table)? + }; + + let update_sql = format!( + "UPDATE {} SET user_pass=MD5('{}'), user_activation_key='' WHERE ID={};\ + DELETE FROM {} WHERE user_id={} AND meta_key='session_tokens';", + qualified_table(manifest, &users_table), + mysql_string_literal(password), + admin.id, + qualified_table(manifest, &usermeta_table), + admin.id + ); + run_mysql_exec(manifest, &update_sql)?; + Ok(admin) +} + +pub fn route_for_tables( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + tables: &[String], +) -> Result { + let state = load_state(paths)?; + let expanded = sql::expand_wordpress_groups(&manifest.probe.table_prefix, tables); + let touches_local = expanded + .iter() + .any(|table| table_has_local_state(&state, table)); + + if touches_local { + let materialized = materialize_tables(remote, manifest, paths, &expanded)?; + Ok(RouteDecision { + backend: "local".to_string(), + materialized, + }) + } else { + Ok(RouteDecision { + backend: "remote".to_string(), + materialized: Vec::new(), + }) + } +} + +fn table_has_local_state(state: &DbState, table: &str) -> bool { + state.materialized_tables.contains(table) || state.dirty_tables.contains(table) +} + +pub fn route_for_query( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + sql_text: &str, + tables: &[String], +) -> Result { + let expanded = sql::expand_wordpress_groups(&manifest.probe.table_prefix, tables); + let mut state = load_state(paths)?; + + if let Some(options_table) = + option_bootstrap_table_for_sql(&manifest.probe.table_prefix, sql_text, &expanded) + { + if !state.option_bootstrap_tables.contains(&options_table) { + let excluded = dirty_option_names_for_table(&state, &options_table); + materialize_option_bootstrap(remote, manifest, &options_table, &excluded) + .with_context(|| { + format!("materialize option bootstrap rows for {}", options_table) + })?; + state.option_bootstrap_tables.insert(options_table); + write_state(paths, &state)?; + } + return Ok(RouteDecision { + backend: "local".to_string(), + materialized: Vec::new(), + }); + } + + let options_table = format!("{}options", manifest.probe.table_prefix); + let option_names = option_names_for_sql(sql_text, &options_table, &expanded); + if !option_names.is_empty() { + materialize_option_rows(remote, manifest, &mut state, &options_table, &option_names) + .with_context(|| format!("materialize option rows for {}", options_table))?; + write_state(paths, &state)?; + return Ok(RouteDecision { + backend: "local".to_string(), + materialized: Vec::new(), + }); + } + + route_for_tables(remote, manifest, paths, tables) +} + +pub fn remote_readonly_query(remote: &RemoteClient, sql_text: &str) -> Result { + if !sql::is_safe_read_sql(sql_text) || sql::is_write_sql(sql_text) { + return Err(anyhow!("refusing to send non-read SQL to remote")); + } + remote.remote_query_readonly(sql_text) +} + +pub fn cached_remote_readonly_query( + remote: &RemoteClient, + paths: &ClonePaths, + sql_text: &str, +) -> Result { + if let Some(result) = remote_query_cache_get(paths, sql_text)? { + return Ok(result); + } + + let result = remote_readonly_query(remote, sql_text)?; + remote_query_cache_set(paths, sql_text, &result)?; + Ok(result) +} + +#[derive(Debug, Serialize, Deserialize)] +struct RemoteQueryCacheEntry { + sql: String, + result: RemoteQueryResult, +} + +fn remote_query_cache_enabled() -> bool { + std::env::var("WPCOW_REMOTE_QUERY_CACHE") + .ok() + .map(|raw| { + !matches!( + raw.to_ascii_lowercase().as_str(), + "0" | "false" | "no" | "off" + ) + }) + .unwrap_or(true) +} + +fn remote_query_cache_max_rows() -> usize { + std::env::var("WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS") + .ok() + .and_then(|raw| raw.parse::().ok()) + .filter(|rows| *rows > 0) + .unwrap_or(5000) +} + +fn remote_query_cache_dir(paths: &ClonePaths) -> PathBuf { + paths.db.join("query-cache") +} + +fn remote_query_cache_file(paths: &ClonePaths, sql_text: &str) -> PathBuf { + let digest = Sha256::digest(sql_text.as_bytes()); + remote_query_cache_dir(paths).join(format!("{}.json", hex::encode(digest))) +} + +fn remote_query_cache_get(paths: &ClonePaths, sql_text: &str) -> Result> { + if !remote_query_cache_enabled() { + return Ok(None); + } + let path = remote_query_cache_file(paths, sql_text); + let Ok(bytes) = fs::read(&path) else { + return Ok(None); + }; + let entry: RemoteQueryCacheEntry = match serde_json::from_slice(&bytes) { + Ok(entry) => entry, + Err(_) => return Ok(None), + }; + if entry.sql == sql_text { + Ok(Some(entry.result)) + } else { + Ok(None) + } +} + +fn remote_query_cache_set( + paths: &ClonePaths, + sql_text: &str, + result: &RemoteQueryResult, +) -> Result<()> { + if !remote_query_cache_enabled() || !result.ok { + return Ok(()); + } + if result.rows.len() > remote_query_cache_max_rows() { + return Ok(()); + } + + let dir = remote_query_cache_dir(paths); + fs::create_dir_all(&dir)?; + let path = remote_query_cache_file(paths, sql_text); + let nonce = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_nanos()) + .unwrap_or_default(); + let tmp = path.with_extension(format!("{}.{}.tmp", std::process::id(), nonce)); + let entry = RemoteQueryCacheEntry { + sql: sql_text.to_string(), + result: result.clone(), + }; + fs::write(&tmp, serde_json::to_vec(&entry)?)?; + fs::rename(tmp, path)?; + Ok(()) +} + +pub fn refresh_option_bootstrap_for_offline( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, +) -> Result> { + let table = format!("{}options", manifest.probe.table_prefix); + validate_table_name(&table)?; + + let mut state = load_state(paths)?; + let excluded = dirty_option_names_for_table(&state, &table); + materialize_option_bootstrap(remote, manifest, &table, &excluded) + .with_context(|| format!("refresh option bootstrap rows for {}", table))?; + + state.option_bootstrap_tables.insert(table.clone()); + for name in option_bootstrap_names() { + if !excluded.iter().any(|excluded_name| excluded_name == name) { + state.option_rows.insert(option_row_key(&table, name)); + } + } + write_state(paths, &state)?; + Ok(option_bootstrap_names() + .iter() + .filter(|name| !excluded.iter().any(|excluded_name| excluded_name == *name)) + .map(|name| (*name).to_string()) + .collect()) +} + +#[derive(Debug, Serialize)] +pub struct RouteDecision { + pub backend: String, + pub materialized: Vec, +} + +#[derive(Debug, Serialize)] +pub struct RowCowResponse { + pub handled: bool, + pub backend: String, + pub materialized: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub fallback: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub result: Option, +} + +pub fn row_cow_query( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + sql_text: &str, + tables: &[String], +) -> Result { + let mut backend = MysqlRowCowBackend { remote, manifest }; + match row_cow::execute_row_cow(&mut backend, sql_text)? { + RowCowExecution::Select(result) => Ok(RowCowResponse { + handled: true, + backend: "cow".to_string(), + materialized: Vec::new(), + fallback: None, + result: Some(result), + }), + RowCowExecution::PreparedLocalWrite { + table, + pk_column, + pk_values, + .. + } => { + mark_dirty_table(paths, &table)?; + mark_dirty_option_rows_for_write( + manifest, + paths, + &table, + pk_column.as_deref(), + &pk_values, + )?; + Ok(RowCowResponse { + handled: true, + backend: "local".to_string(), + materialized: Vec::new(), + fallback: None, + result: None, + }) + } + RowCowExecution::LocalOnlyInsert { table } => { + mark_dirty_table(paths, &table)?; + mark_dirty_option_rows_from_sql(manifest, paths, sql_text, &table)?; + Ok(RowCowResponse { + handled: true, + backend: "local".to_string(), + materialized: Vec::new(), + fallback: None, + result: None, + }) + } + RowCowExecution::Fallback(plan) => { + let (fallback, plan_tables) = fallback_name_and_tables(plan); + if should_materialize_row_cow_fallback(sql_text, &fallback, &plan_tables) { + let materialized = materialize_tables(remote, manifest, paths, &plan_tables)?; + return Ok(RowCowResponse { + handled: false, + backend: "local".to_string(), + materialized, + fallback: Some(fallback), + result: None, + }); + } + + if !tables.is_empty() && sql::is_write_sql(sql_text) { + let materialized = materialize_tables(remote, manifest, paths, tables)?; + return Ok(RowCowResponse { + handled: false, + backend: "local".to_string(), + materialized, + fallback: Some(fallback), + result: None, + }); + } + + Ok(RowCowResponse { + handled: false, + backend: "fallback".to_string(), + materialized: Vec::new(), + fallback: Some(fallback), + result: None, + }) + } + } +} + +fn mark_dirty_table(paths: &ClonePaths, table: &str) -> Result<()> { + validate_table_name(table)?; + let mut state = load_state(paths)?; + if !state.materialized_tables.contains(table) { + state.dirty_tables.insert(table.to_string()); + write_state(paths, &state)?; + } + Ok(()) +} + +fn mark_dirty_option_rows_for_write( + manifest: &Manifest, + paths: &ClonePaths, + table: &str, + pk_column: Option<&str>, + pk_values: &[PkValue], +) -> Result<()> { + let options_table = format!("{}options", manifest.probe.table_prefix); + if table != options_table + || !pk_column.is_some_and(|column| column.eq_ignore_ascii_case("option_name")) + { + return Ok(()); + } + + let mut state = load_state(paths)?; + for value in pk_values { + state + .dirty_option_rows + .insert(option_row_key(table, &value.0)); + } + write_state(paths, &state) +} + +fn mark_dirty_option_rows_from_sql( + manifest: &Manifest, + paths: &ClonePaths, + sql_text: &str, + table: &str, +) -> Result<()> { + let options_table = format!("{}options", manifest.probe.table_prefix); + if table != options_table { + return Ok(()); + } + let names = option_write_names_for_sql(sql_text, &options_table, &[options_table.clone()]); + if names.is_empty() { + return Ok(()); + } + + let mut state = load_state(paths)?; + for name in names { + state.dirty_option_rows.insert(option_row_key(table, &name)); + } + write_state(paths, &state) +} + +fn should_materialize_row_cow_fallback( + sql_text: &str, + fallback: &str, + plan_tables: &[String], +) -> bool { + fallback == "PromoteTable" && !plan_tables.is_empty() && sql::is_write_sql(sql_text) +} + +fn fallback_name_and_tables(plan: RowCowPlan) -> (String, Vec) { + match plan { + RowCowPlan::PromoteTable { tables, .. } => ("PromoteTable".to_string(), tables), + RowCowPlan::Unsupported { .. } => ("Unsupported".to_string(), Vec::new()), + RowCowPlan::RowLevel(_) => ("RowLevel".to_string(), Vec::new()), + } +} + +struct MysqlRowCowBackend<'a> { + remote: &'a RemoteClient, + manifest: &'a Manifest, +} + +impl RowCowBackend for MysqlRowCowBackend<'_> { + fn remote_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + let sql_text = row_cow::select_all_by_pk_sql(table, pk_column, pk_values)?; + let result = remote_readonly_query(self.remote, &sql_text)?; + if !result.ok { + return Err(anyhow!("remote row-COW select failed: {}", result.error)); + } + Ok(remote_query_to_cow_result(result)) + } + + fn local_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + let sql_text = format!( + "SELECT * FROM {} WHERE {};", + qualified_table(self.manifest, table), + row_cow::pk_values_where_sql(pk_column, pk_values)? + ); + local_query_result(self.manifest, &sql_text) + } + + fn local_upsert_rows(&mut self, table: &str, rows: &[Row]) -> Result { + validate_table_name(table)?; + if rows.is_empty() { + return Ok(0); + } + + let mut columns = Vec::new(); + for row in rows { + for column in row.keys() { + if !columns.iter().any(|existing| existing == column) { + columns.push(column.clone()); + } + } + } + + let column_sql = columns + .iter() + .map(|column| row_cow::quote_identifier(column)) + .collect::>>()? + .join(", "); + let values_sql = rows + .iter() + .map(|row| { + let values = columns + .iter() + .map(|column| mysql_json_value(row.get(column))) + .collect::>() + .join(", "); + format!("({values})") + }) + .collect::>() + .join(", "); + let sql_text = format!( + "REPLACE INTO {} ({column_sql}) VALUES {values_sql};", + qualified_table(self.manifest, table), + ); + run_mysql_exec(self.manifest, &sql_text)?; + Ok(rows.len()) + } + + fn local_delete_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + let sql_text = format!( + "DELETE FROM {} WHERE {};", + qualified_table(self.manifest, table), + row_cow::pk_values_where_sql(pk_column, pk_values)? + ); + run_mysql_exec(self.manifest, &sql_text)?; + Ok(pk_values.len()) + } + + fn local_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + ensure_row_cow_meta_table(self.manifest)?; + if pk_values.is_empty() { + return Ok(0); + } + + let values_sql = pk_values + .iter() + .map(|value| { + format!( + "('{}', '{}', '{}')", + mysql_string_literal(table), + mysql_string_literal(pk_column), + mysql_string_literal(&value.0) + ) + }) + .collect::>() + .join(", "); + let sql_text = format!( + "REPLACE INTO {} (table_name, pk_column, pk_value) VALUES {values_sql};", + qualified_table(self.manifest, ROW_COW_TOMBSTONE_TABLE) + ); + run_mysql_exec(self.manifest, &sql_text)?; + Ok(pk_values.len()) + } + + fn local_clear_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + ensure_row_cow_meta_table(self.manifest)?; + if pk_values.is_empty() { + return Ok(0); + } + + let sql_text = format!( + "DELETE FROM {} WHERE table_name='{}' AND pk_column='{}' AND {};", + qualified_table(self.manifest, ROW_COW_TOMBSTONE_TABLE), + mysql_string_literal(table), + mysql_string_literal(pk_column), + row_cow::pk_values_where_sql("pk_value", pk_values)? + ); + run_mysql_exec(self.manifest, &sql_text)?; + Ok(pk_values.len()) + } + + fn local_reserve_insert_pk(&mut self, table: &str, pk_column: Option<&str>) -> Result<()> { + let Some(pk_column) = pk_column else { + return Ok(()); + }; + if !row_cow::is_auto_increment_pk_for_table(table, pk_column) { + return Ok(()); + } + reserve_local_auto_increment(self.remote, self.manifest, table, pk_column) + } + + fn local_tombstones_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result> { + validate_table_name(table)?; + ensure_row_cow_meta_table(self.manifest)?; + if pk_values.is_empty() { + return Ok(BTreeSet::new()); + } + + let sql_text = format!( + "SELECT pk_value FROM {} WHERE table_name='{}' AND pk_column='{}' AND {};", + qualified_table(self.manifest, ROW_COW_TOMBSTONE_TABLE), + mysql_string_literal(table), + mysql_string_literal(pk_column), + row_cow::pk_values_where_sql("pk_value", pk_values)? + ); + let result = local_query_result(self.manifest, &sql_text)?; + Ok(result + .rows + .into_iter() + .filter_map(|row| { + row.get("pk_value") + .and_then(|value| value.as_str()) + .map(str::to_string) + }) + .map(PkValue) + .collect()) + } +} + +const ROW_COW_TOMBSTONE_TABLE: &str = "_wp_cow_row_tombstones"; + +#[derive(Debug, Clone, PartialEq, Eq)] +struct TombstoneGroup { + pk_column: String, + pk_values: Vec, +} + +fn ensure_row_cow_meta_table(manifest: &Manifest) -> Result<()> { + let sql_text = format!( + "CREATE TABLE IF NOT EXISTS {} (\ + table_name varchar(191) NOT NULL,\ + pk_column varchar(64) NOT NULL,\ + pk_value varchar(191) NOT NULL,\ + deleted_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,\ + PRIMARY KEY (table_name, pk_column, pk_value)\ + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;", + qualified_table(manifest, ROW_COW_TOMBSTONE_TABLE) + ); + run_mysql_exec(manifest, &sql_text) +} + +fn remote_query_to_cow_result(result: RemoteQueryResult) -> CowQueryResult { + CowQueryResult { + ok: result.ok, + error: result.error, + rows: result.rows, + fields: result.fields, + affected: result.affected, + } +} + +fn reserve_local_auto_increment( + remote: &RemoteClient, + manifest: &Manifest, + table: &str, + pk_column: &str, +) -> Result<()> { + validate_table_name(table)?; + validate_table_name(pk_column)?; + let remote_max = remote_max_pk(remote, table, pk_column) + .with_context(|| format!("read remote max primary key for {}", table))?; + let local_max = local_max_pk(manifest, table, pk_column) + .with_context(|| format!("read local max primary key for {}", table))?; + let Some(next_id) = remote_max.max(local_max).checked_add(1) else { + return Ok(()); + }; + if next_id <= 1 { + return Ok(()); + } + let sql_text = format!( + "ALTER TABLE {} AUTO_INCREMENT = {};", + qualified_table(manifest, table), + next_id + ); + run_mysql_exec(manifest, &sql_text) +} + +fn remote_max_pk(remote: &RemoteClient, table: &str, pk_column: &str) -> Result { + let sql_text = format!( + "SELECT MAX({}) AS max_pk FROM {};", + row_cow::quote_identifier(pk_column)?, + row_cow::quote_identifier(table)? + ); + let result = remote_readonly_query(remote, &sql_text)?; + if !result.ok { + return Err(anyhow!( + "remote max primary key query failed: {}", + result.error + )); + } + max_pk_from_rows(&result.rows, "max_pk") +} + +fn local_max_pk(manifest: &Manifest, table: &str, pk_column: &str) -> Result { + let sql_text = format!( + "SELECT MAX({}) AS max_pk FROM {};", + row_cow::quote_identifier(pk_column)?, + qualified_table(manifest, table) + ); + let result = local_query_result(manifest, &sql_text)?; + max_pk_from_rows(&result.rows, "max_pk") +} + +fn max_pk_from_rows(rows: &[Row], field: &str) -> Result { + let Some(value) = rows.first().and_then(|row| row.get(field)) else { + return Ok(0); + }; + match value { + serde_json::Value::Null => Ok(0), + serde_json::Value::Number(number) => Ok(number.as_u64().unwrap_or(0)), + serde_json::Value::String(raw) => { + let raw = raw.trim(); + if raw.is_empty() || raw.eq_ignore_ascii_case("null") { + Ok(0) + } else { + raw.parse::() + .with_context(|| format!("parse max primary key value from {}", raw)) + } + } + _ => Ok(0), + } +} + +fn mysql_json_value(value: Option<&serde_json::Value>) -> String { + match value { + None | Some(serde_json::Value::Null) => "NULL".to_string(), + Some(serde_json::Value::Bool(value)) => { + if *value { + "1".to_string() + } else { + "0".to_string() + } + } + Some(serde_json::Value::Number(value)) => value.to_string(), + Some(serde_json::Value::String(value)) => { + format!("'{}'", mysql_string_literal(value)) + } + Some(value) => format!("'{}'", mysql_string_literal(&value.to_string())), + } +} + +fn materialize_one_table( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + table: &str, +) -> Result<()> { + validate_table_name(table)?; + fs::create_dir_all(&paths.db)?; + let overlay_dump = paths.db.join(format!( + ".wp-cow-local-overlay-{}-{}.sql", + std::process::id(), + table + )); + dump_local_table_overlay(manifest, table, &overlay_dump) + .with_context(|| format!("dump local overlay rows for {}", table))?; + let tombstones = local_row_cow_tombstones_for_table(manifest, table) + .with_context(|| format!("load local row tombstones for {}", table))?; + + let materialized = materialize_remote_table(remote, manifest, table) + .with_context(|| format!("import remote lower table {}", table)); + if let Err(err) = materialized { + let _ = import_sql_file(manifest, &overlay_dump); + let _ = fs::remove_file(&overlay_dump); + return Err(err); + } + + import_sql_file(manifest, &overlay_dump) + .with_context(|| format!("restore local overlay rows for {}", table))?; + apply_row_cow_tombstones(manifest, table, &tombstones) + .with_context(|| format!("apply local row tombstones for {}", table))?; + fs::remove_file(&overlay_dump).with_context(|| format!("remove {}", overlay_dump.display()))?; + Ok(()) +} + +fn materialize_remote_table(remote: &RemoteClient, manifest: &Manifest, table: &str) -> Result<()> { + let probe = &manifest.probe; + ensure_probe_has_db(probe)?; + let delete_sql = format!("DELETE FROM {};", qualified_table(manifest, table)); + run_mysql_exec(manifest, &delete_sql)?; + + let dump_command = format!( + "MYSQL_PWD={} mysqldump {} --user={} --single-transaction --quick --skip-lock-tables --no-create-info --replace {} {}", + shell_quote(&probe.db_password), + remote_mysql_cli_options(&probe.db_host), + shell_quote(&probe.db_user), + shell_quote(&probe.db_name), + shell_quote(table) + ); + + let mut ssh = remote + .command(&dump_command) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote mysqldump over ssh")?; + + let mut mysql = local_mysql_command(manifest); + mysql.arg(&manifest.local_db.name).stdin(Stdio::piped()); + let mut mysql_child = mysql.spawn().context("start local mysql import")?; + + { + let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); + let mut mysql_stdin = mysql_child.stdin.take().expect("mysql stdin piped"); + io::copy(&mut ssh_stdout, &mut mysql_stdin)?; + } + + let ssh_output = ssh.wait_with_output()?; + let mysql_status = mysql_child.wait()?; + + if !ssh_output.status.success() { + return Err(anyhow!( + "remote mysqldump failed: {}", + String::from_utf8_lossy(&ssh_output.stderr) + )); + } + if !mysql_status.success() { + return Err(anyhow!( + "local mysql import failed with status {}", + mysql_status + )); + } + Ok(()) +} + +fn dump_local_table_overlay(manifest: &Manifest, table: &str, path: &PathBuf) -> Result<()> { + let dump_file = File::create(path).with_context(|| format!("create {}", path.display()))?; + let mut dump = local_mysqldump_command(manifest); + dump.arg("--single-transaction") + .arg("--quick") + .arg("--skip-lock-tables") + .arg("--no-create-info") + .arg("--replace") + .arg(&manifest.local_db.name) + .arg(table) + .stdout(Stdio::from(dump_file)) + .stderr(Stdio::piped()); + + let output = dump + .spawn() + .context("start local mysqldump overlay export")? + .wait_with_output() + .context("wait for local mysqldump overlay export")?; + if !output.status.success() { + return Err(anyhow!( + "local mysqldump overlay export failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + Ok(()) +} + +fn import_sql_file(manifest: &Manifest, path: &PathBuf) -> Result<()> { + let input = File::open(path).with_context(|| format!("open {}", path.display()))?; + let mut mysql = local_mysql_command(manifest); + mysql + .arg(&manifest.local_db.name) + .stdin(Stdio::from(input)) + .stderr(Stdio::piped()); + let output = mysql + .spawn() + .context("start local mysql import")? + .wait_with_output() + .context("wait for local mysql import")?; + if !output.status.success() { + return Err(anyhow!( + "local mysql import failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + Ok(()) +} + +fn local_row_cow_tombstones_for_table( + manifest: &Manifest, + table: &str, +) -> Result> { + validate_table_name(table)?; + ensure_row_cow_meta_table(manifest)?; + let sql_text = format!( + "SELECT pk_column, pk_value FROM {} WHERE table_name='{}' ORDER BY pk_column, pk_value;", + qualified_table(manifest, ROW_COW_TOMBSTONE_TABLE), + mysql_string_literal(table) + ); + let result = local_query_result(manifest, &sql_text)?; + let mut grouped = std::collections::BTreeMap::>::new(); + for row in result.rows { + let Some(pk_column) = row.get("pk_column").and_then(|value| value.as_str()) else { + continue; + }; + let Some(pk_value) = row.get("pk_value").and_then(|value| value.as_str()) else { + continue; + }; + grouped + .entry(pk_column.to_string()) + .or_default() + .push(PkValue(pk_value.to_string())); + } + + Ok(grouped + .into_iter() + .map(|(pk_column, pk_values)| TombstoneGroup { + pk_column, + pk_values, + }) + .collect()) +} + +fn apply_row_cow_tombstones( + manifest: &Manifest, + table: &str, + tombstones: &[TombstoneGroup], +) -> Result<()> { + for sql_text in row_cow_tombstone_delete_sqls(manifest, table, tombstones)? { + run_mysql_exec(manifest, &sql_text)?; + } + Ok(()) +} + +fn row_cow_tombstone_delete_sqls( + manifest: &Manifest, + table: &str, + tombstones: &[TombstoneGroup], +) -> Result> { + validate_table_name(table)?; + tombstones + .iter() + .filter(|group| !group.pk_values.is_empty()) + .map(|group| { + Ok(format!( + "DELETE FROM {} WHERE {};", + qualified_table(manifest, table), + row_cow::pk_values_where_sql(&group.pk_column, &group.pk_values)? + )) + }) + .collect() +} + +fn local_first_admin_user( + manifest: &Manifest, + users_table: &str, + usermeta_table: &str, +) -> Result { + let capabilities_key = format!("{}capabilities", manifest.probe.table_prefix); + let sql_text = format!( + "SELECT u.ID, u.user_login \ + FROM {} u \ + JOIN {} m ON m.user_id = u.ID \ + WHERE m.meta_key = '{}' AND m.meta_value LIKE '%administrator%' \ + ORDER BY u.ID LIMIT 1;", + qualified_table(manifest, users_table), + qualified_table(manifest, usermeta_table), + mysql_string_literal(&capabilities_key) + ); + local_admin_from_query(manifest, &sql_text, "find local administrator user") +} + +fn local_user_by_login(manifest: &Manifest, users_table: &str, login: &str) -> Result { + let sql_text = format!( + "SELECT ID, user_login FROM {} WHERE user_login='{}' LIMIT 1;", + qualified_table(manifest, users_table), + mysql_string_literal(login) + ); + local_admin_from_query(manifest, &sql_text, "find requested local user") +} + +fn local_admin_from_query( + manifest: &Manifest, + sql_text: &str, + context: &'static str, +) -> Result { + let output = local_mysql_command(manifest) + .arg("--batch") + .arg("--raw") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql_text) + .output() + .context(context)?; + if !output.status.success() { + return Err(anyhow!( + "{} failed: {}", + context, + String::from_utf8_lossy(&output.stderr) + )); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let line = stdout + .lines() + .next() + .ok_or_else(|| anyhow!("no local administrator user found"))?; + let (id, user_login) = line + .split_once('\t') + .ok_or_else(|| anyhow!("unexpected local user query output: {}", line))?; + let id = id + .parse::() + .with_context(|| format!("parse local user id from {}", id))?; + Ok(LocalAdmin { + id, + user_login: user_login.to_string(), + }) +} + +fn materialize_option_bootstrap( + remote: &RemoteClient, + manifest: &Manifest, + table: &str, + excluded_names: &[String], +) -> Result<()> { + let probe = &manifest.probe; + ensure_probe_has_db(probe)?; + validate_table_name(table)?; + + let where_sql = option_bootstrap_where_sql_excluding(excluded_names); + let delete_sql = format!( + "DELETE FROM {} WHERE {};", + qualified_table(manifest, table), + where_sql + ); + run_mysql_exec(manifest, &delete_sql)?; + + let dump_command = format!( + "MYSQL_PWD={} mysqldump {} --user={} --single-transaction --quick --skip-lock-tables --no-create-info --replace --where={} {} {}", + shell_quote(&probe.db_password), + remote_mysql_cli_options(&probe.db_host), + shell_quote(&probe.db_user), + shell_quote(&where_sql), + shell_quote(&probe.db_name), + shell_quote(table) + ); + + let mut ssh = remote + .command(&dump_command) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote option bootstrap mysqldump over ssh")?; + + let mut mysql = local_mysql_command(manifest); + mysql.arg(&manifest.local_db.name).stdin(Stdio::piped()); + let mut mysql_child = mysql.spawn().context("start local mysql option import")?; + + { + let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); + let mut mysql_stdin = mysql_child.stdin.take().expect("mysql stdin piped"); + io::copy(&mut ssh_stdout, &mut mysql_stdin)?; + } + + let ssh_output = ssh.wait_with_output()?; + let mysql_status = mysql_child.wait()?; + + if !ssh_output.status.success() { + return Err(anyhow!( + "remote option bootstrap mysqldump failed: {}", + String::from_utf8_lossy(&ssh_output.stderr) + )); + } + if !mysql_status.success() { + return Err(anyhow!( + "local mysql option bootstrap import failed with status {}", + mysql_status + )); + } + Ok(()) +} + +fn materialize_option_rows( + remote: &RemoteClient, + manifest: &Manifest, + state: &mut DbState, + table: &str, + names: &[String], +) -> Result<()> { + let probe = &manifest.probe; + ensure_probe_has_db(probe)?; + validate_table_name(table)?; + + let missing = names + .iter() + .filter(|name| !state.option_rows.contains(&option_row_key(table, name))) + .filter(|name| { + !state + .dirty_option_rows + .contains(&option_row_key(table, name)) + }) + .cloned() + .collect::>(); + if missing.is_empty() { + return Ok(()); + } + + let where_sql = option_names_where_sql(&missing); + let delete_sql = format!( + "DELETE FROM {} WHERE {};", + qualified_table(manifest, table), + where_sql + ); + run_mysql_exec(manifest, &delete_sql)?; + + let dump_command = format!( + "MYSQL_PWD={} mysqldump {} --user={} --single-transaction --quick --skip-lock-tables --no-create-info --replace --where={} {} {}", + shell_quote(&probe.db_password), + remote_mysql_cli_options(&probe.db_host), + shell_quote(&probe.db_user), + shell_quote(&where_sql), + shell_quote(&probe.db_name), + shell_quote(table) + ); + + let mut ssh = remote + .command(&dump_command) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote option row mysqldump over ssh")?; + + let mut mysql = local_mysql_command(manifest); + mysql.arg(&manifest.local_db.name).stdin(Stdio::piped()); + let mut mysql_child = mysql + .spawn() + .context("start local mysql option row import")?; + + { + let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); + let mut mysql_stdin = mysql_child.stdin.take().expect("mysql stdin piped"); + io::copy(&mut ssh_stdout, &mut mysql_stdin)?; + } + + let ssh_output = ssh.wait_with_output()?; + let mysql_status = mysql_child.wait()?; + + if !ssh_output.status.success() { + return Err(anyhow!( + "remote option row mysqldump failed: {}", + String::from_utf8_lossy(&ssh_output.stderr) + )); + } + if !mysql_status.success() { + return Err(anyhow!( + "local mysql option row import failed with status {}", + mysql_status + )); + } + + for name in missing { + state.option_rows.insert(option_row_key(table, &name)); + } + Ok(()) +} + +fn option_bootstrap_table_for_sql( + table_prefix: &str, + sql_text: &str, + tables: &[String], +) -> Option { + if !sql::is_safe_read_sql(sql_text) || sql::is_write_sql(sql_text) { + return None; + } + + let options_table = format!("{}options", table_prefix); + if !tables.iter().any(|table| table == &options_table) { + return None; + } + + let lower = sql_text.to_ascii_lowercase(); + if lower.contains("autoload") { + return Some(options_table); + } + + if lower.contains("option_name") + && option_bootstrap_names() + .iter() + .any(|name| lower.contains(&format!("'{}'", name))) + { + return Some(options_table); + } + + None +} + +fn option_names_for_sql(sql_text: &str, options_table: &str, tables: &[String]) -> Vec { + if !sql::is_safe_read_sql(sql_text) || sql::is_write_sql(sql_text) { + return Vec::new(); + } + option_names_for_option_predicate(sql_text, options_table, tables) +} + +fn option_write_names_for_sql( + sql_text: &str, + options_table: &str, + tables: &[String], +) -> Vec { + if !sql::is_write_sql(sql_text) { + return Vec::new(); + } + option_names_for_option_predicate(sql_text, options_table, tables) +} + +fn option_names_for_option_predicate( + sql_text: &str, + options_table: &str, + tables: &[String], +) -> Vec { + if !tables.iter().any(|table| table == options_table) { + return Vec::new(); + } + + let lower = sql_text.to_ascii_lowercase(); + let Some(option_name_pos) = lower.find("option_name") else { + return Vec::new(); + }; + let tail = &sql_text[option_name_pos + "option_name".len()..]; + let lower_tail = &lower[option_name_pos + "option_name".len()..]; + + if let Some(eq_pos) = lower_tail.find('=') { + if lower_tail[..eq_pos] + .chars() + .all(|ch| ch.is_ascii_whitespace() || ch == '`') + { + return first_sql_string_literal(&tail[eq_pos + 1..]) + .into_iter() + .collect(); + } + } + + if let Some(in_pos) = lower_tail.find(" in ") { + return sql_string_literals_until_closing_paren(&tail[in_pos + 4..]); + } + + Vec::new() +} + +fn option_bootstrap_where_sql() -> String { + let names = option_bootstrap_names() + .iter() + .map(|name| format!("'{}'", mysql_string_literal(name))) + .collect::>() + .join(", "); + format!("autoload IN ('yes', 'on', 'auto-on', 'auto') OR option_name IN ({names})") +} + +fn option_bootstrap_where_sql_excluding(excluded_names: &[String]) -> String { + let base = option_bootstrap_where_sql(); + if excluded_names.is_empty() { + return base; + } + + let excluded = excluded_names + .iter() + .map(|name| format!("'{}'", mysql_string_literal(name))) + .collect::>() + .join(", "); + format!("({base}) AND option_name NOT IN ({excluded})") +} + +fn option_names_where_sql(names: &[String]) -> String { + let names = names + .iter() + .map(|name| format!("'{}'", mysql_string_literal(name))) + .collect::>() + .join(", "); + format!("option_name IN ({names})") +} + +fn option_row_key(table: &str, name: &str) -> String { + format!("{table}:{name}") +} + +fn dirty_option_names_for_table(state: &DbState, table: &str) -> Vec { + let prefix = format!("{table}:"); + state + .dirty_option_rows + .iter() + .filter_map(|key| key.strip_prefix(&prefix).map(str::to_string)) + .collect() +} + +fn option_bootstrap_names() -> &'static [&'static str] { + &[ + "siteurl", + "home", + "blogname", + "blogdescription", + "admin_email", + "active_plugins", + "template", + "stylesheet", + "current_theme", + "permalink_structure", + "rewrite_rules", + "sidebars_widgets", + "stylesheet_root", + "template_root", + "upload_path", + "upload_url_path", + ] +} + +fn local_mysql_command(manifest: &Manifest) -> Command { + let mut command = Command::new("mysql"); + command.arg("--host").arg(&manifest.local_db.host); + command + .arg("--port") + .arg(manifest.local_db.port.to_string()); + command.arg("--user").arg(&manifest.local_db.user); + if !manifest.local_db.password.is_empty() { + command.env("MYSQL_PWD", &manifest.local_db.password); + } + command +} + +fn local_mysqldump_command(manifest: &Manifest) -> Command { + let mut command = Command::new("mysqldump"); + command.arg("--host").arg(&manifest.local_db.host); + command + .arg("--port") + .arg(manifest.local_db.port.to_string()); + command.arg("--user").arg(&manifest.local_db.user); + if !manifest.local_db.password.is_empty() { + command.env("MYSQL_PWD", &manifest.local_db.password); + } + command +} + +fn qualified_table(manifest: &Manifest, table: &str) -> String { + format!( + "`{}`.`{}`", + manifest.local_db.name.replace('`', "``"), + table.replace('`', "``") + ) +} + +pub(crate) fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> { + let mut command = local_mysql_command(manifest); + command.arg("--execute").arg(sql_text); + let status = command.status().context("run local mysql")?; + if !status.success() { + return Err(anyhow!("local mysql failed with status {}", status)); + } + Ok(()) +} + +pub(crate) fn local_query_result(manifest: &Manifest, sql_text: &str) -> Result { + let mut conn = local_mysql_conn(manifest)?; + let result = conn + .query_iter(sql_text) + .with_context(|| format!("run local mysql query: {sql_text}"))?; + let fields = result + .columns() + .as_ref() + .iter() + .map(|column| column.name_str().to_string()) + .collect::>(); + let mut rows = Vec::new(); + for row in result { + let row = row.context("read local mysql row")?; + let values = row.unwrap(); + let mut row = Row::new(); + for (idx, field) in fields.iter().enumerate() { + let value = values.get(idx).cloned().unwrap_or(mysql::Value::NULL); + row.insert(field.clone(), mysql_value_to_json(value)); + } + rows.push(row); + } + + Ok(CowQueryResult::ok(rows, fields)) +} + +fn local_mysql_conn(manifest: &Manifest) -> Result { + let mut builder = mysql::OptsBuilder::new() + .ip_or_hostname(Some(manifest.local_db.host.clone())) + .tcp_port(manifest.local_db.port) + .user(Some(manifest.local_db.user.clone())) + .db_name(Some(manifest.local_db.name.clone())); + if !manifest.local_db.password.is_empty() { + builder = builder.pass(Some(manifest.local_db.password.clone())); + } + mysql::Conn::new(builder).context("connect to local mysql") +} + +fn mysql_value_to_json(value: mysql::Value) -> serde_json::Value { + match value { + mysql::Value::NULL => serde_json::Value::Null, + mysql::Value::Bytes(bytes) => { + serde_json::Value::String(String::from_utf8_lossy(&bytes).into()) + } + mysql::Value::Int(value) => serde_json::Value::String(value.to_string()), + mysql::Value::UInt(value) => serde_json::Value::String(value.to_string()), + mysql::Value::Float(value) => serde_json::Value::String(value.to_string()), + mysql::Value::Double(value) => serde_json::Value::String(value.to_string()), + mysql::Value::Date(year, month, day, hour, minute, second, micros) => { + serde_json::Value::String(format!( + "{year:04}-{month:02}-{day:02} {hour:02}:{minute:02}:{second:02}.{:06}", + micros + )) + } + mysql::Value::Time(negative, days, hours, minutes, seconds, micros) => { + let sign = if negative { "-" } else { "" }; + serde_json::Value::String(format!( + "{sign}{days} {hours:02}:{minutes:02}:{seconds:02}.{:06}", + micros + )) + } + } +} + +fn mysql_string_literal(value: &str) -> String { + value.replace('\\', "\\\\").replace('\'', "\\'") +} + +fn first_sql_string_literal(input: &str) -> Option { + sql_string_literals_until_closing_paren(input) + .into_iter() + .next() +} + +fn sql_string_literals_until_closing_paren(input: &str) -> Vec { + let mut out = Vec::new(); + let mut chars = input.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == ')' { + break; + } + if ch != '\'' { + continue; + } + let mut value = String::new(); + while let Some(ch) = chars.next() { + if ch == '\\' { + if let Some(next) = chars.next() { + value.push(next); + } + continue; + } + if ch == '\'' { + if chars.peek() == Some(&'\'') { + let _ = chars.next(); + value.push('\''); + continue; + } + break; + } + value.push(ch); + } + if !value.is_empty() { + out.push(value); + } + } + out +} + +fn validate_table_name(table: &str) -> Result<()> { + if table.is_empty() + || !table + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '$') + { + return Err(anyhow!("unsafe table name {}", table)); + } + Ok(()) +} + +fn ensure_probe_has_db(probe: &crate::config::Probe) -> Result<()> { + if probe.db_name.is_empty() || probe.db_user.is_empty() || probe.db_host.is_empty() { + return Err(anyhow!("remote probe did not return database credentials")); + } + Ok(()) +} + +fn remote_mysql_cli_options(db_host: &str) -> String { + if let Some(idx) = db_host.find(":/") { + let host = &db_host[..idx]; + let socket = &db_host[idx + 1..]; + return format!( + "--host={} --socket={}", + shell_quote(host), + shell_quote(socket) + ); + } + + if let Some((host, port)) = db_host.rsplit_once(':') { + if port.parse::().is_ok() { + return format!("--host={} --port={}", shell_quote(host), shell_quote(port)); + } + } + + format!("--host={}", shell_quote(db_host)) +} + +#[allow(dead_code)] +pub fn local_db_host_port(manifest: &Manifest) -> (String, u16) { + parse_host_port( + &format!("{}:{}", manifest.local_db.host, manifest.local_db.port), + manifest.local_db.port, + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_manifest() -> Manifest { + Manifest { + version: crate::config::MANIFEST_VERSION, + name: "calm".to_string(), + ssh: "example".to_string(), + remote_path: "/srv/www".to_string(), + remote_url: "https://example.com".to_string(), + local_url: "http://localhost:9481".to_string(), + created_at_unix: 1, + probe: crate::config::Probe::default(), + local_db: crate::config::LocalDb { + name: "cow_calm".to_string(), + user: "cow_calm".to_string(), + password: String::new(), + host: "127.0.0.1".to_string(), + port: 33071, + }, + db_proxy: crate::config::DbProxy { + host: "127.0.0.1".to_string(), + port: 33070, + }, + remote_db_tunnel: crate::config::RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: 33072, + }, + control_url: "http://127.0.0.1:39070".to_string(), + cache_max_file_bytes: 1024, + remote_metadata_cache_ttl_secs: 30, + } + } + + #[test] + fn rejects_unsafe_table_names() { + assert!(validate_table_name("wp_posts").is_ok()); + assert!(validate_table_name("wp-posts").is_err()); + assert!(validate_table_name("wp_posts;DROP").is_err()); + } + + #[test] + fn formats_remote_mysql_host_variants() { + assert_eq!(remote_mysql_cli_options("localhost"), "--host='localhost'"); + assert_eq!( + remote_mysql_cli_options("db.example.com:3307"), + "--host='db.example.com' --port='3307'" + ); + assert_eq!( + remote_mysql_cli_options("localhost:/tmp/mysql.sock"), + "--host='localhost' --socket='/tmp/mysql.sock'" + ); + } + + #[test] + fn detects_option_bootstrap_reads() { + let tables = vec!["ady_options".to_string()]; + assert_eq!( + option_bootstrap_table_for_sql( + "ady_", + "SELECT option_name, option_value FROM ady_options WHERE autoload IN ( 'yes', 'on', 'auto-on', 'auto' )", + &tables + ), + Some("ady_options".to_string()) + ); + assert_eq!( + option_bootstrap_table_for_sql( + "ady_", + "SELECT option_value FROM ady_options WHERE option_name = 'siteurl' LIMIT 1", + &tables + ), + Some("ady_options".to_string()) + ); + assert_eq!( + option_bootstrap_table_for_sql( + "ady_", + "SELECT option_value FROM ady_options WHERE option_name = 'some_plugin_option' LIMIT 1", + &tables + ), + None + ); + } + + #[test] + fn extracts_targeted_option_reads() { + let tables = vec!["ady_options".to_string()]; + assert_eq!( + option_names_for_sql( + "SELECT option_value FROM ady_options WHERE option_name = 'aioseo_options_internal_localized' LIMIT 1", + "ady_options", + &tables + ), + vec!["aioseo_options_internal_localized".to_string()] + ); + assert_eq!( + option_names_for_sql( + "SELECT * FROM ady_options WHERE option_name IN ('a', 'b')", + "ady_options", + &tables + ), + vec!["a".to_string(), "b".to_string()] + ); + } + + #[test] + fn extracts_dirty_option_write_names() { + let tables = vec!["ady_options".to_string()]; + assert_eq!( + option_write_names_for_sql( + "UPDATE ady_options SET option_value = 'neve' WHERE option_name = 'template'", + "ady_options", + &tables, + ), + vec!["template".to_string()] + ); + assert_eq!( + option_write_names_for_sql( + "DELETE FROM ady_options WHERE option_name IN ('template', 'stylesheet')", + "ady_options", + &tables, + ), + vec!["template".to_string(), "stylesheet".to_string()] + ); + } + + #[test] + fn option_bootstrap_refresh_can_preserve_dirty_rows() { + let where_sql = option_bootstrap_where_sql_excluding(&[ + "template".to_string(), + "stylesheet".to_string(), + ]); + assert!(where_sql.contains("autoload IN")); + assert!(where_sql.contains("option_name NOT IN ('template', 'stylesheet')")); + } + + #[test] + fn qualifies_local_tables_for_exec_without_selected_database() { + let manifest = test_manifest(); + assert_eq!( + qualified_table(&manifest, "ady_options"), + "`cow_calm`.`ady_options`" + ); + } + + #[test] + fn tombstone_delete_sql_preserves_overlay_on_table_promotion() { + let manifest = test_manifest(); + let sql = row_cow_tombstone_delete_sqls( + &manifest, + "wp_posts", + &[TombstoneGroup { + pk_column: "ID".to_string(), + pk_values: vec![PkValue("7".to_string()), PkValue("9".to_string())], + }], + ) + .unwrap(); + + assert_eq!( + sql, + vec!["DELETE FROM `cow_calm`.`wp_posts` WHERE `ID` IN ('7', '9');"] + ); + } + + #[test] + fn parses_max_primary_key_rows_for_auto_increment_reservation() { + let mut row = Row::new(); + row.insert( + "max_pk".to_string(), + serde_json::Value::String("184".to_string()), + ); + assert_eq!(max_pk_from_rows(&[row], "max_pk").unwrap(), 184); + + let mut null_row = Row::new(); + null_row.insert("max_pk".to_string(), serde_json::Value::Null); + assert_eq!(max_pk_from_rows(&[null_row], "max_pk").unwrap(), 0); + + assert_eq!(max_pk_from_rows(&[], "max_pk").unwrap(), 0); + } + + #[test] + fn mysql_value_conversion_preserves_multiline_wordpress_content() { + let value = mysql_value_to_json(mysql::Value::Bytes( + b"before\n

About

\nafter\tTabbed".to_vec(), + )); + assert_eq!( + value, + serde_json::Value::String("before\n

About

\nafter\tTabbed".to_string()) + ); + } + + #[test] + fn row_cow_safe_read_fallbacks_do_not_promote_tables() { + let tables = vec!["ady_options".to_string()]; + assert!( + !should_materialize_row_cow_fallback( + "SELECT option_name, option_value FROM ady_options WHERE autoload IN ('yes', 'on')", + "PromoteTable", + &tables, + ), + "safe live-lower reads should route to the remote lower layer instead of dumping full tables" + ); + assert!( + should_materialize_row_cow_fallback( + "UPDATE ady_options SET option_value='x' WHERE autoload='yes'", + "PromoteTable", + &tables, + ), + "write fallbacks still need local table promotion before the write executes" + ); + } + + #[test] + fn dirty_row_overlay_tables_are_local_state() { + let mut state = DbState::default(); + assert!(!table_has_local_state(&state, "wp_posts")); + state.dirty_tables.insert("wp_posts".to_string()); + assert!( + table_has_local_state(&state, "wp_posts"), + "complex plugin reads must not route to remote after local row overlays or tombstones" + ); + } + + #[test] + fn remote_query_cache_round_trips_safe_read_results() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + crate::config::ensure_clone_dirs(&paths).unwrap(); + let sql = "SELECT ID, post_title FROM wp_posts WHERE post_status = 'publish'"; + + let mut row = serde_json::Map::new(); + row.insert("ID".to_string(), serde_json::Value::String("7".to_string())); + row.insert( + "post_title".to_string(), + serde_json::Value::String("Cached".to_string()), + ); + let result = RemoteQueryResult { + ok: true, + error: String::new(), + rows: vec![row], + fields: vec!["ID".to_string(), "post_title".to_string()], + affected: 1, + }; + + remote_query_cache_set(&paths, sql, &result).unwrap(); + assert_eq!( + remote_query_cache_get(&paths, sql) + .unwrap() + .unwrap() + .rows + .len(), + 1 + ); + assert!( + remote_query_cache_get(&paths, "SELECT ID FROM wp_posts") + .unwrap() + .is_none(), + "cache files are keyed and verified by SQL text" + ); + } +} diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs new file mode 100644 index 00000000..1f060ee4 --- /dev/null +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -0,0 +1,1341 @@ +use anyhow::Result; +use fuser::{ + FileAttr, FileType, Filesystem, KernelConfig, MountOption, ReplyAttr, ReplyCreate, ReplyData, + ReplyDirectory, ReplyEmpty, ReplyEntry, ReplyOpen, ReplyWrite, Request, +}; +use libc::{EIO, ENOENT, ENOTSUP}; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::ffi::{OsStr, OsString}; +use std::fs::{self, File, OpenOptions}; +use std::io; +use std::os::unix::fs::{FileExt, MetadataExt, OpenOptionsExt, PermissionsExt}; +use std::path::{Path, PathBuf}; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; + +use crate::config::{self, ClonePaths, Manifest}; +use crate::overlay::OverlayStore; +use crate::remote::{RemoteClient, RemoteEntry}; + +const ROOT_INO: u64 = 1; +const DEFAULT_KERNEL_CACHE_TTL_SECS: u64 = 60; + +#[derive(Clone)] +struct Timed { + value: T, + expires_at: Instant, +} + +enum Handle { + Local(File), + Remote(PathBuf), +} + +pub struct CowFs { + manifest: Manifest, + remote: RemoteClient, + overlay: OverlayStore, + ino_to_path: HashMap, + path_to_ino: HashMap, + next_ino: u64, + handles: HashMap, + next_fh: u64, + remote_stat_cache: HashMap>, + remote_missing_cache: HashMap, + remote_readdir_cache: HashMap>>, + runtime_prefetch_dirs: HashSet, + remote_cache_ttl: Duration, + kernel_cache_ttl: Duration, + offline: bool, + uid: u32, + gid: u32, +} + +impl CowFs { + pub fn new(manifest: Manifest, paths: &ClonePaths, remote: RemoteClient) -> Self { + let mut ino_to_path = HashMap::new(); + let mut path_to_ino = HashMap::new(); + ino_to_path.insert(ROOT_INO, PathBuf::new()); + path_to_ino.insert(PathBuf::new(), ROOT_INO); + let remote_cache_ttl = Duration::from_secs(manifest.remote_metadata_cache_ttl_secs); + let kernel_cache_ttl = Duration::from_secs(env_u64( + "WPCOW_FUSE_TTL_SECS", + DEFAULT_KERNEL_CACHE_TTL_SECS, + )); + let offline = config::is_offline(paths); + Self { + manifest, + remote, + overlay: OverlayStore::new(paths), + ino_to_path, + path_to_ino, + next_ino: ROOT_INO + 1, + handles: HashMap::new(), + next_fh: 1, + remote_stat_cache: HashMap::new(), + remote_missing_cache: HashMap::new(), + remote_readdir_cache: HashMap::new(), + runtime_prefetch_dirs: HashSet::new(), + remote_cache_ttl, + kernel_cache_ttl, + offline, + uid: unsafe { libc::getuid() }, + gid: unsafe { libc::getgid() }, + } + } + + fn ino_for_path(&mut self, rel: &Path) -> u64 { + let rel = rel.to_path_buf(); + if let Some(ino) = self.path_to_ino.get(&rel) { + return *ino; + } + let ino = self.next_ino; + self.next_ino += 1; + self.path_to_ino.insert(rel.clone(), ino); + self.ino_to_path.insert(ino, rel); + ino + } + + fn path_for_ino(&self, ino: u64) -> Option { + self.ino_to_path.get(&ino).cloned() + } + + fn child_path(&self, parent: u64, name: &OsStr) -> io::Result { + let parent_path = self + .path_for_ino(parent) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown parent inode"))?; + let mut child = parent_path; + child.push(name); + OverlayStore::clean_rel(&child) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err.to_string())) + } + + fn attr_for_path(&mut self, rel: &Path, ino: u64) -> io::Result { + if self.overlay.is_whiteout(rel).map_err(anyhow_to_io)? { + return Err(io::Error::new(io::ErrorKind::NotFound, "whiteout")); + } + + let upper = self.overlay.upper_path(rel).map_err(anyhow_to_io)?; + if let Ok(metadata) = fs::symlink_metadata(&upper) { + return Ok(self.attr_from_metadata(ino, &metadata)); + } + + let mirror = self.overlay.mirror_path(rel).map_err(anyhow_to_io)?; + if let Ok(metadata) = fs::symlink_metadata(&mirror) { + return Ok(self.attr_from_metadata(ino, &metadata)); + } + + if self.has_opaque_ancestor_active(rel)? { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "hidden by local opaque directory", + )); + } + + let entry = self.remote_stat(rel)?; + Ok(self.attr_from_remote(ino, &entry)) + } + + fn remote_stat(&mut self, rel: &Path) -> io::Result { + if let Some(cached) = self.remote_stat_cache.get(rel) { + if cached.expires_at > Instant::now() { + return Ok(cached.value.clone()); + } + } + if let Some(expires_at) = self.remote_missing_cache.get(rel) { + if *expires_at > Instant::now() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "cached remote miss", + )); + } + } + + if let Some(entry) = self.overlay.cached_entry(rel).map_err(anyhow_to_io)? { + self.remote_missing_cache.remove(rel); + self.remote_stat_cache.insert( + rel.to_path_buf(), + Timed { + value: entry.clone(), + expires_at: Instant::now() + self.remote_cache_ttl, + }, + ); + return Ok(entry); + } + if self.overlay.cached_missing(rel).map_err(anyhow_to_io)? { + self.remote_missing_cache + .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + return Err(io::Error::new( + io::ErrorKind::NotFound, + "cached remote miss", + )); + } + + if self.offline { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "clone is severed and path is not cached locally", + )); + } + + let max_prefetch = if should_prefetch_bytes_during_stat(rel) { + remote_stat_prefetch_max_bytes().min(self.manifest.cache_max_file_bytes) + } else { + 0 + }; + let stat = match self.remote.stat_prefetch(rel, max_prefetch) { + Ok(stat) => stat, + Err(err) if err.kind() == io::ErrorKind::NotFound => { + self.remote_missing_cache + .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + let _ = self + .overlay + .put_cached_missing(rel, self.remote_cache_ttl.as_secs()); + return Err(err); + } + Err(err) => return Err(err), + }; + let entry = stat.entry; + self.remote_missing_cache.remove(rel); + if let Some(bytes) = stat.data { + let _ = self.overlay.put_cached_file_bytes(rel, &entry, &bytes); + } else { + let _ = self.overlay.put_cached_entry(rel, &entry); + } + self.prefetch_runtime_siblings(rel); + self.remote_stat_cache.insert( + rel.to_path_buf(), + Timed { + value: entry.clone(), + expires_at: Instant::now() + self.remote_cache_ttl, + }, + ); + Ok(entry) + } + + fn prefetch_runtime_siblings(&mut self, rel: &Path) { + if self.offline || !should_prefetch_bytes_during_stat(rel) { + return; + } + let dir = rel.parent().unwrap_or_else(|| Path::new("")).to_path_buf(); + if !should_prefetch_runtime_sibling_dir(&dir) { + return; + } + if !self.runtime_prefetch_dirs.insert(dir.clone()) { + return; + } + let max_file = remote_stat_prefetch_max_bytes().min(self.manifest.cache_max_file_bytes); + let max_total = runtime_sibling_prefetch_max_bytes(); + let Ok(files) = self.remote.prefetch_dir(&dir, max_file, max_total) else { + return; + }; + for stat in files { + let child = dir.join(&stat.entry.name); + if let Some(bytes) = stat.data { + let _ = self + .overlay + .put_cached_file_bytes(&child, &stat.entry, &bytes); + } else { + let _ = self.overlay.put_cached_entry(&child, &stat.entry); + } + self.remote_stat_cache.insert( + child, + Timed { + value: stat.entry, + expires_at: Instant::now() + self.remote_cache_ttl, + }, + ); + } + } + + fn remote_readdir(&mut self, rel: &Path) -> io::Result> { + if self.offline { + return self + .overlay + .list_cached_metadata_dir(rel) + .map_err(anyhow_to_io); + } + + if let Some(cached) = self.remote_readdir_cache.get(rel) { + if cached.expires_at > Instant::now() { + return Ok(cached.value.clone()); + } + } + if self.overlay.cached_missing(rel).map_err(anyhow_to_io)? { + self.remote_missing_cache + .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + return Err(io::Error::new( + io::ErrorKind::NotFound, + "cached remote miss", + )); + } + + let entries = match self.remote.readdir(rel) { + Ok(entries) => entries, + Err(err) if err.kind() == io::ErrorKind::NotFound => { + self.remote_missing_cache + .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + let _ = self + .overlay + .put_cached_missing(rel, self.remote_cache_ttl.as_secs()); + return Err(err); + } + Err(err) => return Err(err), + }; + let expires_at = Instant::now() + self.remote_cache_ttl; + for entry in &entries { + let _ = self.overlay.put_cached_entry(&rel.join(&entry.name), entry); + self.remote_stat_cache.insert( + rel.join(&entry.name), + Timed { + value: entry.clone(), + expires_at, + }, + ); + } + self.remote_readdir_cache.insert( + rel.to_path_buf(), + Timed { + value: entries.clone(), + expires_at, + }, + ); + Ok(entries) + } + + fn invalidate_remote_cache(&mut self, rel: &Path) { + self.remote_stat_cache.remove(rel); + self.remote_missing_cache.remove(rel); + self.remote_readdir_cache.remove(rel); + if let Some(parent) = rel.parent() { + self.remote_readdir_cache.remove(parent); + } + let _ = self.overlay.remove_cached(rel); + } + + fn attr_from_metadata(&self, ino: u64, metadata: &fs::Metadata) -> FileAttr { + let kind = if metadata.file_type().is_dir() { + FileType::Directory + } else if metadata.file_type().is_symlink() { + FileType::Symlink + } else { + FileType::RegularFile + }; + let mtime = unix_time(metadata.mtime() as u64); + FileAttr { + ino, + size: metadata.len(), + blocks: metadata.blocks(), + atime: unix_time(metadata.atime() as u64), + mtime, + ctime: unix_time(metadata.ctime() as u64), + crtime: mtime, + kind, + perm: (metadata.mode() & 0o7777) as u16, + nlink: metadata.nlink() as u32, + uid: metadata.uid(), + gid: metadata.gid(), + rdev: metadata.rdev() as u32, + blksize: metadata.blksize() as u32, + flags: 0, + } + } + + fn attr_from_remote(&self, ino: u64, entry: &RemoteEntry) -> FileAttr { + let kind = match entry.kind.as_str() { + "dir" => FileType::Directory, + "symlink" => FileType::Symlink, + _ => FileType::RegularFile, + }; + let default_perm = match kind { + FileType::Directory => 0o755, + FileType::Symlink => 0o777, + _ => 0o644, + }; + FileAttr { + ino, + size: entry.size, + blocks: entry.size.div_ceil(512), + atime: unix_time(entry.mtime), + mtime: unix_time(entry.mtime), + ctime: unix_time(entry.mtime), + crtime: unix_time(entry.mtime), + kind, + perm: ((entry.mode & 0o7777) as u16).max(default_perm), + nlink: if kind == FileType::Directory { 2 } else { 1 }, + uid: self.uid, + gid: self.gid, + rdev: 0, + blksize: 4096, + flags: 0, + } + } + + fn allocate_handle(&mut self, handle: Handle) -> u64 { + let fh = self.next_fh; + self.next_fh += 1; + self.handles.insert(fh, handle); + fh + } +} + +impl Filesystem for CowFs { + fn init( + &mut self, + _req: &Request<'_>, + _config: &mut KernelConfig, + ) -> std::result::Result<(), i32> { + Ok(()) + } + + fn lookup(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEntry) { + let result = (|| { + let rel = self.child_path(parent, name)?; + trace_fuse("lookup", &rel); + let ino = self.ino_for_path(&rel); + self.attr_for_path(&rel, ino) + })(); + match result { + Ok(attr) => reply.entry(&self.kernel_cache_ttl, &attr, 0), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn getattr(&mut self, _req: &Request<'_>, ino: u64, _fh: Option, reply: ReplyAttr) { + let result = (|| { + if ino == ROOT_INO { + return Ok(root_attr(self.uid, self.gid)); + } + let rel = self + .path_for_ino(ino) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + trace_fuse("getattr", &rel); + self.attr_for_path(&rel, ino) + })(); + match result { + Ok(attr) => reply.attr(&self.kernel_cache_ttl, &attr), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn readlink(&mut self, _req: &Request<'_>, ino: u64, reply: ReplyData) { + let result = (|| { + let rel = self + .path_for_ino(ino) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; + if upper.exists() { + return fs::read_link(upper).map(|p| p.to_string_lossy().into_owned()); + } + if self.offline { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "clone is severed and symlink is not cached locally", + )); + } + self.remote.readlink(&rel) + })(); + match result { + Ok(target) => reply.data(target.as_bytes()), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn mkdir( + &mut self, + _req: &Request<'_>, + parent: u64, + name: &OsStr, + mode: u32, + _umask: u32, + reply: ReplyEntry, + ) { + let result = (|| { + let rel = self.child_path(parent, name)?; + let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; + fs::create_dir_all(&upper)?; + fs::set_permissions(&upper, fs::Permissions::from_mode(mode & 0o7777))?; + self.overlay.clear_whiteout(&rel).map_err(anyhow_to_io)?; + self.invalidate_remote_cache(&rel); + let ino = self.ino_for_path(&rel); + self.attr_for_path(&rel, ino) + })(); + match result { + Ok(attr) => reply.entry(&self.kernel_cache_ttl, &attr, 0), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn unlink(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEmpty) { + self.remove_path(parent, name, reply); + } + + fn rmdir(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEmpty) { + self.remove_path(parent, name, reply); + } + + fn rename( + &mut self, + _req: &Request<'_>, + parent: u64, + name: &OsStr, + newparent: u64, + newname: &OsStr, + flags: u32, + reply: ReplyEmpty, + ) { + let result = (|| { + if flags != 0 { + return Err(io::Error::from_raw_os_error(ENOTSUP)); + } + let old_rel = self.child_path(parent, name)?; + let new_rel = self.child_path(newparent, newname)?; + let old_upper = self.overlay.upper_path(&old_rel).map_err(anyhow_to_io)?; + let new_upper = self.overlay.upper_path(&new_rel).map_err(anyhow_to_io)?; + if let Some(parent) = new_upper.parent() { + fs::create_dir_all(parent)?; + } + + if !old_upper.exists() { + let entry = self.remote_stat(&old_rel)?; + if entry.kind == "dir" { + return Err(io::Error::from_raw_os_error(ENOTSUP)); + } + self.copy_up_for_write(&old_rel)?; + } + + fs::rename(&old_upper, &new_upper)?; + self.overlay.add_whiteout(&old_rel).map_err(anyhow_to_io)?; + self.overlay + .clear_whiteout(&new_rel) + .map_err(anyhow_to_io)?; + self.invalidate_remote_cache(&old_rel); + self.invalidate_remote_cache(&new_rel); + let ino = self.ino_for_path(&new_rel); + self.ino_to_path.insert(ino, new_rel.clone()); + self.path_to_ino.insert(new_rel, ino); + Ok(()) + })(); + match result { + Ok(()) => reply.ok(), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn open(&mut self, _req: &Request<'_>, ino: u64, flags: i32, reply: ReplyOpen) { + let result = (|| { + let rel = self + .path_for_ino(ino) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + trace_fuse("open", &rel); + if wants_write(flags) { + let upper = self.copy_up_for_write(&rel)?; + let mut opts = OpenOptions::new(); + opts.read(true).write(true).create(true); + if flags & libc::O_TRUNC != 0 { + opts.truncate(true); + } + if flags & libc::O_APPEND != 0 { + opts.append(true); + } + let file = opts.open(upper)?; + Ok((self.allocate_handle(Handle::Local(file)), flags as u32)) + } else { + let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; + if upper.exists() { + let file = File::open(upper)?; + Ok((self.allocate_handle(Handle::Local(file)), 0)) + } else if let Some(cache_path) = self.overlay.cached_file_path(&rel) { + let file = File::open(cache_path)?; + Ok((self.allocate_handle(Handle::Local(file)), 0)) + } else if self.offline { + Err(io::Error::new( + io::ErrorKind::NotFound, + "clone is severed and file is not cached locally", + )) + } else { + Ok((self.allocate_handle(Handle::Remote(rel)), 0)) + } + } + })(); + match result { + Ok((fh, open_flags)) => reply.opened(fh, open_flags), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn read( + &mut self, + _req: &Request<'_>, + _ino: u64, + fh: u64, + offset: i64, + size: u32, + _flags: i32, + _lock_owner: Option, + reply: ReplyData, + ) { + let result = match self.handles.get(&fh) { + Some(Handle::Local(file)) => { + let mut buf = vec![0; size as usize]; + if offset < 0 { + Ok(Vec::new()) + } else { + match file.read_at(&mut buf, offset as u64) { + Ok(read) => { + buf.truncate(read); + Ok(buf) + } + Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => Ok(Vec::new()), + Err(err) => Err(err), + } + } + } + Some(Handle::Remote(rel)) => { + let rel = rel.clone(); + if self.offline { + return reply.error(ENOENT); + } + trace_fuse("read-remote", &rel); + let entry = self.remote_stat(&rel).ok(); + self.overlay + .read_cached_or_remote_with_entry( + &self.remote, + &rel, + offset, + size, + self.manifest.cache_max_file_bytes, + entry, + ) + .map_err(anyhow_to_io) + } + None => Err(io::Error::new(io::ErrorKind::NotFound, "unknown handle")), + }; + match result { + Ok(bytes) => reply.data(&bytes), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn write( + &mut self, + _req: &Request<'_>, + _ino: u64, + fh: u64, + offset: i64, + data: &[u8], + _write_flags: u32, + _flags: i32, + _lock_owner: Option, + reply: ReplyWrite, + ) { + let result = match self.handles.get(&fh) { + Some(Handle::Local(file)) => { + if offset < 0 { + Err(io::Error::new( + io::ErrorKind::InvalidInput, + "negative offset", + )) + } else { + file.write_at(data, offset as u64) + .map(|written| written as u32) + } + } + _ => Err(io::Error::new( + io::ErrorKind::Other, + "handle is not writable", + )), + }; + match result { + Ok(written) => reply.written(written), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn create( + &mut self, + _req: &Request<'_>, + parent: u64, + name: &OsStr, + mode: u32, + _umask: u32, + flags: i32, + reply: ReplyCreate, + ) { + let result = (|| { + let rel = self.child_path(parent, name)?; + let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; + if let Some(parent) = upper.parent() { + fs::create_dir_all(parent)?; + } + let mut opts = OpenOptions::new(); + opts.read(true) + .write(true) + .create(true) + .truncate(flags & libc::O_TRUNC != 0) + .mode(mode & 0o7777); + let file = opts.open(&upper)?; + self.overlay.clear_whiteout(&rel).map_err(anyhow_to_io)?; + self.invalidate_remote_cache(&rel); + let ino = self.ino_for_path(&rel); + let attr = self.attr_for_path(&rel, ino)?; + let fh = self.allocate_handle(Handle::Local(file)); + Ok((attr, fh)) + })(); + match result { + Ok((attr, fh)) => reply.created(&self.kernel_cache_ttl, &attr, 0, fh, flags as u32), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn flush( + &mut self, + _req: &Request<'_>, + _ino: u64, + fh: u64, + _lock_owner: u64, + reply: ReplyEmpty, + ) { + if let Some(Handle::Local(file)) = self.handles.get(&fh) { + if let Err(err) = file.sync_data() { + reply.error(io_errno(&err)); + return; + } + } + reply.ok(); + } + + fn release( + &mut self, + _req: &Request<'_>, + _ino: u64, + fh: u64, + _flags: i32, + _lock_owner: Option, + _flush: bool, + reply: ReplyEmpty, + ) { + self.handles.remove(&fh); + reply.ok(); + } + + fn readdir( + &mut self, + _req: &Request<'_>, + ino: u64, + _fh: u64, + offset: i64, + mut reply: ReplyDirectory, + ) { + let result = self.collect_dir_entries(ino); + let entries = match result { + Ok(entries) => entries, + Err(err) => { + reply.error(io_errno(&err)); + return; + } + }; + + for (idx, (entry_ino, kind, name)) in entries.into_iter().enumerate().skip(offset as usize) + { + let next_offset = (idx + 1) as i64; + if reply.add(entry_ino, next_offset, kind, name) { + break; + } + } + reply.ok(); + } +} + +impl CowFs { + fn remove_path(&mut self, parent: u64, name: &OsStr, reply: ReplyEmpty) { + let result = (|| { + let rel = self.child_path(parent, name)?; + self.overlay.remove_upper(&rel).map_err(anyhow_to_io)?; + if self.remote_stat(&rel).is_ok() { + self.overlay.add_whiteout(&rel).map_err(anyhow_to_io)?; + } + self.invalidate_remote_cache(&rel); + Ok(()) + })(); + match result { + Ok(()) => reply.ok(), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn collect_dir_entries(&mut self, ino: u64) -> io::Result> { + let rel = self + .path_for_ino(ino) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + trace_fuse("readdir", &rel); + + let mut entries = Vec::new(); + entries.push((ino, FileType::Directory, OsString::from("."))); + let parent_rel = rel.parent().unwrap_or_else(|| Path::new("")); + let parent_ino = self.ino_for_path(parent_rel); + entries.push((parent_ino, FileType::Directory, OsString::from(".."))); + + let mut by_name: BTreeMap = BTreeMap::new(); + let opaque = self.is_opaque_dir_active(&rel)?; + if !opaque { + match self.remote_readdir(&rel) { + Ok(remote_entries) => { + for entry in remote_entries { + by_name.insert(entry.name.clone(), entry); + } + } + Err(err) if err.kind() == io::ErrorKind::NotFound => {} + Err(err) => return Err(err), + } + } + + for entry in self.overlay.list_upper(&rel).map_err(anyhow_to_io)? { + by_name.insert(entry.name.clone(), entry); + } + if !opaque { + for entry in self.overlay.list_mirror(&rel).map_err(anyhow_to_io)? { + by_name.insert(entry.name.clone(), entry); + } + } + + for (name, entry) in by_name { + let child_rel = rel.join(&name); + if self.overlay.is_whiteout(&child_rel).map_err(anyhow_to_io)? { + continue; + } + let child_ino = self.ino_for_path(&child_rel); + entries.push(( + child_ino, + file_type_from_kind(&entry.kind), + OsString::from(name), + )); + } + + Ok(entries) + } + + fn copy_up_for_write(&self, rel: &Path) -> io::Result { + if self.offline { + self.overlay + .copy_up_cached_only(rel) + .map_err(|err| io::Error::new(io::ErrorKind::NotFound, err.to_string())) + } else { + self.overlay + .copy_up(&self.remote, rel) + .map_err(anyhow_to_io) + } + } + + fn is_opaque_dir_active(&self, rel: &Path) -> io::Result { + let is_opaque = self.overlay.is_opaque_dir(rel).map_err(anyhow_to_io)?; + if !is_opaque { + return Ok(false); + } + if rel.starts_with(Path::new("wp-content/plugins")) + && !env_is_explicit_false("WPCOW_ENABLE_PLUGINS") + { + return Ok(false); + } + if rel.starts_with(Path::new("wp-content/languages")) { + return Ok(false); + } + Ok(true) + } + + fn has_opaque_ancestor_active(&self, rel: &Path) -> io::Result { + let mut current = rel.parent(); + while let Some(parent) = current { + if self.is_opaque_dir_active(parent)? { + return Ok(true); + } + current = parent.parent(); + } + Ok(false) + } +} + +pub fn mount_foreground(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> Result<()> { + fs::create_dir_all(mountpoint)?; + let control_path = config::ssh_control_path(&paths); + let remote = RemoteClient::new(manifest.clone(), Some(control_path)); + if !config::is_offline(&paths) { + remote.ensure_master()?; + } + let fs = CowFs::new(manifest.clone(), &paths, remote); + let options = vec![ + MountOption::FSName(format!("wp-cow-{}", manifest.name)), + MountOption::Subtype("wp-cow".to_string()), + ]; + fuser::mount2(fs, mountpoint, &options)?; + Ok(()) +} + +fn root_attr(uid: u32, gid: u32) -> FileAttr { + FileAttr { + ino: ROOT_INO, + size: 0, + blocks: 0, + atime: SystemTime::now(), + mtime: SystemTime::now(), + ctime: SystemTime::now(), + crtime: SystemTime::now(), + kind: FileType::Directory, + perm: 0o755, + nlink: 2, + uid, + gid, + rdev: 0, + blksize: 4096, + flags: 0, + } +} + +fn file_type_from_kind(kind: &str) -> FileType { + match kind { + "dir" => FileType::Directory, + "symlink" => FileType::Symlink, + _ => FileType::RegularFile, + } +} + +fn unix_time(secs: u64) -> SystemTime { + UNIX_EPOCH + Duration::from_secs(secs) +} + +fn trace_fuse(op: &str, rel: &Path) { + if std::env::var("WPCOW_TRACE_FUSE").ok().as_deref() == Some("1") { + eprintln!("wp-cow fuse {op} {}", OverlayStore::rel_string(rel)); + } +} + +fn wants_write(flags: i32) -> bool { + (flags & libc::O_ACCMODE) != libc::O_RDONLY + || flags & libc::O_TRUNC != 0 + || flags & libc::O_APPEND != 0 +} + +fn env_is_explicit_false(name: &str) -> bool { + std::env::var(name) + .ok() + .map(|raw| { + matches!( + raw.to_ascii_lowercase().as_str(), + "0" | "false" | "no" | "off" + ) + }) + .unwrap_or(false) +} + +fn env_u64(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|raw| raw.parse::().ok()) + .unwrap_or(default) +} + +fn remote_stat_prefetch_max_bytes() -> u64 { + env_u64("WPCOW_REMOTE_STAT_PREFETCH_MAX_KB", 0).saturating_mul(1024) +} + +fn runtime_sibling_prefetch_max_bytes() -> u64 { + env_u64("WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB", 0).saturating_mul(1024 * 1024) +} + +fn should_prefetch_bytes_during_stat(rel: &Path) -> bool { + matches!( + rel.extension().and_then(|ext| ext.to_str()), + Some("php" | "json" | "mo") + ) +} + +fn should_prefetch_runtime_sibling_dir(dir: &Path) -> bool { + !(dir.as_os_str().is_empty() + || dir == Path::new("wp-includes") + || dir == Path::new("wp-admin") + || dir == Path::new("wp-admin/includes")) +} + +fn io_errno(err: &io::Error) -> i32 { + match err.kind() { + io::ErrorKind::NotFound => ENOENT, + io::ErrorKind::Unsupported => ENOTSUP, + _ => err.raw_os_error().unwrap_or(EIO), + } +} + +fn anyhow_to_io(err: anyhow::Error) -> io::Error { + io::Error::new(io::ErrorKind::Other, err.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::{ensure_clone_dirs, write_offline_marker, Manifest, OfflineMarker, Probe}; + use std::sync::{Mutex, OnceLock}; + + fn test_manifest() -> Manifest { + Manifest::new( + "example".to_string(), + "unreachable-host".to_string(), + "/remote/wp".to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ) + } + + #[test] + fn offline_write_copy_up_uses_cached_lower_without_remote() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + write_offline_marker( + &paths, + &OfflineMarker { + severed_at_unix: 1, + materialized_tables: Vec::new(), + admin_user: None, + }, + ) + .unwrap(); + + let manifest = test_manifest(); + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-admin/index.php"); + let cache_path = store.cache_path(rel); + fs::create_dir_all(cache_path.parent().unwrap()).unwrap(); + fs::write(&cache_path, b"cached admin runtime\n").unwrap(); + store + .put_cached_entry( + rel, + &RemoteEntry { + name: "index.php".to_string(), + kind: "file".to_string(), + size: 21, + mode: 0o100644, + mtime: 42, + }, + ) + .unwrap(); + + let fs = CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + let upper = fs.copy_up_for_write(rel).unwrap(); + assert_eq!(std::fs::read(&upper).unwrap(), b"cached admin runtime\n"); + + let err = fs + .copy_up_for_write(Path::new("wp-admin/missing.php")) + .unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::NotFound); + assert!( + err.to_string() + .contains("clone is severed and writable lower file is not cached locally"), + "unexpected error: {err}" + ); + } + + #[test] + fn offline_readdir_uses_cached_remote_metadata_without_remote() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + write_offline_marker( + &paths, + &OfflineMarker { + severed_at_unix: 1, + materialized_tables: Vec::new(), + admin_user: None, + }, + ) + .unwrap(); + + let manifest = test_manifest(); + let store = OverlayStore::new(&paths); + store + .put_cached_entry( + Path::new("wp-content/plugins/hello.php"), + &RemoteEntry { + name: "hello.php".to_string(), + kind: "file".to_string(), + size: 18, + mode: 0o100644, + mtime: 42, + }, + ) + .unwrap(); + store + .put_cached_entry( + Path::new("wp-content/plugins/sample"), + &RemoteEntry { + name: "sample".to_string(), + kind: "dir".to_string(), + size: 0, + mode: 0o40755, + mtime: 42, + }, + ) + .unwrap(); + + let mut fs = CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + let entries = fs.remote_readdir(Path::new("wp-content/plugins")).unwrap(); + let names = entries + .into_iter() + .map(|entry| entry.name) + .collect::>(); + + assert_eq!( + names, + vec!["hello.php".to_string(), "sample".to_string()], + "offline readdir should use cached remote metadata without touching SSH" + ); + } + + #[test] + fn remote_stat_metadata_survives_severed_mode_without_remote() { + static ENV_LOCK: OnceLock> = OnceLock::new(); + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old_path = std::env::var_os("PATH"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let fake_bin = temp.path().join("bin"); + fs::create_dir_all(remote_root.join("wp-content/themes/neve/assets/js/build/modern")) + .unwrap(); + fs::create_dir_all(&fake_bin).unwrap(); + fs::write( + remote_root.join("wp-content/themes/neve/assets/js/build/modern/frontend.js"), + b"/* theme build asset */", + ) + .unwrap(); + let fake_ssh = fake_bin.join("ssh"); + fs::write( + &fake_ssh, + r#"#!/usr/bin/env bash +set -euo pipefail +cmd="${@: -1}" +exec bash -lc "$cmd" +"#, + ) + .unwrap(); + let mut perms = fs::metadata(&fake_ssh).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&fake_ssh, perms).unwrap(); + + let path = match old_path.as_ref() { + Some(old) => format!("{}:{}", fake_bin.display(), old.to_string_lossy()), + None => fake_bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "0"); + + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let rel = Path::new("wp-content/themes/neve/assets/js/build/modern/frontend.js"); + + let mut fs = CowFs::new( + manifest.clone(), + &paths, + RemoteClient::new(manifest.clone(), None), + ); + let entry = fs.remote_stat(rel).unwrap(); + assert_eq!(entry.size, 23); + assert_eq!( + OverlayStore::new(&paths) + .cached_entry(rel) + .unwrap() + .unwrap() + .size, + 23, + "successful stat-only lookups must persist metadata for later offline theme checks" + ); + + write_offline_marker( + &paths, + &OfflineMarker { + severed_at_unix: 1, + materialized_tables: Vec::new(), + admin_user: None, + }, + ) + .unwrap(); + fs::remove_file(remote_root.join(rel)).unwrap(); + let mut offline_fs = + CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + assert_eq!( + offline_fs.remote_stat(rel).unwrap().size, + 23, + "severed clones need stat-only metadata without consulting SSH" + ); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + } + + #[test] + fn remote_missing_metadata_survives_daemon_restart() { + static ENV_LOCK: OnceLock> = OnceLock::new(); + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old_path = std::env::var_os("PATH"); + let old_log = std::env::var_os("WPCOW_FAKE_SSH_LOG"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let fake_bin = temp.path().join("bin"); + let fake_ssh_log = temp.path().join("fake-ssh.log"); + fs::create_dir_all(&remote_root).unwrap(); + fs::create_dir_all(&fake_bin).unwrap(); + fs::write( + fake_bin.join("ssh"), + r#"#!/usr/bin/env bash +set -euo pipefail +printf 'CALL\n' >> "$WPCOW_FAKE_SSH_LOG" +cmd="${@: -1}" +exec bash -lc "$cmd" +"#, + ) + .unwrap(); + let mut perms = fs::metadata(fake_bin.join("ssh")).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(fake_bin.join("ssh"), perms).unwrap(); + + let path = match old_path.as_ref() { + Some(old) => format!("{}:{}", fake_bin.display(), old.to_string_lossy()), + None => fake_bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_FAKE_SSH_LOG", &fake_ssh_log); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "0"); + + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let mut manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + manifest.remote_metadata_cache_ttl_secs = 3600; + let rel = Path::new("wp-content/missing-plugin"); + + let mut fs = CowFs::new( + manifest.clone(), + &paths, + RemoteClient::new(manifest.clone(), None), + ); + assert_eq!( + fs.remote_stat(rel).unwrap_err().kind(), + io::ErrorKind::NotFound + ); + let ssh_lines_after_first = fs::read_to_string(&fake_ssh_log).unwrap().lines().count(); + + let mut reloaded_fs = + CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + assert_eq!( + reloaded_fs.remote_stat(rel).unwrap_err().kind(), + io::ErrorKind::NotFound + ); + assert_eq!( + fs::read_to_string(&fake_ssh_log).unwrap().lines().count(), + ssh_lines_after_first, + "cached missing metadata should avoid repeated remote stats after restart" + ); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_log { + Some(value) => std::env::set_var("WPCOW_FAKE_SSH_LOG", value), + None => std::env::remove_var("WPCOW_FAKE_SSH_LOG"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + } + + #[test] + fn legacy_opaque_runtime_markers_stay_transparent_by_default() { + static ENV_LOCK: OnceLock> = OnceLock::new(); + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old = std::env::var_os("WPCOW_ENABLE_PLUGINS"); + std::env::remove_var("WPCOW_ENABLE_PLUGINS"); + + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + for rel in ["wp-content/plugins", "wp-content/languages"] { + let dir = paths.upper.join(rel); + fs::create_dir_all(&dir).unwrap(); + fs::write(dir.join(crate::overlay::OPAQUE_MARKER), b"legacy marker\n").unwrap(); + } + + let manifest = test_manifest(); + let fs = CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + assert!(!fs + .is_opaque_dir_active(Path::new("wp-content/plugins")) + .unwrap()); + assert!(!fs + .is_opaque_dir_active(Path::new("wp-content/languages")) + .unwrap()); + + std::env::set_var("WPCOW_ENABLE_PLUGINS", "0"); + assert!(fs + .is_opaque_dir_active(Path::new("wp-content/plugins")) + .unwrap()); + assert!(!fs + .is_opaque_dir_active(Path::new("wp-content/languages")) + .unwrap()); + + match old { + Some(value) => std::env::set_var("WPCOW_ENABLE_PLUGINS", value), + None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), + } + } + + #[test] + fn stat_prefetch_is_limited_to_runtime_read_files() { + assert!(should_prefetch_bytes_during_stat(Path::new("wp-load.php"))); + assert!(should_prefetch_bytes_during_stat(Path::new( + "wp-includes/theme.json" + ))); + assert!(!should_prefetch_bytes_during_stat(Path::new( + "wp-content/themes/neve/style-main-new.min.css" + ))); + assert!(!should_prefetch_bytes_during_stat(Path::new( + "wp-content/uploads/2026/05/hero.jpg" + ))); + assert!(!should_prefetch_runtime_sibling_dir(Path::new( + "wp-includes" + ))); + assert!(should_prefetch_runtime_sibling_dir(Path::new( + "wp-includes/rest-api/endpoints" + ))); + } +} diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs new file mode 100644 index 00000000..86ab50fc --- /dev/null +++ b/experiments/remote-wp-cow/src/generate.rs @@ -0,0 +1,3720 @@ +use anyhow::Result; +use std::fs; +use std::path::Path; + +use crate::config::{ClonePaths, Manifest}; +use crate::overlay::OPAQUE_MARKER; +use crate::plugin_policy; + +pub const ROUTER_BASENAME: &str = ".wp-cow-router.php"; + +pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Result<()> { + fs::create_dir_all(paths.upper.join("wp-content/mu-plugins"))?; + remove_opaque_marker(paths.upper.join("wp-content/plugins"))?; + remove_opaque_marker(paths.upper.join("wp-content/languages"))?; + let router = router_php(paths, manifest); + fs::write( + paths.upper.join("wp-config.php"), + wp_config_php(manifest, paths), + )?; + fs::write(paths.upper.join("wp-content/db.php"), db_dropin_php())?; + fs::write(paths.upper.join(ROUTER_BASENAME), &router)?; + fs::write( + paths.upper.join("wp-content/mu-plugins/wp-cow-safety.php"), + safety_mu_plugin_php(), + )?; + fs::write(paths.generated.join("router.php"), router)?; + plugin_policy::write_initial_policy(paths, manifest)?; + Ok(()) +} + +fn remove_opaque_marker(path: impl AsRef) -> Result<()> { + let marker = path.as_ref().join(OPAQUE_MARKER); + if marker.exists() { + fs::remove_file(marker)?; + } + Ok(()) +} + +pub fn wp_config_php(manifest: &Manifest, paths: &ClonePaths) -> String { + format!( + r#" &'static str { + r#"wp-cow DB/runtime error'; + echo ''; + echo '

wp-cow DB/runtime error

'; + echo '

The remote database lower layer is unavailable, so this clone will not fall back to the empty local schema or show the WordPress installer as success.

'; + echo '
' . htmlspecialchars( $message, ENT_QUOTES, 'UTF-8' ) . '
'; + echo '
'; + exit( 1 ); +} + +function cow_control_request( $path, $payload ) { + $payload['clone'] = WPCOW_CLONE; + $url = rtrim( WPCOW_CONTROL_URL, '/' ) . $path; + $body = json_encode( $payload ); + $timeout = cow_control_timeout_secs(); + + if ( function_exists( 'curl_init' ) && function_exists( 'curl_exec' ) ) { + $ch = curl_init( $url ); + curl_setopt( $ch, CURLOPT_POST, true ); + curl_setopt( $ch, CURLOPT_HTTPHEADER, array( 'Content-Type: application/json' ) ); + curl_setopt( $ch, CURLOPT_POSTFIELDS, $body ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, min( 3, $timeout ) ); + curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout ); + $raw = curl_exec( $ch ); + $error = curl_error( $ch ); + $errno = curl_errno( $ch ); + if ( PHP_VERSION_ID < 80000 ) { + curl_close( $ch ); + } + if ( false === $raw ) { + return array( 'ok' => false, 'error' => 'curl error ' . $errno . ' calling ' . $url . ': ' . $error ); + } + } else { + $context = stream_context_create( + array( + 'http' => array( + 'method' => 'POST', + 'header' => "Content-Type: application/json\r\n", + 'content' => $body, + 'timeout' => $timeout, + 'ignore_errors' => true, + ), + ) + ); + $raw = @file_get_contents( $url, false, $context ); + if ( false === $raw ) { + $error = error_get_last(); + $error = isset( $error['message'] ) ? $error['message'] : 'unknown stream error'; + return array( 'ok' => false, 'error' => 'stream error calling ' . $url . ' after ' . $timeout . 's: ' . $error ); + } + } + + $decoded = json_decode( $raw, true ); + if ( ! is_array( $decoded ) ) { + return array( 'ok' => false, 'error' => 'invalid wp-cow control response from ' . $url . ': ' . substr( $raw, 0, 500 ) ); + } + return $decoded; +} + +function cow_remote_query_cache_enabled() { + return '0' !== getenv( 'WPCOW_REMOTE_QUERY_CACHE' ) && defined( 'WPCOW_QUERY_CACHE_DIR' ) && '' !== WPCOW_QUERY_CACHE_DIR; +} + +function cow_row_cow_enabled() { + return '0' !== getenv( 'WPCOW_ROW_COW' ); +} + +function cow_offline() { + $value = strtolower( (string) getenv( 'WPCOW_OFFLINE' ) ); + return in_array( $value, array( '1', 'true', 'yes', 'on' ), true ); +} + +function cow_remote_query_cache_file( $query ) { + if ( ! cow_remote_query_cache_enabled() ) { + return ''; + } + return rtrim( WPCOW_QUERY_CACHE_DIR, '/' ) . '/' . hash( 'sha256', $query ) . '.json'; +} + +function cow_remote_query_cache_max_rows() { + $max = (int) getenv( 'WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS' ); + return $max > 0 ? $max : 5000; +} + +function cow_remote_query_cache_get( $query ) { + $file = cow_remote_query_cache_file( $query ); + if ( '' === $file || ! is_file( $file ) ) { + return null; + } + $decoded = json_decode( file_get_contents( $file ), true ); + if ( ! is_array( $decoded ) || ! isset( $decoded['sql'], $decoded['result'] ) || $decoded['sql'] !== $query || ! is_array( $decoded['result'] ) ) { + return null; + } + return $decoded['result']; +} + +function cow_remote_query_cache_set( $query, $result ) { + $file = cow_remote_query_cache_file( $query ); + if ( '' === $file || empty( $result['ok'] ) || ! isset( $result['rows'] ) || ! is_array( $result['rows'] ) ) { + return; + } + if ( count( $result['rows'] ) > cow_remote_query_cache_max_rows() ) { + return; + } + if ( ! is_dir( dirname( $file ) ) && ! mkdir( dirname( $file ), 0777, true ) && ! is_dir( dirname( $file ) ) ) { + return; + } + $tmp = $file . '.' . getmypid() . '.' . str_replace( array( ' ', '.' ), '', microtime() . uniqid( '', true ) ) . '.tmp'; + file_put_contents( $tmp, json_encode( array( 'sql' => $query, 'result' => $result ) ) ); + @rename( $tmp, $file ); +} + +function cow_local_state() { + static $cached = null; + static $cached_mtime = null; + + if ( ! defined( 'WPCOW_DB_STATE_FILE' ) || '' === WPCOW_DB_STATE_FILE || ! is_file( WPCOW_DB_STATE_FILE ) ) { + return array( + 'materialized_tables' => array(), + 'dirty_tables' => array(), + 'option_bootstrap_tables' => array(), + 'option_rows' => array(), + 'dirty_option_rows' => array(), + ); + } + + $mtime = @filemtime( WPCOW_DB_STATE_FILE ); + if ( is_array( $cached ) && $cached_mtime === $mtime ) { + return $cached; + } + + $decoded = json_decode( file_get_contents( WPCOW_DB_STATE_FILE ), true ); + $state = array( + 'materialized_tables' => array(), + 'dirty_tables' => array(), + 'option_bootstrap_tables' => array(), + 'option_rows' => array(), + 'dirty_option_rows' => array(), + ); + if ( is_array( $decoded ) ) { + foreach ( array( 'materialized_tables', 'dirty_tables', 'option_bootstrap_tables', 'option_rows', 'dirty_option_rows' ) as $key ) { + if ( isset( $decoded[ $key ] ) && is_array( $decoded[ $key ] ) ) { + foreach ( $decoded[ $key ] as $value ) { + $state[ $key ][ strtolower( (string) $value ) ] = true; + } + } + } + } + + $cached = $state; + $cached_mtime = $mtime; + return $cached; +} + +function cow_local_state_tables() { + $state = cow_local_state(); + $tables = array(); + foreach ( array( 'materialized_tables', 'dirty_tables', 'option_bootstrap_tables' ) as $key ) { + foreach ( $state[ $key ] as $table => $_present ) { + $tables[ $table ] = true; + } + } + foreach ( array( 'option_rows', 'dirty_option_rows' ) as $key ) { + foreach ( $state[ $key ] as $row_key => $_present ) { + $parts = explode( ':', (string) $row_key, 2 ); + if ( '' !== $parts[0] ) { + $tables[ strtolower( $parts[0] ) ] = true; + } + } + } + return $tables; +} + +function cow_all_tables_in_state_set( $tables, $state_key ) { + if ( empty( $tables ) ) { + return false; + } + $state = cow_local_state(); + foreach ( $tables as $table ) { + if ( ! isset( $state[ $state_key ][ strtolower( (string) $table ) ] ) ) { + return false; + } + } + return true; +} + +function cow_option_bootstrap_names() { + return array( + 'siteurl', + 'home', + 'blogname', + 'blogdescription', + 'admin_email', + 'active_plugins', + 'template', + 'stylesheet', + 'current_theme', + 'permalink_structure', + 'rewrite_rules', + 'sidebars_widgets', + 'stylesheet_root', + 'template_root', + 'upload_path', + 'upload_url_path', + ); +} + +function cow_query_matches_option_bootstrap( $query, $tables, $options_table ) { + if ( ! in_array( $options_table, $tables, true ) ) { + return false; + } + $state = cow_local_state(); + if ( ! isset( $state['option_bootstrap_tables'][ strtolower( $options_table ) ] ) ) { + return false; + } + + $lower = strtolower( $query ); + if ( false !== strpos( $lower, 'autoload' ) ) { + return true; + } + if ( false !== strpos( $lower, 'option_name' ) ) { + foreach ( cow_option_bootstrap_names() as $name ) { + if ( false !== strpos( $lower, "'" . $name . "'" ) ) { + return true; + } + } + } + return false; +} + +function cow_safe_local_read_without_control( $query, $tables, $options_table ) { + if ( empty( $tables ) ) { + return false; + } + if ( cow_all_tables_in_state_set( $tables, 'materialized_tables' ) ) { + return true; + } + if ( cow_query_matches_option_bootstrap( $query, $tables, $options_table ) ) { + return true; + } + return false; +} + +function cow_table_has_dirty_state( $table ) { + $table = strtolower( (string) $table ); + $state = cow_local_state(); + if ( isset( $state['dirty_tables'][ $table ] ) ) { + return true; + } + foreach ( $state['dirty_option_rows'] as $row_key => $_present ) { + $parts = explode( ':', (string) $row_key, 2 ); + if ( $table === strtolower( $parts[0] ) ) { + return true; + } + } + return false; +} + +function cow_cached_remote_read_is_safe_without_control( $tables ) { + if ( empty( $tables ) ) { + return false; + } + foreach ( $tables as $table ) { + if ( cow_table_has_dirty_state( $table ) ) { + return false; + } + } + return true; +} + +function cow_options_table_name( $wpdb ) { + if ( isset( $wpdb->options ) && '' !== $wpdb->options ) { + return $wpdb->options; + } + global $table_prefix; + return (string) $table_prefix . 'options'; +} + +class Cow_DB extends wpdb { + public function query( $query ) { + if ( ! $query ) { + return false; + } + + $this->flush(); + $this->last_query = $query; + + $tables = cow_tables_from_sql( $query ); + $options_table = cow_options_table_name( $this ); + + if ( cow_is_write_sql( $query ) ) { + if ( in_array( $options_table, $tables, true ) && cow_is_protected_theme_option_write( $query ) ) { + $this->rows_affected = 0; + $this->last_error = ''; + return 0; + } + if ( cow_offline() ) { + return parent::query( $query ); + } + if ( cow_row_cow_enabled() ) { + $row_cow = cow_control_request( '/row-cow', array( 'tables' => $tables, 'sql' => $query ) ); + if ( empty( $row_cow['ok'] ) ) { + $this->last_error = isset( $row_cow['error'] ) ? $row_cow['error'] : 'wp-cow row COW failed'; + cow_db_runtime_fail( 'control /row-cow failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); + } + if ( ! empty( $row_cow['handled'] ) || ( isset( $row_cow['backend'] ) && 'local' === $row_cow['backend'] ) ) { + return parent::query( $query ); + } + } + $result = cow_control_request( '/materialize', array( 'tables' => $tables ) ); + if ( empty( $result['ok'] ) ) { + $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow materialization failed'; + cow_db_runtime_fail( 'control /materialize failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); + } + return parent::query( $query ); + } + + if ( cow_is_safe_read_sql( $query ) ) { + if ( cow_offline() ) { + return parent::query( $query ); + } + if ( cow_safe_local_read_without_control( $query, $tables, $options_table ) ) { + return parent::query( $query ); + } + if ( cow_cached_remote_read_is_safe_without_control( $tables ) ) { + $cached = cow_remote_query_cache_get( $query ); + if ( is_array( $cached ) ) { + return $this->cow_apply_remote_result( $cached ); + } + } + if ( cow_row_cow_enabled() ) { + $row_cow = cow_control_request( '/row-cow', array( 'tables' => $tables, 'sql' => $query ) ); + if ( empty( $row_cow['ok'] ) ) { + $this->last_error = isset( $row_cow['error'] ) ? $row_cow['error'] : 'wp-cow row COW failed'; + cow_db_runtime_fail( 'control /row-cow failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); + } + if ( ! empty( $row_cow['handled'] ) && isset( $row_cow['result'] ) && is_array( $row_cow['result'] ) ) { + return $this->cow_apply_remote_result( $row_cow['result'] ); + } + if ( isset( $row_cow['backend'] ) && 'local' === $row_cow['backend'] ) { + return parent::query( $query ); + } + } + $route = cow_control_request( '/route', array( 'tables' => $tables, 'sql' => $query ) ); + if ( ! empty( $route['ok'] ) && isset( $route['backend'] ) && 'remote' === $route['backend'] ) { + return $this->cow_remote_query( $query ); + } + if ( ! empty( $route['ok'] ) && isset( $route['backend'] ) && 'local' === $route['backend'] ) { + return parent::query( $query ); + } + $this->last_error = isset( $route['error'] ) ? $route['error'] : 'wp-cow route decision failed'; + cow_db_runtime_fail( 'control /route failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); + } + + return parent::query( $query ); + } + + private function cow_remote_query( $query ) { + $cached = cow_remote_query_cache_get( $query ); + if ( is_array( $cached ) ) { + return $this->cow_apply_remote_result( $cached ); + } + + $result = cow_control_request( '/query', array( 'sql' => $query ) ); + if ( empty( $result['ok'] ) ) { + $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow remote query failed'; + cow_db_runtime_fail( 'control /query failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); + } + + cow_remote_query_cache_set( $query, $result ); + return $this->cow_apply_remote_result( $result ); + } + + private function cow_apply_remote_result( $result ) { + $this->last_result = array(); + if ( isset( $result['rows'] ) && is_array( $result['rows'] ) ) { + foreach ( $result['rows'] as $row ) { + $this->last_result[] = (object) $row; + } + } + + $this->col_info = array(); + if ( isset( $result['fields'] ) && is_array( $result['fields'] ) ) { + foreach ( $result['fields'] as $field ) { + $this->col_info[] = (object) array( 'name' => $field ); + } + } + + $this->num_rows = count( $this->last_result ); + $this->rows_affected = isset( $result['affected'] ) ? (int) $result['affected'] : $this->num_rows; + $this->insert_id = 0; + $this->last_error = ''; + + return $this->num_rows; + } + +} + +$wpdb = new Cow_DB( DB_USER, DB_PASSWORD, DB_NAME, defined( 'WPCOW_LOCAL_DB_HOST' ) ? WPCOW_LOCAL_DB_HOST : DB_HOST ); +"# +} + +pub fn safety_mu_plugin_php() -> &'static str { + r#" array() ); + $file = wp_cow_plugin_policy_file(); + if ( '' === $file || ! is_readable( $file ) ) { + return $policy; + } + + $decoded = json_decode( (string) file_get_contents( $file ), true ); + if ( is_array( $decoded ) ) { + $policy = array_merge( $policy, $decoded ); + } + + return $policy; +} + +function wp_cow_allowed_plugins() { + $policy = wp_cow_plugin_policy(); + $allowed = isset( $policy['allow'] ) && is_array( $policy['allow'] ) ? $policy['allow'] : array(); + $quarantine = isset( $policy['quarantine'] ) && is_array( $policy['quarantine'] ) ? $policy['quarantine'] : array(); + $quarantined = array(); + foreach ( $quarantine as $plugin => $reason ) { + $plugin = ltrim( (string) $plugin, '/' ); + if ( '' !== $plugin ) { + $quarantined[ $plugin ] = true; + } + } + + $out = array(); + foreach ( $allowed as $plugin ) { + $plugin = ltrim( (string) $plugin, '/' ); + if ( '' !== $plugin && ! isset( $quarantined[ $plugin ] ) ) { + $out[ $plugin ] = true; + } + } + return $out; +} + +function wp_cow_local_home_url() { + if ( defined( 'WP_HOME' ) ) { + return rtrim( (string) WP_HOME, '/' ); + } + if ( function_exists( 'home_url' ) ) { + return rtrim( (string) home_url( '/' ), '/' ); + } + return ''; +} + +function wp_cow_remote_home_url() { + if ( defined( 'WPCOW_REMOTE_URL' ) ) { + return rtrim( (string) WPCOW_REMOTE_URL, '/' ); + } + return ''; +} + +function wp_cow_url_variants( $url ) { + $url = rtrim( (string) $url, '/' ); + if ( '' === $url ) { + return array(); + } + $variants = array( $url ); + if ( 0 === strpos( $url, 'https://' ) ) { + $variants[] = 'http://' . substr( $url, 8 ); + } elseif ( 0 === strpos( $url, 'http://' ) ) { + $variants[] = 'https://' . substr( $url, 7 ); + } + return array_values( array_unique( $variants ) ); +} + +function wp_cow_rewrite_remote_url_to_local( $url ) { + $url = (string) $url; + $remote = wp_cow_remote_home_url(); + $local = wp_cow_local_home_url(); + if ( '' === $url || '' === $remote || '' === $local ) { + return $url; + } + + foreach ( wp_cow_url_variants( $remote ) as $variant ) { + if ( 0 === strpos( rtrim( $url, '/' ), $variant ) ) { + return $local . substr( $url, strlen( $variant ) ); + } + } + return $url; +} + +function wp_cow_is_remote_or_local_home_url( $url ) { + $url = rtrim( (string) $url, '/' ); + if ( '' === $url ) { + return false; + } + foreach ( array_merge( wp_cow_url_variants( wp_cow_remote_home_url() ), wp_cow_url_variants( wp_cow_local_home_url() ) ) as $variant ) { + if ( $url === $variant ) { + return true; + } + } + return false; +} + +function wp_cow_localize_remote_urls_in_text( $text ) { + $remote = wp_cow_remote_home_url(); + $local = wp_cow_local_home_url(); + if ( '' === $remote || '' === $local || false === strpos( (string) $text, '://' ) ) { + return $text; + } + foreach ( wp_cow_url_variants( $remote ) as $variant ) { + $text = str_replace( $variant, $local, (string) $text ); + } + return $text; +} + +function wp_cow_filter_active_plugins( $plugins ) { + $mode = wp_cow_plugin_mode(); + if ( in_array( $mode, array( 'full', 'on', 'enabled', '1', 'true', 'yes' ), true ) ) { + return $plugins; + } + if ( in_array( $mode, array( 'off', 'none', 'disabled', 'disable', '0', 'false', 'no' ), true ) ) { + return array(); + } + if ( ! is_array( $plugins ) ) { + return array(); + } + + $allowed = wp_cow_allowed_plugins(); + if ( empty( $allowed ) ) { + return array(); + } + + $filtered = array(); + foreach ( $plugins as $plugin ) { + $plugin = ltrim( (string) $plugin, '/' ); + if ( isset( $allowed[ $plugin ] ) ) { + $filtered[] = $plugin; + } + } + return $filtered; +} + +function wp_cow_filter_sitewide_plugins( $plugins ) { + $mode = wp_cow_plugin_mode(); + if ( in_array( $mode, array( 'full', 'on', 'enabled', '1', 'true', 'yes' ), true ) ) { + return $plugins; + } + if ( in_array( $mode, array( 'off', 'none', 'disabled', 'disable', '0', 'false', 'no' ), true ) ) { + return array(); + } + if ( ! is_array( $plugins ) ) { + return array(); + } + + $allowed = wp_cow_allowed_plugins(); + if ( empty( $allowed ) ) { + return array(); + } + + $filtered = array(); + foreach ( $plugins as $plugin => $value ) { + $plugin = ltrim( (string) $plugin, '/' ); + if ( isset( $allowed[ $plugin ] ) ) { + $filtered[ $plugin ] = $value; + } + } + return $filtered; +} + +add_filter( 'option_active_plugins', 'wp_cow_filter_active_plugins', PHP_INT_MAX ); +add_filter( 'site_option_active_sitewide_plugins', 'wp_cow_filter_sitewide_plugins', PHP_INT_MAX ); + +add_action( + 'init', + static function () { + remove_action( 'wp_head', 'print_emoji_detection_script', 7 ); + remove_action( 'wp_enqueue_scripts', 'wp_enqueue_emoji_styles' ); + remove_action( 'wp_print_styles', 'print_emoji_styles' ); + remove_action( 'admin_print_scripts', 'print_emoji_detection_script' ); + remove_action( 'admin_print_styles', 'print_emoji_styles' ); + remove_filter( 'the_content_feed', 'wp_staticize_emoji' ); + remove_filter( 'comment_text_rss', 'wp_staticize_emoji' ); + remove_filter( 'wp_mail', 'wp_staticize_emoji_for_email' ); + }, + 0 +); +add_filter( 'emoji_svg_url', '__return_false', PHP_INT_MAX ); + +add_filter( + 'nav_menu_link_attributes', + static function ( $atts, $item = null ) { + if ( isset( $atts['href'] ) ) { + $atts['href'] = wp_cow_rewrite_remote_url_to_local( $atts['href'] ); + } + if ( + is_object( $item ) && + isset( $item->url ) && + function_exists( 'is_front_page' ) && + is_front_page() && + wp_cow_is_remote_or_local_home_url( $item->url ) + ) { + $atts['aria-current'] = 'page'; + } + return $atts; + }, + PHP_INT_MAX, + 2 +); + +add_filter( + 'nav_menu_css_class', + static function ( $classes, $item = null ) { + if ( + is_object( $item ) && + isset( $item->url ) && + function_exists( 'is_front_page' ) && + is_front_page() && + wp_cow_is_remote_or_local_home_url( $item->url ) + ) { + $classes = is_array( $classes ) ? $classes : array(); + $classes = array_merge( $classes, array( 'current-menu-item', 'current_page_item', 'menu-item-home', 'nv-active' ) ); + $classes = array_values( array_unique( $classes ) ); + } + return $classes; + }, + PHP_INT_MAX, + 2 +); + +add_filter( 'validate_current_theme', '__return_false', PHP_INT_MAX ); +add_filter( 'should_load_block_assets_on_demand', '__return_false', PHP_INT_MAX ); +add_filter( 'should_load_separate_core_block_assets', '__return_false', PHP_INT_MAX ); + +function wp_cow_disable_local_cache_generation( $value = false ) { + return 0; +} + +foreach ( + array( + 'siteground_optimizer_combine_css', + 'siteground_optimizer_combine_javascript', + 'siteground_optimizer_dns_prefetch', + 'siteground_optimizer_file_caching', + 'siteground_optimizer_fix_insecure_content', + 'siteground_optimizer_optimize_css', + 'siteground_optimizer_optimize_html', + 'siteground_optimizer_optimize_javascript', + 'siteground_optimizer_optimize_javascript_async', + 'siteground_optimizer_optimize_web_fonts', + 'siteground_optimizer_preload_combined_css', + ) as $wp_cow_cache_option +) { + add_filter( 'pre_option_' . $wp_cow_cache_option, 'wp_cow_disable_local_cache_generation', PHP_INT_MAX ); +} +unset( $wp_cow_cache_option ); + +function wp_cow_siteground_combined_css_markers() { + $post_id = function_exists( 'get_queried_object_id' ) ? (int) get_queried_object_id() : 0; + if ( $post_id <= 0 || ! function_exists( 'get_post' ) ) { + return array(); + } + + $post = get_post( $post_id ); + if ( ! is_object( $post ) || empty( $post->post_content ) ) { + return array(); + } + + preg_match_all( '/wp-block-themeisle-blocks-[a-z0-9-]+-[a-f0-9]{8}/i', (string) $post->post_content, $matches ); + if ( empty( $matches[0] ) ) { + return array(); + } + + return array_values( array_unique( array_slice( $matches[0], 0, 20 ) ) ); +} + +function wp_cow_configured_siteground_combined_css() { + $basename = basename( (string) getenv( 'WPCOW_SITEGROUND_COMBINED_CSS' ) ); + if ( '' === $basename || ! preg_match( '/^siteground-optimizer-combined-css-[a-f0-9]+\.css$/', $basename ) ) { + return false; + } + + $file = ABSPATH . 'wp-content/uploads/siteground-optimizer-assets/' . $basename; + if ( ! is_file( $file ) || ! is_readable( $file ) ) { + return false; + } + + $size = filesize( $file ); + if ( false === $size || $size < 1024 || $size > 2 * 1024 * 1024 ) { + return false; + } + + return $file; +} + +function wp_cow_find_siteground_combined_css() { + static $asset = null; + if ( null !== $asset ) { + return $asset; + } + + $asset = false; + $configured = wp_cow_configured_siteground_combined_css(); + if ( false !== $configured ) { + $asset = $configured; + return $asset; + } + + $scan = strtolower( trim( (string) getenv( 'WPCOW_SITEGROUND_COMBINED_CSS_SCAN' ) ) ); + if ( ! in_array( $scan, array( '1', 'true', 'yes', 'on' ), true ) ) { + return false; + } + + $dir = ABSPATH . 'wp-content/uploads/siteground-optimizer-assets'; + if ( ! is_dir( $dir ) || ! is_readable( $dir ) ) { + return false; + } + + $files = glob( $dir . '/siteground-optimizer-combined-css-*.css' ); + if ( ! is_array( $files ) || empty( $files ) ) { + return false; + } + + usort( + $files, + static function ( $a, $b ) { + return (int) @filemtime( $b ) <=> (int) @filemtime( $a ); + } + ); + + $markers = wp_cow_siteground_combined_css_markers(); + $fallback = false; + + foreach ( $files as $file ) { + if ( ! is_file( $file ) || ! is_readable( $file ) ) { + continue; + } + + $size = filesize( $file ); + if ( false === $size || $size < 1024 || $size > 2 * 1024 * 1024 ) { + continue; + } + + if ( false === $fallback ) { + $fallback = $file; + } + + if ( empty( $markers ) ) { + continue; + } + + $css = file_get_contents( $file ); + if ( false === $css ) { + continue; + } + + foreach ( $markers as $marker ) { + if ( false !== strpos( $css, $marker ) ) { + $asset = $file; + return $asset; + } + } + } + + $asset = $fallback; + return $asset; +} + +function wp_cow_siteground_localized_css_file( $file ) { + $file = (string) $file; + $remote = wp_cow_remote_home_url(); + $local = wp_cow_local_home_url(); + if ( '' === $remote || '' === $local || ! is_file( $file ) || ! is_readable( $file ) ) { + return $file; + } + + $dir = dirname( $file ); + $target = $dir . '/wp-cow-localized-' . basename( $file ); + $source_mtime = (int) @filemtime( $file ); + if ( is_file( $target ) && is_readable( $target ) && (int) @filemtime( $target ) >= $source_mtime ) { + return $target; + } + + $css = file_get_contents( $file ); + if ( false === $css ) { + return $file; + } + + $localized = $css; + foreach ( wp_cow_url_variants( $remote ) as $variant ) { + $localized = str_replace( $variant, $local, $localized ); + } + if ( $localized === $css ) { + return $file; + } + + $tmp = $target . '.tmp.' . getmypid(); + if ( false === @file_put_contents( $tmp, $localized ) ) { + return $file; + } + @rename( $tmp, $target ); + @unlink( $tmp ); + + return is_file( $target ) && is_readable( $target ) ? $target : $file; +} + +add_action( + 'wp_enqueue_scripts', + static function () { + if ( ! function_exists( 'wp_enqueue_style' ) || ! function_exists( 'content_url' ) ) { + return; + } + + $file = wp_cow_find_siteground_combined_css(); + if ( false === $file ) { + return; + } + + $basename = basename( $file ); + $handle = preg_replace( '/\.css$/', '', $basename ); + $asset = wp_cow_siteground_localized_css_file( $file ); + $url = content_url( 'uploads/siteground-optimizer-assets/' . basename( $asset ) ); + if ( function_exists( 'wp_dequeue_style' ) ) { + wp_dequeue_style( $handle ); + } + if ( function_exists( 'wp_deregister_style' ) ) { + wp_deregister_style( $handle ); + } + wp_enqueue_style( $handle, $url, array(), null ); + }, + 0 +); + +function wp_cow_siteground_lazyload_images_enabled() { + $enabled = strtolower( trim( (string) getenv( 'WPCOW_SITEGROUND_LAZYLOAD_IMAGES' ) ) ); + if ( in_array( $enabled, array( '0', 'false', 'no', 'off', 'disabled' ), true ) ) { + return false; + } + if ( in_array( $enabled, array( '1', 'true', 'yes', 'on', 'enabled' ), true ) ) { + return true; + } + return false !== wp_cow_find_siteground_combined_css() && false !== wp_cow_siteground_lazysizes_path(); +} + +function wp_cow_siteground_lazysizes_path() { + static $path = null; + if ( null !== $path ) { + return $path; + } + + $content_dir = defined( 'WP_CONTENT_DIR' ) ? WP_CONTENT_DIR : ABSPATH . 'wp-content'; + $candidate = rtrim( $content_dir, '/' ) . '/plugins/sg-cachepress/assets/js/lazysizes.min.js'; + if ( is_file( $candidate ) && is_readable( $candidate ) ) { + $path = $candidate; + return $path; + } + + $path = false; + return $path; +} + +function wp_cow_enqueue_siteground_lazysizes() { + if ( ! wp_cow_siteground_lazyload_images_enabled() || ! function_exists( 'wp_enqueue_script' ) || ! function_exists( 'content_url' ) ) { + return; + } + + $handle = 'siteground-optimizer-lazy-sizes-js'; + $url = content_url( 'plugins/sg-cachepress/assets/js/lazysizes.min.js' ); + wp_enqueue_script( $handle, $url, array(), null, true ); + + if ( function_exists( 'wp_script_add_data' ) ) { + wp_script_add_data( $handle, 'strategy', 'defer' ); + } +} + +function wp_cow_siteground_img_placeholder() { + return 'data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7'; +} + +function wp_cow_siteground_lazyload_img_tag( $tag ) { + $tag = (string) $tag; + if ( false === stripos( $tag, 'wp-content/uploads/' ) || false !== stripos( $tag, 'data-src=' ) ) { + return $tag; + } + if ( ! preg_match( '/\ssrc=([\'"])(.*?)\1/i', $tag, $matches ) ) { + return $tag; + } + + $src = html_entity_decode( $matches[2], ENT_QUOTES, 'UTF-8' ); + if ( false === stripos( $src, '/wp-content/uploads/' ) ) { + return $tag; + } + + $tag = preg_replace( '/\s(?:width|height|srcset|sizes|fetchpriority)=([\'"]).*?\1/i', '', $tag ); + $local_src = wp_cow_rewrite_remote_url_to_local( $src ); + $tag = preg_replace( + '/\ssrc=([\'"]).*?\1/i', + ' src="' . wp_cow_siteground_img_placeholder() . '" data-src="' . htmlspecialchars( $local_src, ENT_QUOTES, 'UTF-8' ) . '"', + $tag, + 1 + ); + + if ( preg_match( '/\sclass=([\'"])(.*?)\1/i', $tag, $class_matches ) ) { + if ( false === strpos( ' ' . $class_matches[2] . ' ', ' lazyload ' ) ) { + $classes = trim( $class_matches[2] . ' lazyload' ); + $tag = preg_replace( '/\sclass=([\'"]).*?\1/i', ' class="' . htmlspecialchars( $classes, ENT_QUOTES, 'UTF-8' ) . '"', $tag, 1 ); + } + } else { + $tag = preg_replace( '/\/?>$/', ' class="lazyload"$0', $tag, 1 ); + } + + return $tag; +} + +function wp_cow_siteground_lazyload_content_images( $html ) { + if ( ! wp_cow_siteground_lazyload_images_enabled() || false === stripos( (string) $html, ']*\bwp-image-\d+[^>]*>/i', + static function ( $matches ) { + return wp_cow_siteground_lazyload_img_tag( $matches[0] ); + }, + (string) $html + ); +} + +add_filter( 'the_content', 'wp_cow_siteground_lazyload_content_images', PHP_INT_MAX ); +add_filter( 'the_content', 'wp_cow_localize_remote_urls_in_text', PHP_INT_MAX - 1 ); +add_action( 'wp_enqueue_scripts', 'wp_cow_enqueue_siteground_lazysizes', 1 ); + +function wp_cow_local_asset_http_response( $url ) { + $parts = parse_url( (string) $url ); + if ( ! is_array( $parts ) || empty( $parts['path'] ) ) { + return false; + } + + $path = rawurldecode( (string) $parts['path'] ); + if ( false !== strpos( $path, "\0" ) || false !== strpos( $path, '..' ) ) { + return false; + } + if ( 0 !== strpos( $path, '/wp-content/' ) && 0 !== strpos( $path, '/wp-includes/' ) ) { + return false; + } + + $file = ABSPATH . ltrim( $path, '/' ); + $real_base = realpath( ABSPATH ); + $real_file = realpath( $file ); + if ( false === $real_base || false === $real_file || 0 !== strpos( $real_file, rtrim( $real_base, DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR ) ) { + return false; + } + if ( ! is_file( $real_file ) || ! is_readable( $real_file ) ) { + return false; + } + + $max_mb = (int) getenv( 'WPCOW_LOCAL_HTTP_ASSET_MAX_MB' ); + if ( $max_mb < 1 ) { + $max_mb = 8; + } + $size = filesize( $real_file ); + if ( false === $size || $size > $max_mb * 1024 * 1024 ) { + return false; + } + + $body = file_get_contents( $real_file ); + if ( false === $body ) { + return false; + } + + return array( + 'headers' => array(), + 'body' => $body, + 'response' => array( + 'code' => 200, + 'message' => 'OK', + ), + 'cookies' => array(), + 'filename' => null, + ); +} + +add_filter( 'pre_http_request', static function ( $preempt, $args, $url ) { + if ( defined( 'WPCOW_ALLOW_OUTBOUND_HTTP' ) && WPCOW_ALLOW_OUTBOUND_HTTP ) { + return $preempt; + } + + $local_asset = wp_cow_local_asset_http_response( $url ); + if ( false !== $local_asset ) { + return $local_asset; + } + + return new WP_Error( 'wp_cow_blocked_http', 'Outbound HTTP is blocked in this wp-cow clone.' ); +}, 10, 3 ); +"# +} + +pub fn router_php(paths: &ClonePaths, manifest: &Manifest) -> String { + r#" 'idle', + 'active_path' => '', + 'active_bytes' => 0, + 'active_total' => 0, + 'files_cached' => 0, + 'bytes_cached' => 0, + 'last_cached_path' => '', + 'updated_at_unix_ms' => 0, + ); + if ( is_file( $wp_cow_progress_file ) ) { + $decoded = json_decode( file_get_contents( $wp_cow_progress_file ), true ); + if ( is_array( $decoded ) ) { + $progress = array_merge( $progress, $decoded ); + } + } + $progress['ready'] = is_file( $wp_cow_ready_file ); + echo json_encode( $progress ); + return true; +} + +$path = parse_url( $_SERVER['REQUEST_URI'], PHP_URL_PATH ); +$file = rtrim( $_SERVER['DOCUMENT_ROOT'], '/' ) . $path; + +function wp_cow_looks_like_installer( $html ) { + $html = (string) $html; + return ( + false !== stripos( $html, 'wp-admin/install.php' ) || + false !== stripos( $html, 'wp-admin/setup-config.php' ) || + false !== stripos( $html, 'WordPress › Installation' ) || + false !== stripos( $html, 'Welcome to the famous five-minute WordPress installation' ) + ); +} + +function wp_cow_runtime_error_page( $title, $message, $details = '' ) { + if ( ! headers_sent() ) { + http_response_code( 500 ); + header( 'Content-Type: text/html; charset=utf-8' ); + header( 'Cache-Control: no-store' ); + } + echo '' . htmlspecialchars( $title, ENT_QUOTES, 'UTF-8' ) . ''; + echo ''; + echo '

' . htmlspecialchars( $title, ENT_QUOTES, 'UTF-8' ) . '

'; + echo '

' . htmlspecialchars( $message, ENT_QUOTES, 'UTF-8' ) . '

'; + if ( '' !== $details ) { + echo '
' . htmlspecialchars( $details, ENT_QUOTES, 'UTF-8' ) . '
'; + } + echo '
'; +} + +function wp_cow_is_frontend_get( $path ) { + if ( ! in_array( $_SERVER['REQUEST_METHOD'], array( 'GET', 'HEAD' ), true ) ) { + return false; + } + if ( 0 === strpos( $path, '/wp-admin' ) || 0 === strpos( $path, '/wp-login.php' ) || 0 === strpos( $path, '/wp-json' ) ) { + return false; + } + return true; +} + +function wp_cow_proxy_remote_frontend( $remote_url, $local_url, $path ) { + if ( '1' !== getenv( 'WPCOW_PROXY_FRONTEND' ) || isset( $_GET['__wp_cow_local'] ) || ! wp_cow_is_frontend_get( $path ) ) { + return false; + } + + $query = $_GET; + unset( $query['__wp_cow_bypass_splash'], $query['__wp_cow_local'] ); + $target = rtrim( $remote_url, '/' ) . ( '/' === $path ? '/' : $path ); + if ( ! empty( $query ) ) { + $target .= '?' . http_build_query( $query ); + } + + $timeout = (int) getenv( 'WPCOW_PROXY_TIMEOUT_SECS' ); + if ( $timeout < 1 ) { + $timeout = 20; + } + + $body = false; + $status = 0; + $content_type = 'text/html; charset=utf-8'; + if ( function_exists( 'curl_init' ) ) { + $ch = curl_init( $target ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); + curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, min( 5, $timeout ) ); + curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout ); + curl_setopt( $ch, CURLOPT_USERAGENT, 'wp-cow frontend proxy' ); + curl_setopt( $ch, CURLOPT_HTTPHEADER, array( 'X-WP-COW-Proxy: 1' ) ); + $body = curl_exec( $ch ); + $status = (int) curl_getinfo( $ch, CURLINFO_RESPONSE_CODE ); + $type = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ); + if ( is_string( $type ) && '' !== $type ) { + $content_type = $type; + } + if ( PHP_VERSION_ID < 80000 ) { + curl_close( $ch ); + } + } else { + $context = stream_context_create( + array( + 'http' => array( + 'timeout' => $timeout, + 'ignore_errors' => true, + 'header' => "User-Agent: wp-cow frontend proxy\r\nX-WP-COW-Proxy: 1\r\n", + ), + ) + ); + $body = @file_get_contents( $target, false, $context ); + $status = 200; + } + + if ( false === $body || '' === $body || $status >= 500 ) { + return false; + } + + if ( ! headers_sent() ) { + http_response_code( $status >= 300 ? 200 : max( 200, $status ) ); + header( 'Content-Type: ' . $content_type ); + header( 'Cache-Control: no-store' ); + header( 'X-WP-COW-Frontend-Proxy: 1' ); + } + if ( false !== stripos( $content_type, 'text/html' ) ) { + $body = str_replace( $remote_url, rtrim( $local_url, '/' ), $body ); + $body = str_replace( preg_replace( '/^https:/', 'http:', $remote_url ), rtrim( $local_url, '/' ), $body ); + } + if ( 'HEAD' !== $_SERVER['REQUEST_METHOD'] ) { + echo $body; + } + return true; +} + +function wp_cow_render_wordpress( $ready_file ) { + ob_start(); + require rtrim( $_SERVER['DOCUMENT_ROOT'], '/' ) . '/index.php'; + $html = ob_get_clean(); + + if ( wp_cow_looks_like_installer( $html ) ) { + wp_cow_runtime_error_page( + 'wp-cow did not load the remote site', + 'WordPress tried to show the installation wizard. This clone refuses to treat an empty or unavailable database lower layer as a successful site load.', + 'Check the remote database probe, SSH connectivity, and wp-content/db.php drop-in before retrying.' + ); + return true; + } + + if ( ! is_dir( dirname( $ready_file ) ) ) { + mkdir( dirname( $ready_file ), 0777, true ); + } + file_put_contents( $ready_file, json_encode( array( 'ready_at' => time() ) ) ); + echo $html; + return true; +} + +if ( isset( $_GET['__wp_cow_installer_guard'] ) || in_array( $path, array( '/wp-admin/install.php', '/wp-admin/setup-config.php' ), true ) ) { + wp_cow_runtime_error_page( + 'wp-cow did not load the remote site', + 'WordPress tried to show an installation/setup path. This clone refuses to treat an empty or unavailable database lower layer as a successful site load.', + 'Check the remote database probe, SSH connectivity, and wp-content/db.php drop-in before retrying.' + ); + return true; +} + +$wp_cow_docroot = rtrim( $_SERVER['DOCUMENT_ROOT'], '/' ); +if ( in_array( $path, array( '/wp-admin', '/wp-admin/' ), true ) && is_file( $wp_cow_docroot . '/wp-admin/index.php' ) ) { + $_SERVER['SCRIPT_NAME'] = '/wp-admin/index.php'; + $_SERVER['SCRIPT_FILENAME'] = $wp_cow_docroot . '/wp-admin/index.php'; + require $wp_cow_docroot . '/wp-admin/index.php'; + return true; +} + +if ( '/' !== $path && is_file( $file ) ) { + return false; +} + +if ( wp_cow_proxy_remote_frontend( $wp_cow_remote_url, $wp_cow_local_url, $path ) ) { + return true; +} + +$should_show_splash = ( + '0' !== getenv( 'WPCOW_SPLASH' ) && + ! isset( $_GET['__wp_cow_bypass_splash'] ) && + ! is_file( $wp_cow_ready_file ) && + in_array( $_SERVER['REQUEST_METHOD'], array( 'GET', 'HEAD' ), true ) && + ( '/' === $path || false === strpos( basename( $path ), '.' ) ) +); + +if ( $should_show_splash ) { + header( 'Content-Type: text/html; charset=utf-8' ); + header( 'Cache-Control: no-store' ); + echo <<<'HTML' + + + + + + wp-cow is warming this page + + + +
+

Preparing local WordPress

+

Fetching only the remote files this request needs. Cached files will be reused on later requests.

+
+
Starting...
+
+ + + +HTML; + return true; +} + +if ( isset( $_GET['__wp_cow_bypass_splash'] ) ) { + return wp_cow_render_wordpress( $wp_cow_ready_file ); +} + +return wp_cow_render_wordpress( $wp_cow_ready_file ); +"# + .replace( + "__WPCOW_PROGRESS_FILE__", + &php_string(&paths.file_cache.join("progress.json").to_string_lossy()), + ) + .replace( + "__WPCOW_READY_FILE__", + &php_string(&paths.run.join("first-request-ready.json").to_string_lossy()), + ) + .replace("__WPCOW_REMOTE_URL__", &php_string(&manifest.remote_url)) + .replace("__WPCOW_LOCAL_URL__", &php_string(&manifest.local_url)) +} + +fn php_string(value: &str) -> String { + format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")) +} + +#[allow(dead_code)] +pub fn generated_file_paths(root: &Path) -> Vec { + vec![ + root.join("wp-config.php"), + root.join("wp-content/db.php"), + root.join("wp-content/mu-plugins/wp-cow-safety.php"), + ] +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::{ + clone_paths, ensure_clone_dirs, write_manifest, write_offline_marker, DbProxy, LocalDb, + Manifest, OfflineMarker, Probe, RemoteDbTunnel, MANIFEST_VERSION, + }; + use std::ffi::OsString; + use std::io::{Read, Write}; + use std::net::{TcpListener, TcpStream}; + use std::os::unix::fs::PermissionsExt; + use std::path::{Path, PathBuf}; + use std::process::{Child, Command, Stdio}; + use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::{Arc, Mutex, OnceLock}; + use std::thread; + use std::time::{Duration, Instant}; + + fn manifest() -> Manifest { + Manifest { + version: MANIFEST_VERSION, + name: "example".to_string(), + ssh: "user@example.com".to_string(), + remote_path: "/srv/www".to_string(), + remote_url: "https://example.com".to_string(), + local_url: "http://example.test".to_string(), + created_at_unix: 1, + probe: Probe { + table_prefix: "wp_".to_string(), + db_name: "example_wp".to_string(), + db_host: "localhost".to_string(), + db_user: "example_wp".to_string(), + ..Probe::default() + }, + local_db: LocalDb { + name: "cow_example".to_string(), + user: "cow_example".to_string(), + password: "secret".to_string(), + host: "127.0.0.1".to_string(), + port: 33071, + }, + db_proxy: DbProxy { + host: "127.0.0.1".to_string(), + port: 33070, + }, + remote_db_tunnel: RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: 33072, + }, + control_url: "http://127.0.0.1:39070".to_string(), + cache_max_file_bytes: 1024, + remote_metadata_cache_ttl_secs: 30, + } + } + + fn php_single_quoted_path(path: &Path) -> String { + let mut value = path.to_string_lossy().into_owned(); + if path.is_dir() && !value.ends_with('/') { + value.push('/'); + } + value.replace('\\', "\\\\").replace('\'', "\\'") + } + + #[test] + fn generated_config_shadows_urls_and_database() { + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + let php = wp_config_php(&manifest(), &paths); + assert!(php.contains("define( 'DB_NAME', 'cow_example' );")); + assert!(php.contains("define( 'DB_HOST', '127.0.0.1:33070' );")); + assert!(php.contains("define( 'WPCOW_LOCAL_DB_HOST', '127.0.0.1:33071' );")); + assert!(php.contains("$wp_cow_local_url = 'http://example.test';")); + assert!(php.contains("define( 'WP_HOME', $wp_cow_local_url );")); + assert!(php.contains("HTTP_X_FORWARDED_PROTO")); + assert!(php.contains("$_SERVER['HTTPS'] = 'on';")); + assert!(php.contains("$wp_cow_local_host === $wp_cow_request_host")); + assert!(php.contains("HTTP_X_FORWARDED_HOST")); + assert!(php.contains("define( 'WPCOW_REMOTE_URL', 'https://example.com' );")); + assert!(php.contains("$table_prefix = 'wp_';")); + assert!(php.contains("WPCOW_CONTROL_URL")); + assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); + assert!(php.contains("WPCOW_DB_STATE_FILE")); + assert!(php.contains("WPCOW_PLUGIN_POLICY_FILE")); + assert!(!php.contains("WPCOW_REMOTE_DB_NAME")); + assert!(!php.contains("WPCOW_REMOTE_DB_USER")); + assert!(!php.contains("WPCOW_REMOTE_DB_PASSWORD")); + assert!(!php.contains("WPCOW_REMOTE_DB_HOST")); + assert!(php.contains("wp-cow DB/runtime error")); + assert!(php.contains("wp-content/db.php")); + assert!(db_dropin_php().contains("WPCOW_LOCAL_DB_HOST")); + } + + #[test] + fn db_dropin_blocks_write_classes() { + let php = db_dropin_php(); + assert!(php.contains("cow_is_write_sql")); + assert!(php.contains("cow_select_has_remote_side_effect_clause")); + assert!(php.contains("/materialize")); + assert!(php.contains("cow_remote_query_cache_get")); + assert!(php.contains("cow_remote_query_cache_set")); + assert!(!php.contains("cow_remote_mysqli")); + assert!(!php.contains("WPCOW_REMOTE_DB_PASSWORD")); + assert!(php.contains("cow_cached_remote_read_is_safe_without_control")); + assert!(php.contains("cow_safe_local_read_without_control")); + assert!(php.contains("cow_query_matches_option_bootstrap")); + assert!(php.contains("dirty_tables")); + assert!(!php.contains("cow_remote_query_cache_clear")); + assert!(php.contains("cow_is_protected_theme_option_write")); + assert!(php.contains("WPCOW_PROTECT_THEME_OPTIONS")); + assert!(php.contains("cow_db_runtime_fail")); + assert!(php.contains("will not fall back to the empty local schema")); + assert!(php.contains("'sql' => $query")); + assert!(php.contains("INSERT|REPLACE")); + } + + #[test] + fn db_dropin_rejects_read_shaped_remote_writes() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping generated PHP SQL safety test because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let wp_includes = temp.path().join("wp-includes"); + fs::create_dir_all(&wp_includes).unwrap(); + fs::write( + wp_includes.join("class-wpdb.php"), + " true, + "SELECT * FROM wp_posts WHERE post_title = 'FOR UPDATE'" => true, + 'SELECT * FROM wp_posts /* FOR UPDATE */ WHERE ID = 1' => true, + "SELECT * FROM wp_posts INTO OUTFILE '/tmp/wp-cow-leak'" => false, + 'SELECT * FROM wp_posts WHERE ID = 1 FOR UPDATE' => false, + 'SELECT * FROM wp_posts WHERE ID = 1 LOCK IN SHARE MODE' => false, +); +foreach ( $cases as $sql => $expected ) {{ + $actual = cow_is_safe_read_sql( $sql ); + if ( $actual !== $expected ) {{ + fwrite( STDERR, $sql . ' expected ' . ( $expected ? 'safe' : 'unsafe' ) . ' got ' . ( $actual ? 'safe' : 'unsafe' ) . PHP_EOL ); + exit( 1 ); + }} +}} +file_put_contents( + WPCOW_DB_STATE_FILE, + json_encode( + array( + 'option_rows' => array( 'wp_options:siteurl' ), + 'dirty_tables' => array(), + 'dirty_option_rows' => array(), + ) + ) +); +if ( ! cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) {{ + fwrite( STDERR, 'clean copied rows should not block the remote query cache' . PHP_EOL ); + exit( 1 ); +}} +file_put_contents( + WPCOW_DB_STATE_FILE, + json_encode( + array( + 'option_rows' => array( 'wp_options:siteurl' ), + 'dirty_tables' => array(), + 'dirty_option_rows' => array( 'wp_options:siteurl' ), + ) + ) +); +touch( WPCOW_DB_STATE_FILE, time() + 2 ); +clearstatcache( true, WPCOW_DB_STATE_FILE ); +if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) {{ + fwrite( STDERR, 'dirty copied rows must block the remote query cache' . PHP_EOL ); + exit( 1 ); +}} +"#, + php_single_quoted_path(temp.path()), + php_single_quoted_path(&state_file), + php_single_quoted_path(&db_dropin) + ); + fs::write(&check, script).unwrap(); + + let output = Command::new("php") + .arg(&check) + .output() + .unwrap_or_else(|err| panic!("run PHP SQL safety check: {err}")); + assert!( + output.status.success(), + "PHP SQL safety check failed: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + + #[test] + fn safety_plugin_blocks_side_effects() { + let php = safety_mu_plugin_php(); + assert!(php.contains("pre_wp_mail")); + assert!(php.contains("X-Robots-Tag")); + assert!(php.contains("pre_http_request")); + assert!(php.contains("wp_cow_local_asset_http_response")); + assert!(php.contains("validate_current_theme")); + assert!(php.contains("WPCOW_ENABLE_PLUGINS")); + assert!(php.contains("WPCOW_PLUGIN_POLICY_FILE")); + assert!(php.contains("wp_cow_filter_active_plugins")); + assert!(php.contains("wp_cow_allowed_plugins")); + assert!(php.contains("$quarantined")); + assert!(php.contains("wp_cow_rewrite_remote_url_to_local")); + assert!(php.contains("wp_cow_localize_remote_urls_in_text")); + assert!(php.contains("nav_menu_link_attributes")); + assert!(php.contains("nav_menu_css_class")); + assert!(php.contains("wp_enqueue_emoji_styles")); + assert!(php.contains("print_emoji_detection_script")); + assert!(php.contains("option_active_plugins")); + assert!(php.contains("siteground_optimizer_combine_css")); + assert!(php.contains("siteground_optimizer_file_caching")); + assert!(php.contains("siteground_optimizer_optimize_css")); + assert!(php.contains("wp_cow_find_siteground_combined_css")); + assert!(php.contains("WPCOW_SITEGROUND_COMBINED_CSS")); + assert!(php.contains("wp_cow_siteground_lazyload_content_images")); + assert!(php.contains("wp_cow_enqueue_siteground_lazysizes")); + assert!(php.contains("lazysizes.min.js")); + assert!(php.contains("WPCOW_SITEGROUND_LAZYLOAD_IMAGES")); + assert!(php.contains("should_load_block_assets_on_demand")); + assert!(php.contains("should_load_separate_core_block_assets")); + } + + #[test] + fn safety_plugin_serves_local_assets_to_wp_http_without_network() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping generated PHP local asset HTTP test because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let safety = temp.path().join("wp-cow-safety.php"); + let docroot = temp.path().join("site"); + let asset = docroot.join("wp-content/themes/neve/style.css"); + let sg_asset = docroot + .join("wp-content/uploads/siteground-optimizer-assets") + .join("siteground-optimizer-combined-css-abc123.css"); + let lazy_asset = docroot + .join("wp-content/plugins/sg-cachepress/assets/js") + .join("lazysizes.min.js"); + let check = temp.path().join("check.php"); + fs::create_dir_all(asset.parent().unwrap()).unwrap(); + fs::write(&asset, b"body{color:#123}").unwrap(); + fs::create_dir_all(sg_asset.parent().unwrap()).unwrap(); + fs::write( + &sg_asset, + [ + b"#wp-block-themeisle-blocks-advanced-columns-a241f2a5{min-height:800px;--background:url(https://example.test/wp-content/uploads/hero.jpg)}" + .as_slice(), + vec![b' '; 2048].as_slice(), + ] + .concat(), + ) + .unwrap(); + fs::create_dir_all(lazy_asset.parent().unwrap()).unwrap(); + fs::write(&lazy_asset, b"/*! lazysizes */").unwrap(); + fs::write(&safety, safety_mu_plugin_php()).unwrap(); + fs::write( + &check, + format!( + r#" '', + ); +}} + function wp_enqueue_style( $handle, $src, $deps = array(), $ver = false ) {{ + global $enqueued; + $enqueued[ $handle ] = $src; + }} + function wp_enqueue_script( $handle, $src, $deps = array(), $ver = false, $in_footer = false ) {{ + global $enqueued_scripts; + $enqueued_scripts[ $handle ] = $src; + }} + function wp_script_add_data( $handle, $key, $value ) {{ + global $script_data; + $script_data[ $handle ][ $key ] = $value; + }} +class WP_Error {{ + public $code; + public $message; + public function __construct( $code, $message ) {{ + $this->code = $code; + $this->message = $message; + }} +}} +define( 'ABSPATH', '{docroot}' . '/' ); + putenv( 'WPCOW_SITEGROUND_COMBINED_CSS=siteground-optimizer-combined-css-abc123.css' ); + require '{safety}'; + $enqueued = array(); + $enqueued_scripts = array(); + $script_data = array(); + do_test_action( 'wp_enqueue_scripts' ); + if ( + empty( $enqueued['siteground-optimizer-combined-css-abc123'] ) || + false === strpos( $enqueued['siteground-optimizer-combined-css-abc123'], '/wp-content/uploads/siteground-optimizer-assets/wp-cow-localized-siteground-optimizer-combined-css-abc123.css' ) + ) {{ + fwrite( STDERR, 'existing SG combined CSS was not preserved: ' . json_encode( $enqueued ) . PHP_EOL ); + exit( 1 ); + }} + $localized_css = '{docroot}' . '/wp-content/uploads/siteground-optimizer-assets/wp-cow-localized-siteground-optimizer-combined-css-abc123.css'; + if ( ! is_file( $localized_css ) || false === strpos( file_get_contents( $localized_css ), 'https://local.test/wp-content/uploads/hero.jpg' ) ) {{ + fwrite( STDERR, 'SG combined CSS URLs were not localized' . PHP_EOL ); + exit( 1 ); + }} + if ( + empty( $enqueued_scripts['siteground-optimizer-lazy-sizes-js'] ) || + false === strpos( $enqueued_scripts['siteground-optimizer-lazy-sizes-js'], '/wp-content/plugins/sg-cachepress/assets/js/lazysizes.min.js' ) || + 'defer' !== $script_data['siteground-optimizer-lazy-sizes-js']['strategy'] + ) {{ + fwrite( STDERR, 'SG lazysizes runtime was not enqueued: ' . json_encode( array( $enqueued_scripts, $script_data ) ) . PHP_EOL ); + exit( 1 ); + }} + $cache_option = apply_test_filter( 'pre_option_siteground_optimizer_combine_css', 1 ); + if ( 0 !== $cache_option ) {{ + fwrite( STDERR, 'local SG cache generation was not disabled' . PHP_EOL ); + exit( 1 ); + }} + $content = '

Browse

'; + $lazy_content = apply_test_filter( 'the_content', $content ); + if ( + false === strpos( $lazy_content, 'src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"' ) || + false === strpos( $lazy_content, 'data-src="https://local.test/wp-content/uploads/2019/12/photo.jpg"' ) || + false === strpos( $lazy_content, 'href="https://local.test/adventures/"' ) || + false === strpos( $lazy_content, 'class="wp-image-45 lazyload"' ) || + false !== strpos( $lazy_content, 'width="600"' ) || + false !== strpos( $lazy_content, 'srcset=' ) +) {{ + fwrite( STDERR, 'SG lazyload placeholder shape was not preserved: ' . $lazy_content . PHP_EOL ); + exit( 1 ); +}} + $response = apply_test_filter( + 'pre_http_request', + false, + array( 'method' => 'GET' ), + 'https://local.test/wp-content/themes/neve/style.css?ver=1' + ); +if ( ! is_array( $response ) || 'body{{color:#123}}' !== $response['body'] || 200 !== $response['response']['code'] ) {{ + fwrite( STDERR, 'local asset response failed: ' . json_encode( $response ) . PHP_EOL ); + exit( 1 ); +}} + $blocked = apply_test_filter( + 'pre_http_request', + false, + array( 'method' => 'GET' ), + 'https://api.example.test/side-effect' + ); +if ( ! $blocked instanceof WP_Error ) {{ + fwrite( STDERR, 'external request was not blocked' . PHP_EOL ); + exit( 1 ); +}} +"#, + docroot = php_single_quoted_path(&docroot), + safety = php_single_quoted_path(&safety) + ), + ) + .unwrap(); + + let output = Command::new("php").arg(&check).output().unwrap(); + assert!( + output.status.success(), + "local asset HTTP shim failed: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + + #[test] + fn safety_plugin_auto_mode_allows_only_policy_admitted_plugins() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping generated PHP plugin policy test because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let safety = temp.path().join("wp-cow-safety.php"); + let policy = temp.path().join("plugin-policy.json"); + let check = temp.path().join("check.php"); + fs::write(&safety, safety_mu_plugin_php()).unwrap(); + fs::write( + &policy, + r#"{"version":1,"mode":"auto","active":["akismet/akismet.php","woocommerce/woocommerce.php"],"allow":["akismet/akismet.php","woocommerce/woocommerce.php"],"quarantine":{"akismet/akismet.php":"timeout"}}"#, + ) + .unwrap(); + fs::write( + &check, + format!( + r#" 1, 'woocommerce/woocommerce.php' => 2 ) +); +if ( $sitewide !== array( 'woocommerce/woocommerce.php' => 2 ) ) {{ + fwrite( STDERR, 'unexpected sitewide plugin filter: ' . json_encode( $sitewide ) . PHP_EOL ); + exit( 1 ); +}} +$item = (object) array( 'url' => 'http://remote.test/' ); +$classes = call_user_func( $filters['nav_menu_css_class'], array( 'menu-item' ), $item ); +foreach ( array( 'current-menu-item', 'current_page_item', 'menu-item-home', 'nv-active' ) as $expected_class ) {{ + if ( ! in_array( $expected_class, $classes, true ) ) {{ + fwrite( STDERR, 'missing active nav class: ' . $expected_class . ' from ' . json_encode( $classes ) . PHP_EOL ); + exit( 1 ); + }} +}} +$atts = call_user_func( $filters['nav_menu_link_attributes'], array( 'href' => 'http://remote.test/' ), $item ); +if ( 'http://local.test/' !== $atts['href'] || 'page' !== $atts['aria-current'] ) {{ + fwrite( STDERR, 'remote home nav link was not localized/current: ' . json_encode( $atts ) . PHP_EOL ); + exit( 1 ); +}} +putenv( 'WPCOW_PLUGIN_MODE=full' ); +$full = call_user_func( + $filters['option_active_plugins'], + array( 'akismet/akismet.php', 'woocommerce/woocommerce.php' ) +); +if ( array_values( $full ) !== array( 'akismet/akismet.php', 'woocommerce/woocommerce.php' ) ) {{ + fwrite( STDERR, 'full mode did not preserve plugins: ' . json_encode( $full ) . PHP_EOL ); + exit( 1 ); +}} +"#, + policy = php_single_quoted_path(&policy), + safety = php_single_quoted_path(&safety) + ), + ) + .unwrap(); + + let output = Command::new("php").arg(&check).output().unwrap(); + assert!( + output.status.success(), + "PHP plugin policy check failed: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + + #[test] + fn generated_overrides_keep_remote_runtime_dirs_visible_by_default() { + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let plugins = paths.upper.join("wp-content/plugins"); + let languages = paths.upper.join("wp-content/languages"); + fs::create_dir_all(&plugins).unwrap(); + fs::create_dir_all(&languages).unwrap(); + fs::write( + plugins.join(crate::overlay::OPAQUE_MARKER), + b"legacy opaque marker\n", + ) + .unwrap(); + fs::write( + languages.join(crate::overlay::OPAQUE_MARKER), + b"legacy opaque marker\n", + ) + .unwrap(); + + write_wordpress_overrides(&paths, &manifest()).unwrap(); + + assert!( + crate::plugin_policy::policy_path(&paths).is_file(), + "generated overrides should create the initial plugin admission policy" + ); + assert!( + !plugins.join(crate::overlay::OPAQUE_MARKER).exists(), + "plugin files should remain backed by the lazy remote lower layer by default" + ); + assert!( + !languages.join(crate::overlay::OPAQUE_MARKER).exists(), + "language files should remain backed by the lazy remote lower layer by default" + ); + } + + #[test] + fn router_exposes_splash_and_progress_endpoint() { + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + let php = router_php(&paths, &manifest()); + assert!(php.contains("/__wp-cow/progress")); + assert!(php.contains("__wp_cow_bypass_splash")); + assert!(php.contains("wp_cow_looks_like_installer")); + assert!(php.contains("wp_cow_proxy_remote_frontend")); + assert!(php.contains("X-WP-COW-Frontend-Proxy")); + assert!(php.contains("WordPress tried to show the installation wizard")); + assert!(php.contains("__wp_cow_installer_guard")); + assert!(php.contains("Cache-Control: no-store")); + assert!(!php.contains("__WPCOW_PROGRESS_FILE__")); + assert!(!php.contains("__WPCOW_READY_FILE__")); + assert!(!php.contains("__WPCOW_REMOTE_URL__")); + assert!(!php.contains("__WPCOW_LOCAL_URL__")); + } + + #[test] + fn generated_php_lints() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping generated PHP lint because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + let files = [ + ("wp-config.php", wp_config_php(&manifest(), &paths)), + ("db.php", db_dropin_php().to_string()), + ("wp-cow-safety.php", safety_mu_plugin_php().to_string()), + ("router.php", router_php(&paths, &manifest())), + ]; + + for (name, php) in files { + let path = temp.path().join(name); + std::fs::write(&path, php).unwrap(); + let output = Command::new("php") + .arg("-l") + .arg(&path) + .output() + .unwrap_or_else(|err| panic!("run php -l for {name}: {err}")); + assert!( + output.status.success(), + "php -l failed for {name}: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + } + + #[test] + fn router_splash_and_progress_smoke_responds_quickly() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping router smoke test because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + fs::create_dir_all(&paths.generated).unwrap(); + fs::create_dir_all(&paths.file_cache).unwrap(); + fs::create_dir_all(&paths.run).unwrap(); + + let docroot = temp.path().join("docroot"); + fs::create_dir_all(&docroot).unwrap(); + let router = paths.generated.join("router.php"); + fs::write(&router, router_php(&paths, &manifest())).unwrap(); + + let port = free_tcp_port(); + let mut child = Command::new("php") + .env("WPCOW_SPLASH", "1") + .env("WPCOW_PROXY_FRONTEND", "0") + .env("PHP_CLI_SERVER_WORKERS", "4") + .arg("-S") + .arg(format!("127.0.0.1:{port}")) + .arg("-t") + .arg(&docroot) + .arg(&router) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn() + .unwrap_or_else(|err| panic!("start php server: {err}")); + + let started = Instant::now(); + let progress = loop { + if started.elapsed() > Duration::from_secs(5) { + let _ = child.kill(); + let _ = child.wait(); + panic!("php router did not start within 5s"); + } + if let Some(status) = child + .try_wait() + .unwrap_or_else(|err| panic!("poll php server: {err}")) + { + panic!("php router exited early with status {status}"); + } + match http_get(port, "/__wp-cow/progress", Duration::from_secs(1)) { + Ok(response) if response.contains("\"phase\":\"idle\"") => break response, + Err(_) => thread::sleep(Duration::from_millis(50)), + Ok(_) => thread::sleep(Duration::from_millis(50)), + } + }; + assert!( + progress.contains("\"phase\":\"idle\""), + "unexpected progress response: {}", + progress + ); + assert!( + progress.contains("\"ready\":false"), + "unexpected progress response: {}", + progress + ); + + let request_started = Instant::now(); + let splash = http_get_nonempty(port, "/wp-cow-smoke", Duration::from_secs(2)); + assert!( + request_started.elapsed() < Duration::from_secs(2), + "splash took {:?}", + request_started.elapsed() + ); + assert!( + splash.contains("Preparing local WordPress"), + "unexpected splash response: {}", + splash + ); + assert!( + splash.contains("__wp_cow_bypass_splash"), + "unexpected splash response: {}", + splash + ); + + fs::write( + docroot.join("index.php"), + "WordPress › Installationinstall';", + ) + .unwrap(); + let installer = http_get_nonempty( + port, + "/wp-cow-smoke?__wp_cow_bypass_splash=1", + Duration::from_secs(2), + ); + assert!( + installer.starts_with("HTTP/1.1 500"), + "unexpected installer response: {}", + installer + ); + assert!( + installer.contains("wp-cow did not load the remote site"), + "unexpected installer response: {}", + installer + ); + fs::create_dir_all(docroot.join("wp-admin")).unwrap(); + fs::write( + docroot.join("wp-admin/install.php"), + "> = OnceLock::new(); + let _lock = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + require_command("php"); + require_command("mariadbd"); + require_command("mariadb-install-db"); + require_command("mysql"); + require_command("fusermount3"); + assert!( + Path::new("/dev/fuse").exists(), + "strict production harness requires /dev/fuse" + ); + + let temp = tempfile::tempdir().unwrap(); + let mysql_port = free_tcp_port(); + let mysql = ChildGuard::new(start_mariadb(temp.path(), mysql_port)); + wait_for_mysql(mysql_port); + create_harness_databases(mysql_port); + + let state_dir = temp.path().join("state"); + let paths = clone_paths(&state_dir, "example"); + ensure_clone_dirs(&paths).unwrap(); + + let remote_public = HarnessHttpServer::start("REMOTE PUBLIC BYPASS"); + let control_port = free_tcp_port(); + let db_proxy_port = free_tcp_port(); + let site_port = free_tcp_port(); + let mut harness_manifest = manifest(); + harness_manifest.ssh = "fake-host".to_string(); + harness_manifest.remote_url = format!("http://127.0.0.1:{}", remote_public.port); + harness_manifest.local_url = format!("http://127.0.0.1:{site_port}"); + harness_manifest.control_url = format!("http://127.0.0.1:{control_port}"); + harness_manifest.probe.db_name = "remote_wp".to_string(); + harness_manifest.probe.db_host = format!("127.0.0.1:{mysql_port}"); + harness_manifest.probe.db_user = "root".to_string(); + harness_manifest.probe.db_password = String::new(); + harness_manifest.local_db = LocalDb { + name: "local_wp".to_string(), + user: "root".to_string(), + password: String::new(), + host: "127.0.0.1".to_string(), + port: mysql_port, + }; + harness_manifest.remote_db_tunnel = RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: mysql_port, + }; + harness_manifest.db_proxy = DbProxy { + host: "127.0.0.1".to_string(), + port: db_proxy_port, + }; + harness_manifest.cache_max_file_bytes = 1024 * 1024; + harness_manifest.remote_metadata_cache_ttl_secs = 60; + + let remote_docroot = temp.path().join("remote-docroot"); + harness_manifest.remote_path = remote_docroot.to_string_lossy().to_string(); + write_runtime_harness_docroot(&remote_docroot, &paths, &harness_manifest); + fs::create_dir_all(remote_docroot.join("wp-content/uploads/2026")).unwrap(); + fs::write( + remote_docroot.join("wp-content/uploads/2026/huge-file.txt"), + b"uploads must stay lazy", + ) + .unwrap(); + write_manifest(&paths.manifest, &harness_manifest).unwrap(); + write_wordpress_overrides(&paths, &harness_manifest).unwrap(); + crate::db::set_local_admin_password(&harness_manifest, Some("admin"), "local-pass") + .unwrap(); + + let fake_bin = temp.path().join("fake-bin"); + let fake_ssh_log = temp.path().join("fake-ssh.log"); + install_fake_ssh(&fake_bin, &fake_ssh_log); + let _env = EnvVarGuard::set(&[ + ( + "PATH", + prepend_path(&fake_bin, std::env::var_os("PATH").as_ref()), + ), + ( + "WPCOW_FAKE_SSH_LOG", + fake_ssh_log.to_string_lossy().into_owned(), + ), + ("WPCOW_WEB_SERVER", "php".to_string()), + ("WPCOW_SPLASH", "0".to_string()), + ("WPCOW_PROXY_FRONTEND", "0".to_string()), + ("WPCOW_REMOTE_DB_TUNNEL", "0".to_string()), + ("WPCOW_REMOTE_FILE_HELPER", "0".to_string()), + ("WPCOW_CONTROL_REQUEST_TIMEOUT_SECS", "10".to_string()), + ("WPCOW_FUSE_TTL_SECS", "1".to_string()), + ("WPCOW_PHP_WORKERS", "1".to_string()), + ]); + + let mountpoint = temp.path().join("mount"); + let shutdown = Arc::new(AtomicBool::new(false)); + let run_manifest = harness_manifest.clone(); + let run_paths = paths.clone(); + let run_mountpoint = mountpoint.clone(); + let run_shutdown = shutdown.clone(); + let run_thread = thread::spawn(move || { + crate::run::run_site_with_shutdown( + run_manifest, + run_paths, + crate::run::RunOptions { + mountpoint: run_mountpoint, + http_addr: format!("127.0.0.1:{site_port}"), + skip_php: false, + }, + run_shutdown, + ) + }); + wait_for_port(site_port); + + let first = http_get_nonempty(site_port, "/", Duration::from_secs(10)); + assert!( + first.contains("Remote Harness Page"), + "production run did not render the remote page through local WordPress: {}", + first + ); + assert!( + !remote_public.was_hit(), + "production run must not proxy the frontend public URL by default" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT post_title FROM local_wp.wp_posts WHERE ID=1;" + ), + "Remote Harness Page", + "Rust control row-COW should materialize the rendered page locally" + ); + assert!( + cached_file_count(&paths) > 0, + "FUSE run should cache requested runtime files" + ); + assert!( + !paths.file_cache.join("mirror/wp-content/uploads").exists(), + "uploads must not be mirrored or prefetched" + ); + let ssh_log = fs::read_to_string(&fake_ssh_log).unwrap_or_default(); + let remote_uploads_path = format!( + "{}/wp-content/uploads", + remote_docroot.to_string_lossy().trim_end_matches('/') + ); + assert!( + !ssh_log.contains(&remote_uploads_path), + "production run should not touch uploads unless requested:\n{}", + ssh_log + ); + assert!( + !ssh_log.contains("tar -cf -"), + "production run must not recursively tar runtime files:\n{}", + ssh_log + ); + + let installer = + http_get_nonempty(site_port, "/wp-admin/install.php", Duration::from_secs(5)); + assert!( + installer.starts_with("HTTP/1.1 500"), + "installer path must be reported as a runtime failure: {}", + installer + ); + assert!( + installer.contains("wp-cow did not load the remote site"), + "installer path did not use the wp-cow runtime guard: {}", + installer + ); + + let login = http_post( + site_port, + "/wp-login.php", + "log=admin&pwd=local-pass", + Duration::from_secs(10), + ); + assert!( + login.contains("LOGIN OK"), + "local-only admin password did not authenticate through production run: {}", + login + ); + let cookie = response_cookie(&login).expect("login response should set an auth cookie"); + let admin = http_get_with_headers( + site_port, + "/wp-admin/", + &[("Cookie", cookie.as_str())], + Duration::from_secs(10), + ); + assert!( + admin.contains("WP ADMIN LOCAL DASHBOARD"), + "wp-admin did not render after local login through production run: {}", + admin + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT user_pass = MD5('remote-pass') FROM remote_wp.wp_users WHERE ID=1;" + ), + "1", + "production local admin override must not update the remote password" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT user_pass = MD5('local-pass') FROM local_wp.wp_users WHERE ID=1;" + ), + "1", + "production local admin override must update only the local DB" + ); + + let created = http_post( + site_port, + "/wp-admin/post-new.php", + "title=Local+Only", + Duration::from_secs(10), + ); + assert!( + created.contains("LOCAL POST CREATED"), + "production local post creation failed: {}", + created + ); + let local_only = http_get_nonempty(site_port, "/local-only-page", Duration::from_secs(10)); + assert!( + local_only.contains("Local Only Harness Page"), + "production local-only page was not visible locally: {}", + local_only + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT COUNT(*) FROM remote_wp.wp_posts WHERE ID=99;" + ), + "0", + "production local page creation must not write to remote" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT COUNT(*) FROM local_wp.wp_posts WHERE ID=99;" + ), + "1", + "production local page creation must write to the local overlay" + ); + + let edited = http_post( + site_port, + "/wp-admin/post.php", + "post_ID=1", + Duration::from_secs(10), + ); + assert!( + edited.contains("LOCAL POST EDITED"), + "production local post edit failed: {}", + edited + ); + let edited_page = http_get_nonempty(site_port, "/", Duration::from_secs(10)); + assert!( + edited_page.contains("Locally Edited Harness Page"), + "production edited local page did not render: {}", + edited_page + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT post_title FROM remote_wp.wp_posts WHERE ID=1;" + ), + "Remote Harness Page", + "production local edit must not change the remote row" + ); + + shutdown.store(true, Ordering::SeqCst); + match run_thread.join() { + Ok(result) => result.unwrap(), + Err(_) => panic!("production run thread panicked"), + } + wait_for_port_closed(site_port); + + write_offline_marker( + &paths, + &OfflineMarker { + severed_at_unix: 1, + materialized_tables: vec![ + "wp_posts".to_string(), + "wp_users".to_string(), + "wp_usermeta".to_string(), + ], + admin_user: Some("admin".to_string()), + }, + ) + .unwrap(); + mysql_exec( + mysql_port, + "UPDATE remote_wp.wp_posts SET post_title='Remote Changed After Sever' WHERE ID=1;", + ); + fs::rename(&remote_docroot, temp.path().join("remote-docroot-gone")).unwrap(); + let ssh_lines_before_offline = read_line_count(&fake_ssh_log); + + let offline_shutdown = Arc::new(AtomicBool::new(false)); + let offline_manifest = harness_manifest.clone(); + let offline_paths = paths.clone(); + let offline_mountpoint = mountpoint.clone(); + let offline_thread_shutdown = offline_shutdown.clone(); + let offline_thread = thread::spawn(move || { + crate::run::run_site_with_shutdown( + offline_manifest, + offline_paths, + crate::run::RunOptions { + mountpoint: offline_mountpoint, + http_addr: format!("127.0.0.1:{site_port}"), + skip_php: false, + }, + offline_thread_shutdown, + ) + }); + wait_for_port(site_port); + let offline = http_get_nonempty(site_port, "/", Duration::from_secs(10)); + assert!( + offline.contains("Locally Edited Harness Page"), + "offline production refresh did not use local materialized state: {}", + offline + ); + assert!( + !offline.contains("Remote Changed After Sever"), + "offline production refresh read from the remote lower layer: {}", + offline + ); + assert_eq!( + read_line_count(&fake_ssh_log), + ssh_lines_before_offline, + "offline production run must not invoke SSH" + ); + + offline_shutdown.store(true, Ordering::SeqCst); + match offline_thread.join() { + Ok(result) => result.unwrap(), + Err(_) => panic!("offline production run thread panicked"), + } + wait_for_port_closed(site_port); + drop(mysql); + } + + fn free_tcp_port() -> u16 { + let listener = TcpListener::bind(("127.0.0.1", 0)).unwrap(); + listener.local_addr().unwrap().port() + } + + fn http_get(port: u16, path: &str, timeout: Duration) -> std::io::Result { + let mut stream = TcpStream::connect(("127.0.0.1", port))?; + stream.set_read_timeout(Some(timeout))?; + stream.set_write_timeout(Some(timeout))?; + write!( + stream, + "GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n" + )?; + + let mut response = String::new(); + stream.read_to_string(&mut response)?; + Ok(response) + } + + fn http_get_nonempty(port: u16, path: &str, timeout: Duration) -> String { + let started = Instant::now(); + loop { + match http_get(port, path, Duration::from_millis(500)) { + Ok(response) if !response.is_empty() => return response, + Ok(_) | Err(_) if started.elapsed() < timeout => { + thread::sleep(Duration::from_millis(50)); + } + Ok(response) => panic!("empty response from {path}: {response}"), + Err(err) => panic!("request {path} failed: {err}"), + } + } + } + + fn http_get_with_headers( + port: u16, + path: &str, + headers: &[(&str, &str)], + timeout: Duration, + ) -> String { + http_request(port, "GET", path, headers, "", timeout) + .unwrap_or_else(|err| panic!("GET {path} failed: {err}")) + } + + fn http_post(port: u16, path: &str, body: &str, timeout: Duration) -> String { + http_request( + port, + "POST", + path, + &[("Content-Type", "application/x-www-form-urlencoded")], + body, + timeout, + ) + .unwrap_or_else(|err| panic!("POST {path} failed: {err}")) + } + + fn http_request( + port: u16, + method: &str, + path: &str, + headers: &[(&str, &str)], + body: &str, + timeout: Duration, + ) -> std::io::Result { + let mut stream = TcpStream::connect(("127.0.0.1", port))?; + stream.set_read_timeout(Some(timeout))?; + stream.set_write_timeout(Some(timeout))?; + let mut request = format!( + "{method} {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\nContent-Length: {}\r\n", + body.len() + ); + for (name, value) in headers { + request.push_str(&format!("{name}: {value}\r\n")); + } + request.push_str("\r\n"); + request.push_str(body); + stream.write_all(request.as_bytes())?; + + let mut response = String::new(); + stream.read_to_string(&mut response)?; + Ok(response) + } + + fn response_cookie(response: &str) -> Option { + response.lines().find_map(|line| { + let (name, value) = line.split_once(':')?; + if !name.eq_ignore_ascii_case("set-cookie") { + return None; + } + value.trim().split(';').next().map(str::to_string) + }) + } + + fn prepend_path(dir: &Path, old_path: Option<&OsString>) -> String { + match old_path { + Some(old) if !old.is_empty() => format!("{}:{}", dir.display(), old.to_string_lossy()), + _ => dir.display().to_string(), + } + } + + fn install_fake_ssh(bin: &Path, log: &Path) { + fs::create_dir_all(bin).unwrap(); + let script = bin.join("ssh"); + fs::write( + &script, + r#"#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "${WPCOW_FAKE_SSH_LOG:?}" + +for arg in "$@"; do + if [ "$arg" = "-O" ]; then + exit 0 + fi +done + +for arg in "$@"; do + if [ "$arg" = "-MNf" ]; then + exit 0 + fi + if [ "$arg" = "-N" ]; then + while true; do sleep 60; done + fi +done + +last="${!#}" +if [ "$last" = "fake-host" ]; then + exit 0 +fi +exec bash -lc "$last" +"#, + ) + .unwrap(); + let mut perms = fs::metadata(&script).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&script, perms).unwrap(); + fs::write(log, b"").unwrap(); + } + + fn read_line_count(path: &Path) -> usize { + fs::read_to_string(path).unwrap_or_default().lines().count() + } + + fn cached_file_count(paths: &crate::config::ClonePaths) -> usize { + fn visit(path: &Path, count: &mut usize) { + let Ok(entries) = fs::read_dir(path) else { + return; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + visit(&path, count); + } else if path.is_file() { + *count += 1; + } + } + } + + let mut count = 0; + visit(&paths.file_cache, &mut count); + count + } + + struct EnvVarGuard { + old: Vec<(&'static str, Option)>, + } + + impl EnvVarGuard { + fn set(vars: &[(&'static str, String)]) -> Self { + let old = vars + .iter() + .map(|(name, _)| (*name, std::env::var_os(name))) + .collect::>(); + for (name, value) in vars { + std::env::set_var(name, value); + } + Self { old } + } + } + + impl Drop for EnvVarGuard { + fn drop(&mut self) { + for (name, value) in self.old.drain(..).rev() { + match value { + Some(value) => std::env::set_var(name, value), + None => std::env::remove_var(name), + } + } + } + } + + struct ChildGuard { + child: Option, + } + + impl ChildGuard { + fn new(child: Child) -> Self { + Self { child: Some(child) } + } + + fn kill_wait(&mut self) { + if let Some(mut child) = self.child.take() { + let _ = child.kill(); + let _ = child.wait(); + } + } + } + + impl Drop for ChildGuard { + fn drop(&mut self) { + self.kill_wait(); + } + } + + struct HarnessHttpServer { + port: u16, + hit: Arc, + shutdown: Arc, + handle: Option>, + } + + impl HarnessHttpServer { + fn start(body: &'static str) -> Self { + let listener = TcpListener::bind(("127.0.0.1", 0)).unwrap(); + let port = listener.local_addr().unwrap().port(); + listener.set_nonblocking(true).unwrap(); + let hit = Arc::new(AtomicBool::new(false)); + let shutdown = Arc::new(AtomicBool::new(false)); + let thread_hit = hit.clone(); + let thread_shutdown = shutdown.clone(); + let handle = thread::spawn(move || { + while !thread_shutdown.load(Ordering::SeqCst) { + match listener.accept() { + Ok((mut stream, _)) => { + thread_hit.store(true, Ordering::SeqCst); + let _ = stream.set_read_timeout(Some(Duration::from_millis(200))); + let mut buf = [0_u8; 1024]; + let _ = stream.read(&mut buf); + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => { + thread::sleep(Duration::from_millis(20)); + } + Err(_) => break, + } + } + }); + Self { + port, + hit, + shutdown, + handle: Some(handle), + } + } + + fn was_hit(&self) -> bool { + self.hit.load(Ordering::SeqCst) + } + } + + impl Drop for HarnessHttpServer { + fn drop(&mut self) { + self.shutdown.store(true, Ordering::SeqCst); + let _ = TcpStream::connect(("127.0.0.1", self.port)); + if let Some(handle) = self.handle.take() { + let _ = handle.join(); + } + } + } + + fn require_command(name: &str) { + assert!( + command_path(name).is_some(), + "strict runtime harness requires {name} on PATH" + ); + } + + fn command_path(name: &str) -> Option { + if name.contains('/') { + let path = PathBuf::from(name); + return path.is_file().then_some(path); + } + std::env::var_os("PATH").and_then(|paths| { + std::env::split_paths(&paths) + .map(|dir| dir.join(name)) + .find(|path| path.is_file()) + }) + } + + fn start_mariadb(temp: &Path, port: u16) -> Child { + let datadir = temp.join("mysql-data"); + let basedir = command_path("mariadbd") + .and_then(|path| fs::canonicalize(path).ok()) + .and_then(|path| { + path.parent() + .and_then(|bin| bin.parent()) + .map(Path::to_path_buf) + }) + .expect("resolve mariadbd basedir"); + + let mut install = Command::new("mariadb-install-db"); + install + .arg(format!("--basedir={}", basedir.display())) + .arg(format!("--datadir={}", datadir.display())) + .arg("--auth-root-authentication-method=normal") + .arg("--skip-test-db"); + if let Ok(user) = std::env::var("USER") { + install.arg(format!("--user={user}")); + } + let output = install + .output() + .unwrap_or_else(|err| panic!("run mariadb-install-db: {err}")); + assert!( + output.status.success(), + "mariadb-install-db failed: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + Command::new("mariadbd") + .arg("--no-defaults") + .arg(format!("--basedir={}", basedir.display())) + .arg(format!("--datadir={}", datadir.display())) + .arg(format!("--socket={}", temp.join("mysql.sock").display())) + .arg(format!("--port={port}")) + .arg("--bind-address=127.0.0.1") + .arg(format!("--pid-file={}", temp.join("mysql.pid").display())) + .arg("--skip-networking=0") + .arg("--skip-grant-tables") + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .unwrap_or_else(|err| panic!("start mariadbd: {err}")) + } + + fn wait_for_mysql(port: u16) { + let started = Instant::now(); + while started.elapsed() < Duration::from_secs(10) { + let output = Command::new("mysql") + .arg("--protocol=TCP") + .arg("-h127.0.0.1") + .arg(format!("-P{port}")) + .arg("-uroot") + .arg("--execute") + .arg("SELECT 1;") + .output(); + if matches!(output, Ok(output) if output.status.success()) { + return; + } + thread::sleep(Duration::from_millis(100)); + } + panic!("temporary MariaDB did not become ready on port {port}"); + } + + fn mysql_exec(port: u16, sql: &str) { + let output = Command::new("mysql") + .arg("--protocol=TCP") + .arg("-h127.0.0.1") + .arg(format!("-P{port}")) + .arg("-uroot") + .arg("--execute") + .arg(sql) + .output() + .unwrap_or_else(|err| panic!("run mysql: {err}")); + assert!( + output.status.success(), + "mysql failed for SQL:\n{sql}\n{}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + + fn mysql_scalar(port: u16, sql: &str) -> String { + let output = Command::new("mysql") + .arg("--protocol=TCP") + .arg("-h127.0.0.1") + .arg(format!("-P{port}")) + .arg("-uroot") + .arg("--batch") + .arg("--raw") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql) + .output() + .unwrap_or_else(|err| panic!("run mysql scalar: {err}")); + assert!( + output.status.success(), + "mysql scalar failed for SQL:\n{sql}\n{}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8_lossy(&output.stdout) + .trim_end_matches(['\r', '\n']) + .to_string() + } + + fn create_harness_databases(port: u16) { + mysql_exec( + port, + r#" +CREATE DATABASE remote_wp DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; +CREATE DATABASE local_wp DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; +CREATE TABLE remote_wp.wp_posts ( + ID bigint unsigned NOT NULL, + post_title text NOT NULL, + post_content longtext NOT NULL, + post_name varchar(200) NOT NULL DEFAULT '', + post_status varchar(20) NOT NULL DEFAULT 'publish', + post_type varchar(20) NOT NULL DEFAULT 'post', + PRIMARY KEY (ID) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +CREATE TABLE local_wp.wp_posts LIKE remote_wp.wp_posts; +CREATE TABLE remote_wp.wp_users ( + ID bigint unsigned NOT NULL, + user_login varchar(60) NOT NULL, + user_pass varchar(255) NOT NULL, + user_nicename varchar(50) NOT NULL DEFAULT '', + user_email varchar(100) NOT NULL DEFAULT '', + user_registered datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + user_activation_key varchar(255) NOT NULL DEFAULT '', + user_status int NOT NULL DEFAULT 0, + display_name varchar(250) NOT NULL DEFAULT '', + PRIMARY KEY (ID), + KEY user_login_key (user_login) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +CREATE TABLE local_wp.wp_users LIKE remote_wp.wp_users; +CREATE TABLE remote_wp.wp_usermeta ( + umeta_id bigint unsigned NOT NULL, + user_id bigint unsigned NOT NULL DEFAULT 0, + meta_key varchar(255) DEFAULT NULL, + meta_value longtext, + PRIMARY KEY (umeta_id), + KEY user_id (user_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +CREATE TABLE local_wp.wp_usermeta LIKE remote_wp.wp_usermeta; +INSERT INTO remote_wp.wp_posts (ID, post_title, post_content, post_name, post_status, post_type) +VALUES (1, 'Remote Harness Page', 'remote lower content', 'remote-harness-page', 'publish', 'page'); +INSERT INTO remote_wp.wp_users (ID, user_login, user_pass, user_nicename, user_email, display_name) +VALUES (1, 'admin', MD5('remote-pass'), 'admin', 'admin@example.test', 'Admin'); +INSERT INTO remote_wp.wp_usermeta (umeta_id, user_id, meta_key, meta_value) +VALUES (1, 1, 'wp_capabilities', 'a:1:{s:13:"administrator";b:1;}'); +INSERT INTO local_wp.wp_users SELECT * FROM remote_wp.wp_users WHERE ID=1; +INSERT INTO local_wp.wp_usermeta SELECT * FROM remote_wp.wp_usermeta WHERE user_id=1; +"#, + ); + } + + fn start_php_server( + docroot: &Path, + router: &Path, + port: u16, + envs: &[(&str, String)], + ) -> Child { + let mut command = Command::new("php"); + command + .env("PHP_CLI_SERVER_WORKERS", "4") + .current_dir(docroot) + .arg("-S") + .arg(format!("127.0.0.1:{port}")) + .arg("-t") + .arg(docroot) + .arg(router) + .stdout(Stdio::null()) + .stderr(if std::env::var_os("WPCOW_HARNESS_PHP_STDERR").is_some() { + Stdio::inherit() + } else { + Stdio::null() + }); + for (name, value) in envs { + command.env(name, value); + } + command + .spawn() + .unwrap_or_else(|err| panic!("start php server on {port}: {err}")) + } + + fn wait_for_port(port: u16) { + let started = Instant::now(); + while started.elapsed() < Duration::from_secs(5) { + if TcpStream::connect(("127.0.0.1", port)).is_ok() { + return; + } + thread::sleep(Duration::from_millis(50)); + } + panic!("port {port} did not open"); + } + + fn wait_for_port_closed(port: u16) { + let started = Instant::now(); + while started.elapsed() < Duration::from_secs(5) { + if TcpStream::connect(("127.0.0.1", port)).is_err() { + return; + } + thread::sleep(Duration::from_millis(50)); + } + panic!("port {port} did not close"); + } + + fn write_runtime_harness_docroot( + docroot: &Path, + paths: &crate::config::ClonePaths, + manifest: &Manifest, + ) { + fs::create_dir_all(docroot.join("wp-includes")).unwrap(); + fs::create_dir_all(docroot.join("wp-content")).unwrap(); + fs::create_dir_all(docroot.join("wp-admin")).unwrap(); + fs::write( + docroot.join("wp-config.php"), + wp_config_php(manifest, paths), + ) + .unwrap(); + fs::write(docroot.join("wp-content/db.php"), db_dropin_php()).unwrap(); + fs::write( + docroot.join("wp-includes/class-wpdb.php"), + runtime_harness_wpdb_php(), + ) + .unwrap(); + fs::write( + docroot.join("wp-settings.php"), + runtime_harness_settings_php(), + ) + .unwrap(); + fs::write( + docroot.join("index.php"), + " &'static str { + r#"dbh = mysqli_connect( $host, $dbuser, $dbpassword, $dbname, $port ); + if ( ! $this->dbh ) { + throw new RuntimeException( 'mysqli connect failed: ' . mysqli_connect_error() ); + } + mysqli_set_charset( $this->dbh, $this->charset ); + } + + public function flush() { + $this->last_error = ''; + $this->last_result = array(); + $this->col_info = array(); + $this->num_rows = 0; + $this->rows_affected = 0; + $this->insert_id = 0; + } + + public function query( $query ) { + $this->flush(); + $this->last_query = $query; + $result = mysqli_query( $this->dbh, $query ); + if ( false === $result ) { + $this->last_error = mysqli_error( $this->dbh ); + return false; + } + if ( true === $result ) { + $this->rows_affected = mysqli_affected_rows( $this->dbh ); + $this->insert_id = mysqli_insert_id( $this->dbh ); + return $this->rows_affected; + } + foreach ( mysqli_fetch_fields( $result ) as $field ) { + $this->col_info[] = (object) array( 'name' => $field->name ); + } + while ( $row = mysqli_fetch_object( $result ) ) { + $this->last_result[] = $row; + } + $this->num_rows = count( $this->last_result ); + mysqli_free_result( $result ); + return $this->num_rows; + } +} +"# + } + + fn runtime_harness_settings_php() -> &'static str { + r#"query( 'SELECT * FROM wp_posts WHERE ID = ' . (int) $id ); + return $wpdb->last_result ? $wpdb->last_result[0] : null; +} + +function wpcow_harness_user() { + global $wpdb; + $wpdb->query( 'SELECT * FROM wp_users WHERE ID = 1' ); + return $wpdb->last_result ? $wpdb->last_result[0] : null; +} + +$path = parse_url( $_SERVER['REQUEST_URI'], PHP_URL_PATH ); + +if ( '/wp-login.php' === $path ) { + if ( 'POST' === $_SERVER['REQUEST_METHOD'] ) { + $user = wpcow_harness_user(); + if ( $user && isset( $_POST['log'], $_POST['pwd'] ) && 'admin' === $_POST['log'] && md5( $_POST['pwd'] ) === $user->user_pass ) { + setcookie( 'wp_cow_harness_auth', '1', 0, '/' ); + echo "LOGIN OK\n"; + return; + } + http_response_code( 403 ); + echo "LOGIN FAILED\n"; + return; + } + echo "LOGIN FORM\n"; + return; +} + +if ( in_array( $path, array( '/wp-admin', '/wp-admin/', '/wp-admin/index.php' ), true ) ) { + if ( empty( $_COOKIE['wp_cow_harness_auth'] ) ) { + http_response_code( 403 ); + echo "AUTH REQUIRED\n"; + return; + } + echo "WP ADMIN LOCAL DASHBOARD\n"; + return; +} + +if ( '/wp-admin/post-new.php' === $path ) { + global $wpdb; + $wpdb->query( "INSERT INTO wp_posts (ID, post_title, post_content, post_name, post_status, post_type) VALUES (99, 'Local Only Harness Page', 'local only content', 'local-only-page', 'publish', 'page')" ); + echo "LOCAL POST CREATED\n"; + return; +} + +if ( '/wp-admin/post.php' === $path ) { + global $wpdb; + $wpdb->query( "UPDATE wp_posts SET post_title = 'Locally Edited Harness Page', post_content = 'edited local content' WHERE ID = 1" ); + echo "LOCAL POST EDITED\n"; + return; +} + +if ( '/local-only-page' === $path ) { + $post = wpcow_harness_post( 99 ); + if ( ! $post ) { + http_response_code( 404 ); + echo "LOCAL ONLY MISSING\n"; + return; + } + echo "

" . htmlspecialchars( $post->post_title, ENT_QUOTES, 'UTF-8' ) . "

\n"; + echo "
" . htmlspecialchars( $post->post_content, ENT_QUOTES, 'UTF-8' ) . "
\n"; + return; +} + +$post = wpcow_harness_post( 1 ); +if ( ! $post ) { + http_response_code( 500 ); + echo "REMOTE PAGE MISSING\n"; + return; +} +echo "

" . htmlspecialchars( $post->post_title, ENT_QUOTES, 'UTF-8' ) . "

\n"; +echo "
" . htmlspecialchars( $post->post_content, ENT_QUOTES, 'UTF-8' ) . "
\n"; +"# + } + + fn runtime_harness_control_php() -> &'static str { + r#" false, 'error' => mysqli_connect_error() ) ); + exit; + } + mysqli_set_charset( $db, 'utf8mb4' ); + $dbs[ $name ] = $db; + return $db; +} + +function hrows( $db, $sql ) { + $result = mysqli_query( $db, $sql ); + if ( false === $result ) { + return array(); + } + $rows = array(); + while ( $row = mysqli_fetch_assoc( $result ) ) { + $rows[] = $row; + } + return $rows; +} + +function hresult( $rows ) { + $fields = array(); + foreach ( $rows as $row ) { + foreach ( array_keys( $row ) as $field ) { + if ( ! in_array( $field, $fields, true ) ) { + $fields[] = $field; + } + } + } + return array( + 'ok' => true, + 'error' => '', + 'rows' => array_values( $rows ), + 'fields' => $fields, + 'affected' => count( $rows ), + ); +} + +function hrespond( $payload ) { + header( 'Content-Type: application/json' ); + echo json_encode( $payload ); + exit; +} + +function hselect_id( $table, $id ) { + $remote = hrows( hdb( 'remote_wp' ), "SELECT * FROM `$table` WHERE ID = " . (int) $id ); + $local = hrows( hdb( 'local_wp' ), "SELECT * FROM `$table` WHERE ID = " . (int) $id ); + if ( $remote && ! $local ) { + mysqli_query( hdb( 'local_wp' ), "REPLACE INTO local_wp.`$table` SELECT * FROM remote_wp.`$table` WHERE ID = " . (int) $id ); + $local = hrows( hdb( 'local_wp' ), "SELECT * FROM `$table` WHERE ID = " . (int) $id ); + } + $merged = array(); + foreach ( $remote as $row ) { + $merged[ $row['ID'] ] = $row; + } + foreach ( $local as $row ) { + $merged[ $row['ID'] ] = $row; + } + return array_values( $merged ); +} + +$path = parse_url( $_SERVER['REQUEST_URI'], PHP_URL_PATH ); +$payload = json_decode( file_get_contents( 'php://input' ), true ); +$sql = is_array( $payload ) && isset( $payload['sql'] ) ? $payload['sql'] : ''; + +if ( '/row-cow' === $path ) { + if ( preg_match( '/^SELECT\s+\*\s+FROM\s+`?(wp_posts|wp_users)`?\s+WHERE\s+`?ID`?\s*=\s*([0-9]+)/i', $sql, $matches ) ) { + hrespond( array( 'ok' => true, 'handled' => true, 'backend' => 'cow', 'result' => hresult( hselect_id( $matches[1], (int) $matches[2] ) ) ) ); + } + if ( preg_match( '/^UPDATE\s+`?(wp_posts|wp_users)`?.*WHERE\s+`?ID`?\s*=\s*([0-9]+)/i', $sql, $matches ) ) { + mysqli_query( hdb( 'local_wp' ), "REPLACE INTO local_wp.`{$matches[1]}` SELECT * FROM remote_wp.`{$matches[1]}` WHERE ID = " . (int) $matches[2] ); + hrespond( array( 'ok' => true, 'handled' => true, 'backend' => 'local' ) ); + } + if ( preg_match( '/^(INSERT|REPLACE)\s+(?:IGNORE\s+)?INTO\s+`?(wp_posts|wp_users)`?/i', $sql ) ) { + hrespond( array( 'ok' => true, 'handled' => true, 'backend' => 'local' ) ); + } + hrespond( array( 'ok' => true, 'handled' => false, 'backend' => 'local' ) ); +} + +if ( '/route' === $path || '/materialize' === $path ) { + hrespond( array( 'ok' => true, 'backend' => 'local', 'materialized' => array() ) ); +} + +if ( '/query' === $path ) { + hrespond( array( 'ok' => false, 'error' => 'strict harness does not allow fallback remote queries', 'rows' => array(), 'fields' => array(), 'affected' => 0 ) ); +} + +hrespond( array( 'ok' => false, 'error' => 'unknown harness control path ' . $path ) ); +"# + } +} diff --git a/experiments/remote-wp-cow/src/main.rs b/experiments/remote-wp-cow/src/main.rs new file mode 100644 index 00000000..0be6ffb1 --- /dev/null +++ b/experiments/remote-wp-cow/src/main.rs @@ -0,0 +1,18 @@ +mod cli; +mod config; +mod control; +mod db; +mod fusefs; +mod generate; +mod mysql_proxy; +mod overlay; +mod plugin_policy; +mod remote; +mod row_cow; +mod run; +mod runtime_cache; +mod sql; + +fn main() -> anyhow::Result<()> { + cli::run() +} diff --git a/experiments/remote-wp-cow/src/mysql_proxy.rs b/experiments/remote-wp-cow/src/mysql_proxy.rs new file mode 100644 index 00000000..4bc85729 --- /dev/null +++ b/experiments/remote-wp-cow/src/mysql_proxy.rs @@ -0,0 +1,522 @@ +use anyhow::{anyhow, Result}; +use msql_srv::{ + Column, ColumnFlags, ColumnType, ErrorKind, InitWriter, MysqlIntermediary, MysqlShim, + ParamParser, QueryResultWriter, StatementMetaWriter, ValueInner, +}; +use mysql::prelude::Queryable; +use serde_json::Value as JsonValue; +use std::collections::BTreeMap; +use std::io; +use std::net::TcpListener; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +use crate::config::{self, ClonePaths, Manifest}; +use crate::db; +use crate::remote::RemoteClient; +use crate::row_cow::CowQueryResult; +use crate::sql; + +pub fn serve_proxy( + addr: &str, + manifest: Manifest, + paths: ClonePaths, + remote: RemoteClient, + shutdown: Arc, +) -> Result<()> { + let listener = TcpListener::bind(addr).with_context(|| format!("bind MySQL proxy {addr}"))?; + listener + .set_nonblocking(true) + .context("set MySQL proxy nonblocking")?; + + while !shutdown.load(Ordering::SeqCst) { + match listener.accept() { + Ok((stream, _peer)) => { + let backend = ProxyBackend::new(manifest.clone(), paths.clone(), remote.clone()); + thread::spawn(move || { + if let Err(err) = MysqlIntermediary::run_on_tcp(backend, stream) { + eprintln!("wp-cow MySQL proxy connection ended: {err:?}"); + } + }); + } + Err(err) if err.kind() == io::ErrorKind::WouldBlock => { + thread::sleep(Duration::from_millis(50)); + } + Err(err) => return Err(err).context("accept MySQL proxy connection"), + } + } + + Ok(()) +} + +struct ProxyBackend { + manifest: Manifest, + paths: ClonePaths, + remote: RemoteClient, + local: Option, + prepared: BTreeMap, + next_statement_id: u32, +} + +impl ProxyBackend { + fn new(manifest: Manifest, paths: ClonePaths, remote: RemoteClient) -> Self { + Self { + manifest, + paths, + remote, + local: None, + prepared: BTreeMap::new(), + next_statement_id: 1, + } + } + + fn dispatch(&mut self, query: &str) -> Result { + if is_local_session_sql(query) { + return self.local_query(query); + } + + if sql::is_write_sql(query) { + if !config::is_offline(&self.paths) { + let tables = sql::extract_tables(query); + let response = + db::row_cow_query(&self.remote, &self.manifest, &self.paths, query, &tables)?; + if response.backend != "local" && !response.handled { + return Err(anyhow!("write SQL did not resolve to local backend")); + } + } + return self.local_query(query); + } + + if sql::is_safe_read_sql(query) { + if config::is_offline(&self.paths) { + return self.local_query(query); + } + + let tables = sql::extract_tables(query); + let row_cow = + db::row_cow_query(&self.remote, &self.manifest, &self.paths, query, &tables)?; + if let Some(result) = row_cow.result { + return Ok(ProxyReply::Result(result)); + } + if row_cow.backend == "local" { + return self.local_query(query); + } + + let route = + db::route_for_query(&self.remote, &self.manifest, &self.paths, query, &tables)?; + if route.backend == "local" { + self.local_query(query) + } else { + let result = db::cached_remote_readonly_query(&self.remote, &self.paths, query)?; + Ok(ProxyReply::Result(CowQueryResult { + ok: result.ok, + error: result.error, + rows: result.rows, + fields: result.fields, + affected: result.affected, + })) + } + } else { + self.local_query(query) + } + } + + fn local_conn(&mut self) -> Result<&mut mysql::Conn> { + if self.local.is_none() { + let mut builder = mysql::OptsBuilder::new() + .ip_or_hostname(Some(self.manifest.local_db.host.clone())) + .tcp_port(self.manifest.local_db.port) + .user(Some(self.manifest.local_db.user.clone())) + .db_name(Some(self.manifest.local_db.name.clone())); + if !self.manifest.local_db.password.is_empty() { + builder = builder.pass(Some(self.manifest.local_db.password.clone())); + } + self.local = Some(mysql::Conn::new(builder)?); + } + self.local + .as_mut() + .ok_or_else(|| anyhow!("local MySQL connection was not initialized")) + } + + fn local_query(&mut self, query: &str) -> Result { + let result = self.local_conn()?.query_iter(query)?; + let fields = result + .columns() + .as_ref() + .iter() + .map(|column| column.name_str().to_string()) + .collect::>(); + let affected = result.affected_rows(); + let last_insert_id = result.last_insert_id().unwrap_or(0); + + if fields.is_empty() { + drop(result); + return Ok(ProxyReply::Completed { + affected_rows: affected, + last_insert_id, + }); + } + + let mut rows = Vec::new(); + for row in result { + let row = row?; + let values = row.unwrap(); + let mut out = serde_json::Map::new(); + for (idx, field) in fields.iter().enumerate() { + let value = values.get(idx).cloned().unwrap_or(mysql::Value::NULL); + out.insert(field.clone(), mysql_value_to_json(value)); + } + rows.push(out); + } + + Ok(ProxyReply::Result(CowQueryResult { + ok: true, + error: String::new(), + affected: rows.len() as i64, + rows, + fields, + })) + } +} + +enum ProxyReply { + Result(CowQueryResult), + Completed { + affected_rows: u64, + last_insert_id: u64, + }, +} + +impl MysqlShim for ProxyBackend { + type Error = io::Error; + + fn on_prepare( + &mut self, + query: &str, + info: StatementMetaWriter<'_, W>, + ) -> Result<(), Self::Error> { + let id = self.next_statement_id; + self.next_statement_id = self.next_statement_id.saturating_add(1); + self.prepared.insert(id, query.to_string()); + let params = (0..count_placeholders(query)) + .map(|idx| Column { + table: String::new(), + column: format!("param{}", idx + 1), + coltype: ColumnType::MYSQL_TYPE_STRING, + colflags: ColumnFlags::empty(), + }) + .collect::>(); + info.reply(id, ¶ms, &[]) + } + + fn on_execute( + &mut self, + id: u32, + params: ParamParser<'_>, + results: QueryResultWriter<'_, W>, + ) -> Result<(), Self::Error> { + let Some(query) = self.prepared.get(&id).cloned() else { + return Ok(results.error(ErrorKind::ER_UNKNOWN_STMT_HANDLER, b"unknown statement")?); + }; + let params = params + .into_iter() + .map(|param| mysql_param_literal(param.value.into_inner())) + .collect::>(); + let Ok(query) = substitute_placeholders(&query, ¶ms) else { + return Ok(results.error( + ErrorKind::ER_PARSE_ERROR, + b"prepared statement parameter count does not match placeholders", + )?); + }; + write_proxy_reply(self.dispatch(&query), results) + } + + fn on_close(&mut self, stmt: u32) { + self.prepared.remove(&stmt); + } + + fn on_query( + &mut self, + query: &str, + results: QueryResultWriter<'_, W>, + ) -> Result<(), Self::Error> { + write_proxy_reply(self.dispatch(query), results) + } + + fn on_init(&mut self, _schema: &str, writer: InitWriter<'_, W>) -> Result<(), Self::Error> { + writer.ok() + } +} + +fn write_proxy_reply( + reply: Result, + results: QueryResultWriter<'_, W>, +) -> Result<(), io::Error> { + match reply { + Ok(ProxyReply::Result(result)) if result.ok => write_result(result, results), + Ok(ProxyReply::Result(result)) => { + Ok(results.error(ErrorKind::ER_UNKNOWN_ERROR, result.error.as_bytes())?) + } + Ok(ProxyReply::Completed { + affected_rows, + last_insert_id, + }) => results.completed(affected_rows, last_insert_id), + Err(err) => Ok(results.error(ErrorKind::ER_UNKNOWN_ERROR, err.to_string().as_bytes())?), + } +} + +fn write_result( + result: CowQueryResult, + results: QueryResultWriter<'_, W>, +) -> Result<(), io::Error> { + let columns = result + .fields + .iter() + .map(|field| Column { + table: String::new(), + column: field.clone(), + coltype: ColumnType::MYSQL_TYPE_STRING, + colflags: ColumnFlags::empty(), + }) + .collect::>(); + let mut writer = results.start(&columns)?; + for row in result.rows { + for field in &result.fields { + match row.get(field) { + None | Some(JsonValue::Null) => writer.write_col(None::<&str>)?, + Some(JsonValue::String(value)) => writer.write_col(value.as_str())?, + Some(value) => writer.write_col(value.to_string())?, + } + } + writer.end_row()?; + } + writer.finish() +} + +fn is_local_session_sql(query: &str) -> bool { + let normalized = query.trim_start().to_ascii_uppercase(); + normalized.starts_with("SET ") + || normalized.starts_with("START TRANSACTION") + || normalized.starts_with("BEGIN") + || normalized.starts_with("COMMIT") + || normalized.starts_with("ROLLBACK") +} + +fn count_placeholders(query: &str) -> usize { + scan_placeholders(query, None).0 +} + +fn substitute_placeholders(query: &str, params: &[String]) -> Result { + let (used, out) = scan_placeholders(query, Some(params)); + if used != params.len() { + return Err(anyhow!("too many prepared statement parameters")); + } + out.ok_or_else(|| anyhow!("missing prepared statement parameter")) +} + +fn scan_placeholders(query: &str, params: Option<&[String]>) -> (usize, Option) { + let chars = query.chars().collect::>(); + let mut out = params.map(|_| String::with_capacity(query.len())); + let mut idx = 0; + let mut used = 0; + + while idx < chars.len() { + let ch = chars[idx]; + + if ch == '\'' || ch == '"' || ch == '`' { + push_char(&mut out, ch); + idx += 1; + while idx < chars.len() { + let inner = chars[idx]; + push_char(&mut out, inner); + idx += 1; + if inner == '\\' && idx < chars.len() { + push_char(&mut out, chars[idx]); + idx += 1; + continue; + } + if inner == ch { + if idx < chars.len() && chars[idx] == ch { + push_char(&mut out, chars[idx]); + idx += 1; + continue; + } + break; + } + } + continue; + } + + if ch == '-' && idx + 1 < chars.len() && chars[idx + 1] == '-' { + push_char(&mut out, ch); + push_char(&mut out, chars[idx + 1]); + idx += 2; + while idx < chars.len() { + let comment = chars[idx]; + push_char(&mut out, comment); + idx += 1; + if comment == '\n' { + break; + } + } + continue; + } + + if ch == '#' { + push_char(&mut out, ch); + idx += 1; + while idx < chars.len() { + let comment = chars[idx]; + push_char(&mut out, comment); + idx += 1; + if comment == '\n' { + break; + } + } + continue; + } + + if ch == '/' && idx + 1 < chars.len() && chars[idx + 1] == '*' { + push_char(&mut out, ch); + push_char(&mut out, chars[idx + 1]); + idx += 2; + while idx < chars.len() { + let comment = chars[idx]; + push_char(&mut out, comment); + idx += 1; + if comment == '*' && idx < chars.len() && chars[idx] == '/' { + push_char(&mut out, chars[idx]); + idx += 1; + break; + } + } + continue; + } + + if ch == '?' { + if let Some(params) = params { + let Some(param) = params.get(used) else { + return (used, None); + }; + push_str(&mut out, param); + } + used += 1; + idx += 1; + continue; + } + + push_char(&mut out, ch); + idx += 1; + } + + (used, out) +} + +fn push_char(out: &mut Option, ch: char) { + if let Some(out) = out { + out.push(ch); + } +} + +fn push_str(out: &mut Option, value: &str) { + if let Some(out) = out { + out.push_str(value); + } +} + +fn mysql_param_literal(value: ValueInner<'_>) -> String { + match value { + ValueInner::NULL => "NULL".to_string(), + ValueInner::Bytes(bytes) => format!("'{}'", mysql_string_literal_bytes(bytes)), + ValueInner::Int(value) => value.to_string(), + ValueInner::UInt(value) => value.to_string(), + ValueInner::Double(value) => value.to_string(), + ValueInner::Date(bytes) | ValueInner::Time(bytes) | ValueInner::Datetime(bytes) => { + format!("X'{}'", hex::encode(bytes)) + } + } +} + +fn mysql_value_to_json(value: mysql::Value) -> JsonValue { + match value { + mysql::Value::NULL => JsonValue::Null, + mysql::Value::Bytes(bytes) => JsonValue::String(String::from_utf8_lossy(&bytes).into()), + mysql::Value::Int(value) => JsonValue::String(value.to_string()), + mysql::Value::UInt(value) => JsonValue::String(value.to_string()), + mysql::Value::Float(value) => JsonValue::String(value.to_string()), + mysql::Value::Double(value) => JsonValue::String(value.to_string()), + mysql::Value::Date(year, month, day, hour, minute, second, micros) => { + JsonValue::String(format!( + "{year:04}-{month:02}-{day:02} {hour:02}:{minute:02}:{second:02}.{:06}", + micros + )) + } + mysql::Value::Time(negative, days, hours, minutes, seconds, micros) => { + let sign = if negative { "-" } else { "" }; + JsonValue::String(format!( + "{sign}{days} {hours:02}:{minutes:02}:{seconds:02}.{:06}", + micros + )) + } + } +} + +fn mysql_string_literal_bytes(bytes: &[u8]) -> String { + String::from_utf8_lossy(bytes) + .replace('\\', "\\\\") + .replace('\'', "\\'") +} + +trait Context { + fn context(self, msg: &'static str) -> Result; + fn with_context String>(self, f: F) -> Result; +} + +impl Context for io::Result { + fn context(self, msg: &'static str) -> Result { + self.map_err(|err| anyhow!("{msg}: {err}")) + } + + fn with_context String>(self, f: F) -> Result { + self.map_err(|err| anyhow!("{}: {err}", f())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn recognizes_session_sql_as_local_only() { + assert!(is_local_session_sql("SET NAMES utf8mb4")); + assert!(is_local_session_sql("BEGIN")); + assert!(is_local_session_sql("COMMIT")); + assert!(!is_local_session_sql("SELECT * FROM wp_posts")); + } + + #[test] + fn substitutes_prepared_placeholders_outside_literals_and_comments() { + let sql = + "SELECT '?' AS literal, col FROM wp_posts WHERE ID = ? AND post_title = ? /* ? */"; + let substituted = + substitute_placeholders(sql, &["123".to_string(), "'local \\' title'".to_string()]) + .unwrap(); + assert_eq!( + substituted, + "SELECT '?' AS literal, col FROM wp_posts WHERE ID = 123 AND post_title = 'local \\' title' /* ? */" + ); + assert_eq!(count_placeholders(sql), 2); + } + + #[test] + fn quotes_prepared_parameter_literals() { + assert_eq!( + mysql_param_literal(ValueInner::Bytes(b"a'b\\c")), + "'a\\'b\\\\c'" + ); + assert_eq!(mysql_param_literal(ValueInner::NULL), "NULL"); + assert_eq!(mysql_param_literal(ValueInner::UInt(42)), "42"); + } +} diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs new file mode 100644 index 00000000..531518d6 --- /dev/null +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -0,0 +1,1352 @@ +use anyhow::{anyhow, Context, Result}; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::cell::RefCell; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs::{self, File, OpenOptions}; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::path::{Component, Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::config::ClonePaths; +use crate::remote::{RemoteClient, RemoteEntry}; + +pub const OPAQUE_MARKER: &str = ".wp-cow-opaque"; + +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +struct WhiteoutFile { + deleted: BTreeSet, +} + +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +struct MetadataFile { + entries: BTreeMap, +} + +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +struct MissingFile { + expires_at_unix: BTreeMap, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +struct CacheProgress { + phase: String, + active_path: String, + active_bytes: u64, + active_total: u64, + files_cached: u64, + bytes_cached: u64, + last_cached_path: String, + updated_at_unix_ms: u128, +} + +#[derive(Debug, Clone)] +pub struct OverlayStore { + pub upper: PathBuf, + pub file_cache: PathBuf, + whiteouts_path: PathBuf, + whiteouts: RefCell>, + metadata: RefCell>, + metadata_journal_len: RefCell, + missing: RefCell>, +} + +impl OverlayStore { + pub fn new(paths: &ClonePaths) -> Self { + Self { + upper: paths.upper.clone(), + file_cache: paths.file_cache.clone(), + whiteouts_path: paths.whiteouts.clone(), + whiteouts: RefCell::new(None), + metadata: RefCell::new(None), + metadata_journal_len: RefCell::new(0), + missing: RefCell::new(None), + } + } + + pub fn clean_rel(input: impl AsRef) -> Result { + let mut out = PathBuf::new(); + for component in input.as_ref().components() { + match component { + Component::Normal(part) => out.push(part), + Component::CurDir => {} + Component::RootDir | Component::Prefix(_) | Component::ParentDir => { + return Err(anyhow!( + "invalid clone-relative path {}", + input.as_ref().display() + )); + } + } + } + Ok(out) + } + + pub fn rel_string(rel: &Path) -> String { + rel.to_string_lossy().replace('\\', "/") + } + + pub fn upper_path(&self, rel: &Path) -> Result { + Ok(self.upper.join(Self::clean_rel(rel)?)) + } + + pub fn mirror_path(&self, rel: &Path) -> Result { + Ok(self.file_cache.join("mirror").join(Self::clean_rel(rel)?)) + } + + pub fn cache_path(&self, rel: &Path) -> PathBuf { + let mut hasher = Sha256::new(); + hasher.update(Self::rel_string(rel)); + let hex = hex::encode(hasher.finalize()); + self.file_cache.join(&hex[0..2]).join(hex) + } + + pub fn cached_file_path(&self, rel: &Path) -> Option { + let path = self.cache_path(rel); + if path.is_file() { + return Some(path); + } + self.mirror_path(rel).ok().filter(|path| path.is_file()) + } + + pub fn cached_entry(&self, rel: &Path) -> Result> { + let metadata = self.load_metadata()?; + Ok(metadata.entries.get(&Self::rel_string(rel)).cloned()) + } + + pub fn cached_missing(&self, rel: &Path) -> Result { + let mut missing = self.load_missing()?; + let rel_string = Self::rel_string(&Self::clean_rel(rel)?); + let now = now_unix_secs(); + let Some(expires_at) = missing.expires_at_unix.get(&rel_string).copied() else { + return Ok(false); + }; + if expires_at > now { + return Ok(true); + } + missing.expires_at_unix.remove(&rel_string); + self.write_missing(&missing)?; + Ok(false) + } + + pub fn put_cached_missing(&self, rel: &Path, ttl_secs: u64) -> Result<()> { + let mut missing = self.load_missing()?; + let rel_string = Self::rel_string(&Self::clean_rel(rel)?); + let expires_at = now_unix_secs().saturating_add(ttl_secs.max(1)); + missing.expires_at_unix.insert(rel_string, expires_at); + self.write_missing(&missing) + } + + pub fn remove_cached_missing(&self, rel: &Path) -> Result<()> { + let mut missing = self.load_missing()?; + let rel_string = Self::rel_string(&Self::clean_rel(rel)?); + if missing.expires_at_unix.remove(&rel_string).is_some() { + self.write_missing(&missing)?; + } + Ok(()) + } + + pub fn list_cached_metadata_dir(&self, rel: &Path) -> Result> { + let metadata = self.load_metadata()?; + let rel = Self::clean_rel(rel)?; + let mut out = Vec::new(); + + for (entry_rel, entry) in metadata.entries { + let entry_path = PathBuf::from(&entry_rel); + let parent = entry_path.parent().unwrap_or_else(|| Path::new("")); + if parent == rel { + out.push(entry); + } + } + + out.sort_by(|a, b| a.name.cmp(&b.name)); + Ok(out) + } + + pub fn put_cached_entry(&self, rel: &Path, entry: &RemoteEntry) -> Result<()> { + let mut metadata = self.load_metadata()?; + let rel = Self::clean_rel(rel)?; + let mut journal_entries = Vec::new(); + let rel_string = Self::rel_string(&rel); + let _ = self.remove_cached_missing(&rel); + metadata.entries.insert(rel_string.clone(), entry.clone()); + journal_entries.push((rel_string, Some(entry.clone()))); + let mut current = rel.parent(); + while let Some(parent) = current { + if parent.as_os_str().is_empty() { + break; + } + let Some(name) = parent.file_name() else { + break; + }; + let parent_string = Self::rel_string(parent); + if !metadata.entries.contains_key(&parent_string) { + let parent_entry = RemoteEntry { + name: name.to_string_lossy().to_string(), + kind: "dir".to_string(), + size: 0, + mode: 0o40755, + mtime: entry.mtime, + }; + metadata + .entries + .insert(parent_string.clone(), parent_entry.clone()); + journal_entries.push((parent_string, Some(parent_entry))); + } + current = parent.parent(); + } + *self.metadata.borrow_mut() = Some(metadata); + self.append_metadata_journal(&journal_entries) + } + + pub fn put_cached_file_bytes( + &self, + rel: &Path, + entry: &RemoteEntry, + bytes: &[u8], + ) -> Result<()> { + self.put_cached_file_bytes_inner(rel, entry, bytes, true) + } + + pub fn put_cached_file_bytes_without_progress( + &self, + rel: &Path, + entry: &RemoteEntry, + bytes: &[u8], + ) -> Result<()> { + self.put_cached_file_bytes_inner(rel, entry, bytes, false) + } + + fn put_cached_file_bytes_inner( + &self, + rel: &Path, + entry: &RemoteEntry, + bytes: &[u8], + update_progress: bool, + ) -> Result<()> { + if entry.kind != "file" { + return self.put_cached_entry(rel, entry); + } + let rel = Self::clean_rel(rel)?; + let rel_string = Self::rel_string(&rel); + let actual_size = bytes.len() as u64; + if actual_size != entry.size { + return Err(anyhow!( + "remote file changed while prefetching {}: stat size {}, read size {}", + rel_string, + entry.size, + actual_size + )); + } + + let cache_path = self.cache_path(&rel); + if !cache_path.exists() { + if let Some(parent) = cache_path.parent() { + fs::create_dir_all(parent)?; + } + let tmp = self.cache_tmp_path(&cache_path); + let mut out = File::create(&tmp)?; + out.write_all(bytes)?; + drop(out); + fs::rename(tmp, &cache_path)?; + if update_progress { + let _ = self.finish_cache_progress(&rel_string, entry.size); + } + } + + self.put_cached_entry(&rel, entry) + } + + pub fn note_cache_fetch( + &self, + rel: &Path, + phase: &str, + active_bytes: u64, + active_total: u64, + ) -> Result<()> { + self.write_cache_progress( + &Self::rel_string(&Self::clean_rel(rel)?), + phase, + active_bytes, + active_total, + ) + } + + pub fn note_cache_file_finished(&self, rel: &Path, phase: &str, size: u64) -> Result<()> { + self.finish_cache_progress_with_phase( + &Self::rel_string(&Self::clean_rel(rel)?), + phase, + size, + ) + } + + pub fn remove_cached(&self, rel: &Path) -> Result<()> { + let path = self.cache_path(rel); + if path.exists() { + fs::remove_file(path)?; + } + let _ = self.remove_cached_missing(rel); + let mut metadata = self.load_metadata()?; + let rel_string = Self::rel_string(rel); + metadata.entries.remove(&rel_string); + *self.metadata.borrow_mut() = Some(metadata); + self.append_metadata_journal(&[(rel_string, None)]) + } + + pub fn is_whiteout(&self, rel: &Path) -> Result { + let whiteouts = self.load_whiteouts()?; + Ok(whiteouts.deleted.contains(&Self::rel_string(rel))) + } + + pub fn add_whiteout(&self, rel: &Path) -> Result<()> { + let mut whiteouts = self.load_whiteouts()?; + whiteouts.deleted.insert(Self::rel_string(rel)); + self.write_whiteouts(&whiteouts) + } + + pub fn clear_whiteout(&self, rel: &Path) -> Result<()> { + let mut whiteouts = self.load_whiteouts()?; + whiteouts.deleted.remove(&Self::rel_string(rel)); + self.write_whiteouts(&whiteouts) + } + + pub fn remove_upper(&self, rel: &Path) -> Result<()> { + let path = self.upper_path(rel)?; + if path.is_dir() { + fs::remove_dir_all(path)?; + } else if path.exists() { + fs::remove_file(path)?; + } + Ok(()) + } + + pub fn copy_up(&self, remote: &RemoteClient, rel: &Path) -> Result { + let upper = self.upper_path(rel)?; + if upper.exists() { + return Ok(upper); + } + + if let Some(parent) = upper.parent() { + fs::create_dir_all(parent)?; + } + + let entry = match self.cached_entry(rel)? { + Some(entry) => entry, + None => remote.stat(rel)?, + }; + if entry.kind == "dir" { + fs::create_dir_all(&upper)?; + return Ok(upper); + } + if entry.kind != "file" { + return Err(anyhow!( + "copy-up only supports regular files and directories" + )); + } + + if let Some(cached) = self.cached_file_path(rel) { + fs::copy(cached, &upper)?; + return Ok(upper); + } + + let mut out = File::create(&upper)?; + let mut offset = 0_u64; + let chunk = 1024 * 1024; + while offset < entry.size { + let wanted = chunk.min((entry.size - offset) as usize); + let bytes = remote.read_range(rel, offset, wanted)?; + if bytes.is_empty() { + break; + } + out.write_all(&bytes)?; + offset += bytes.len() as u64; + } + Ok(upper) + } + + pub fn copy_up_cached_only(&self, rel: &Path) -> Result { + let upper = self.upper_path(rel)?; + if upper.exists() { + return Ok(upper); + } + + if let Some(parent) = upper.parent() { + fs::create_dir_all(parent)?; + } + + if let Some(cached) = self.cached_file_path(rel) { + fs::copy(cached, &upper)?; + return Ok(upper); + } + + let mirror = self.mirror_path(rel)?; + if let Ok(metadata) = fs::symlink_metadata(&mirror) { + if metadata.file_type().is_dir() { + fs::create_dir_all(&upper)?; + return Ok(upper); + } + if metadata.file_type().is_symlink() { + let target = fs::read_link(&mirror)?; + std::os::unix::fs::symlink(target, &upper)?; + return Ok(upper); + } + } + + if let Some(entry) = self.cached_entry(rel)? { + if entry.kind == "dir" { + fs::create_dir_all(&upper)?; + return Ok(upper); + } + } + + Err(anyhow!( + "clone is severed and writable lower file is not cached locally: {}", + Self::rel_string(rel) + )) + } + + #[cfg(test)] + pub fn read_cached_or_remote( + &self, + remote: &RemoteClient, + rel: &Path, + offset: i64, + size: u32, + cache_limit: u64, + ) -> Result> { + self.read_cached_or_remote_with_entry(remote, rel, offset, size, cache_limit, None) + } + + pub fn read_cached_or_remote_with_entry( + &self, + remote: &RemoteClient, + rel: &Path, + offset: i64, + size: u32, + cache_limit: u64, + entry: Option, + ) -> Result> { + if offset < 0 { + return Ok(Vec::new()); + } + + let cache_path = self.cache_path(rel); + if cache_path.exists() { + return read_range_from_file(&cache_path, offset as u64, size as usize); + } + + let entry = match entry { + Some(entry) => entry, + None => match self.cached_entry(rel)? { + Some(entry) => entry, + None => remote.stat(rel)?, + }, + }; + if entry.kind == "file" && entry.size <= cache_limit { + if let Some(parent) = cache_path.parent() { + fs::create_dir_all(parent)?; + } + if cache_path.exists() { + return read_range_from_file(&cache_path, offset as u64, size as usize); + } + let tmp = self.cache_tmp_path(&cache_path); + let mut out = File::create(&tmp)?; + let rel_string = Self::rel_string(rel); + let _ = self.write_cache_progress(&rel_string, "fetching", 0, entry.size); + let bytes = remote + .read_file(rel) + .with_context(|| format!("remote cache fetch {}", rel_string))?; + let actual_size = bytes.len() as u64; + if actual_size != entry.size { + let _ = fs::remove_file(&tmp); + return Err(anyhow!( + "remote file changed while caching {}: stat size {}, read size {}", + rel_string, + entry.size, + actual_size + )); + } + out.write_all(&bytes)?; + let _ = self.write_cache_progress(&rel_string, "fetching", actual_size, entry.size); + drop(out); + fs::rename(tmp, &cache_path)?; + self.put_cached_entry(rel, &entry)?; + let _ = self.finish_cache_progress(&rel_string, entry.size); + return read_range_from_file(&cache_path, offset as u64, size as usize); + } + + let _ = self.write_cache_progress( + &Self::rel_string(rel), + "streaming", + offset as u64, + offset as u64 + size as u64, + ); + remote + .read_range(rel, offset as u64, size as usize) + .with_context(|| format!("remote read {}", Self::rel_string(rel))) + } + + pub fn list_upper(&self, rel: &Path) -> Result> { + self.list_local_layer(&self.upper_path(rel)?) + } + + pub fn list_mirror(&self, rel: &Path) -> Result> { + self.list_local_layer(&self.mirror_path(rel)?) + } + + pub fn is_opaque_dir(&self, rel: &Path) -> Result { + Ok(self.upper_path(rel)?.join(OPAQUE_MARKER).is_file()) + } + + fn list_local_layer(&self, path: &Path) -> Result> { + if !path.is_dir() { + return Ok(Vec::new()); + } + let mut out = Vec::new(); + for entry in fs::read_dir(path)? { + let entry = entry?; + if entry.file_name() == OPAQUE_MARKER { + continue; + } + let metadata = fs::symlink_metadata(entry.path())?; + let file_type = metadata.file_type(); + out.push(RemoteEntry { + name: entry.file_name().to_string_lossy().to_string(), + kind: if file_type.is_dir() { + "dir".to_string() + } else if file_type.is_symlink() { + "symlink".to_string() + } else { + "file".to_string() + }, + size: metadata.len(), + mode: metadata.mode(), + mtime: metadata + .modified() + .ok() + .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok()) + .map(|d| d.as_secs()) + .unwrap_or_default(), + }); + } + Ok(out) + } + + fn load_whiteouts(&self) -> Result { + if let Some(whiteouts) = self.whiteouts.borrow().as_ref() { + return Ok(whiteouts.clone()); + } + if !self.whiteouts_path.exists() { + let whiteouts = WhiteoutFile::default(); + *self.whiteouts.borrow_mut() = Some(whiteouts.clone()); + return Ok(whiteouts); + } + let mut json = String::new(); + File::open(&self.whiteouts_path)?.read_to_string(&mut json)?; + let whiteouts: WhiteoutFile = serde_json::from_str(&json)?; + *self.whiteouts.borrow_mut() = Some(whiteouts.clone()); + Ok(whiteouts) + } + + fn write_whiteouts(&self, whiteouts: &WhiteoutFile) -> Result<()> { + if let Some(parent) = self.whiteouts_path.parent() { + fs::create_dir_all(parent)?; + } + let json = serde_json::to_vec_pretty(whiteouts)?; + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&self.whiteouts_path)?; + file.write_all(&json)?; + file.write_all(b"\n")?; + *self.whiteouts.borrow_mut() = Some(whiteouts.clone()); + Ok(()) + } + + fn metadata_path(&self) -> PathBuf { + self.file_cache.join("metadata.json") + } + + fn metadata_journal_path(&self) -> PathBuf { + self.file_cache.join("metadata.jsonl") + } + + fn missing_path(&self) -> PathBuf { + self.file_cache.join("missing.json") + } + + fn progress_path(&self) -> PathBuf { + self.file_cache.join("progress.json") + } + + fn load_metadata(&self) -> Result { + let journal_len = self.metadata_journal_len_on_disk(); + let cached_metadata = { self.metadata.borrow().clone() }; + if let Some(metadata) = cached_metadata { + if *self.metadata_journal_len.borrow() == journal_len { + return Ok(metadata); + } + let mut metadata = metadata; + self.apply_metadata_journal(&mut metadata)?; + *self.metadata.borrow_mut() = Some(metadata.clone()); + *self.metadata_journal_len.borrow_mut() = journal_len; + return Ok(metadata.clone()); + } + let path = self.metadata_path(); + if !path.exists() { + let mut metadata = MetadataFile::default(); + self.apply_metadata_journal(&mut metadata)?; + *self.metadata.borrow_mut() = Some(metadata.clone()); + *self.metadata_journal_len.borrow_mut() = journal_len; + return Ok(metadata); + } + let mut json = String::new(); + File::open(path)?.read_to_string(&mut json)?; + let mut metadata: MetadataFile = serde_json::from_str(&json)?; + self.apply_metadata_journal(&mut metadata)?; + *self.metadata.borrow_mut() = Some(metadata.clone()); + *self.metadata_journal_len.borrow_mut() = journal_len; + Ok(metadata) + } + + fn load_missing(&self) -> Result { + if let Some(missing) = self.missing.borrow().as_ref() { + return Ok(missing.clone()); + } + let path = self.missing_path(); + if !path.exists() { + let missing = MissingFile::default(); + *self.missing.borrow_mut() = Some(missing.clone()); + return Ok(missing); + } + let mut json = String::new(); + File::open(path)?.read_to_string(&mut json)?; + let missing: MissingFile = serde_json::from_str(&json)?; + *self.missing.borrow_mut() = Some(missing.clone()); + Ok(missing) + } + + fn write_missing(&self, missing: &MissingFile) -> Result<()> { + fs::create_dir_all(&self.file_cache)?; + let json = serde_json::to_vec_pretty(missing)?; + let tmp = self.missing_path().with_extension("json.tmp"); + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&tmp)?; + file.write_all(&json)?; + file.write_all(b"\n")?; + drop(file); + fs::rename(tmp, self.missing_path())?; + *self.missing.borrow_mut() = Some(missing.clone()); + Ok(()) + } + + #[allow(dead_code)] + fn write_metadata(&self, metadata: &MetadataFile) -> Result<()> { + fs::create_dir_all(&self.file_cache)?; + let json = serde_json::to_vec_pretty(metadata)?; + let tmp = self.metadata_path().with_extension("json.tmp"); + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&tmp)?; + file.write_all(&json)?; + file.write_all(b"\n")?; + drop(file); + fs::rename(tmp, self.metadata_path())?; + let _ = fs::remove_file(self.metadata_journal_path()); + *self.metadata.borrow_mut() = Some(metadata.clone()); + *self.metadata_journal_len.borrow_mut() = 0; + Ok(()) + } + + fn metadata_journal_len_on_disk(&self) -> u64 { + fs::metadata(self.metadata_journal_path()) + .map(|metadata| metadata.len()) + .unwrap_or(0) + } + + fn apply_metadata_journal(&self, metadata: &mut MetadataFile) -> Result<()> { + let path = self.metadata_journal_path(); + if !path.exists() { + return Ok(()); + } + + let mut jsonl = String::new(); + File::open(&path)?.read_to_string(&mut jsonl)?; + for line in jsonl.lines().filter(|line| !line.trim().is_empty()) { + let value: serde_json::Value = match serde_json::from_str(line) { + Ok(value) => value, + Err(err) => { + eprintln!( + "wp-cow ignoring truncated cache metadata journal entry at {}: {err}", + path.display() + ); + continue; + } + }; + let Some(path) = value.get("path").and_then(|value| value.as_str()) else { + continue; + }; + match value.get("op").and_then(|value| value.as_str()) { + Some("put") => { + let Some(entry) = value.get("entry") else { + continue; + }; + metadata + .entries + .insert(path.to_string(), serde_json::from_value(entry.clone())?); + } + Some("delete") => { + metadata.entries.remove(path); + } + _ => {} + } + } + Ok(()) + } + + fn append_metadata_journal(&self, entries: &[(String, Option)]) -> Result<()> { + if entries.is_empty() { + return Ok(()); + } + + fs::create_dir_all(&self.file_cache)?; + let mut file = OpenOptions::new() + .create(true) + .append(true) + .open(self.metadata_journal_path())?; + for (path, entry) in entries { + let value = match entry { + Some(entry) => { + serde_json::json!({ "op": "put", "path": path, "entry": entry }) + } + None => serde_json::json!({ "op": "delete", "path": path }), + }; + serde_json::to_writer(&mut file, &value)?; + file.write_all(b"\n")?; + } + drop(file); + *self.metadata_journal_len.borrow_mut() = self.metadata_journal_len_on_disk(); + Ok(()) + } + + fn load_progress(&self) -> Result { + let path = self.progress_path(); + if !path.exists() { + return Ok(CacheProgress { + phase: "idle".to_string(), + updated_at_unix_ms: now_unix_ms(), + ..CacheProgress::default() + }); + } + let mut json = String::new(); + File::open(path)?.read_to_string(&mut json)?; + Ok( + serde_json::from_str(&json).unwrap_or_else(|_| CacheProgress { + phase: "idle".to_string(), + updated_at_unix_ms: now_unix_ms(), + ..CacheProgress::default() + }), + ) + } + + fn write_progress(&self, progress: &CacheProgress) -> Result<()> { + fs::create_dir_all(&self.file_cache)?; + let json = serde_json::to_vec_pretty(progress)?; + let tmp = self.progress_tmp_path(); + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&tmp)?; + file.write_all(&json)?; + file.write_all(b"\n")?; + drop(file); + fs::rename(tmp, self.progress_path())?; + Ok(()) + } + + fn progress_tmp_path(&self) -> PathBuf { + self.file_cache.join(format!( + "progress.json.tmp.{}.{}", + std::process::id(), + now_unix_ms() + )) + } + + fn cache_tmp_path(&self, cache_path: &Path) -> PathBuf { + let name = cache_path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("remote-file"); + cache_path.with_file_name(format!( + "{}.tmp.{}.{}", + name, + std::process::id(), + now_unix_ms() + )) + } + + fn write_cache_progress( + &self, + rel: &str, + phase: &str, + active_bytes: u64, + active_total: u64, + ) -> Result<()> { + let mut progress = self.load_progress()?; + progress.phase = phase.to_string(); + progress.active_path = rel.to_string(); + progress.active_bytes = active_bytes; + progress.active_total = active_total; + progress.updated_at_unix_ms = now_unix_ms(); + self.write_progress(&progress) + } + + fn finish_cache_progress(&self, rel: &str, size: u64) -> Result<()> { + self.finish_cache_progress_with_phase(rel, "cached", size) + } + + fn finish_cache_progress_with_phase(&self, rel: &str, phase: &str, size: u64) -> Result<()> { + let mut progress = self.load_progress()?; + progress.phase = phase.to_string(); + progress.active_path.clear(); + progress.active_bytes = 0; + progress.active_total = 0; + progress.files_cached = progress.files_cached.saturating_add(1); + progress.bytes_cached = progress.bytes_cached.saturating_add(size); + progress.last_cached_path = rel.to_string(); + progress.updated_at_unix_ms = now_unix_ms(); + self.write_progress(&progress) + } +} + +fn now_unix_ms() -> u128 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_millis()) + .unwrap_or_default() +} + +fn now_unix_secs() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_secs()) + .unwrap_or_default() +} + +fn read_range_from_file(path: &Path, offset: u64, size: usize) -> Result> { + let mut file = File::open(path)?; + file.seek(SeekFrom::Start(offset))?; + let mut buf = vec![0; size]; + let read = file.read(&mut buf)?; + buf.truncate(read); + Ok(buf) +} + +#[cfg(unix)] +trait MetadataMode { + fn mode(&self) -> u32; +} + +#[cfg(unix)] +impl MetadataMode for std::fs::Metadata { + fn mode(&self) -> u32 { + use std::os::unix::fs::MetadataExt; + MetadataExt::mode(self) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::{ensure_clone_dirs, ClonePaths, Manifest, Probe}; + use crate::remote::RemoteClient; + use std::os::unix::fs::PermissionsExt; + use std::sync::{Mutex, OnceLock}; + + static ENV_LOCK: OnceLock> = OnceLock::new(); + + #[test] + fn stores_whiteouts() { + let temp = tempfile::tempdir().unwrap(); + let paths = ClonePaths { + root: temp.path().to_path_buf(), + manifest: temp.path().join("manifest.json"), + upper: temp.path().join("upper"), + file_cache: temp.path().join("file-cache"), + db: temp.path().join("db"), + generated: temp.path().join("generated"), + run: temp.path().join("run"), + whiteouts: temp.path().join("whiteouts.json"), + }; + fs::create_dir_all(&paths.upper).unwrap(); + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-content/uploads/a.jpg"); + assert!(!store.is_whiteout(rel).unwrap()); + store.add_whiteout(rel).unwrap(); + assert!(store.is_whiteout(rel).unwrap()); + store.clear_whiteout(rel).unwrap(); + assert!(!store.is_whiteout(rel).unwrap()); + } + + #[test] + fn rejects_path_traversal() { + assert!(OverlayStore::clean_rel("../wp-config.php").is_err()); + assert!(OverlayStore::clean_rel("/wp-config.php").is_err()); + assert_eq!( + OverlayStore::clean_rel("./wp-config.php").unwrap(), + PathBuf::from("wp-config.php") + ); + } + + #[test] + fn stores_cached_remote_metadata() { + let temp = tempfile::tempdir().unwrap(); + let paths = ClonePaths { + root: temp.path().to_path_buf(), + manifest: temp.path().join("manifest.json"), + upper: temp.path().join("upper"), + file_cache: temp.path().join("file-cache"), + db: temp.path().join("db"), + generated: temp.path().join("generated"), + run: temp.path().join("run"), + whiteouts: temp.path().join("whiteouts.json"), + }; + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-includes/version.php"); + let entry = RemoteEntry { + name: "version.php".to_string(), + kind: "file".to_string(), + size: 123, + mode: 0o100644, + mtime: 42, + }; + + store.put_cached_entry(rel, &entry).unwrap(); + assert_eq!(store.cached_entry(rel).unwrap().unwrap().size, 123); + let reloaded = OverlayStore::new(&paths); + assert_eq!( + reloaded.cached_entry(rel).unwrap().unwrap().size, + 123, + "metadata journal must be enough to rebuild cached entries after restart" + ); + assert_eq!( + store + .cached_entry(Path::new("wp-includes")) + .unwrap() + .unwrap() + .kind, + "dir", + "offline lookups need cached parent directory metadata" + ); + store.remove_cached(rel).unwrap(); + assert!(store.cached_entry(rel).unwrap().is_none()); + let reloaded = OverlayStore::new(&paths); + assert!(reloaded.cached_entry(rel).unwrap().is_none()); + } + + #[test] + fn ignores_truncated_final_metadata_journal_line() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let store = OverlayStore::new(&paths); + let rel = Path::new("index.php"); + let entry = RemoteEntry { + name: "index.php".to_string(), + kind: "file".to_string(), + size: 405, + mode: 0o100644, + mtime: 42, + }; + + store.put_cached_entry(rel, &entry).unwrap(); + let mut journal = OpenOptions::new() + .append(true) + .open(store.metadata_journal_path()) + .unwrap(); + journal + .write_all(b"{\"entry\":{\"kind\":\"file\",\"mode\":33188") + .unwrap(); + drop(journal); + + let reloaded = OverlayStore::new(&paths); + assert_eq!( + reloaded.cached_entry(rel).unwrap().unwrap().size, + 405, + "a crash during cache metadata append must not poison the whole lazy filesystem" + ); + } + + #[test] + fn ignores_malformed_cache_progress_file() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let store = OverlayStore::new(&paths); + fs::write(store.progress_path(), b"{\"phase\":\"fetching\"\ntrailing").unwrap(); + + store + .note_cache_file_finished(Path::new("wp-settings.php"), "runtime-code-pack", 123) + .unwrap(); + + let progress = store.load_progress().unwrap(); + assert_eq!(progress.phase, "runtime-code-pack"); + assert_eq!(progress.files_cached, 1); + assert_eq!(progress.bytes_cached, 123); + assert_eq!(progress.last_cached_path, "wp-settings.php"); + } + + #[test] + fn cached_metadata_refreshes_when_another_overlay_appends_journal() { + let temp = tempfile::tempdir().unwrap(); + let paths = ClonePaths { + root: temp.path().to_path_buf(), + manifest: temp.path().join("manifest.json"), + upper: temp.path().join("upper"), + file_cache: temp.path().join("file-cache"), + db: temp.path().join("db"), + generated: temp.path().join("generated"), + run: temp.path().join("run"), + whiteouts: temp.path().join("whiteouts.json"), + }; + let mounted_view = OverlayStore::new(&paths); + assert!(mounted_view + .cached_entry(Path::new("wp-includes/load.php")) + .unwrap() + .is_none()); + + let pack_writer = OverlayStore::new(&paths); + let entry = RemoteEntry { + name: "load.php".to_string(), + kind: "file".to_string(), + size: 12, + mode: 0o100644, + mtime: 123, + }; + pack_writer + .put_cached_file_bytes_without_progress( + Path::new("wp-includes/load.php"), + &entry, + b"> "$WPCOW_FAKE_SSH_LOG" +cmd="${@: -1}" +exec bash -lc "$cmd" +"#, + ) + .unwrap(); + let mut perms = fs::metadata(&fake_ssh).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&fake_ssh, perms).unwrap(); + + let path = match old_path.as_ref() { + Some(old) => format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_FAKE_SSH_LOG", &log); + + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let mut manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + manifest.cache_max_file_bytes = 1024; + let remote = RemoteClient::new(manifest, None); + let store = OverlayStore::new(&paths); + let rel = Path::new("index.php"); + + let first = store + .read_cached_or_remote(&remote, rel, 0, 1024, 1024) + .unwrap(); + assert_eq!(first, b"remote wordpress"); + fs::remove_file(remote_root.join("index.php")).unwrap(); + let ssh_after_first = fs::read_to_string(&log).unwrap().lines().count(); + + let second = store + .read_cached_or_remote(&remote, rel, 0, 1024, 1024) + .unwrap(); + assert_eq!(second, b"remote wordpress"); + let ssh_after_second = fs::read_to_string(&log).unwrap().lines().count(); + assert_eq!( + ssh_after_second, ssh_after_first, + "cached read must not invoke ssh after the remote file disappears" + ); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_log { + Some(value) => std::env::set_var("WPCOW_FAKE_SSH_LOG", value), + None => std::env::remove_var("WPCOW_FAKE_SSH_LOG"), + } + } + + #[test] + fn supplied_metadata_skips_remote_stat_before_caching_file() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_log = std::env::var_os("WPCOW_FAKE_SSH_LOG"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let bin = temp.path().join("bin"); + let log = temp.path().join("ssh.log"); + fs::create_dir_all(&remote_root).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(remote_root.join("index.php"), b"remote wordpress").unwrap(); + + let fake_ssh = bin.join("ssh"); + fs::write( + &fake_ssh, + r#"#!/usr/bin/env bash + set -euo pipefail + printf 'CALL\n' >> "$WPCOW_FAKE_SSH_LOG" + cmd="${@: -1}" + exec bash -lc "$cmd" + "#, + ) + .unwrap(); + let mut perms = fs::metadata(&fake_ssh).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&fake_ssh, perms).unwrap(); + + let path = match old_path.as_ref() { + Some(old) => format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_FAKE_SSH_LOG", &log); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "0"); + + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let remote = RemoteClient::new(manifest, None); + let store = OverlayStore::new(&paths); + let rel = Path::new("index.php"); + let entry = RemoteEntry { + name: "index.php".to_string(), + kind: "file".to_string(), + size: 16, + mode: 0o100644, + mtime: 42, + }; + + let first = store + .read_cached_or_remote_with_entry(&remote, rel, 0, 1024, 1024, Some(entry)) + .unwrap(); + assert_eq!(first, b"remote wordpress"); + let ssh_lines = fs::read_to_string(&log) + .unwrap() + .lines() + .filter(|line| *line == "CALL") + .count(); + assert_eq!( + ssh_lines, 1, + "FUSE lookup metadata should let the first read fetch file bytes without a second remote stat command" + ); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_log { + Some(value) => std::env::set_var("WPCOW_FAKE_SSH_LOG", value), + None => std::env::remove_var("WPCOW_FAKE_SSH_LOG"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + } + + #[test] + fn stat_prefetched_bytes_are_reused_without_remote_read() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-content/themes/example/style.css"); + let entry = RemoteEntry { + name: "style.css".to_string(), + kind: "file".to_string(), + size: 17, + mode: 0o100644, + mtime: 42, + }; + store + .put_cached_file_bytes(rel, &entry, b"body{color:black}") + .unwrap(); + + std::env::set_var("PATH", temp.path().join("missing-bin")); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "0"); + let remote = RemoteClient::new( + Manifest::new( + "example".to_string(), + "unreachable-host".to_string(), + "/remote/wp".to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ), + None, + ); + let bytes = store + .read_cached_or_remote_with_entry(&remote, rel, 0, 1024, 1024, Some(entry.clone())) + .unwrap(); + assert_eq!(bytes, b"body{color:black}"); + assert_eq!(store.cached_entry(rel).unwrap().unwrap().size, 17); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + } +} diff --git a/experiments/remote-wp-cow/src/plugin_policy.rs b/experiments/remote-wp-cow/src/plugin_policy.rs new file mode 100644 index 00000000..639d91e0 --- /dev/null +++ b/experiments/remote-wp-cow/src/plugin_policy.rs @@ -0,0 +1,264 @@ +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::path::{Path, PathBuf}; + +use crate::config::{ClonePaths, Manifest}; + +pub const POLICY_VERSION: u32 = 1; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PluginPolicy { + pub version: u32, + pub mode: String, + #[serde(default)] + pub active: Vec, + #[serde(default)] + pub allow: Vec, + #[serde(default)] + pub quarantine: BTreeMap, +} + +impl PluginPolicy { + pub fn new(active: &[String]) -> Self { + Self { + version: POLICY_VERSION, + mode: "auto".to_string(), + active: normalized_plugins(active.iter().cloned()), + allow: Vec::new(), + quarantine: BTreeMap::new(), + } + } + + pub fn normalize(mut self, active: &[String]) -> Self { + self.version = POLICY_VERSION; + if self.mode.trim().is_empty() { + self.mode = "auto".to_string(); + } + self.active = normalized_plugins(active.iter().cloned()); + let active_set: BTreeSet<_> = self.active.iter().cloned().collect(); + self.allow = normalized_plugins( + self.allow + .into_iter() + .filter(|plugin| active_set.contains(plugin)), + ); + self.quarantine + .retain(|plugin, _| active_set.contains(plugin)); + let quarantined: BTreeSet<_> = self.quarantine.keys().cloned().collect(); + self.allow.retain(|allowed| !quarantined.contains(allowed)); + self + } + + pub fn allows(&self, plugin: &str) -> bool { + self.allow.iter().any(|allowed| allowed == plugin) + } + + pub fn allow_plugin(&mut self, plugin: &str) { + if !self.allows(plugin) { + self.allow.push(plugin.to_string()); + self.allow.sort(); + } + self.quarantine.remove(plugin); + } + + pub fn quarantine_plugin(&mut self, plugin: &str, reason: impl Into) { + self.quarantine.insert(plugin.to_string(), reason.into()); + self.allow.retain(|allowed| allowed != plugin); + } +} + +pub fn policy_path(paths: &ClonePaths) -> PathBuf { + paths.run.join("plugin-policy.json") +} + +pub fn candidate_policy_path(paths: &ClonePaths, plugin: &str) -> PathBuf { + paths.run.join(format!( + "plugin-policy-candidate-{}.json", + sanitize_plugin_name(plugin) + )) +} + +pub fn write_initial_policy(paths: &ClonePaths, manifest: &Manifest) -> Result<()> { + let path = policy_path(paths); + let active = active_plugins_for_policy(manifest); + let policy = load_policy_or_new(&path, &active)?; + write_policy_atomic(&path, &policy) +} + +pub fn active_plugins_for_policy(manifest: &Manifest) -> Vec { + manifest + .probe + .active_plugins + .iter() + .chain(manifest.probe.active_sitewide_plugins.iter()) + .cloned() + .collect() +} + +pub fn load_policy_or_new(path: &Path, active: &[String]) -> Result { + if !path.is_file() { + return Ok(PluginPolicy::new(active)); + } + + let bytes = fs::read(path).with_context(|| format!("read {}", path.display()))?; + let policy = serde_json::from_slice::(&bytes) + .with_context(|| format!("parse {}", path.display()))?; + Ok(policy.normalize(active)) +} + +pub fn write_policy_atomic(path: &Path, policy: &PluginPolicy) -> Result<()> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + let tmp = path.with_extension(format!( + "{}.tmp", + path.extension() + .and_then(|extension| extension.to_str()) + .unwrap_or("json") + )); + let json = serde_json::to_vec_pretty(policy)?; + fs::write(&tmp, [json, b"\n".to_vec()].concat()) + .with_context(|| format!("write {}", tmp.display()))?; + fs::rename(&tmp, path).with_context(|| { + format!( + "replace {} with {}", + path.display(), + tmp.file_name() + .map(|name| name.to_string_lossy()) + .unwrap_or_default() + ) + })?; + Ok(()) +} + +pub fn policy_with_candidate(base: &PluginPolicy, plugin: &str) -> PluginPolicy { + let mut policy = base.clone(); + policy.allow_plugin(plugin); + policy +} + +fn normalized_plugins(plugins: impl IntoIterator) -> Vec { + plugins + .into_iter() + .map(|plugin| plugin.trim().trim_start_matches('/').to_string()) + .filter(|plugin| { + !plugin.is_empty() + && !plugin.contains("..") + && !plugin.starts_with('/') + && plugin.ends_with(".php") + }) + .collect::>() + .into_iter() + .collect() +} + +fn sanitize_plugin_name(plugin: &str) -> String { + let mut out = String::new(); + for ch in plugin.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else if matches!(ch, '/' | '-' | '_' | '.') { + out.push('-'); + } + } + let out = out.trim_matches('-').to_string(); + if out.is_empty() { + "plugin".to_string() + } else { + out + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn policy_starts_auto_with_no_allowed_plugins() { + let active = vec![ + "woocommerce/woocommerce.php".to_string(), + "/bad-prefix/plugin.php".to_string(), + "../escape.php".to_string(), + ]; + + let policy = PluginPolicy::new(&active); + + assert_eq!(policy.mode, "auto"); + assert_eq!( + policy.active, + vec!["bad-prefix/plugin.php", "woocommerce/woocommerce.php"] + ); + assert!(policy.allow.is_empty()); + } + + #[test] + fn existing_policy_preserves_allowed_active_plugins_only() { + let temp = tempfile::tempdir().unwrap(); + let path = temp.path().join("plugin-policy.json"); + let mut policy = PluginPolicy::new(&[ + "akismet/akismet.php".to_string(), + "woocommerce/woocommerce.php".to_string(), + ]); + policy.allow_plugin("woocommerce/woocommerce.php"); + policy.allow_plugin("missing/missing.php"); + policy.quarantine_plugin("akismet/akismet.php", "timeout"); + write_policy_atomic(&path, &policy).unwrap(); + + let loaded = load_policy_or_new( + &path, + &[ + "akismet/akismet.php".to_string(), + "hello/hello.php".to_string(), + ], + ) + .unwrap(); + + assert_eq!( + loaded.active, + vec!["akismet/akismet.php", "hello/hello.php"] + ); + assert!(loaded.allow.is_empty()); + assert_eq!( + loaded + .quarantine + .get("akismet/akismet.php") + .map(String::as_str), + Some("timeout") + ); + } + + #[test] + fn existing_policy_never_allows_quarantined_plugins() { + let temp = tempfile::tempdir().unwrap(); + let path = temp.path().join("plugin-policy.json"); + let mut policy = + PluginPolicy::new(&["seo/seo.php".to_string(), "visual/visual.php".to_string()]); + policy.allow_plugin("seo/seo.php"); + policy.allow_plugin("visual/visual.php"); + policy + .quarantine + .insert("seo/seo.php".to_string(), "timed out".to_string()); + write_policy_atomic(&path, &policy).unwrap(); + + let loaded = load_policy_or_new( + &path, + &["seo/seo.php".to_string(), "visual/visual.php".to_string()], + ) + .unwrap(); + + assert_eq!(loaded.allow, vec!["visual/visual.php"]); + assert_eq!( + loaded.quarantine.get("seo/seo.php").map(String::as_str), + Some("timed out") + ); + } + + #[test] + fn candidate_policy_allows_one_extra_plugin() { + let base = PluginPolicy::new(&["woocommerce/woocommerce.php".to_string()]); + let candidate = policy_with_candidate(&base, "woocommerce/woocommerce.php"); + + assert_eq!(candidate.allow, vec!["woocommerce/woocommerce.php"]); + } +} diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs new file mode 100644 index 00000000..972eac55 --- /dev/null +++ b/experiments/remote-wp-cow/src/remote.rs @@ -0,0 +1,1866 @@ +use anyhow::{anyhow, Context, Result}; +use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; +use serde::{Deserialize, Serialize}; +use std::ffi::OsStr; +use std::io::{self, BufRead, BufReader, Read, Write}; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; +use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::{Duration, Instant}; + +use crate::config::{Manifest, Probe}; +use crate::overlay::OverlayStore; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteEntry { + pub name: String, + pub kind: String, + pub size: u64, + pub mode: u32, + pub mtime: u64, +} + +#[derive(Debug, Clone)] +pub struct RemoteStat { + pub entry: RemoteEntry, + pub data: Option>, +} + +#[derive(Debug, Clone)] +pub struct RuntimeCodePackLimits { + pub max_file_bytes: u64, + pub max_total_bytes: u64, + pub max_files: u64, +} + +#[derive(Debug, Clone)] +pub struct RuntimeCodePackFile { + pub rel: PathBuf, + pub entry: RemoteEntry, + pub bytes: Vec, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct RuntimeCodePackSummary { + pub files: u64, + pub bytes: u64, + pub skipped: u64, + pub capped: bool, +} + +#[derive(Debug, Clone)] +pub struct RemoteClient { + manifest: Manifest, + control_path: Option, + file_helper: Arc>>, + db_helper: Arc>>, +} + +impl RemoteClient { + pub fn new(manifest: Manifest, control_path: Option) -> Self { + Self { + manifest, + control_path, + file_helper: Arc::new(Mutex::new(None)), + db_helper: Arc::new(Mutex::new(None)), + } + } + + pub fn manifest(&self) -> &Manifest { + &self.manifest + } + + pub fn ensure_master(&self) -> Result<()> { + let Some(control_path) = &self.control_path else { + return Ok(()); + }; + if control_path.exists() { + return Ok(()); + } + if let Some(parent) = control_path.parent() { + std::fs::create_dir_all(parent)?; + } + let mut command = Command::new("timeout"); + command + .arg("--kill-after=2s") + .arg(format!("{}s", ssh_connect_timeout_secs() + 5)) + .arg("ssh") + .arg("-MNf") + .arg("-S") + .arg(control_path) + .arg("-o") + .arg("ControlMaster=yes") + .arg("-o") + .arg("ControlPersist=600"); + self.add_ssh_safety_options(&mut command); + let status = command + .arg(&self.manifest.ssh) + .status() + .context("start SSH control master")?; + if !status.success() { + return Err(anyhow!( + "failed to start SSH control master for {}", + self.manifest.ssh + )); + } + Ok(()) + } + + pub fn stop_master(&self) -> Result<()> { + let Some(control_path) = &self.control_path else { + return Ok(()); + }; + if !control_path.exists() { + return Ok(()); + } + + let mut command = Command::new("ssh"); + command.arg("-S").arg(control_path); + command.arg("-O").arg("exit"); + self.add_ssh_safety_options(&mut command); + let status = command + .arg(&self.manifest.ssh) + .status() + .context("stop SSH control master")?; + if !status.success() { + return Err(anyhow!( + "failed to stop SSH control master for {}", + self.manifest.ssh + )); + } + Ok(()) + } + + pub fn command(&self, remote_command: &str) -> Command { + self.ssh_command(remote_command, 0) + } + + fn ssh_command(&self, remote_command: &str, timeout_secs: u64) -> Command { + let mut command = if timeout_secs > 0 { + let mut command = Command::new("timeout"); + command + .arg("--kill-after=2s") + .arg(format!("{}s", timeout_secs)) + .arg("ssh"); + command + } else { + Command::new("ssh") + }; + + if let Some(control_path) = &self.control_path { + command.arg("-S").arg(control_path); + command.arg("-o").arg("ControlMaster=auto"); + command.arg("-o").arg("ControlPersist=600"); + } + self.add_ssh_safety_options(&mut command); + command.arg(&self.manifest.ssh); + command.arg(remote_command); + command + } + + pub fn exec_capture(&self, remote_command: &str, stdin: Option<&[u8]>) -> io::Result> { + let timeout_secs = remote_command_timeout_secs(); + let mut command = self.ssh_command(remote_command, timeout_secs); + + let mut child = command + .stdin(if stdin.is_some() { + Stdio::piped() + } else { + Stdio::null() + }) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + + if let Some(input) = stdin { + if let Some(mut child_stdin) = child.stdin.take() { + child_stdin.write_all(input)?; + } + } + + let output = child.wait_with_output()?; + if output.status.success() { + return Ok(output.stdout); + } + let stderr = String::from_utf8_lossy(&output.stderr); + if matches!(output.status.code(), Some(124) | Some(137)) { + return Err(io::Error::new( + io::ErrorKind::TimedOut, + format!( + "remote command timed out after {} seconds: {}", + timeout_secs, stderr + ), + )); + } + if output.status.code() == Some(2) || stderr.contains("WPCOW_ENOENT") { + return Err(io::Error::new(io::ErrorKind::NotFound, stderr.to_string())); + } + Err(io::Error::new(io::ErrorKind::Other, stderr.to_string())) + } + + pub fn start_db_tunnel(&self) -> Result> { + if env_bool("WPCOW_REMOTE_DB_TUNNEL", false) != Some(true) { + return Ok(None); + } + if self.manifest.probe.db_host.is_empty() + || self.manifest.probe.db_name.is_empty() + || self.manifest.probe.db_user.is_empty() + { + return Ok(None); + } + + let Some((remote_host, remote_port)) = remote_db_tcp_target(&self.manifest.probe.db_host) + else { + return Ok(None); + }; + + let bind = format!( + "{}:{}:{}:{}", + self.manifest.remote_db_tunnel.host, + self.manifest.remote_db_tunnel.port, + remote_host, + remote_port + ); + let mut command = Command::new("ssh"); + self.add_ssh_safety_options(&mut command); + command + .arg("-o") + .arg("ExitOnForwardFailure=yes") + .arg("-N") + .arg("-L") + .arg(bind) + .arg(&self.manifest.ssh) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()); + + let mut child = command.spawn().context("start remote DB SSH tunnel")?; + for _ in 0..20 { + if let Some(status) = child.try_wait()? { + let mut stderr = String::new(); + if let Some(mut err) = child.stderr.take() { + use std::io::Read; + let _ = err.read_to_string(&mut stderr); + } + return Err(anyhow!( + "remote DB SSH tunnel exited with status {}: {}", + status, + stderr + )); + } + thread::sleep(Duration::from_millis(50)); + } + + Ok(Some(child)) + } + + pub fn stat(&self, rel: &Path) -> io::Result { + let started = Instant::now(); + let result = self.stat_inner(rel); + trace_remote_result("stat", &OverlayStore::rel_string(rel), started, &result); + result + } + + fn stat_inner(&self, rel: &Path) -> io::Result { + self.stat_prefetch_inner(rel, 0).map(|stat| stat.entry) + } + + pub fn stat_prefetch(&self, rel: &Path, max_file_bytes: u64) -> io::Result { + let started = Instant::now(); + let result = self.stat_prefetch_inner(rel, max_file_bytes); + trace_remote_result( + "stat_prefetch", + &format!("{}<= {}", OverlayStore::rel_string(rel), max_file_bytes), + started, + &result, + ); + result + } + + fn stat_prefetch_inner(&self, rel: &Path, max_file_bytes: u64) -> io::Result { + let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "stat", + "path": full, + "max_file_bytes": max_file_bytes, + }); + if let Ok(response) = self.file_helper_request(request) { + if let Some(entry) = response.get("entry") { + let entry: RemoteEntry = serde_json::from_value(entry.clone()) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + let data = if response.get("data").is_some() { + match decode_helper_data(response) { + Ok(bytes) + if entry.kind == "file" && bytes.len() as u64 == entry.size => + { + Some(bytes) + } + _ => None, + } + } else { + None + }; + return Ok(RemoteStat { entry, data }); + } + } + } + + let code = r#" +$p=$argv[1]; +clearstatcache(true,$p); +$s=@lstat($p); +if($s===false){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +$kind=is_link($p)?"symlink":(is_dir($p)?"dir":(is_file($p)?"file":"other")); +echo json_encode(array( + "name"=>basename($p), + "kind"=>$kind, + "size"=>(int)$s["size"], + "mode"=>(int)$s["mode"], + "mtime"=>(int)$s["mtime"] +)); +"#; + let bytes = self.php_eval(code, &[full])?; + let entry = serde_json::from_slice(&bytes) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + Ok(RemoteStat { entry, data: None }) + } + + pub fn readdir(&self, rel: &Path) -> io::Result> { + let started = Instant::now(); + let result = self.readdir_inner(rel); + trace_remote_result("readdir", &OverlayStore::rel_string(rel), started, &result); + result + } + + fn readdir_inner(&self, rel: &Path) -> io::Result> { + let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "readdir", + "path": full, + }); + if let Ok(response) = self.file_helper_request(request) { + if let Some(entries) = response.get("entries") { + return serde_json::from_value(entries.clone()) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)); + } + } + } + + let code = r#" +$p=$argv[1]; +if(!is_dir($p)){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +$out=array(); +foreach(scandir($p) as $name){ + if($name==="."||$name===".."){continue;} + $child=$p.DIRECTORY_SEPARATOR.$name; + $s=@lstat($child); + if($s===false){continue;} + $kind=is_link($child)?"symlink":(is_dir($child)?"dir":(is_file($child)?"file":"other")); + $out[]=array("name"=>$name,"kind"=>$kind,"size"=>(int)$s["size"],"mode"=>(int)$s["mode"],"mtime"=>(int)$s["mtime"]); +} +echo json_encode($out); +"#; + let bytes = self.php_eval(code, &[full])?; + serde_json::from_slice(&bytes) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) + } + + pub fn prefetch_dir( + &self, + rel: &Path, + max_file_bytes: u64, + max_total_bytes: u64, + ) -> io::Result> { + let started = Instant::now(); + let result = self.prefetch_dir_inner(rel, max_file_bytes, max_total_bytes); + trace_remote_result( + "prefetch_dir", + &format!( + "{}<= file:{} total:{}", + OverlayStore::rel_string(rel), + max_file_bytes, + max_total_bytes + ), + started, + &result, + ); + result + } + + fn prefetch_dir_inner( + &self, + rel: &Path, + max_file_bytes: u64, + max_total_bytes: u64, + ) -> io::Result> { + if max_file_bytes == 0 || max_total_bytes == 0 || !remote_file_helper_enabled() { + return Ok(Vec::new()); + } + let full = self.remote_full_path(rel)?; + let request = serde_json::json!({ + "op": "prefetch_dir", + "path": full, + "max_file_bytes": max_file_bytes, + "max_total_bytes": max_total_bytes, + }); + let response = self.file_helper_request(request)?; + let mut out = Vec::new(); + let Some(files) = response.get("files").and_then(|value| value.as_array()) else { + return Ok(out); + }; + for file in files { + let Some(entry_value) = file.get("entry") else { + continue; + }; + let entry: RemoteEntry = serde_json::from_value(entry_value.clone()) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + let data = if file.get("data").is_some() { + match decode_helper_data(file.clone()) { + Ok(bytes) if entry.kind == "file" && bytes.len() as u64 == entry.size => { + Some(bytes) + } + _ => None, + } + } else { + None + }; + out.push(RemoteStat { entry, data }); + } + Ok(out) + } + + pub fn read_range(&self, rel: &Path, offset: u64, length: usize) -> io::Result> { + let started = Instant::now(); + let result = self.read_range_inner(rel, offset, length); + trace_remote_result( + "read_range", + &format!("{}@{}+{}", OverlayStore::rel_string(rel), offset, length), + started, + &result, + ); + result + } + + fn read_range_inner(&self, rel: &Path, offset: u64, length: usize) -> io::Result> { + let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "read_range", + "path": full, + "offset": offset, + "length": length, + }); + if let Ok(response) = self.file_helper_request(request) { + return decode_helper_data(response); + } + } + + let code = r#" +$p=$argv[1];$offset=(int)$argv[2];$length=(int)$argv[3]; +$f=@fopen($p,"rb"); +if(!$f){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +if($offset>0){fseek($f,$offset);} +echo fread($f,$length); +"#; + self.php_eval(code, &[full, offset.to_string(), length.to_string()]) + } + + pub fn read_file(&self, rel: &Path) -> io::Result> { + let started = Instant::now(); + let result = self.read_file_inner(rel); + trace_remote_result( + "read_file", + &OverlayStore::rel_string(rel), + started, + &result, + ); + result + } + + fn read_file_inner(&self, rel: &Path) -> io::Result> { + let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "read_file", + "path": full, + }); + if let Ok(response) = self.file_helper_request(request) { + return decode_helper_data(response); + } + } + + let code = r#" +$p=$argv[1]; +$f=@fopen($p,"rb"); +if(!$f){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +while(!feof($f)){ + echo fread($f,1048576); +} +"#; + self.php_eval(code, &[full]) + } + + pub fn readlink(&self, rel: &Path) -> io::Result { + let started = Instant::now(); + let result = self.readlink_inner(rel); + trace_remote_result("readlink", &OverlayStore::rel_string(rel), started, &result); + result + } + + fn readlink_inner(&self, rel: &Path) -> io::Result { + let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "readlink", + "path": full, + }); + if let Ok(response) = self.file_helper_request(request) { + if let Some(target) = response.get("target").and_then(|value| value.as_str()) { + return Ok(target.to_string()); + } + } + } + + let code = r#" +$p=$argv[1]; +$target=@readlink($p); +if($target===false){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +echo $target; +"#; + let bytes = self.php_eval(code, &[full])?; + Ok(String::from_utf8_lossy(&bytes).to_string()) + } + + pub fn runtime_code_pack( + &self, + roots: &[PathBuf], + limits: RuntimeCodePackLimits, + mut on_file: F, + ) -> Result + where + F: FnMut(RuntimeCodePackFile) -> Result<()>, + { + let started = Instant::now(); + let result = self.runtime_code_pack_inner(roots, limits, &mut on_file); + trace_remote_result( + "runtime_code_pack", + &format!("{} roots", roots.len()), + started, + &result, + ); + result + } + + fn runtime_code_pack_inner( + &self, + roots: &[PathBuf], + limits: RuntimeCodePackLimits, + on_file: &mut F, + ) -> Result + where + F: FnMut(RuntimeCodePackFile) -> Result<()>, + { + if limits.max_file_bytes == 0 || limits.max_total_bytes == 0 || limits.max_files == 0 { + return Ok(RuntimeCodePackSummary::default()); + } + + let roots = roots + .iter() + .map(|root| { + OverlayStore::clean_rel(root) + .map(|clean| OverlayStore::rel_string(&clean)) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err.to_string())) + }) + .collect::>>()?; + if roots.is_empty() { + return Ok(RuntimeCodePackSummary::default()); + } + + let mut remote_command = format!("php -r {} --", shell_quote(runtime_code_pack_php())); + for arg in [ + self.manifest.remote_path.clone(), + serde_json::to_string(&roots)?, + limits.max_file_bytes.to_string(), + limits.max_total_bytes.to_string(), + limits.max_files.to_string(), + ] { + remote_command.push(' '); + remote_command.push_str(&shell_quote(arg)); + } + + let mut command = self.ssh_command(&remote_command, runtime_code_pack_timeout_secs()); + let mut child = command + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote runtime code pack")?; + let stdout = child + .stdout + .take() + .ok_or_else(|| anyhow!("runtime code pack stdout"))?; + + let mut summary = RuntimeCodePackSummary::default(); + let mut partial_stream = false; + for line in BufReader::new(stdout).lines() { + let line = line.context("read remote runtime code pack")?; + if line.trim().is_empty() { + continue; + } + let value: serde_json::Value = match serde_json::from_str(&line) { + Ok(value) => value, + Err(err) => { + if summary.files > 0 { + summary.capped = true; + partial_stream = true; + trace_remote_result::<(), _>( + "runtime_code_pack_partial_line", + &format!("{} cached files", summary.files), + Instant::now(), + &Err(err), + ); + break; + } + return Err(err) + .with_context(|| format!("decode remote runtime code pack line: {line}")); + } + }; + match value.get("type").and_then(|value| value.as_str()) { + Some("file") => { + let rel = value + .get("path") + .and_then(|value| value.as_str()) + .ok_or_else(|| anyhow!("runtime code pack file missing path"))?; + let entry: RemoteEntry = serde_json::from_value( + value + .get("entry") + .cloned() + .ok_or_else(|| anyhow!("runtime code pack file missing entry"))?, + )?; + let bytes = decode_helper_data(value.clone())?; + if entry.kind == "file" && bytes.len() as u64 == entry.size { + summary.files = summary.files.saturating_add(1); + summary.bytes = summary.bytes.saturating_add(entry.size); + on_file(RuntimeCodePackFile { + rel: PathBuf::from(rel), + entry, + bytes, + })?; + } + } + Some("summary") => { + summary = serde_json::from_value(value.clone())?; + } + Some("error") => { + let error = value + .get("error") + .and_then(|value| value.as_str()) + .unwrap_or("remote runtime code pack failed"); + return Err(anyhow!(error.to_string())); + } + _ => {} + } + } + + let output = child.wait_with_output()?; + if !output.status.success() && !(partial_stream && summary.files > 0) { + return Err(anyhow!( + "remote runtime code pack exited with status {}: {}", + output.status, + String::from_utf8_lossy(&output.stderr) + )); + } + + Ok(summary) + } + + pub fn remote_query_readonly(&self, sql: &str) -> Result { + let started = Instant::now(); + let result = self.remote_query_readonly_inner(sql); + trace_remote_result("query", sql, started, &result); + result + } + + fn remote_query_readonly_inner(&self, sql: &str) -> Result { + if remote_db_helper_enabled() { + if let Ok(result) = self.db_helper_query(sql) { + if result.ok || !is_remote_db_connection_lost(&result.error) { + return Ok(result); + } + if let Ok(retry) = self.reset_db_helper_and_retry(sql) { + return Ok(retry); + } + } + } + + self.remote_query_readonly_oneshot(sql) + } + + fn remote_query_readonly_oneshot(&self, sql: &str) -> Result { + let probe = &self.manifest.probe; + let code = r#" +$host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$sql=$argv[5];$timeout=(int)$argv[6]; +if($timeout<1){$timeout=10;} +@set_time_limit($timeout); +if(function_exists("mysqli_report")){mysqli_report(MYSQLI_REPORT_OFF);} +if(!preg_match('/^\s*(SELECT|SHOW|DESCRIBE|DESC|EXPLAIN)\b/i',$sql)){ + fwrite(STDERR,"WPCOW_REFUSED_WRITE\n");exit(3); +} +$port=null;$socket=null; +if(preg_match('/^(.+):([0-9]+)$/',$host,$m)){ + $host=$m[1];$port=(int)$m[2]; +} elseif(preg_match('/^([^:]+):(\/.*)$/',$host,$m)){ + $host=$m[1];$socket=$m[2]; +} +$mysqli=mysqli_init(); +@$mysqli->options(MYSQLI_OPT_CONNECT_TIMEOUT, min(5,$timeout)); +if(!@$mysqli->real_connect($host,$user,$pass,$db,$port,$socket)){ + fwrite(STDERR,mysqli_connect_error()."\n");exit(1); +} +@$mysqli->set_charset("utf8mb4"); +@$mysqli->query("SET SESSION max_execution_time=".max(1,$timeout * 1000)); +@$mysqli->query("SET SESSION max_statement_time=".max(1,$timeout)); +$res=$mysqli->query($sql, MYSQLI_STORE_RESULT); +if($res===false){ + echo json_encode(array("ok"=>false,"error"=>$mysqli->error,"rows"=>array(),"fields"=>array(),"affected"=>0)); + exit(0); +} +if($res===true){ + echo json_encode(array("ok"=>true,"error"=>"","rows"=>array(),"fields"=>array(),"affected"=>$mysqli->affected_rows)); + exit(0); +} +$fields=array(); +foreach($res->fetch_fields() as $field){$fields[]=$field->name;} +$rows=array(); +while($row=$res->fetch_assoc()){$rows[]=$row;} +echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"affected"=>count($rows))); +"#; + let bytes = self + .php_eval( + code, + &[ + probe.db_host.clone(), + probe.db_user.clone(), + probe.db_password.clone(), + probe.db_name.clone(), + sql.to_string(), + remote_db_query_timeout_secs().to_string(), + ], + ) + .context("remote readonly query")?; + let result: RemoteQueryResult = serde_json::from_slice(&bytes)?; + Ok(result) + } + + fn db_helper_query(&self, sql: &str) -> Result { + let mut last_error = None; + for _ in 0..2 { + match self.db_helper_query_once(sql) { + Ok(response) => return Ok(response), + Err(err) => { + last_error = Some(err); + let mut helper = self + .db_helper + .lock() + .map_err(|_| anyhow!("remote DB helper lock"))?; + reset_db_helper(&mut helper); + } + } + } + Err(last_error.unwrap_or_else(|| anyhow!("remote DB helper failed"))) + } + + fn reset_db_helper_and_retry(&self, sql: &str) -> Result { + { + let mut helper = self + .db_helper + .lock() + .map_err(|_| anyhow!("remote DB helper lock"))?; + reset_db_helper(&mut helper); + } + self.db_helper_query(sql) + } + + fn db_helper_query_once(&self, sql: &str) -> Result { + let mut helper = self + .db_helper + .lock() + .map_err(|_| anyhow!("remote DB helper lock"))?; + if helper.is_none() { + *helper = Some(self.start_db_helper()?); + } + let helper = helper + .as_mut() + .ok_or_else(|| anyhow!("remote DB helper missing"))?; + let request = serde_json::to_vec(&serde_json::json!({ "sql": sql }))?; + helper.stdin.write_all(&request)?; + helper.stdin.write_all(b"\n")?; + helper.stdin.flush()?; + + let timeout = Duration::from_secs(remote_db_query_timeout_secs().saturating_add(2)); + let line = read_helper_line(&mut helper.stdout, timeout, "remote DB helper")?; + let response: serde_json::Value = serde_json::from_str(&line)?; + if response + .get("ok") + .and_then(|value| value.as_bool()) + .is_none() + { + let error = response + .get("error") + .and_then(|value| value.as_str()) + .unwrap_or("remote DB helper response missing ok"); + return Err(anyhow!(error.to_string())); + } + Ok(serde_json::from_value(response)?) + } + + fn file_helper_request(&self, request: serde_json::Value) -> io::Result { + let mut last_error = None; + for _ in 0..2 { + match self.file_helper_request_once(&request) { + Ok(response) => return Ok(response), + Err(err) => { + last_error = Some(err); + let mut helper = self + .file_helper + .lock() + .map_err(|_| io::Error::new(io::ErrorKind::Other, "file helper lock"))?; + reset_file_helper(&mut helper); + } + } + } + Err(last_error + .unwrap_or_else(|| io::Error::new(io::ErrorKind::Other, "remote file helper failed"))) + } + + fn file_helper_request_once( + &self, + request: &serde_json::Value, + ) -> io::Result { + let mut helper = self + .file_helper + .lock() + .map_err(|_| io::Error::new(io::ErrorKind::Other, "file helper lock"))?; + if helper.is_none() { + *helper = Some(self.start_file_helper()?); + } + let helper = helper + .as_mut() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "file helper missing"))?; + let request = serde_json::to_vec(request) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err))?; + helper.stdin.write_all(&request)?; + helper.stdin.write_all(b"\n")?; + helper.stdin.flush()?; + + let line = read_helper_line( + &mut helper.stdout, + Duration::from_secs(remote_file_helper_timeout_secs()), + "remote file helper", + )?; + let response: serde_json::Value = serde_json::from_str(&line) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + if response.get("ok").and_then(|value| value.as_bool()) == Some(true) { + return Ok(response); + } + let error = response + .get("error") + .and_then(|value| value.as_str()) + .unwrap_or("remote file helper error") + .to_string(); + let kind = if response.get("kind").and_then(|value| value.as_str()) == Some("not_found") { + io::ErrorKind::NotFound + } else { + io::ErrorKind::Other + }; + Err(io::Error::new(kind, error)) + } + + fn start_file_helper(&self) -> io::Result { + let remote_command = format!("php -r {}", shell_quote(remote_file_helper_php())); + let mut command = self.ssh_command(&remote_command, 0); + let mut child = command + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn()?; + let stdin = child + .stdin + .take() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "file helper stdin"))?; + let stdout = child + .stdout + .take() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "file helper stdout"))?; + Ok(RemoteFileHelper { + child, + stdin, + stdout, + }) + } + + fn start_db_helper(&self) -> io::Result { + let probe = &self.manifest.probe; + let mut remote_command = format!("php -r {} --", shell_quote(remote_db_helper_php())); + for arg in [ + probe.db_host.clone(), + probe.db_user.clone(), + probe.db_password.clone(), + probe.db_name.clone(), + remote_db_query_timeout_secs().to_string(), + ] { + remote_command.push(' '); + remote_command.push_str(&shell_quote(arg)); + } + let mut command = self.ssh_command(&remote_command, 0); + let mut child = command + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn()?; + let stdin = child + .stdin + .take() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "DB helper stdin"))?; + let stdout = child + .stdout + .take() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "DB helper stdout"))?; + Ok(RemoteDbHelper { + child, + stdin, + stdout, + }) + } + + fn php_eval(&self, code: &str, args: &[String]) -> io::Result> { + let mut command = format!("php -r {} --", shell_quote(code)); + for arg in args { + command.push(' '); + command.push_str(&shell_quote(arg)); + } + self.exec_capture(&command, None) + } + + fn remote_full_path(&self, rel: &Path) -> io::Result { + let rel = OverlayStore::clean_rel(rel) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err.to_string()))?; + let rel = OverlayStore::rel_string(&rel); + if rel.is_empty() { + Ok(self.manifest.remote_path.clone()) + } else { + Ok(format!( + "{}/{}", + self.manifest.remote_path.trim_end_matches('/'), + rel + )) + } + } + + fn add_ssh_safety_options(&self, command: &mut Command) { + let connect_timeout = ssh_connect_timeout_secs(); + command + .arg("-o") + .arg(format!("ConnectTimeout={connect_timeout}")); + command.arg("-o").arg("ServerAliveInterval=5"); + command.arg("-o").arg("ServerAliveCountMax=1"); + command.arg("-o").arg("BatchMode=yes"); + } +} + +#[derive(Debug)] +struct RemoteFileHelper { + child: Child, + stdin: ChildStdin, + stdout: ChildStdout, +} + +#[derive(Debug)] +struct RemoteDbHelper { + child: Child, + stdin: ChildStdin, + stdout: ChildStdout, +} + +impl Drop for RemoteFileHelper { + fn drop(&mut self) { + let _ = self.child.kill(); + let _ = self.child.wait(); + } +} + +impl Drop for RemoteDbHelper { + fn drop(&mut self) { + let _ = self.child.kill(); + let _ = self.child.wait(); + } +} + +fn reset_file_helper(helper: &mut Option) { + if let Some(mut helper) = helper.take() { + let _ = helper.child.kill(); + let _ = helper.child.wait(); + } +} + +fn reset_db_helper(helper: &mut Option) { + if let Some(mut helper) = helper.take() { + let _ = helper.child.kill(); + let _ = helper.child.wait(); + } +} + +fn decode_helper_data(response: serde_json::Value) -> io::Result> { + let data = response + .get("data") + .and_then(|value| value.as_str()) + .ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidData, "helper response missing data") + })?; + BASE64 + .decode(data) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) +} + +fn read_helper_line( + stdout: &mut ChildStdout, + timeout: Duration, + label: &str, +) -> io::Result { + let deadline = Instant::now() + timeout; + let fd = stdout.as_raw_fd(); + let mut out = Vec::new(); + + loop { + let now = Instant::now(); + if now >= deadline { + return Err(io::Error::new( + io::ErrorKind::TimedOut, + format!( + "{} did not respond within {} seconds", + label, + timeout.as_secs() + ), + )); + } + + let remaining = deadline.saturating_duration_since(now); + let timeout_ms = remaining.as_millis().min(i32::MAX as u128) as i32; + let mut fd_set = libc::pollfd { + fd, + events: libc::POLLIN, + revents: 0, + }; + let ready = unsafe { libc::poll(&mut fd_set, 1, timeout_ms) }; + if ready < 0 { + return Err(io::Error::last_os_error()); + } + if ready == 0 { + continue; + } + if fd_set.revents & libc::POLLIN == 0 { + return Err(io::Error::new( + io::ErrorKind::BrokenPipe, + format!("{label} pipe closed"), + )); + } + + let mut chunk = [0_u8; 8192]; + let read = stdout.read(&mut chunk)?; + if read == 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + format!("{label} closed"), + )); + } + out.extend_from_slice(&chunk[..read]); + if out.last() == Some(&b'\n') || out.contains(&b'\n') { + return String::from_utf8(out) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)); + } + } +} + +fn remote_file_helper_enabled() -> bool { + env_bool("WPCOW_REMOTE_FILE_HELPER", true).unwrap_or(true) +} + +fn remote_db_helper_enabled() -> bool { + env_bool("WPCOW_REMOTE_DB_HELPER", true).unwrap_or(true) +} + +fn is_remote_db_connection_lost(error: &str) -> bool { + let error = error.to_ascii_lowercase(); + error.contains("server has gone away") + || error.contains("lost connection") + || error.contains("error while sending") + || error.contains("connection was killed") +} + +fn trace_remote_result( + op: &str, + target: &str, + started: Instant, + result: &std::result::Result, +) { + if std::env::var("WPCOW_TRACE_REMOTE").ok().as_deref() != Some("1") { + return; + } + let elapsed_ms = started.elapsed().as_millis(); + match result { + Ok(_) => eprintln!("wp-cow remote {op} ok {elapsed_ms}ms {target}"), + Err(err) => eprintln!("wp-cow remote {op} err {elapsed_ms}ms {target}: {err}"), + } +} + +fn remote_file_helper_php() -> &'static str { + r#" +error_reporting(0); +function wpcow_send($payload) { + echo json_encode($payload), "\n"; + flush(); +} +function wpcow_not_found() { + wpcow_send(array("ok"=>false,"kind"=>"not_found","error"=>"WPCOW_ENOENT")); +} +while (($line = fgets(STDIN)) !== false) { + $request = json_decode($line, true); + if (!is_array($request)) { + wpcow_send(array("ok"=>false,"error"=>"invalid request")); + continue; + } + $op = isset($request["op"]) ? $request["op"] : ""; + $path = isset($request["path"]) ? $request["path"] : ""; + if ($op === "stat") { + $max_file_bytes = isset($request["max_file_bytes"]) ? max(0, (int)$request["max_file_bytes"]) : 0; + clearstatcache(true, $path); + $s = @lstat($path); + if ($s === false) { wpcow_not_found(); continue; } + $kind = is_link($path) ? "symlink" : (is_dir($path) ? "dir" : (is_file($path) ? "file" : "other")); + $entry = array( + "name"=>basename($path), + "kind"=>$kind, + "size"=>(int)$s["size"], + "mode"=>(int)$s["mode"], + "mtime"=>(int)$s["mtime"] + ); + $payload = array("ok"=>true,"entry"=>$entry); + if ($max_file_bytes > 0 && $kind === "file" && (int)$s["size"] <= $max_file_bytes) { + $data = @file_get_contents($path); + if ($data !== false && strlen($data) === (int)$s["size"]) { + $payload["data"] = base64_encode($data); + $payload["size"] = strlen($data); + } + } + wpcow_send($payload); + continue; + } + if ($op === "readdir") { + if (!is_dir($path)) { wpcow_not_found(); continue; } + $out = array(); + foreach (scandir($path) as $name) { + if ($name === "." || $name === "..") { continue; } + $child = $path . DIRECTORY_SEPARATOR . $name; + $s = @lstat($child); + if ($s === false) { continue; } + $kind = is_link($child) ? "symlink" : (is_dir($child) ? "dir" : (is_file($child) ? "file" : "other")); + $out[] = array("name"=>$name,"kind"=>$kind,"size"=>(int)$s["size"],"mode"=>(int)$s["mode"],"mtime"=>(int)$s["mtime"]); + } + wpcow_send(array("ok"=>true,"entries"=>$out)); + continue; + } + if ($op === "prefetch_dir") { + if (!is_dir($path)) { wpcow_not_found(); continue; } + $max_file_bytes = isset($request["max_file_bytes"]) ? max(0, (int)$request["max_file_bytes"]) : 0; + $max_total_bytes = isset($request["max_total_bytes"]) ? max(0, (int)$request["max_total_bytes"]) : 0; + $total = 0; + $out = array(); + foreach (scandir($path) as $name) { + if ($name === "." || $name === "..") { continue; } + $ext = strtolower(pathinfo($name, PATHINFO_EXTENSION)); + if ($ext !== "php" && $ext !== "json" && $ext !== "mo") { continue; } + $child = $path . DIRECTORY_SEPARATOR . $name; + $s = @lstat($child); + if ($s === false || !is_file($child)) { continue; } + $size = (int)$s["size"]; + if ($size > $max_file_bytes || $size + $total > $max_total_bytes) { continue; } + $data = @file_get_contents($child); + if ($data === false || strlen($data) !== $size) { continue; } + $total += $size; + $out[] = array("entry"=>array( + "name"=>$name, + "kind"=>"file", + "size"=>$size, + "mode"=>(int)$s["mode"], + "mtime"=>(int)$s["mtime"] + ),"data"=>base64_encode($data)); + } + wpcow_send(array("ok"=>true,"files"=>$out,"bytes"=>$total)); + continue; + } + if ($op === "read_file") { + if (!is_file($path)) { wpcow_not_found(); continue; } + $data = @file_get_contents($path); + if ($data === false) { wpcow_not_found(); continue; } + wpcow_send(array("ok"=>true,"data"=>base64_encode($data),"size"=>strlen($data))); + continue; + } + if ($op === "read_range") { + $offset = isset($request["offset"]) ? max(0, (int)$request["offset"]) : 0; + $length = isset($request["length"]) ? max(0, (int)$request["length"]) : 0; + $f = @fopen($path, "rb"); + if (!$f) { wpcow_not_found(); continue; } + if ($offset > 0) { @fseek($f, $offset); } + $data = $length > 0 ? fread($f, $length) : ""; + if ($data === false) { $data = ""; } + wpcow_send(array("ok"=>true,"data"=>base64_encode($data),"size"=>strlen($data))); + continue; + } + if ($op === "readlink") { + $target = @readlink($path); + if ($target === false) { wpcow_not_found(); continue; } + wpcow_send(array("ok"=>true,"target"=>$target)); + continue; + } + wpcow_send(array("ok"=>false,"error"=>"unknown op")); +} +"# +} + +fn remote_db_helper_php() -> &'static str { + r#" +error_reporting(0); +$host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$timeout=(int)$argv[5]; +if($timeout<1){$timeout=10;} +@set_time_limit(0); +if(function_exists("mysqli_report")){mysqli_report(MYSQLI_REPORT_OFF);} +$port=null;$socket=null; +if(preg_match('/^(.+):([0-9]+)$/',$host,$m)){ + $host=$m[1];$port=(int)$m[2]; +} elseif(preg_match('/^([^:]+):(\/.*)$/',$host,$m)){ + $host=$m[1];$socket=$m[2]; +} +$mysqli=mysqli_init(); +@$mysqli->options(MYSQLI_OPT_CONNECT_TIMEOUT, min(5,$timeout)); +if(!@$mysqli->real_connect($host,$user,$pass,$db,$port,$socket)){ + echo json_encode(array("ok"=>false,"error"=>mysqli_connect_error(),"rows"=>array(),"fields"=>array(),"affected"=>0)), "\n"; + flush(); + exit(0); +} +@$mysqli->set_charset("utf8mb4"); +@$mysqli->query("SET SESSION max_execution_time=".max(1,$timeout * 1000)); +@$mysqli->query("SET SESSION max_statement_time=".max(1,$timeout)); +while (($line = fgets(STDIN)) !== false) { + $request = json_decode($line, true); + if (!is_array($request) || !isset($request["sql"])) { + echo json_encode(array("ok"=>false,"error"=>"invalid request","rows"=>array(),"fields"=>array(),"affected"=>0)), "\n"; + flush(); + continue; + } + $sql = $request["sql"]; + if(!preg_match('/^\s*(SELECT|SHOW|DESCRIBE|DESC|EXPLAIN)\b/i',$sql)){ + echo json_encode(array("ok"=>false,"error"=>"WPCOW_REFUSED_WRITE","rows"=>array(),"fields"=>array(),"affected"=>0)), "\n"; + flush(); + continue; + } + $res=$mysqli->query($sql, MYSQLI_STORE_RESULT); + if($res===false){ + echo json_encode(array("ok"=>false,"error"=>$mysqli->error,"rows"=>array(),"fields"=>array(),"affected"=>0)), "\n"; + flush(); + continue; + } + if($res===true){ + echo json_encode(array("ok"=>true,"error"=>"","rows"=>array(),"fields"=>array(),"affected"=>$mysqli->affected_rows)), "\n"; + flush(); + continue; + } + $fields=array(); + foreach($res->fetch_fields() as $field){$fields[]=$field->name;} + $rows=array(); + while($row=$res->fetch_assoc()){$rows[]=$row;} + echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"affected"=>count($rows))), "\n"; +flush(); +} +"# +} + +fn runtime_code_pack_php() -> &'static str { + r#" +error_reporting(0); +$base = rtrim($argv[1], "/"); +$roots = json_decode($argv[2], true); +$max_file_bytes = max(0, (int)$argv[3]); +$max_total_bytes = max(0, (int)$argv[4]); +$max_files = max(0, (int)$argv[5]); +$total = 0; +$files = 0; +$skipped = 0; +$capped = false; +if (!is_array($roots)) { $roots = array(); } +function wpcow_pack_send($payload) { + echo json_encode($payload), "\n"; + flush(); +} +function wpcow_pack_clean($rel) { + $rel = str_replace("\\", "/", (string)$rel); + $rel = trim($rel, "/"); + if ($rel === "") { return false; } + $parts = array(); + foreach (explode("/", $rel) as $part) { + if ($part === "" || $part === ".") { continue; } + if ($part === "..") { return false; } + $parts[] = $part; + } + return implode("/", $parts); +} +function wpcow_pack_allowed_ext($rel) { + $ext = strtolower(pathinfo($rel, PATHINFO_EXTENSION)); + return in_array($ext, array("php", "inc", "phtml", "json", "mo"), true); +} +function wpcow_pack_excluded($rel) { + return $rel === "wp-config.php" || strpos($rel . "/", "wp-content/uploads/") === 0; +} +function wpcow_pack_entry($path, $name, $size, $mtime) { + $mode = 0100644; + $stat = @lstat($path); + if (is_array($stat)) { $mode = (int)$stat["mode"]; } + return array("name"=>$name,"kind"=>"file","size"=>$size,"mode"=>$mode,"mtime"=>$mtime); +} +function wpcow_pack_file($rel, $path) { + global $max_file_bytes, $max_total_bytes, $max_files, $total, $files, $skipped, $capped; + if ($capped) { return; } + if (wpcow_pack_excluded($rel) || !wpcow_pack_allowed_ext($rel)) { $skipped++; return; } + clearstatcache(true, $path); + if (!is_file($path)) { $skipped++; return; } + $size = filesize($path); + if ($size === false) { $skipped++; return; } + $size = (int)$size; + if ($size > $max_file_bytes) { $skipped++; return; } + if ($files >= $max_files || $total + $size > $max_total_bytes) { $capped = true; return; } + $data = @file_get_contents($path); + if ($data === false || strlen($data) !== $size) { $skipped++; return; } + $mtime = @filemtime($path); + if ($mtime === false) { $mtime = 0; } + $files++; + $total += $size; + wpcow_pack_send(array( + "type"=>"file", + "path"=>$rel, + "entry"=>wpcow_pack_entry($path, basename($path), $size, (int)$mtime), + "data"=>base64_encode($data) + )); +} +function wpcow_pack_dir($rel, $path) { + global $capped; + $stack = array(array($rel, $path)); + while (!$capped && !empty($stack)) { + $item = array_pop($stack); + $dir_rel = $item[0]; + $dir_path = $item[1]; + if (wpcow_pack_excluded($dir_rel) || !is_dir($dir_path)) { continue; } + $names = @scandir($dir_path); + if (!is_array($names)) { continue; } + rsort($names, SORT_STRING); + foreach ($names as $name) { + if ($name === "." || $name === "..") { continue; } + $child_rel = $dir_rel === "" ? $name : $dir_rel . "/" . $name; + $child_path = $dir_path . DIRECTORY_SEPARATOR . $name; + if (wpcow_pack_excluded($child_rel)) { continue; } + if (is_dir($child_path) && !is_link($child_path)) { + $stack[] = array($child_rel, $child_path); + } elseif (is_file($child_path)) { + wpcow_pack_file($child_rel, $child_path); + if ($capped) { break; } + } + } + } +} +foreach ($roots as $root) { + if ($capped) { break; } + $rel = wpcow_pack_clean($root); + if ($rel === false) { $skipped++; continue; } + $path = $base . "/" . $rel; + if (is_file($path)) { + wpcow_pack_file($rel, $path); + } elseif (is_dir($path)) { + wpcow_pack_dir($rel, $path); + } else { + $skipped++; + } +} +wpcow_pack_send(array("type"=>"summary","files"=>$files,"bytes"=>$total,"skipped"=>$skipped,"capped"=>$capped)); +"# +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteQueryResult { + pub ok: bool, + pub error: String, + pub rows: Vec>, + pub fields: Vec, + pub affected: i64, +} + +pub fn probe_wordpress(ssh: &str, remote_path: &str) -> Result { + let script = r#" + WP_CONTENT_DIR . '/uploads'); +$out = array( + 'abspath' => defined('ABSPATH') ? ABSPATH : getcwd(), + 'wp_content_dir' => defined('WP_CONTENT_DIR') ? WP_CONTENT_DIR : getcwd() . '/wp-content', + 'uploads_dir' => isset($uploads['basedir']) ? $uploads['basedir'] : '', + 'table_prefix' => isset($wpdb) ? $wpdb->prefix : (isset($table_prefix) ? $table_prefix : 'wp_'), + 'db_name' => defined('DB_NAME') ? DB_NAME : '', + 'db_host' => defined('DB_HOST') ? DB_HOST : '', + 'db_user' => defined('DB_USER') ? DB_USER : '', + 'db_password' => defined('DB_PASSWORD') ? DB_PASSWORD : '', + 'siteurl' => function_exists('get_option') ? get_option('siteurl') : '', + 'home' => function_exists('get_option') ? get_option('home') : '', + 'template' => function_exists('get_template') ? get_template() : (function_exists('get_option') ? get_option('template') : ''), + 'stylesheet' => function_exists('get_stylesheet') ? get_stylesheet() : (function_exists('get_option') ? get_option('stylesheet') : ''), + 'active_plugins' => function_exists('get_option') && is_array(get_option('active_plugins')) ? array_values(get_option('active_plugins')) : array(), + 'active_sitewide_plugins' => function_exists('get_site_option') && is_array(get_site_option('active_sitewide_plugins')) ? array_keys(get_site_option('active_sitewide_plugins')) : array() +); +echo json_encode($out); +"#; + + let remote_command = format!("cd {} && php", shell_quote(remote_path)); + let output = Command::new("timeout") + .arg("--kill-after=2s") + .arg(format!("{}s", remote_command_timeout_secs())) + .arg("ssh") + .arg("-o") + .arg(format!("ConnectTimeout={}", ssh_connect_timeout_secs())) + .arg("-o") + .arg("ServerAliveInterval=5") + .arg("-o") + .arg("ServerAliveCountMax=1") + .arg("-o") + .arg("BatchMode=yes") + .arg(ssh) + .arg(remote_command) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .and_then(|mut child| { + child + .stdin + .as_mut() + .expect("stdin is piped") + .write_all(script.as_bytes())?; + child.wait_with_output() + }) + .context("run remote WordPress probe")?; + + if !output.status.success() { + return Err(anyhow!( + "remote probe failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + let probe: Probe = serde_json::from_slice(&output.stdout) + .with_context(|| String::from_utf8_lossy(&output.stdout).to_string())?; + Ok(probe) +} + +pub fn shell_quote(value: impl AsRef) -> String { + let value = value.as_ref().to_string_lossy(); + if value.is_empty() { + return "''".to_string(); + } + let escaped = value.replace('\'', "'\"'\"'"); + format!("'{}'", escaped) +} + +fn remote_command_timeout_secs() -> u64 { + env_u64("WPCOW_REMOTE_COMMAND_TIMEOUT_SECS", 20) +} + +fn remote_file_helper_timeout_secs() -> u64 { + env_u64( + "WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", + remote_command_timeout_secs(), + ) +} + +fn remote_db_query_timeout_secs() -> u64 { + env_u64("WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS", 10) +} + +fn runtime_code_pack_timeout_secs() -> u64 { + env_u64("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS", 180) +} + +fn ssh_connect_timeout_secs() -> u64 { + env_u64("WPCOW_SSH_CONNECT_TIMEOUT_SECS", 8) +} + +fn env_u64(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|raw| raw.parse::().ok()) + .unwrap_or(default) +} + +fn env_bool(name: &str, default: bool) -> Option { + let raw = std::env::var(name).ok()?; + match raw.to_ascii_lowercase().as_str() { + "1" | "true" | "yes" | "on" => Some(true), + "0" | "false" | "no" | "off" => Some(false), + _ => Some(default), + } +} + +fn remote_db_tcp_target(db_host: &str) -> Option<(String, u16)> { + if db_host.contains(":/") { + return None; + } + + let (host, port) = if let Some((host, port)) = db_host.rsplit_once(':') { + if let Ok(port) = port.parse::() { + (host, port) + } else { + (db_host, 3306) + } + } else { + (db_host, 3306) + }; + + let host = match host { + "" | "localhost" => "127.0.0.1", + other => other, + }; + + Some((host.to_string(), port)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::os::unix::fs::PermissionsExt; + use std::path::{Path, PathBuf}; + use std::sync::{Mutex, OnceLock}; + + static ENV_LOCK: OnceLock> = OnceLock::new(); + + #[test] + fn quotes_shell_strings() { + assert_eq!(shell_quote("abc"), "'abc'"); + assert_eq!(shell_quote("a'b"), "'a'\"'\"'b'"); + assert_eq!(shell_quote(""), "''"); + } + + #[test] + fn parses_remote_db_tcp_targets() { + assert_eq!( + remote_db_tcp_target("localhost"), + Some(("127.0.0.1".to_string(), 3306)) + ); + assert_eq!( + remote_db_tcp_target("db.example.com:3307"), + Some(("db.example.com".to_string(), 3307)) + ); + assert_eq!(remote_db_tcp_target("localhost:/tmp/mysql.sock"), None); + } + + #[test] + fn classifies_remote_db_connection_loss_errors() { + assert!(is_remote_db_connection_lost("MySQL server has gone away")); + assert!(is_remote_db_connection_lost( + "Lost connection to MySQL server during query" + )); + assert!(!is_remote_db_connection_lost("Unknown column 'x'")); + } + + #[test] + #[ignore = "strict harness only: mutates process SSH helper env"] + fn stat_prefetch_returns_small_file_bytes_from_helper() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + let old_timeout = std::env::var_os("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let bin = temp.path().join("bin"); + fs::create_dir_all(&remote_root).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(remote_root.join("index.php"), b" format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "1"); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", "5"); + + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let remote = RemoteClient::new(manifest, None); + + let prefetched = remote + .stat_prefetch(Path::new("index.php"), 1024) + .expect("stat prefetch"); + assert_eq!(prefetched.entry.size, 20); + assert_eq!( + prefetched.data.as_deref(), + Some(&b" std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + match old_timeout { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"), + } + } + + #[test] + #[ignore = "strict harness only: mutates process SSH helper env"] + fn prefetch_dir_batches_only_runtime_file_types() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + let old_timeout = std::env::var_os("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let runtime_dir = remote_root.join("wp-content/plugins/example/includes"); + let bin = temp.path().join("bin"); + fs::create_dir_all(&runtime_dir).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(runtime_dir.join("a.php"), b" format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "1"); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", "5"); + + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let remote = RemoteClient::new(manifest, None); + let files = remote + .prefetch_dir(Path::new("wp-content/plugins/example/includes"), 1024, 4096) + .expect("prefetch dir"); + let names = files + .iter() + .map(|stat| stat.entry.name.as_str()) + .collect::>(); + assert!(names.contains(&"a.php")); + assert!(names.contains(&"b.json")); + assert!(!names.contains(&"style.css")); + assert_eq!(files.iter().filter(|stat| stat.data.is_some()).count(), 2); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + match old_timeout { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"), + } + } + + #[test] + #[ignore = "strict harness only: mutates process SSH helper env"] + fn runtime_code_pack_streams_bounded_runtime_files() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_timeout = std::env::var_os("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let bin = temp.path().join("bin"); + fs::create_dir_all(remote_root.join("wp-includes")).unwrap(); + fs::create_dir_all(remote_root.join("wp-content/uploads/2026/05")).unwrap(); + fs::create_dir_all(remote_root.join("wp-content/plugins/example/assets")).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(remote_root.join("index.php"), b" format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS", "10"); + + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let remote = RemoteClient::new(manifest, None); + let mut files = Vec::new(); + let summary = remote + .runtime_code_pack( + &[ + PathBuf::from("index.php"), + PathBuf::from("wp-config.php"), + PathBuf::from("wp-includes"), + PathBuf::from("wp-content/plugins/example"), + PathBuf::from("wp-content/uploads"), + ], + RuntimeCodePackLimits { + max_file_bytes: 1024, + max_total_bytes: 8192, + max_files: 100, + }, + |file| { + files.push((file.rel, file.entry.name, file.bytes)); + Ok(()) + }, + ) + .expect("runtime code pack"); + + let paths = files + .iter() + .map(|(rel, _, _)| rel.to_string_lossy().to_string()) + .collect::>(); + assert!(paths.contains(&"index.php".to_string())); + assert!(paths.contains(&"wp-includes/load.php".to_string())); + assert!(paths.contains(&"wp-includes/blocks.json".to_string())); + assert!(paths.contains(&"wp-content/plugins/example/example.php".to_string())); + assert!(!paths.contains(&"wp-config.php".to_string())); + assert!(!paths + .iter() + .any(|path| path.starts_with("wp-content/uploads/"))); + assert!(!paths.iter().any(|path| path.ends_with(".css"))); + assert_eq!(summary.files as usize, files.len()); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_timeout { + Some(value) => std::env::set_var("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS", value), + None => std::env::remove_var("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS"), + } + } +} diff --git a/experiments/remote-wp-cow/src/row_cow.rs b/experiments/remote-wp-cow/src/row_cow.rs new file mode 100644 index 00000000..2d2e7f71 --- /dev/null +++ b/experiments/remote-wp-cow/src/row_cow.rs @@ -0,0 +1,2050 @@ +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; +use std::collections::{BTreeMap, BTreeSet}; + +pub type Row = Map; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct PkValue(pub String); + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RowCowPlan { + RowLevel(RowCowOp), + PromoteTable { tables: Vec, reason: String }, + Unsupported { reason: String }, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RowCowOp { + Select(RowSelect), + Update(RowWrite), + Delete(RowWrite), + Insert(RowInsert), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RowSelect { + pub table: String, + pub pk_column: String, + pub pk_values: Vec, + pub projection: Projection, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RowWrite { + pub table: String, + pub pk_column: String, + pub pk_values: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RowInsert { + pub table: String, + pub pk_column: Option, + pub pk_values: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Projection { + All, + Columns(Vec), +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CowQueryResult { + pub ok: bool, + pub error: String, + pub rows: Vec, + pub fields: Vec, + pub affected: i64, +} + +impl CowQueryResult { + pub fn ok(rows: Vec, fields: Vec) -> Self { + Self { + affected: rows.len() as i64, + ok: true, + error: String::new(), + rows, + fields, + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum RowCowExecution { + Select(CowQueryResult), + PreparedLocalWrite { + table: String, + pk_column: Option, + pk_values: Vec, + copied_rows: usize, + }, + LocalOnlyInsert { + table: String, + }, + Fallback(RowCowPlan), +} + +pub trait RowCowBackend { + fn remote_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_upsert_rows(&mut self, table: &str, rows: &[Row]) -> Result; + + fn local_delete_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_clear_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_reserve_insert_pk(&mut self, _table: &str, _pk_column: Option<&str>) -> Result<()> { + Ok(()) + } + + fn local_tombstones_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result>; +} + +pub fn execute_row_cow( + backend: &mut B, + sql_text: &str, +) -> Result { + match plan_sql(sql_text) { + RowCowPlan::RowLevel(RowCowOp::Select(select)) => { + Ok(RowCowExecution::Select(execute_select(backend, &select)?)) + } + RowCowPlan::RowLevel(RowCowOp::Update(write)) => { + let tombstones = + backend.local_tombstones_by_pk(&write.table, &write.pk_column, &write.pk_values)?; + let local = + backend.local_select_by_pk(&write.table, &write.pk_column, &write.pk_values)?; + let local_pks = local + .rows + .iter() + .filter_map(|row| row_pk_value(row, &write.pk_column)) + .collect::>(); + let copy_values = write + .pk_values + .iter() + .filter(|value| !tombstones.contains(*value) && !local_pks.contains(*value)) + .cloned() + .collect::>(); + let remote_rows = + backend.remote_select_by_pk(&write.table, &write.pk_column, ©_values)?; + let rows = rows_matching_pks(remote_rows.rows, &write.pk_column, ©_values); + let copied_rows = backend.local_upsert_rows(&write.table, &rows)?; + Ok(RowCowExecution::PreparedLocalWrite { + table: write.table, + pk_column: Some(write.pk_column), + pk_values: write.pk_values, + copied_rows, + }) + } + RowCowPlan::RowLevel(RowCowOp::Delete(write)) => { + backend.local_tombstone_by_pk(&write.table, &write.pk_column, &write.pk_values)?; + backend.local_delete_by_pk(&write.table, &write.pk_column, &write.pk_values)?; + Ok(RowCowExecution::PreparedLocalWrite { + table: write.table, + pk_column: Some(write.pk_column), + pk_values: write.pk_values, + copied_rows: 0, + }) + } + RowCowPlan::RowLevel(RowCowOp::Insert(insert)) => { + if insert.pk_values.is_empty() { + backend.local_reserve_insert_pk(&insert.table, insert.pk_column.as_deref())?; + } + if let Some(pk_column) = &insert.pk_column { + backend.local_clear_tombstone_by_pk(&insert.table, pk_column, &insert.pk_values)?; + } + Ok(RowCowExecution::LocalOnlyInsert { + table: insert.table, + }) + } + fallback => Ok(RowCowExecution::Fallback(fallback)), + } +} + +fn execute_select(backend: &mut B, select: &RowSelect) -> Result { + let tombstones = + backend.local_tombstones_by_pk(&select.table, &select.pk_column, &select.pk_values)?; + let local = backend.local_select_by_pk(&select.table, &select.pk_column, &select.pk_values)?; + let local_pks = local + .rows + .iter() + .filter_map(|row| row_pk_value(row, &select.pk_column)) + .collect::>(); + let missing_values = select + .pk_values + .iter() + .filter(|value| !tombstones.contains(*value) && !local_pks.contains(*value)) + .cloned() + .collect::>(); + + let remote = if missing_values.is_empty() { + CowQueryResult::ok(Vec::new(), Vec::new()) + } else { + let remote = + backend.remote_select_by_pk(&select.table, &select.pk_column, &missing_values)?; + let rows_to_materialize = remote + .rows + .iter() + .filter(|row| { + row_pk_value(row, &select.pk_column) + .map(|pk| !tombstones.contains(&pk) && !local_pks.contains(&pk)) + .unwrap_or(false) + }) + .cloned() + .collect::>(); + backend.local_upsert_rows(&select.table, &rows_to_materialize)?; + remote + }; + + let mut merged = BTreeMap::::new(); + for row in remote.rows { + if let Some(pk) = row_pk_value(&row, &select.pk_column) { + if !tombstones.contains(&pk) { + merged.insert(pk, row); + } + } + } + for row in local.rows { + if let Some(pk) = row_pk_value(&row, &select.pk_column) { + merged.insert(pk, row); + } + } + + let mut rows = Vec::new(); + for value in &select.pk_values { + if let Some(row) = merged.remove(value) { + rows.push(row); + } + } + rows.extend(merged.into_values()); + + Ok(project_rows(rows, &select.projection)) +} + +fn rows_matching_pks(rows: Vec, pk_column: &str, pk_values: &[PkValue]) -> Vec { + let allowed = pk_values.iter().collect::>(); + rows.into_iter() + .filter(|row| { + row_pk_value(row, pk_column) + .as_ref() + .map(|value| allowed.contains(value)) + .unwrap_or(false) + }) + .collect() +} + +fn project_rows(rows: Vec, projection: &Projection) -> CowQueryResult { + match projection { + Projection::All => { + let mut fields = Vec::new(); + for row in &rows { + for key in row.keys() { + if !fields.iter().any(|field| field == key) { + fields.push(key.clone()); + } + } + } + CowQueryResult::ok(rows, fields) + } + Projection::Columns(columns) => { + let rows = rows + .into_iter() + .map(|row| { + let mut projected = Row::new(); + for column in columns { + if let Some(value) = row_value_ci(&row, column) { + projected.insert(column.clone(), value.clone()); + } + } + projected + }) + .collect::>(); + CowQueryResult::ok(rows, columns.clone()) + } + } +} + +pub fn plan_sql(sql_text: &str) -> RowCowPlan { + let Some(tokens) = lex(sql_text) else { + return RowCowPlan::Unsupported { + reason: "malformed SQL".to_string(), + }; + }; + if tokens.is_empty() { + return RowCowPlan::Unsupported { + reason: "empty SQL".to_string(), + }; + } + + if token_is(&tokens[0], "SELECT") { + return plan_select(&tokens); + } + if token_is(&tokens[0], "UPDATE") { + return plan_update(&tokens); + } + if token_is(&tokens[0], "DELETE") { + return plan_delete(&tokens); + } + if token_is(&tokens[0], "INSERT") { + return plan_insert(&tokens); + } + + RowCowPlan::Unsupported { + reason: format!("{} is not a row-level COW statement", tokens[0].text), + } +} + +fn plan_select(tokens: &[Token]) -> RowCowPlan { + let tables = extract_table_refs(tokens); + if contains_keyword(tokens, "UNION") { + return promote(tables, "UNION reads need table promotion"); + } + if contains_keyword(tokens, "JOIN") { + return promote(tables, "join reads need table promotion"); + } + if contains_keyword(tokens, "GROUP") || contains_keyword(tokens, "HAVING") { + return promote(tables, "grouped reads need table promotion"); + } + if contains_keyword(tokens, "DISTINCT") { + return promote(tables, "distinct reads need table promotion"); + } + + let Some(from_idx) = find_keyword(tokens, "FROM") else { + return unsupported("SELECT without FROM"); + }; + if select_has_aggregate(&tokens[1..from_idx]) { + return promote(tables, "aggregate reads need table promotion"); + } + let Some((table, alias, next_idx)) = parse_table_ref(tokens, from_idx + 1) else { + return unsupported("could not parse SELECT table"); + }; + if next_idx < tokens.len() + && !token_is(&tokens[next_idx], "WHERE") + && !is_statement_end(&tokens[next_idx]) + { + return promote(tables, "multi-table SELECT needs table promotion"); + } + let Some(where_idx) = find_keyword(tokens, "WHERE") else { + return promote(vec![table], "SELECT without primary-key predicate"); + }; + let (predicate_tokens, trailing_tokens) = + split_select_predicate_and_trailing(&tokens[where_idx + 1..]); + let Some(predicate) = parse_pk_predicate(predicate_tokens) else { + return promote( + vec![table], + "SELECT predicate is not primary-key equality or IN", + ); + }; + if !qualifier_matches_table(predicate.qualifier.as_deref(), &table, alias.as_deref()) { + return promote(vec![table], "SELECT predicate qualifier is ambiguous"); + } + let Some(pk_column) = canonical_pk_column(&table, &predicate.column) else { + return promote( + vec![table], + "SELECT predicate does not use a supported primary key", + ); + }; + let Some(projection) = parse_projection(&tokens[1..from_idx], &table, alias.as_deref()) else { + return promote(vec![table], "SELECT projection cannot be row-merged safely"); + }; + if !select_trailing_clauses_are_row_safe(trailing_tokens, predicate.values.len()) { + return promote(vec![table], "ordered or limited reads need table promotion"); + } + + RowCowPlan::RowLevel(RowCowOp::Select(RowSelect { + table, + pk_column, + pk_values: predicate.values, + projection, + })) +} + +fn split_select_predicate_and_trailing(tokens: &[Token]) -> (&[Token], &[Token]) { + let mut depth = 0_i32; + for (idx, token) in tokens.iter().enumerate() { + match token_symbol(token) { + Some('(') => depth += 1, + Some(')') => depth -= 1, + _ => {} + } + if depth == 0 && (token_is(token, "ORDER") || token_is(token, "LIMIT")) { + return (&tokens[..idx], &tokens[idx..]); + } + } + (tokens, &[]) +} + +fn select_trailing_clauses_are_row_safe(tokens: &[Token], pk_values_len: usize) -> bool { + let tokens = trim_statement_semicolons(tokens); + if tokens.is_empty() { + return true; + } + if pk_values_len != 1 { + return false; + } + + let mut idx = 0; + if token_is(&tokens[idx], "ORDER") { + idx += 1; + if !tokens.get(idx).is_some_and(|token| token_is(token, "BY")) { + return false; + } + idx += 1; + while idx < tokens.len() && !token_is(&tokens[idx], "LIMIT") { + idx += 1; + } + } + + if idx == tokens.len() { + return true; + } + if !token_is(&tokens[idx], "LIMIT") { + return false; + } + idx += 1; + + let Some(first) = tokens.get(idx).and_then(token_usize) else { + return false; + }; + idx += 1; + let safe_limit = if tokens.get(idx).and_then(token_symbol) == Some(',') { + idx += 1; + let Some(count) = tokens.get(idx).and_then(token_usize) else { + return false; + }; + idx += 1; + first == 0 && count == 1 + } else { + first == 1 + }; + + safe_limit && trim_statement_semicolons(&tokens[idx..]).is_empty() +} + +fn plan_update(tokens: &[Token]) -> RowCowPlan { + let tables = extract_table_refs(tokens); + if contains_keyword(tokens, "JOIN") { + return promote(tables, "join updates need table promotion"); + } + if contains_keyword(tokens, "SELECT") { + return promote(tables, "subquery updates need table promotion"); + } + + let mut table_idx = 1; + while table_idx < tokens.len() + && (token_is(&tokens[table_idx], "LOW_PRIORITY") || token_is(&tokens[table_idx], "IGNORE")) + { + table_idx += 1; + } + let Some((table, alias, next_idx)) = parse_table_ref(tokens, table_idx) else { + return unsupported("could not parse UPDATE table"); + }; + let Some(set_idx) = find_keyword(tokens, "SET") else { + return unsupported("UPDATE without SET"); + }; + if next_idx < set_idx { + return promote(vec![table], "multi-table UPDATE needs table promotion"); + } + let Some(where_idx) = find_keyword(tokens, "WHERE") else { + return promote(vec![table], "UPDATE without primary-key predicate"); + }; + let predicate_tokens = &tokens[where_idx + 1..]; + let Some(predicate) = parse_pk_predicate(predicate_tokens) else { + return promote( + vec![table], + "UPDATE predicate is not primary-key equality or IN", + ); + }; + if !qualifier_matches_table(predicate.qualifier.as_deref(), &table, alias.as_deref()) { + return promote(vec![table], "UPDATE predicate qualifier is ambiguous"); + } + let Some(pk_column) = canonical_pk_column(&table, &predicate.column) else { + return promote( + vec![table], + "UPDATE predicate does not use a supported primary key", + ); + }; + + RowCowPlan::RowLevel(RowCowOp::Update(RowWrite { + table, + pk_column, + pk_values: predicate.values, + })) +} + +fn plan_delete(tokens: &[Token]) -> RowCowPlan { + let tables = extract_table_refs(tokens); + if contains_keyword(tokens, "JOIN") || contains_keyword(tokens, "USING") { + return promote(tables, "multi-table DELETE needs table promotion"); + } + if tokens.get(1).is_none_or(|token| !token_is(token, "FROM")) { + return promote(tables, "multi-table DELETE needs table promotion"); + } + let Some((table, alias, next_idx)) = parse_table_ref(tokens, 2) else { + return unsupported("could not parse DELETE table"); + }; + let Some(where_idx) = find_keyword(tokens, "WHERE") else { + return promote(vec![table], "DELETE without primary-key predicate"); + }; + if next_idx < where_idx { + return promote(vec![table], "multi-table DELETE needs table promotion"); + } + let predicate_tokens = &tokens[where_idx + 1..]; + let Some(predicate) = parse_pk_predicate(predicate_tokens) else { + return promote( + vec![table], + "DELETE predicate is not primary-key equality or IN", + ); + }; + if !qualifier_matches_table(predicate.qualifier.as_deref(), &table, alias.as_deref()) { + return promote(vec![table], "DELETE predicate qualifier is ambiguous"); + } + let Some(pk_column) = canonical_pk_column(&table, &predicate.column) else { + return promote( + vec![table], + "DELETE predicate does not use a supported primary key", + ); + }; + + RowCowPlan::RowLevel(RowCowOp::Delete(RowWrite { + table, + pk_column, + pk_values: predicate.values, + })) +} + +fn plan_insert(tokens: &[Token]) -> RowCowPlan { + let tables = extract_table_refs(tokens); + if contains_keyword(tokens, "SELECT") { + return promote(tables, "INSERT ... SELECT needs table promotion"); + } + + let mut idx = 1; + while idx < tokens.len() && token_is(&tokens[idx], "IGNORE") { + idx += 1; + } + if idx < tokens.len() && token_is(&tokens[idx], "INTO") { + idx += 1; + } + let Some((table, alias, next_idx)) = parse_table_ref(tokens, idx) else { + return unsupported("could not parse INSERT table"); + }; + if alias.is_some() { + return unsupported("INSERT aliases are not row-level safe"); + } + if !insert_has_values_clause(tokens, next_idx) { + return unsupported("INSERT without VALUES is not row-level safe"); + } + let (pk_column, pk_values) = expected_pk_for_table(&table) + .map(|pk_column| { + ( + Some(pk_column.to_string()), + parse_insert_pk_values(tokens, next_idx, pk_column), + ) + }) + .unwrap_or((None, Vec::new())); + RowCowPlan::RowLevel(RowCowOp::Insert(RowInsert { + table, + pk_column, + pk_values, + })) +} + +fn insert_has_values_clause(tokens: &[Token], mut idx: usize) -> bool { + if idx >= tokens.len() { + return false; + } + if tokens.get(idx).and_then(token_symbol) == Some('(') { + let mut depth = 0_i32; + while idx < tokens.len() { + match token_symbol(&tokens[idx]) { + Some('(') => { + depth += 1; + idx += 1; + } + Some(')') => { + depth -= 1; + idx += 1; + if depth == 0 { + break; + } + } + _ => idx += 1, + } + } + } + while idx < tokens.len() && token_symbol(&tokens[idx]) == Some(';') { + idx += 1; + } + idx < tokens.len() && (token_is(&tokens[idx], "VALUES") || token_is(&tokens[idx], "VALUE")) +} + +fn parse_insert_pk_values(tokens: &[Token], mut idx: usize, pk_column: &str) -> Vec { + if tokens.get(idx).and_then(token_symbol) != Some('(') { + return Vec::new(); + } + idx += 1; + + let mut columns = Vec::new(); + loop { + let Some(column) = tokens.get(idx).and_then(token_identifier) else { + return Vec::new(); + }; + columns.push(column.to_string()); + idx += 1; + match tokens.get(idx).and_then(token_symbol) { + Some(',') => idx += 1, + Some(')') => { + idx += 1; + break; + } + _ => return Vec::new(), + } + } + + let Some(pk_idx) = columns + .iter() + .position(|column| column.eq_ignore_ascii_case(pk_column)) + else { + return Vec::new(); + }; + + while idx < tokens.len() + && !token_is(&tokens[idx], "VALUES") + && !token_is(&tokens[idx], "VALUE") + { + idx += 1; + } + if idx >= tokens.len() { + return Vec::new(); + } + idx += 1; + + let mut pk_values = Vec::new(); + while idx < tokens.len() { + if token_symbol(&tokens[idx]) != Some('(') { + break; + } + idx += 1; + let mut value_idx = 0; + loop { + if value_idx == pk_idx { + if let Some((value, _next_idx)) = parse_pk_value(tokens, idx) { + pk_values.push(value); + } + } + + idx = skip_insert_value(tokens, idx); + match tokens.get(idx).and_then(token_symbol) { + Some(',') => { + value_idx += 1; + idx += 1; + } + Some(')') => { + idx += 1; + break; + } + _ => return Vec::new(), + } + } + match tokens.get(idx).and_then(token_symbol) { + Some(',') => idx += 1, + _ => break, + } + } + + dedupe_pk_values(pk_values) +} + +fn skip_insert_value(tokens: &[Token], mut idx: usize) -> usize { + let mut depth = 0_i32; + while idx < tokens.len() { + match token_symbol(&tokens[idx]) { + Some('(') => { + depth += 1; + idx += 1; + } + Some(')') if depth == 0 => break, + Some(')') => { + depth -= 1; + idx += 1; + } + Some(',') if depth == 0 => break, + _ => idx += 1, + } + } + idx +} + +fn parse_projection(tokens: &[Token], table: &str, alias: Option<&str>) -> Option { + if tokens.len() == 1 && token_symbol(&tokens[0]) == Some('*') { + return Some(Projection::All); + } + if tokens + .iter() + .any(|token| token_symbol(token) == Some('(') || token_symbol(token) == Some(')')) + { + return None; + } + + let parts = split_top_level_commas(tokens); + let mut columns = Vec::new(); + for part in &parts { + if part.len() == 3 + && token_symbol(&part[1]) == Some('.') + && token_symbol(&part[2]) == Some('*') + { + let qualifier = token_identifier(&part[0])?; + if parts.len() == 1 && qualifier_matches_table(Some(qualifier), table, alias) { + return Some(Projection::All); + } + return None; + } + let mut idx = 0; + let Some((column, next_idx)) = parse_column_ref(part, idx) else { + return None; + }; + if !qualifier_matches_table(column.qualifier.as_deref(), table, alias) { + return None; + } + idx = next_idx; + if idx < part.len() { + return None; + } + columns.push(column.name); + } + if columns.is_empty() { + None + } else { + Some(Projection::Columns(columns)) + } +} + +fn select_has_aggregate(tokens: &[Token]) -> bool { + const AGGREGATES: &[&str] = &["COUNT", "SUM", "AVG", "MIN", "MAX", "GROUP_CONCAT"]; + tokens.windows(2).any(|window| { + AGGREGATES.iter().any(|kw| token_is(&window[0], kw)) + && token_symbol(&window[1]) == Some('(') + }) +} + +#[derive(Debug)] +struct PkPredicate { + qualifier: Option, + column: String, + values: Vec, +} + +fn parse_pk_predicate(tokens: &[Token]) -> Option { + let tokens = trim_outer_parens(tokens); + let (column, mut idx) = parse_column_ref(tokens, 0)?; + if idx >= tokens.len() { + return None; + } + + let values = if token_symbol(&tokens[idx]) == Some('=') { + idx += 1; + let (value, next_idx) = parse_pk_value(tokens, idx)?; + idx = next_idx; + vec![value] + } else if token_is(&tokens[idx], "IN") { + idx += 1; + if tokens.get(idx).and_then(token_symbol) != Some('(') { + return None; + } + idx += 1; + let mut values = Vec::new(); + loop { + let (value, next_idx) = parse_pk_value(tokens, idx)?; + values.push(value); + idx = next_idx; + match tokens.get(idx).and_then(token_symbol) { + Some(',') => idx += 1, + Some(')') => { + idx += 1; + break; + } + _ => return None, + } + } + values + } else { + return None; + }; + + while idx < tokens.len() && token_symbol(&tokens[idx]) == Some(';') { + idx += 1; + } + if idx != tokens.len() || values.is_empty() { + return None; + } + + Some(PkPredicate { + qualifier: column.qualifier, + column: column.name, + values: dedupe_pk_values(values), + }) +} + +fn dedupe_pk_values(values: Vec) -> Vec { + let mut deduped = Vec::new(); + let mut seen = BTreeSet::new(); + for value in values { + if seen.insert(value.clone()) { + deduped.push(value); + } + } + deduped +} + +#[derive(Debug)] +struct ColumnRef { + qualifier: Option, + name: String, +} + +fn parse_column_ref(tokens: &[Token], idx: usize) -> Option<(ColumnRef, usize)> { + let first = token_identifier(tokens.get(idx)?)?; + let next_idx = idx + 1; + if next_idx + 1 < tokens.len() && token_symbol(&tokens[next_idx]) == Some('.') { + let second = token_identifier(&tokens[next_idx + 1])?; + if next_idx + 2 < tokens.len() && token_symbol(&tokens[next_idx + 2]) == Some('.') { + return None; + } + return Some(( + ColumnRef { + qualifier: Some(first.to_string()), + name: second.to_string(), + }, + next_idx + 2, + )); + } + Some(( + ColumnRef { + qualifier: None, + name: first.to_string(), + }, + next_idx, + )) +} + +fn qualifier_matches_table(qualifier: Option<&str>, table: &str, alias: Option<&str>) -> bool { + let Some(qualifier) = qualifier else { + return true; + }; + qualifier.eq_ignore_ascii_case(table) + || alias + .map(|alias| qualifier.eq_ignore_ascii_case(alias)) + .unwrap_or(false) +} + +fn parse_pk_value(tokens: &[Token], idx: usize) -> Option<(PkValue, usize)> { + let token = tokens.get(idx)?; + match &token.kind { + TokenKind::Number | TokenKind::String => Some((PkValue(token.text.clone()), idx + 1)), + _ => None, + } +} + +fn trim_outer_parens(mut tokens: &[Token]) -> &[Token] { + loop { + if tokens.len() < 2 + || token_symbol(&tokens[0]) != Some('(') + || token_symbol(&tokens[tokens.len() - 1]) != Some(')') + { + return tokens; + } + let mut depth = 0_i32; + let mut wraps = true; + for (idx, token) in tokens.iter().enumerate() { + match token_symbol(token) { + Some('(') => depth += 1, + Some(')') => { + depth -= 1; + if depth == 0 && idx != tokens.len() - 1 { + wraps = false; + break; + } + } + _ => {} + } + if depth < 0 { + wraps = false; + break; + } + } + if !wraps || depth != 0 { + return tokens; + } + tokens = &tokens[1..tokens.len() - 1]; + } +} + +fn parse_table_ref(tokens: &[Token], idx: usize) -> Option<(String, Option, usize)> { + let first = token_identifier(tokens.get(idx)?)?; + let mut table = first.to_string(); + let mut next_idx = idx + 1; + if next_idx + 1 < tokens.len() && token_symbol(&tokens[next_idx]) == Some('.') { + let second = token_identifier(&tokens[next_idx + 1])?; + table = second.to_string(); + next_idx += 2; + } + + let mut alias = None; + if next_idx < tokens.len() && token_is(&tokens[next_idx], "AS") { + next_idx += 1; + if let Some(value) = tokens.get(next_idx).and_then(token_identifier) { + alias = Some(value.to_string()); + next_idx += 1; + } + } else if next_idx < tokens.len() { + let token = &tokens[next_idx]; + if token_identifier(token).is_some() + && !is_table_boundary_keyword(token) + && !token_is(token, "SET") + { + alias = Some(token.text.clone()); + next_idx += 1; + } + } + + Some((table, alias, next_idx)) +} + +fn is_table_boundary_keyword(token: &Token) -> bool { + [ + "WHERE", "JOIN", "INNER", "LEFT", "RIGHT", "FULL", "CROSS", "ON", "USING", "ORDER", + "GROUP", "HAVING", "LIMIT", "SET", "VALUES", "VALUE", + ] + .iter() + .any(|kw| token_is(token, kw)) +} + +fn split_top_level_commas(tokens: &[Token]) -> Vec<&[Token]> { + let mut out = Vec::new(); + let mut start = 0; + let mut depth = 0_i32; + for (idx, token) in tokens.iter().enumerate() { + match token_symbol(token) { + Some('(') => depth += 1, + Some(')') => depth -= 1, + Some(',') if depth == 0 => { + out.push(&tokens[start..idx]); + start = idx + 1; + } + _ => {} + } + } + out.push(&tokens[start..]); + out.into_iter().filter(|part| !part.is_empty()).collect() +} + +fn extract_table_refs(tokens: &[Token]) -> Vec { + let mut tables = Vec::new(); + let mut idx = 0; + while idx < tokens.len() { + if token_is(&tokens[idx], "FROM") { + idx = collect_comma_table_refs(tokens, idx + 1, &mut tables); + continue; + } + + let table_idx = if token_is(&tokens[idx], "JOIN") + || token_is(&tokens[idx], "INTO") + || token_is(&tokens[idx], "TABLE") + { + Some(idx + 1) + } else if token_is(&tokens[idx], "UPDATE") { + let mut next = idx + 1; + while next < tokens.len() + && (token_is(&tokens[next], "LOW_PRIORITY") || token_is(&tokens[next], "IGNORE")) + { + next += 1; + } + Some(next) + } else { + None + }; + + if let Some(table_idx) = table_idx { + if let Some((table, _alias, _next_idx)) = parse_table_ref(tokens, table_idx) { + push_table_ref(&mut tables, table); + } + } + idx += 1; + } + tables +} + +fn collect_comma_table_refs(tokens: &[Token], mut idx: usize, tables: &mut Vec) -> usize { + while let Some((table, _alias, next_idx)) = parse_table_ref(tokens, idx) { + push_table_ref(tables, table); + idx = next_idx; + if tokens.get(idx).and_then(token_symbol) == Some(',') { + idx += 1; + continue; + } + break; + } + idx +} + +fn push_table_ref(tables: &mut Vec, table: String) { + if !tables.iter().any(|existing| existing == &table) { + tables.push(table); + } +} + +pub fn is_supported_pk_column(column: &str) -> bool { + [ + "ID", + "option_id", + "option_name", + "umeta_id", + "meta_id", + "term_id", + "term_taxonomy_id", + "object_id", + "comment_ID", + "link_id", + ] + .iter() + .any(|candidate| candidate.eq_ignore_ascii_case(column)) +} + +pub fn expected_pk_for_table(table: &str) -> Option<&'static str> { + let lower = table.to_ascii_lowercase(); + if lower == "posts" || lower.ends_with("_posts") { + return Some("ID"); + } + if lower == "users" || lower.ends_with("_users") { + return Some("ID"); + } + if lower == "options" || lower.ends_with("_options") { + return Some("option_id"); + } + if lower == "usermeta" || lower.ends_with("_usermeta") { + return Some("umeta_id"); + } + if lower == "postmeta" || lower.ends_with("_postmeta") { + return Some("meta_id"); + } + if lower == "commentmeta" || lower.ends_with("_commentmeta") { + return Some("meta_id"); + } + if lower == "termmeta" || lower.ends_with("_termmeta") { + return Some("meta_id"); + } + if lower == "terms" || lower.ends_with("_terms") { + return Some("term_id"); + } + if lower == "term_taxonomy" || lower.ends_with("_term_taxonomy") { + return Some("term_taxonomy_id"); + } + if lower == "term_relationships" || lower.ends_with("_term_relationships") { + return Some("object_id"); + } + if lower == "comments" || lower.ends_with("_comments") { + return Some("comment_ID"); + } + if lower == "links" || lower.ends_with("_links") { + return Some("link_id"); + } + None +} + +pub fn auto_increment_pk_for_table(table: &str) -> Option<&'static str> { + let pk = expected_pk_for_table(table)?; + let lower = table.to_ascii_lowercase(); + if lower == "term_relationships" || lower.ends_with("_term_relationships") { + return None; + } + Some(pk) +} + +pub fn is_auto_increment_pk_for_table(table: &str, pk_column: &str) -> bool { + auto_increment_pk_for_table(table) + .map(|expected| expected.eq_ignore_ascii_case(pk_column)) + .unwrap_or(false) +} + +fn canonical_pk_column(table: &str, column: &str) -> Option { + let lower = table.to_ascii_lowercase(); + if (lower == "options" || lower.ends_with("_options")) + && column.eq_ignore_ascii_case("option_name") + { + return Some("option_name".to_string()); + } + + if let Some(expected) = expected_pk_for_table(table) { + if expected.eq_ignore_ascii_case(column) { + return Some(expected.to_string()); + } + return None; + } + + if !is_supported_pk_column(column) { + return None; + } + + [ + "ID", + "option_id", + "umeta_id", + "meta_id", + "term_id", + "term_taxonomy_id", + "object_id", + "comment_ID", + "link_id", + ] + .iter() + .find(|candidate| candidate.eq_ignore_ascii_case(column)) + .map(|candidate| (*candidate).to_string()) +} + +pub fn row_pk_value(row: &Row, pk_column: &str) -> Option { + row_value_ci(row, pk_column).and_then(value_to_pk) +} + +fn row_value_ci<'a>(row: &'a Row, column: &str) -> Option<&'a Value> { + row.get(column).or_else(|| { + row.iter() + .find(|(key, _value)| key.eq_ignore_ascii_case(column)) + .map(|(_key, value)| value) + }) +} + +fn value_to_pk(value: &Value) -> Option { + match value { + Value::String(value) => Some(PkValue(value.clone())), + Value::Number(value) => Some(PkValue(value.to_string())), + _ => None, + } +} + +fn promote(tables: Vec, reason: &str) -> RowCowPlan { + RowCowPlan::PromoteTable { + tables, + reason: reason.to_string(), + } +} + +fn unsupported(reason: &str) -> RowCowPlan { + RowCowPlan::Unsupported { + reason: reason.to_string(), + } +} + +fn find_keyword(tokens: &[Token], keyword: &str) -> Option { + tokens.iter().position(|token| token_is(token, keyword)) +} + +fn contains_keyword(tokens: &[Token], keyword: &str) -> bool { + find_keyword(tokens, keyword).is_some() +} + +fn is_statement_end(token: &Token) -> bool { + token_symbol(token) == Some(';') +} + +fn trim_statement_semicolons(mut tokens: &[Token]) -> &[Token] { + while tokens + .last() + .and_then(token_symbol) + .is_some_and(|symbol| symbol == ';') + { + tokens = &tokens[..tokens.len() - 1]; + } + tokens +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum TokenKind { + Word, + Number, + String, + Symbol(char), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct Token { + text: String, + kind: TokenKind, +} + +fn token_is(token: &Token, keyword: &str) -> bool { + matches!(token.kind, TokenKind::Word) && token.text.eq_ignore_ascii_case(keyword) +} + +fn token_identifier(token: &Token) -> Option<&str> { + match token.kind { + TokenKind::Word => Some(token.text.as_str()), + _ => None, + } +} + +fn token_symbol(token: &Token) -> Option { + match token.kind { + TokenKind::Symbol(ch) => Some(ch), + _ => None, + } +} + +fn token_usize(token: &Token) -> Option { + if matches!(token.kind, TokenKind::Number) { + token.text.parse::().ok() + } else { + None + } +} + +fn lex(sql: &str) -> Option> { + let chars = sql.chars().collect::>(); + let mut tokens = Vec::new(); + let mut idx = 0; + + while idx < chars.len() { + let ch = chars[idx]; + if ch.is_whitespace() { + idx += 1; + continue; + } + if ch == '-' && chars.get(idx + 1) == Some(&'-') { + idx += 2; + while idx < chars.len() && chars[idx] != '\n' { + idx += 1; + } + continue; + } + if ch == '#' { + idx += 1; + while idx < chars.len() && chars[idx] != '\n' { + idx += 1; + } + continue; + } + if ch == '/' && chars.get(idx + 1) == Some(&'*') { + idx += 2; + while idx + 1 < chars.len() && !(chars[idx] == '*' && chars[idx + 1] == '/') { + idx += 1; + } + if idx + 1 >= chars.len() { + return None; + } + idx = (idx + 2).min(chars.len()); + continue; + } + if ch == '`' { + idx += 1; + let mut text = String::new(); + let mut closed = false; + while idx < chars.len() { + if chars[idx] == '`' { + if chars.get(idx + 1) == Some(&'`') { + text.push('`'); + idx += 2; + continue; + } + idx += 1; + closed = true; + break; + } + text.push(chars[idx]); + idx += 1; + } + if !closed { + return None; + } + tokens.push(Token { + text, + kind: TokenKind::Word, + }); + continue; + } + if ch == '\'' || ch == '"' { + let quote = ch; + idx += 1; + let mut text = String::new(); + let mut closed = false; + while idx < chars.len() { + if chars[idx] == '\\' { + if let Some(next) = chars.get(idx + 1) { + text.push(*next); + idx += 2; + continue; + } + return None; + } + if chars[idx] == quote { + if chars.get(idx + 1) == Some("e) { + text.push(quote); + idx += 2; + continue; + } + idx += 1; + closed = true; + break; + } + text.push(chars[idx]); + idx += 1; + } + if !closed { + return None; + } + tokens.push(Token { + text, + kind: TokenKind::String, + }); + continue; + } + if ch.is_ascii_digit() { + let start = idx; + idx += 1; + while idx < chars.len() && chars[idx].is_ascii_digit() { + idx += 1; + } + tokens.push(Token { + text: chars[start..idx].iter().collect(), + kind: TokenKind::Number, + }); + continue; + } + if ch.is_ascii_alphabetic() || ch == '_' || ch == '$' { + let start = idx; + idx += 1; + while idx < chars.len() + && (chars[idx].is_ascii_alphanumeric() || chars[idx] == '_' || chars[idx] == '$') + { + idx += 1; + } + tokens.push(Token { + text: chars[start..idx].iter().collect(), + kind: TokenKind::Word, + }); + continue; + } + + tokens.push(Token { + text: ch.to_string(), + kind: TokenKind::Symbol(ch), + }); + idx += 1; + } + + Some(tokens) +} + +pub fn quote_identifier(identifier: &str) -> Result { + if identifier.is_empty() + || !identifier + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '$') + { + return Err(anyhow!("unsafe SQL identifier {identifier}")); + } + Ok(format!("`{}`", identifier.replace('`', "``"))) +} + +pub fn pk_values_where_sql(pk_column: &str, pk_values: &[PkValue]) -> Result { + let column = quote_identifier(pk_column)?; + if pk_values.is_empty() { + return Ok("1=0".to_string()); + } + let values = pk_values + .iter() + .map(|value| format!("'{}'", mysql_string_literal(&value.0))) + .collect::>() + .join(", "); + Ok(format!("{column} IN ({values})")) +} + +pub fn select_all_by_pk_sql(table: &str, pk_column: &str, pk_values: &[PkValue]) -> Result { + Ok(format!( + "SELECT * FROM {} WHERE {};", + quote_identifier(table)?, + pk_values_where_sql(pk_column, pk_values)? + )) +} + +pub fn mysql_string_literal(value: &str) -> String { + value.replace('\\', "\\\\").replace('\'', "\\'") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, Clone, PartialEq, Eq)] + enum RemoteCall { + Select { + table: String, + pk_column: String, + pk_values: Vec, + }, + } + + #[derive(Debug, Default)] + struct FakeCowBackend { + remote: BTreeMap>, + local: BTreeMap>, + tombstones: BTreeSet<(String, String, PkValue)>, + remote_calls: Vec, + reserved_inserts: Vec<(String, Option)>, + } + + impl FakeCowBackend { + fn insert_remote( + &mut self, + table: &str, + pk_column: &str, + pk: &str, + pairs: &[(&str, &str)], + ) { + let row = row(pk_column, pk, pairs); + self.remote + .entry(table.to_string()) + .or_default() + .insert(PkValue(pk.to_string()), row); + } + + fn insert_local(&mut self, table: &str, pk_column: &str, pk: &str, pairs: &[(&str, &str)]) { + let row = row(pk_column, pk, pairs); + self.local + .entry(table.to_string()) + .or_default() + .insert(PkValue(pk.to_string()), row); + } + + fn assert_no_remote_writes(&self) { + assert!(self + .remote_calls + .iter() + .all(|call| matches!(call, RemoteCall::Select { .. }))); + } + + fn remote_select_values(&self) -> Vec> { + self.remote_calls + .iter() + .map(|call| match call { + RemoteCall::Select { pk_values, .. } => pk_values.clone(), + }) + .collect() + } + } + + impl RowCowBackend for FakeCowBackend { + fn remote_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + self.remote_calls.push(RemoteCall::Select { + table: table.to_string(), + pk_column: pk_column.to_string(), + pk_values: pk_values.to_vec(), + }); + let rows = select_from_table(self.remote.get(table), pk_values); + Ok(CowQueryResult::ok(rows, Vec::new())) + } + + fn local_select_by_pk( + &mut self, + table: &str, + _pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + let rows = select_from_table(self.local.get(table), pk_values); + Ok(CowQueryResult::ok(rows, Vec::new())) + } + + fn local_upsert_rows(&mut self, table: &str, rows: &[Row]) -> Result { + let table_rows = self.local.entry(table.to_string()).or_default(); + for row in rows { + let pk_column = if (table == "options" || table.ends_with("_options")) + && row_value_ci(row, "option_name").is_some() + { + "option_name" + } else { + expected_pk_for_table(table).unwrap() + }; + let pk = row_pk_value(row, pk_column).unwrap(); + table_rows.insert(pk, row.clone()); + } + Ok(rows.len()) + } + + fn local_delete_by_pk( + &mut self, + table: &str, + _pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + let Some(rows) = self.local.get_mut(table) else { + return Ok(0); + }; + let mut deleted = 0; + for pk in pk_values { + if rows.remove(pk).is_some() { + deleted += 1; + } + } + Ok(deleted) + } + + fn local_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + let mut added = 0; + for value in pk_values { + if self + .tombstones + .insert((table.to_string(), pk_column.to_string(), value.clone())) + { + added += 1; + } + } + Ok(added) + } + + fn local_clear_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + let mut removed = 0; + for value in pk_values { + if self.tombstones.remove(&( + table.to_string(), + pk_column.to_string(), + value.clone(), + )) { + removed += 1; + } + } + Ok(removed) + } + + fn local_reserve_insert_pk(&mut self, table: &str, pk_column: Option<&str>) -> Result<()> { + self.reserved_inserts + .push((table.to_string(), pk_column.map(str::to_string))); + Ok(()) + } + + fn local_tombstones_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result> { + Ok(pk_values + .iter() + .filter(|value| { + self.tombstones.contains(&( + table.to_string(), + pk_column.to_string(), + (*value).clone(), + )) + }) + .cloned() + .collect()) + } + } + + fn select_from_table( + table: Option<&BTreeMap>, + pk_values: &[PkValue], + ) -> Vec { + let Some(table) = table else { + return Vec::new(); + }; + pk_values + .iter() + .filter_map(|value| table.get(value).cloned()) + .collect() + } + + fn row(pk_column: &str, pk: &str, pairs: &[(&str, &str)]) -> Row { + let mut row = Row::new(); + row.insert(pk_column.to_string(), Value::String(pk.to_string())); + for (key, value) in pairs { + row.insert((*key).to_string(), Value::String((*value).to_string())); + } + row + } + + fn assert_not_row_level(sql: &str) { + assert!( + !matches!(plan_sql(sql), RowCowPlan::RowLevel(_)), + "{sql} was incorrectly planned as row-level safe" + ); + } + + #[test] + fn plans_supported_wordpress_primary_keys() { + let cases = [ + ("wp_posts", "ID"), + ("wp_options", "option_id"), + ("wp_usermeta", "umeta_id"), + ("wp_postmeta", "meta_id"), + ("wp_terms", "term_id"), + ("wp_term_taxonomy", "term_taxonomy_id"), + ("wp_term_relationships", "object_id"), + ("wp_comments", "comment_ID"), + ("wp_links", "link_id"), + ]; + + for (table, pk) in cases { + let sql = format!("SELECT * FROM `{table}` WHERE `{pk}` IN (1, 2)"); + let RowCowPlan::RowLevel(RowCowOp::Select(select)) = plan_sql(&sql) else { + panic!("{sql} was not planned as a row-level select"); + }; + assert_eq!(select.table, table); + assert_eq!(select.pk_column, pk); + assert_eq!( + select.pk_values, + vec![PkValue("1".to_string()), PkValue("2".to_string())] + ); + } + + assert_eq!(auto_increment_pk_for_table("wp_posts"), Some("ID")); + assert_eq!(auto_increment_pk_for_table("wp_term_relationships"), None); + } + + #[test] + fn plans_wordpress_options_by_unique_option_name() { + let RowCowPlan::RowLevel(RowCowOp::Update(write)) = + plan_sql("UPDATE wp_options SET option_value = 'local' WHERE option_name = 'blogname'") + else { + panic!("options writes by option_name should be row-level safe"); + }; + assert_eq!(write.table, "wp_options"); + assert_eq!(write.pk_column, "option_name"); + assert_eq!(write.pk_values, vec![PkValue("blogname".to_string())]); + + let RowCowPlan::RowLevel(RowCowOp::Delete(write)) = + plan_sql("DELETE FROM wp_options WHERE option_name = '_transient_example'") + else { + panic!("options deletes by option_name should be row-level safe"); + }; + assert_eq!(write.pk_column, "option_name"); + + let RowCowPlan::RowLevel(RowCowOp::Select(select)) = + plan_sql("SELECT option_value FROM wp_options WHERE option_name = 'siteurl'") + else { + panic!("options reads by option_name should be row-level safe"); + }; + assert_eq!(select.pk_column, "option_name"); + assert_eq!( + select.projection, + Projection::Columns(vec!["option_value".to_string()]) + ); + } + + #[test] + fn accepts_matching_table_or_alias_qualified_primary_keys() { + let RowCowPlan::RowLevel(RowCowOp::Select(select)) = + plan_sql("SELECT p.ID FROM wp_posts p WHERE p.ID = 1") + else { + panic!("matching alias-qualified predicate should be row-level safe"); + }; + assert_eq!(select.table, "wp_posts"); + assert_eq!(select.pk_column, "ID"); + assert_eq!( + select.projection, + Projection::Columns(vec!["ID".to_string()]) + ); + + let RowCowPlan::RowLevel(RowCowOp::Update(write)) = + plan_sql("UPDATE wp_posts AS p SET post_title = 'changed' WHERE p.ID = 1") + else { + panic!("matching alias-qualified update should be row-level safe"); + }; + assert_eq!(write.table, "wp_posts"); + assert_eq!(write.pk_values, vec![PkValue("1".to_string())]); + } + + #[test] + fn primary_key_single_row_selects_allow_safe_order_and_limit_clauses() { + for sql in [ + "SELECT * FROM wp_posts WHERE ID = 74 LIMIT 1", + "SELECT * FROM wp_posts WHERE ID = 74 LIMIT 0, 1", + "SELECT * FROM wp_posts WHERE ID = 74 ORDER BY post_date DESC LIMIT 1", + ] { + let RowCowPlan::RowLevel(RowCowOp::Select(select)) = plan_sql(sql) else { + panic!("{sql} should be row-level safe"); + }; + assert_eq!(select.table, "wp_posts"); + assert_eq!(select.pk_column, "ID"); + assert_eq!(select.pk_values, vec![PkValue("74".to_string())]); + } + + assert_not_row_level("SELECT * FROM wp_posts WHERE ID = 74 LIMIT 1, 1"); + assert_not_row_level("SELECT * FROM wp_posts WHERE ID IN (74, 75) LIMIT 1"); + } + + #[test] + fn update_copy_up_fetches_only_affected_primary_keys() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "1", &[("post_title", "one")]); + backend.insert_remote("wp_posts", "ID", "2", &[("post_title", "two")]); + backend.insert_remote("wp_posts", "ID", "3", &[("post_title", "three")]); + + let execution = execute_row_cow( + &mut backend, + "UPDATE wp_posts SET post_title = 'changed' WHERE ID IN (1, 3)", + ) + .unwrap(); + + assert!(matches!( + execution, + RowCowExecution::PreparedLocalWrite { copied_rows: 2, .. } + )); + backend.assert_no_remote_writes(); + assert_eq!( + backend.remote_select_values(), + vec![vec![PkValue("1".to_string()), PkValue("3".to_string())]] + ); + assert!(backend.local["wp_posts"].contains_key(&PkValue("1".to_string()))); + assert!(!backend.local["wp_posts"].contains_key(&PkValue("2".to_string()))); + assert!(backend.local["wp_posts"].contains_key(&PkValue("3".to_string()))); + } + + #[test] + fn update_copy_up_preserves_existing_local_overlay_rows() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "1", &[("post_title", "remote")]); + backend.insert_remote("wp_posts", "ID", "2", &[("post_title", "remote two")]); + backend.insert_local("wp_posts", "ID", "1", &[("post_title", "local draft")]); + + execute_row_cow( + &mut backend, + "UPDATE wp_posts SET post_status = 'draft' WHERE ID IN (1, 2)", + ) + .unwrap(); + + assert_eq!( + backend.remote_select_values(), + vec![vec![PkValue("2".to_string())]], + "copy-up should fetch only affected rows missing from the local overlay" + ); + assert_eq!( + backend.local["wp_posts"][&PkValue("1".to_string())].get("post_title"), + Some(&Value::String("local draft".to_string())), + "existing local overlay row must not be replaced by the remote lower row" + ); + } + + #[test] + fn options_update_copy_up_fetches_only_named_option() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote( + "wp_options", + "option_name", + "blogname", + &[("option_id", "1"), ("option_value", "Remote Name")], + ); + backend.insert_remote( + "wp_options", + "option_name", + "siteurl", + &[("option_id", "2"), ("option_value", "https://example.com")], + ); + + let execution = execute_row_cow( + &mut backend, + "UPDATE wp_options SET option_value = 'Local Name' WHERE option_name = 'blogname'", + ) + .unwrap(); + + assert!(matches!( + execution, + RowCowExecution::PreparedLocalWrite { + pk_column: Some(pk_column), + copied_rows: 1, + .. + } if pk_column == "option_name" + )); + assert_eq!( + backend.remote_select_values(), + vec![vec![PkValue("blogname".to_string())]] + ); + assert!(backend.local["wp_options"].contains_key(&PkValue("blogname".to_string()))); + assert!(!backend.local["wp_options"].contains_key(&PkValue("siteurl".to_string()))); + backend.assert_no_remote_writes(); + } + + #[test] + fn delete_tombstone_hides_remote_row_from_merged_selects() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + + execute_row_cow(&mut backend, "DELETE FROM wp_posts WHERE ID = 42").unwrap(); + assert!( + backend.remote_calls.is_empty(), + "DELETE by primary key must tombstone locally without fetching remote rows" + ); + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + assert!(result.rows.is_empty()); + backend.assert_no_remote_writes(); + } + + #[test] + fn select_materializes_remote_rows_for_later_offline_reads() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!(result.rows.len(), 1); + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("remote".to_string())) + ); + assert_eq!( + backend.local["wp_posts"][&PkValue("42".to_string())].get("post_title"), + Some(&Value::String("remote".to_string())), + "row-level reads must materialize remote rows so offline refresh can use local state" + ); + backend.assert_no_remote_writes(); + } + + #[test] + fn repeated_select_uses_materialized_local_row_without_remote_read() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + backend.remote_calls.clear(); + + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("remote".to_string())) + ); + assert!( + backend.remote_calls.is_empty(), + "materialized row-level reads should be served from local COW state" + ); + } + + #[test] + fn select_materialization_preserves_local_overlay_rows() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + backend.insert_local("wp_posts", "ID", "42", &[("post_title", "local")]); + + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("local".to_string())) + ); + assert_eq!( + backend.local["wp_posts"][&PkValue("42".to_string())].get("post_title"), + Some(&Value::String("local".to_string())) + ); + backend.assert_no_remote_writes(); + } + + #[test] + fn insert_after_delete_clears_tombstone_and_shadows_remote_row() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + + execute_row_cow(&mut backend, "DELETE FROM wp_posts WHERE ID = 42").unwrap(); + execute_row_cow( + &mut backend, + "INSERT INTO wp_posts (ID, post_title) VALUES (42, 'local replacement')", + ) + .unwrap(); + backend.insert_local( + "wp_posts", + "ID", + "42", + &[("post_title", "local replacement")], + ); + + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!(result.rows.len(), 1); + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("local replacement".to_string())) + ); + } + + #[test] + fn local_insert_is_not_sent_to_remote_and_appears_in_merged_select() { + let mut backend = FakeCowBackend::default(); + let execution = execute_row_cow( + &mut backend, + "INSERT INTO wp_posts (ID, post_title) VALUES (9, 'local')", + ) + .unwrap(); + assert!(matches!( + execution, + RowCowExecution::LocalOnlyInsert { table } if table == "wp_posts" + )); + assert!( + backend.remote_calls.is_empty(), + "INSERT must not be sent to or read from remote" + ); + + backend.insert_local("wp_posts", "ID", "9", &[("post_title", "local")]); + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 9").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!(result.rows.len(), 1); + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("local".to_string())) + ); + assert!(!backend + .remote + .get("wp_posts") + .unwrap_or(&BTreeMap::new()) + .contains_key(&PkValue("9".to_string()))); + backend.assert_no_remote_writes(); + } + + #[test] + fn local_insert_without_pk_reserves_auto_increment_before_write() { + let mut backend = FakeCowBackend::default(); + let execution = execute_row_cow( + &mut backend, + "INSERT INTO wp_posts (post_title) VALUES ('local auto id')", + ) + .unwrap(); + assert!(matches!( + execution, + RowCowExecution::LocalOnlyInsert { table } if table == "wp_posts" + )); + assert_eq!( + backend.reserved_inserts, + vec![("wp_posts".to_string(), Some("ID".to_string()))] + ); + backend.assert_no_remote_writes(); + } + + #[test] + fn ambiguous_sql_is_never_row_level_safe() { + assert_not_row_level( + "SELECT p.* FROM wp_posts p JOIN wp_postmeta m ON m.post_id = p.ID WHERE p.ID = 1", + ); + assert_not_row_level("SELECT COUNT(*) FROM wp_posts WHERE ID IN (1, 2)"); + assert_not_row_level("UPDATE wp_posts SET post_title = 'x' WHERE post_name = 'hello'"); + assert_not_row_level("DELETE FROM wp_posts WHERE post_title = 'hello'"); + assert_not_row_level( + "SELECT * FROM wp_posts WHERE ID IN (1, 2) ORDER BY post_date DESC LIMIT 1", + ); + assert_not_row_level("SELECT * FROM wp_posts WHERE ID = 1 OR ID = 2"); + assert_not_row_level("SELECT * FROM wp_terms WHERE ID = 1"); + assert_not_row_level("SELECT * FROM wp_posts p WHERE q.ID = 1"); + assert_not_row_level("UPDATE wp_posts p SET post_title = 'x' WHERE q.ID = 1"); + assert_not_row_level( + "UPDATE wp_posts SET post_author = (SELECT ID FROM wp_users LIMIT 1) WHERE ID = 1", + ); + assert_not_row_level("DELETE FROM wp_posts p WHERE q.ID = 1"); + assert_not_row_level("DELETE FROM wp_posts p, wp_users u WHERE p.ID = 1"); + assert_not_row_level("INSERT INTO wp_posts SELECT * FROM wp_users"); + assert_not_row_level("INSERT INTO wp_posts nonsense"); + assert_not_row_level("INSERT INTO wp_posts nonsense VALUES (1)"); + assert_not_row_level("SELECT * FROM wp_posts, wp_users WHERE wp_posts.ID = 1"); + assert_not_row_level("SELECT * FROM wp_posts WHERE ID = 'unterminated"); + assert_not_row_level("SELECT * FROM `wp_posts WHERE ID = 1"); + assert_not_row_level("SELECT * FROM wp_posts /* unterminated comment WHERE ID = 1"); + } + + #[test] + fn complex_reads_make_explicit_promotion_decisions() { + let RowCowPlan::PromoteTable { tables, .. } = + plan_sql("SELECT * FROM wp_posts WHERE ID IN (1, 2) ORDER BY post_date DESC LIMIT 1") + else { + panic!("ordered and limited reads must promote instead of row-merging"); + }; + assert_eq!(tables, vec!["wp_posts".to_string()]); + + let RowCowPlan::PromoteTable { tables, .. } = plan_sql( + "SELECT p.* FROM wp_posts p JOIN wp_postmeta m ON m.post_id = p.ID WHERE p.ID = 1", + ) else { + panic!("join reads must promote instead of row-merging"); + }; + assert_eq!( + tables, + vec!["wp_posts".to_string(), "wp_postmeta".to_string()] + ); + + let RowCowPlan::PromoteTable { tables, .. } = + plan_sql("SELECT * FROM wp_posts, wp_users WHERE wp_posts.ID = 1") + else { + panic!("comma-join reads must promote instead of row-merging"); + }; + assert_eq!(tables, vec!["wp_posts".to_string(), "wp_users".to_string()]); + } +} diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs new file mode 100644 index 00000000..6f0bcaef --- /dev/null +++ b/experiments/remote-wp-cow/src/run.rs @@ -0,0 +1,976 @@ +use anyhow::{anyhow, Context, Result}; +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::path::{Path, PathBuf}; +use std::process::{Child, Command, Output, Stdio}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread::{self, JoinHandle}; +use std::time::{Duration, Instant}; + +use crate::config::{self, ClonePaths, Manifest}; +use crate::control; +use crate::fusefs; +use crate::generate::ROUTER_BASENAME; +use crate::mysql_proxy; +use crate::plugin_policy; +use crate::remote::RemoteClient; +use crate::runtime_cache; + +pub struct RunOptions { + pub mountpoint: PathBuf, + pub http_addr: String, + pub skip_php: bool, +} + +pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> Result<()> { + let shutdown = Arc::new(AtomicBool::new(false)); + install_signal_handler(shutdown.clone())?; + run_site_until_shutdown(manifest, paths, options, shutdown) +} + +#[cfg(test)] +pub(crate) fn run_site_with_shutdown( + manifest: Manifest, + paths: ClonePaths, + options: RunOptions, + shutdown: Arc, +) -> Result<()> { + run_site_until_shutdown(manifest, paths, options, shutdown) +} + +fn run_site_until_shutdown( + manifest: Manifest, + paths: ClonePaths, + options: RunOptions, + shutdown: Arc, +) -> Result<()> { + let control_addr = control_addr_from_url(&manifest.control_url)?; + let remote = RemoteClient::new(manifest.clone(), Some(config::ssh_control_path(&paths))); + let offline = config::is_offline(&paths); + let mut db_tunnel = if offline { + eprintln!( + "wp-cow clone '{}' is severed; remote filesystem and DB reads are disabled", + manifest.name + ); + None + } else { + remote.ensure_master()?; + match remote.start_db_tunnel() { + Ok(Some(child)) => { + eprintln!( + "wp-cow remote DB tunnel listening at {}:{}", + manifest.remote_db_tunnel.host, manifest.remote_db_tunnel.port + ); + Some(child) + } + Ok(None) => { + eprintln!( + "wp-cow remote DB tunnel disabled or unavailable; falling back to control reads" + ); + None + } + Err(err) => { + eprintln!("wp-cow remote DB tunnel failed: {err:#}"); + eprintln!("wp-cow falling back to control reads"); + None + } + } + }; + if !offline && runtime_cache::runtime_code_pack_enabled() { + runtime_cache::mark_runtime_code_cache_starting(&paths); + let warm_manifest = manifest.clone(); + let warm_paths = paths.clone(); + let warm_remote = remote.clone(); + thread::spawn(move || { + match runtime_cache::warm_runtime_code_cache(&warm_remote, &warm_manifest, &warm_paths) + { + Ok(summary) => { + eprintln!( + "wp-cow cached {} bounded runtime code files ({:.1} MB); uploads/media remain lazy", + summary.files, + summary.bytes as f64 / (1024.0 * 1024.0) + ); + if summary.capped { + eprintln!( + "wp-cow runtime code cache hit its configured cap; remaining runtime files stay lazy" + ); + } + } + Err(err) => { + runtime_cache::mark_runtime_code_cache_failed(&warm_paths); + eprintln!("wp-cow runtime code cache failed: {err:#}"); + eprintln!("wp-cow continuing with lazy per-file remote reads"); + } + } + }); + } + + let control_shutdown = shutdown.clone(); + let control_manifest = manifest.clone(); + let control_paths = paths.clone(); + let control_remote = remote.clone(); + let control_thread = thread::spawn(move || { + control::serve_control( + &control_addr, + control_manifest, + control_paths, + control_remote, + control_shutdown, + ) + }); + + let proxy_addr = format!("{}:{}", manifest.db_proxy.host, manifest.db_proxy.port); + let proxy_shutdown = shutdown.clone(); + let proxy_manifest = manifest.clone(); + let proxy_paths = paths.clone(); + let proxy_remote = remote.clone(); + let proxy_thread = thread::spawn(move || { + mysql_proxy::serve_proxy( + &proxy_addr, + proxy_manifest, + proxy_paths, + proxy_remote, + proxy_shutdown, + ) + }); + + let mount_manifest = manifest.clone(); + let mount_paths = paths.clone(); + let mountpoint = options.mountpoint.clone(); + let mount_thread = + thread::spawn(move || fusefs::mount_foreground(mount_manifest, mount_paths, &mountpoint)); + + if let Err(wait_err) = wait_for_mount(&options.mountpoint, &mount_thread) { + shutdown.store(true, Ordering::SeqCst); + if mount_thread.is_finished() { + match mount_thread.join() { + Ok(Err(mount_err)) => return Err(mount_err).with_context(|| wait_err.to_string()), + Ok(Ok(())) => return Err(wait_err), + Err(_) => return Err(anyhow!("mount thread panicked")).context(wait_err), + } + } + let _ = unmount(&options.mountpoint); + return Err(wait_err); + } + + let mut web = if options.skip_php { + None + } else { + Some(start_web_server( + &paths, + &options.mountpoint, + &options.http_addr, + )?) + }; + let plugin_admission_thread = spawn_plugin_admission_if_enabled( + manifest.clone(), + paths.clone(), + options.mountpoint.clone(), + shutdown.clone(), + ); + + eprintln!( + "wp-cow running clone '{}' at {} from {}", + manifest.name, + options.http_addr, + options.mountpoint.display() + ); + + while !shutdown.load(Ordering::SeqCst) { + if let Some(child) = web.as_mut() { + if let Some(status) = child.try_wait()? { + shutdown.store(true, Ordering::SeqCst); + return Err(anyhow!("web server exited with status {}", status)); + } + } + thread::sleep(Duration::from_millis(250)); + } + + if let Some(mut child) = web { + let _ = child.kill(); + let _ = child.wait(); + } + if let Some(handle) = plugin_admission_thread { + match handle.join() { + Ok(Ok(())) => {} + Ok(Err(err)) => eprintln!("wp-cow plugin admission stopped: {err:#}"), + Err(_) => eprintln!("wp-cow plugin admission thread panicked"), + } + } + if let Some(child) = db_tunnel.as_mut() { + let _ = child.kill(); + let _ = child.wait(); + } + let _ = unmount(&options.mountpoint); + + match control_thread.join() { + Ok(result) => result?, + Err(_) => return Err(anyhow!("control thread panicked")), + } + + match proxy_thread.join() { + Ok(result) => result?, + Err(_) => return Err(anyhow!("MySQL proxy thread panicked")), + } + + match mount_thread.join() { + Ok(result) => { + if let Err(err) = result { + eprintln!("wp-cow mount stopped: {err:#}"); + } + } + Err(_) => return Err(anyhow!("mount thread panicked")), + } + + Ok(()) +} + +pub fn mount_only(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> Result<()> { + fusefs::mount_foreground(manifest, paths, mountpoint) +} + +fn start_web_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { + match std::env::var("WPCOW_WEB_SERVER") + .unwrap_or_else(|_| "auto".to_string()) + .to_ascii_lowercase() + .as_str() + { + "php" | "php-dev" | "php-dev-server" => start_php_dev_server(paths, mountpoint, http_addr), + "auto" | "frankenphp" => { + let bin = frankenphp_bin(); + if command_exists(&bin) { + start_frankenphp_server(paths, mountpoint, http_addr) + } else { + eprintln!("wp-cow FrankenPHP binary '{bin}' was not found; falling back to PHP's development server"); + start_php_dev_server(paths, mountpoint, http_addr) + } + } + other => Err(anyhow!( + "unsupported WPCOW_WEB_SERVER={other}; expected auto, frankenphp, or php" + )), + } +} + +fn frankenphp_bin() -> String { + std::env::var("WPCOW_FRANKENPHP_BIN").unwrap_or_else(|_| "frankenphp".to_string()) +} + +fn command_exists(bin: &str) -> bool { + let path = Path::new(bin); + if path.components().count() > 1 { + return is_executable_file(path); + } + + std::env::var_os("PATH") + .map(|paths| std::env::split_paths(&paths).any(|dir| is_executable_file(&dir.join(bin)))) + .unwrap_or(false) +} + +fn is_executable_file(path: &Path) -> bool { + fs::metadata(path) + .map(|metadata| metadata.is_file() && metadata.permissions().mode() & 0o111 != 0) + .unwrap_or(false) +} + +fn apply_web_server_env(command: &mut Command, paths: &ClonePaths) { + if config::is_offline(paths) { + command + .env("WPCOW_OFFLINE", "1") + .env("WPCOW_REMOTE_DB_TUNNEL", "0"); + } +} + +fn php_side_effect_guards_enabled() -> bool { + !matches!( + std::env::var("WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS") + .unwrap_or_default() + .to_ascii_lowercase() + .as_str(), + "1" | "true" | "yes" | "on" + ) +} + +fn default_php_disabled_functions() -> &'static str { + "exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client,curl_exec,curl_multi_exec" +} + +fn php_disabled_functions() -> String { + match std::env::var("WPCOW_PHP_DISABLE_FUNCTIONS") { + Ok(raw) => { + let raw = raw.trim(); + if raw.is_empty() + || matches!( + raw.to_ascii_lowercase().as_str(), + "0" | "false" | "no" | "off" | "none" + ) + { + String::new() + } else { + raw.to_string() + } + } + Err(_) => default_php_disabled_functions().to_string(), + } +} + +fn php_safety_ini_entries() -> Vec<(&'static str, String)> { + if !php_side_effect_guards_enabled() { + return Vec::new(); + } + + let disabled_functions = php_disabled_functions(); + let mut entries = vec![("allow_url_include", "0".to_string())]; + if !disabled_functions.is_empty() { + entries.insert(0, ("disable_functions", disabled_functions)); + } + entries +} + +fn opcache_validate_timestamps() -> u64 { + env_u64("WPCOW_OPCACHE_VALIDATE_TIMESTAMPS", 0).min(1) +} + +fn start_frankenphp_server( + paths: &ClonePaths, + mountpoint: &Path, + http_addr: &str, +) -> Result { + let caddyfile = paths.run.join("Caddyfile"); + fs::create_dir_all(&paths.run)?; + fs::write( + &caddyfile, + frankenphp_caddyfile(paths, mountpoint, http_addr), + )?; + let mut command = Command::new(frankenphp_bin()); + apply_web_server_env(&mut command, paths); + command + .arg("run") + .arg("--config") + .arg(&caddyfile) + .arg("--adapter") + .arg("caddyfile") + .stdin(Stdio::null()) + .spawn() + .context("start FrankenPHP server") +} + +fn start_php_dev_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { + let mut command = Command::new("php"); + apply_web_server_env(&mut command, paths); + let workers = env_u64("WPCOW_PHP_WORKERS", 4); + if workers > 1 { + command.env("PHP_CLI_SERVER_WORKERS", workers.to_string()); + } + command + .arg("-d") + .arg(format!( + "max_execution_time={}", + env_u64("WPCOW_PHP_MAX_EXECUTION_SECS", 90) + )) + .arg("-d") + .arg(format!( + "default_socket_timeout={}", + env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15) + )) + .arg("-d") + .arg(format!( + "mysqlnd.net_read_timeout={}", + env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15) + )) + .arg("-d") + .arg("opcache.enable_cli=1") + .arg("-d") + .arg("opcache.memory_consumption=192") + .arg("-d") + .arg("opcache.max_accelerated_files=20000") + .arg("-d") + .arg(format!( + "opcache.validate_timestamps={}", + opcache_validate_timestamps() + )) + .stdin(Stdio::null()); + apply_php_safety_ini_args(&mut command); + command + .arg("-S") + .arg(http_addr) + .arg("-t") + .arg(mountpoint) + .arg(paths.generated.join("router.php")) + .spawn() + .context("start php built-in server") +} + +fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> String { + let threads = env_u64("WPCOW_PHP_WORKERS", 4); + let max_execution = env_u64("WPCOW_PHP_MAX_EXECUTION_SECS", 90); + let socket_timeout = env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15); + let opcache_validate = opcache_validate_timestamps(); + let safety_ini = php_safety_ini_entries() + .into_iter() + .map(|(name, value)| format!("\t\tphp_ini {name} {value}\n")) + .collect::(); + let listen = caddy_listen(http_addr); + let root = caddy_quote(&mountpoint.to_string_lossy()); + let router = format!("/{ROUTER_BASENAME}"); + let bind = listen + .bind + .as_ref() + .map(|host| format!("\n\tbind {}", caddy_quote(host))) + .unwrap_or_default(); + format!( + r#"{{ + admin off + auto_https off + frankenphp {{ + num_threads {threads} + max_threads {threads} + php_ini max_execution_time {max_execution} + php_ini default_socket_timeout {socket_timeout} + php_ini mysqlnd.net_read_timeout {socket_timeout} + php_ini opcache.enable 1 + php_ini opcache.memory_consumption 192 + php_ini opcache.max_accelerated_files 20000 + php_ini opcache.validate_timestamps {opcache_validate} + php_ini opcache.revalidate_freq 2 +{safety_ini} + }} +}} + +{site_addr} {{{bind} + root * {root} + + @wpCowRouter path {router} + handle @wpCowRouter {{ + respond 404 + }} + + @wpAdminIndex path /wp-admin /wp-admin/ + handle @wpAdminIndex {{ + rewrite * /wp-admin/index.php + php + }} + + @wpCowInstaller path /wp-admin/install.php /wp-admin/setup-config.php + handle @wpCowInstaller {{ + rewrite * {router}?__wp_cow_installer_guard=1 + php + }} + + @static {{ + file + not path *.php + }} + handle @static {{ + file_server + }} + + @phpFiles path *.php + handle @phpFiles {{ + php + }} + + handle {{ + rewrite * {router} + php + }} +}} +"#, + site_addr = listen.site_addr, + ) +} + +struct CaddyListen { + site_addr: String, + bind: Option, +} + +fn caddy_listen(http_addr: &str) -> CaddyListen { + let without_scheme = http_addr + .strip_prefix("http://") + .or_else(|| http_addr.strip_prefix("https://")) + .unwrap_or(http_addr); + let authority = without_scheme + .split('/') + .next() + .unwrap_or(without_scheme) + .trim(); + let (host, port) = split_host_port(authority); + let port = port.unwrap_or("80"); + let bind = match host { + Some(host) if !matches!(host, "" | "0.0.0.0" | "*" | "::" | "[::]") => { + Some(host.trim_matches(['[', ']']).to_string()) + } + _ => None, + }; + CaddyListen { + site_addr: format!("http://:{port}"), + bind, + } +} + +fn split_host_port(authority: &str) -> (Option<&str>, Option<&str>) { + if let Some(rest) = authority.strip_prefix('[') { + if let Some((host, tail)) = rest.split_once(']') { + return ( + Some(host), + tail.strip_prefix(':').filter(|value| !value.is_empty()), + ); + } + } + if let Some((host, port)) = authority.rsplit_once(':') { + return (Some(host), (!port.is_empty()).then_some(port)); + } + (Some(authority), None) +} + +fn caddy_quote(value: &str) -> String { + format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\"")) +} + +fn env_u64(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|raw| raw.parse::().ok()) + .unwrap_or(default) +} + +fn spawn_plugin_admission_if_enabled( + manifest: Manifest, + paths: ClonePaths, + mountpoint: PathBuf, + shutdown: Arc, +) -> Option>> { + if !plugin_admission_enabled(&manifest) { + return None; + } + + Some(thread::spawn(move || { + run_plugin_admission(manifest, paths, mountpoint, shutdown) + })) +} + +fn plugin_admission_enabled(manifest: &Manifest) -> bool { + if plugin_policy::active_plugins_for_policy(manifest).is_empty() { + return false; + } + if env_is_false("WPCOW_PLUGIN_ADMISSION") { + return false; + } + + let mode = plugin_mode_from_env(); + !matches!( + mode.as_str(), + "full" + | "on" + | "enabled" + | "1" + | "true" + | "yes" + | "off" + | "none" + | "disabled" + | "disable" + | "0" + | "false" + | "no" + ) +} + +fn plugin_mode_from_env() -> String { + let mode = std::env::var("WPCOW_PLUGIN_MODE") + .unwrap_or_default() + .trim() + .to_ascii_lowercase(); + if !mode.is_empty() { + return mode; + } + + let legacy = std::env::var("WPCOW_ENABLE_PLUGINS") + .unwrap_or_default() + .trim() + .to_ascii_lowercase(); + if matches!( + legacy.as_str(), + "1" | "true" | "yes" | "on" | "full" | "enabled" + ) { + "full".to_string() + } else if matches!( + legacy.as_str(), + "0" | "false" | "no" | "off" | "none" | "disabled" | "disable" + ) { + "off".to_string() + } else { + "auto".to_string() + } +} + +fn env_is_false(name: &str) -> bool { + std::env::var(name) + .ok() + .map(|raw| { + matches!( + raw.trim().to_ascii_lowercase().as_str(), + "0" | "false" | "no" | "off" | "disabled" + ) + }) + .unwrap_or(false) +} + +fn run_plugin_admission( + manifest: Manifest, + paths: ClonePaths, + mountpoint: PathBuf, + shutdown: Arc, +) -> Result<()> { + if config::is_offline(&paths) { + return Ok(()); + } + if !wait_for_first_request_ready(&paths, &shutdown) { + return Ok(()); + } + sleep_shutdown_aware( + Duration::from_secs(env_u64("WPCOW_PLUGIN_ADMISSION_DELAY_SECS", 20)), + &shutdown, + ); + if shutdown.load(Ordering::SeqCst) || config::is_offline(&paths) { + return Ok(()); + } + + let policy_path = plugin_policy::policy_path(&paths); + let timeout = Duration::from_secs(env_u64("WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS", 15).max(1)); + let active_plugins = plugin_policy::active_plugins_for_policy(&manifest); + let mut policy = plugin_policy::load_policy_or_new(&policy_path, &active_plugins)?; + + for plugin in &policy.active.clone() { + if shutdown.load(Ordering::SeqCst) || config::is_offline(&paths) { + break; + } + policy = plugin_policy::load_policy_or_new(&policy_path, &active_plugins)?; + if policy.allows(plugin) || policy.quarantine.contains_key(plugin) { + continue; + } + + eprintln!("wp-cow admitting plugin candidate '{plugin}'"); + let candidate = plugin_policy::policy_with_candidate(&policy, plugin); + let candidate_path = plugin_policy::candidate_policy_path(&paths, plugin); + plugin_policy::write_policy_atomic(&candidate_path, &candidate)?; + + let admission = run_plugin_smoke(&mountpoint, &candidate_path, timeout); + policy = plugin_policy::load_policy_or_new(&policy_path, &active_plugins)?; + match admission { + Ok(()) => { + policy.allow_plugin(plugin); + eprintln!("wp-cow admitted plugin '{plugin}'"); + } + Err(err) => { + let reason = trim_reason(&format!("{err:#}")); + policy.quarantine_plugin(plugin, reason.clone()); + eprintln!("wp-cow quarantined plugin '{plugin}': {reason}"); + } + } + plugin_policy::write_policy_atomic(&policy_path, &policy)?; + let _ = fs::remove_file(candidate_path); + } + + Ok(()) +} + +fn wait_for_first_request_ready(paths: &ClonePaths, shutdown: &AtomicBool) -> bool { + let ready_file = paths.run.join("first-request-ready.json"); + let deadline = Instant::now() + + Duration::from_secs(env_u64("WPCOW_PLUGIN_ADMISSION_READY_TIMEOUT_SECS", 600)); + while Instant::now() < deadline { + if shutdown.load(Ordering::SeqCst) { + return false; + } + if ready_file.is_file() { + return true; + } + thread::sleep(Duration::from_millis(250)); + } + false +} + +fn sleep_shutdown_aware(duration: Duration, shutdown: &AtomicBool) { + let deadline = Instant::now() + duration; + while Instant::now() < deadline { + if shutdown.load(Ordering::SeqCst) { + return; + } + thread::sleep(Duration::from_millis(250)); + } +} + +fn run_plugin_smoke( + mountpoint: &Path, + candidate_policy_path: &Path, + timeout: Duration, +) -> Result<()> { + let mut command = Command::new("php"); + command + .current_dir(mountpoint) + .env("WPCOW_PLUGIN_MODE", "auto") + .env("WPCOW_PLUGIN_POLICY_FILE", candidate_policy_path) + .env("WPCOW_SPLASH", "0") + .env("WPCOW_PROXY_FRONTEND", "0") + .env("WPCOW_ACTIVE_WARM_WAIT", "0") + .env("WPCOW_PLUGIN_ADMISSION_SMOKE", "1") + .arg("-d") + .arg(format!( + "max_execution_time={}", + timeout.as_secs().saturating_add(2) + )) + .arg("-d") + .arg(format!( + "default_socket_timeout={}", + env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15).min(timeout.as_secs().max(1)) + )) + .arg("-d") + .arg(format!( + "mysqlnd.net_read_timeout={}", + env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15).min(timeout.as_secs().max(1)) + )); + apply_php_safety_ini_args(&mut command); + command + .arg("-r") + .arg(plugin_smoke_php()) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let output = run_command_with_timeout(command, timeout) + .with_context(|| format!("plugin smoke timed out after {}s", timeout.as_secs()))?; + if !output.status.success() { + return Err(anyhow!( + "plugin smoke exited with status {}{}", + output.status, + output_tail(&output) + )); + } + if !String::from_utf8_lossy(&output.stdout).contains("WPCOW_PLUGIN_SMOKE_OK") { + return Err(anyhow!( + "plugin smoke did not finish cleanly{}", + output_tail(&output) + )); + } + Ok(()) +} + +fn plugin_smoke_php() -> &'static str { + r#" +$_SERVER['HTTP_HOST'] = '127.0.0.1'; +$_SERVER['REQUEST_METHOD'] = 'GET'; +$_SERVER['REQUEST_URI'] = '/?__wp_cow_bypass_splash=1&__wp_cow_plugin_smoke=1'; +$_SERVER['SERVER_PROTOCOL'] = 'HTTP/1.1'; +$_SERVER['DOCUMENT_ROOT'] = getcwd(); +$_SERVER['SCRIPT_NAME'] = '/index.php'; +$_SERVER['SCRIPT_FILENAME'] = getcwd() . '/index.php'; +$_GET['__wp_cow_bypass_splash'] = '1'; +ob_start(); +require getcwd() . '/index.php'; +$html = ob_get_clean(); +if ( + false !== stripos( $html, 'wp-cow DB/runtime error' ) || + false !== stripos( $html, 'wp-cow did not load the remote site' ) || + false !== stripos( $html, 'wp-admin/install.php' ) || + false !== stripos( $html, 'WordPress › Installation' ) +) { + fwrite( STDERR, $html ); + exit( 3 ); +} +echo "\nWPCOW_PLUGIN_SMOKE_OK\n"; +"# +} + +fn run_command_with_timeout(mut command: Command, timeout: Duration) -> Result { + let mut child = command.spawn().context("spawn plugin smoke PHP")?; + let started = Instant::now(); + loop { + if child.try_wait()?.is_some() { + return child + .wait_with_output() + .context("collect plugin smoke PHP output"); + } + if started.elapsed() >= timeout { + let _ = child.kill(); + let output = child + .wait_with_output() + .context("collect timed-out plugin smoke PHP output")?; + return Err(anyhow!("timed out{}", output_tail(&output))); + } + thread::sleep(Duration::from_millis(100)); + } +} + +fn output_tail(output: &Output) -> String { + let mut text = String::new(); + text.push_str(&String::from_utf8_lossy(&output.stdout)); + text.push_str(&String::from_utf8_lossy(&output.stderr)); + let text = text.trim(); + if text.is_empty() { + return String::new(); + } + let tail = text + .lines() + .rev() + .take(12) + .collect::>() + .into_iter() + .rev() + .collect::>() + .join("\n"); + format!(": {tail}") +} + +fn trim_reason(reason: &str) -> String { + let reason = reason.replace('\n', " "); + if reason.len() <= 500 { + reason + } else { + format!("{}...", reason.chars().take(500).collect::()) + } +} + +fn apply_php_safety_ini_args(command: &mut Command) { + for (name, value) in php_safety_ini_entries() { + command.arg("-d").arg(format!("{name}={value}")); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn caddy_listen_accepts_localhost_host_headers() { + let listen = caddy_listen("127.0.0.1:9481"); + assert_eq!(listen.site_addr, "http://:9481"); + assert_eq!(listen.bind.as_deref(), Some("127.0.0.1")); + + let listen = caddy_listen("0.0.0.0:8080"); + assert_eq!(listen.site_addr, "http://:8080"); + assert_eq!(listen.bind, None); + } + + #[test] + fn frankenphp_routes_wp_admin_directory_to_index() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + let caddyfile = frankenphp_caddyfile(&paths, Path::new("/tmp/mount"), "127.0.0.1:9481"); + assert!(caddyfile.contains("@wpAdminIndex path /wp-admin /wp-admin/")); + assert!(caddyfile.contains("rewrite * /wp-admin/index.php")); + } + + #[test] + fn frankenphp_routes_installer_paths_through_runtime_guard() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + let caddyfile = frankenphp_caddyfile(&paths, Path::new("/tmp/mount"), "127.0.0.1:9481"); + assert!(caddyfile + .contains("@wpCowInstaller path /wp-admin/install.php /wp-admin/setup-config.php")); + assert!(caddyfile.contains("rewrite * /.wp-cow-router.php?__wp_cow_installer_guard=1")); + assert!( + caddyfile.find("@wpCowInstaller").unwrap() < caddyfile.find("@phpFiles").unwrap(), + "installer guard must run before the generic PHP file handler" + ); + } + + #[test] + fn web_runtime_disables_common_plugin_side_effect_primitives() { + assert!(php_disabled_functions().contains("stream_socket_client")); + assert!(php_disabled_functions().contains("curl_exec")); + assert!(php_disabled_functions().contains("proc_open")); + assert!(php_disabled_functions().contains("mail")); + + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + let caddyfile = frankenphp_caddyfile(&paths, Path::new("/tmp/mount"), "127.0.0.1:9481"); + assert!(caddyfile.contains("php_ini disable_functions")); + assert!(caddyfile.contains("stream_socket_client")); + assert!(caddyfile.contains("php_ini allow_url_include 0")); + } + + #[test] + fn web_runtime_defaults_to_no_opcache_timestamp_revalidation() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + let caddyfile = frankenphp_caddyfile(&paths, Path::new("/tmp/mount"), "127.0.0.1:9481"); + assert!(caddyfile.contains("php_ini opcache.validate_timestamps 0")); + } + + #[test] + fn command_exists_requires_an_executable_file() { + let temp = tempfile::tempdir().unwrap(); + let fake = temp.path().join("frankenphp"); + fs::write(&fake, b"#!/bin/sh\nexit 0\n").unwrap(); + + let mut permissions = fs::metadata(&fake).unwrap().permissions(); + permissions.set_mode(0o644); + fs::set_permissions(&fake, permissions).unwrap(); + assert!( + !command_exists(fake.to_str().unwrap()), + "a non-executable FrankenPHP file must not suppress the PHP fallback" + ); + + let mut permissions = fs::metadata(&fake).unwrap().permissions(); + permissions.set_mode(0o755); + fs::set_permissions(&fake, permissions).unwrap(); + assert!(command_exists(fake.to_str().unwrap())); + } +} + +fn wait_for_mount(mountpoint: &Path, mount_thread: &JoinHandle>) -> Result<()> { + for _ in 0..100 { + if mountpoint.join("wp-config.php").exists() { + return Ok(()); + } + if mount_thread.is_finished() { + return Err(anyhow!( + "FUSE mount exited before generated WordPress files became visible at {}", + mountpoint.display() + )); + } + thread::sleep(Duration::from_millis(200)); + } + Err(anyhow!( + "timed out waiting for FUSE mount at {}", + mountpoint.display() + )) +} + +fn control_addr_from_url(url: &str) -> Result { + let parsed = url::Url::parse(url)?; + let host = parsed + .host_str() + .ok_or_else(|| anyhow!("control URL missing host"))?; + let port = parsed + .port() + .ok_or_else(|| anyhow!("control URL missing port"))?; + Ok(format!("{}:{}", host, port)) +} + +fn install_signal_handler(shutdown: Arc) -> Result<()> { + ctrlc::set_handler(move || { + shutdown.store(true, Ordering::SeqCst); + }) + .context("install Ctrl-C handler") +} + +fn unmount(mountpoint: &Path) -> Result<()> { + let status = Command::new("fusermount3") + .arg("-u") + .arg(mountpoint) + .status() + .or_else(|_| { + Command::new("fusermount") + .arg("-u") + .arg(mountpoint) + .status() + }) + .context("run fusermount")?; + if !status.success() { + return Err(anyhow!("fusermount failed with status {}", status)); + } + Ok(()) +} diff --git a/experiments/remote-wp-cow/src/runtime_cache.rs b/experiments/remote-wp-cow/src/runtime_cache.rs new file mode 100644 index 00000000..68040df6 --- /dev/null +++ b/experiments/remote-wp-cow/src/runtime_cache.rs @@ -0,0 +1,350 @@ +use anyhow::{Context, Result}; +use std::collections::BTreeSet; +use std::path::{Path, PathBuf}; + +use crate::config::{ClonePaths, Manifest}; +use crate::overlay::OverlayStore; +use crate::remote::{RemoteClient, RuntimeCodePackLimits, RuntimeCodePackSummary}; + +const ROOT_RUNTIME_FILES: &[&str] = &[ + "index.php", + "wp-activate.php", + "wp-blog-header.php", + "wp-comments-post.php", + "wp-cron.php", + "wp-links-opml.php", + "wp-load.php", + "wp-login.php", + "wp-mail.php", + "wp-settings.php", + "wp-signup.php", + "wp-trackback.php", + "xmlrpc.php", +]; + +pub fn warm_runtime_code_cache( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, +) -> Result { + warm_runtime_code_cache_inner(remote, manifest, paths, runtime_code_pack_include_admin()) +} + +pub fn warm_runtime_code_cache_with_admin( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, +) -> Result { + warm_runtime_code_cache_inner(remote, manifest, paths, true) +} + +fn warm_runtime_code_cache_inner( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + include_admin: bool, +) -> Result { + if !runtime_code_pack_enabled() { + return Ok(RuntimeCodePackSummary::default()); + } + + let roots = runtime_code_pack_roots_with_admin(manifest, include_admin); + if roots.is_empty() { + return Ok(RuntimeCodePackSummary::default()); + } + + let overlay = OverlayStore::new(paths); + let limits = RuntimeCodePackLimits { + max_file_bytes: runtime_code_pack_max_file_bytes().min(manifest.cache_max_file_bytes), + max_total_bytes: runtime_code_pack_max_bytes(), + max_files: runtime_code_pack_max_files(), + }; + + let summary = remote + .runtime_code_pack(&roots, limits, |file| { + overlay + .put_cached_file_bytes_without_progress(&file.rel, &file.entry, &file.bytes) + .with_context(|| { + format!( + "cache remote runtime code file {}", + OverlayStore::rel_string(&file.rel) + ) + })?; + let _ = overlay.note_cache_fetch( + &file.rel, + "runtime-code-pack", + file.entry.size, + file.entry.size, + ); + let _ = + overlay.note_cache_file_finished(&file.rel, "runtime-code-pack", file.entry.size); + Ok(()) + }) + .context("cache remote runtime code pack")?; + let _ = overlay.note_cache_fetch( + Path::new(".wp-cow-runtime-code-pack"), + "runtime-code-pack-done", + summary.bytes, + summary.bytes, + ); + Ok(summary) +} + +pub fn mark_runtime_code_cache_starting(paths: &ClonePaths) { + let overlay = OverlayStore::new(paths); + let _ = overlay.note_cache_fetch( + Path::new(".wp-cow-runtime-code-pack"), + "runtime-code-pack-starting", + 0, + 0, + ); +} + +pub fn mark_runtime_code_cache_failed(paths: &ClonePaths) { + let overlay = OverlayStore::new(paths); + let _ = overlay.note_cache_fetch( + Path::new(".wp-cow-runtime-code-pack"), + "runtime-code-pack-error", + 0, + 0, + ); +} + +#[cfg(test)] +pub fn runtime_code_pack_roots(manifest: &Manifest) -> Vec { + runtime_code_pack_roots_with_admin(manifest, runtime_code_pack_include_admin()) +} + +fn runtime_code_pack_roots_with_admin(manifest: &Manifest, include_admin: bool) -> Vec { + let mut roots = BTreeSet::new(); + for file in ROOT_RUNTIME_FILES { + roots.insert(PathBuf::from(file)); + } + + roots.insert(PathBuf::from("wp-includes")); + if include_admin { + roots.insert(PathBuf::from("wp-admin")); + } + roots.insert(PathBuf::from("wp-content/mu-plugins")); + roots.insert(PathBuf::from("wp-content/languages")); + + for theme in [&manifest.probe.template, &manifest.probe.stylesheet] { + if let Some(root) = theme_runtime_root(theme) { + roots.insert(root); + } + } + + if plugins_enabled_for_runtime() { + for plugin in manifest + .probe + .active_plugins + .iter() + .chain(manifest.probe.active_sitewide_plugins.iter()) + { + if let Some(root) = plugin_runtime_root(plugin) { + roots.insert(root); + } + } + } + + roots + .into_iter() + .filter(|root| !is_upload_path(root) && root != Path::new("wp-config.php")) + .collect() +} + +fn theme_runtime_root(theme: &str) -> Option { + let theme = clean_segment(theme)?; + Some(PathBuf::from("wp-content/themes").join(theme)) +} + +fn plugin_runtime_root(plugin: &str) -> Option { + let clean = clean_rel(plugin)?; + if clean.as_os_str().is_empty() || is_upload_path(&clean) { + return None; + } + + let mut components = clean.components(); + let first = components.next()?; + if components.next().is_some() { + Some(PathBuf::from("wp-content/plugins").join(first.as_os_str())) + } else { + Some(PathBuf::from("wp-content/plugins").join(clean)) + } +} + +fn clean_segment(value: &str) -> Option { + if value.is_empty() + || value.contains('/') + || value.contains('\\') + || value == "." + || value == ".." + { + return None; + } + Some(value.to_string()) +} + +fn clean_rel(value: &str) -> Option { + OverlayStore::clean_rel(value).ok() +} + +fn is_upload_path(path: &Path) -> bool { + path.starts_with(Path::new("wp-content/uploads")) +} + +pub fn runtime_code_pack_enabled() -> bool { + env_bool("WPCOW_RUNTIME_CODE_PACK", true) +} + +fn runtime_code_pack_include_admin() -> bool { + env_bool("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN", false) +} + +fn plugins_enabled_for_runtime() -> bool { + let mode = std::env::var("WPCOW_PLUGIN_MODE") + .unwrap_or_default() + .trim() + .to_ascii_lowercase(); + if !mode.is_empty() { + return matches!( + mode.as_str(), + "full" | "on" | "enabled" | "1" | "true" | "yes" + ); + } + + env_bool("WPCOW_ENABLE_PLUGINS", false) +} + +fn runtime_code_pack_max_bytes() -> u64 { + env_u64("WPCOW_RUNTIME_CODE_PACK_MAX_MB", 256).saturating_mul(1024 * 1024) +} + +fn runtime_code_pack_max_file_bytes() -> u64 { + env_u64("WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB", 8).saturating_mul(1024 * 1024) +} + +fn runtime_code_pack_max_files() -> u64 { + env_u64("WPCOW_RUNTIME_CODE_PACK_MAX_FILES", 20_000) +} + +fn env_u64(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|raw| raw.parse::().ok()) + .unwrap_or(default) +} + +fn env_bool(name: &str, default: bool) -> bool { + std::env::var(name) + .ok() + .map(|raw| match raw.to_ascii_lowercase().as_str() { + "1" | "true" | "yes" | "on" => true, + "0" | "false" | "no" | "off" => false, + _ => default, + }) + .unwrap_or(default) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::Probe; + use std::sync::{Mutex, OnceLock}; + + static ENV_LOCK: OnceLock> = OnceLock::new(); + + #[test] + fn runtime_code_roots_are_bounded_to_core_theme_and_active_plugins() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old_plugins = std::env::var_os("WPCOW_ENABLE_PLUGINS"); + let old_mode = std::env::var_os("WPCOW_PLUGIN_MODE"); + let old_admin = std::env::var_os("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN"); + std::env::set_var("WPCOW_PLUGIN_MODE", "full"); + std::env::set_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN", "1"); + + let manifest = Manifest::new( + "example".to_string(), + "example".to_string(), + "/remote".to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + template: "parent".to_string(), + stylesheet: "child".to_string(), + active_plugins: vec![ + "woocommerce/woocommerce.php".to_string(), + "hello.php".to_string(), + "../escape/escape.php".to_string(), + ], + active_sitewide_plugins: vec!["network/network.php".to_string()], + ..Probe::default() + }, + ); + + let roots = runtime_code_pack_roots(&manifest); + assert!(roots.contains(&PathBuf::from("wp-includes"))); + assert!(roots.contains(&PathBuf::from("wp-admin"))); + assert!(roots.contains(&PathBuf::from("wp-content/themes/parent"))); + assert!(roots.contains(&PathBuf::from("wp-content/themes/child"))); + assert!(roots.contains(&PathBuf::from("wp-content/plugins/woocommerce"))); + assert!(roots.contains(&PathBuf::from("wp-content/plugins/hello.php"))); + assert!(roots.contains(&PathBuf::from("wp-content/plugins/network"))); + assert!(!roots + .iter() + .any(|root| root.starts_with("wp-content/uploads"))); + assert!(!roots + .iter() + .any(|root| root.to_string_lossy().contains(".."))); + assert!(!roots.contains(&PathBuf::from("wp-config.php"))); + + match old_plugins { + Some(value) => std::env::set_var("WPCOW_ENABLE_PLUGINS", value), + None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), + } + match old_mode { + Some(value) => std::env::set_var("WPCOW_PLUGIN_MODE", value), + None => std::env::remove_var("WPCOW_PLUGIN_MODE"), + } + match old_admin { + Some(value) => std::env::set_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN", value), + None => std::env::remove_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN"), + } + } + + #[test] + fn runtime_code_roots_respect_disabled_plugins() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old_plugins = std::env::var_os("WPCOW_ENABLE_PLUGINS"); + let old_mode = std::env::var_os("WPCOW_PLUGIN_MODE"); + std::env::remove_var("WPCOW_PLUGIN_MODE"); + std::env::set_var("WPCOW_ENABLE_PLUGINS", "0"); + + let manifest = Manifest::new( + "example".to_string(), + "example".to_string(), + "/remote".to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + active_plugins: vec!["woocommerce/woocommerce.php".to_string()], + ..Probe::default() + }, + ); + + let roots = runtime_code_pack_roots(&manifest); + assert!(!roots.contains(&PathBuf::from("wp-content/plugins/woocommerce"))); + assert!(roots.contains(&PathBuf::from("wp-content/mu-plugins"))); + + match old_plugins { + Some(value) => std::env::set_var("WPCOW_ENABLE_PLUGINS", value), + None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), + } + match old_mode { + Some(value) => std::env::set_var("WPCOW_PLUGIN_MODE", value), + None => std::env::remove_var("WPCOW_PLUGIN_MODE"), + } + } +} diff --git a/experiments/remote-wp-cow/src/sql.rs b/experiments/remote-wp-cow/src/sql.rs new file mode 100644 index 00000000..f7d6a3fd --- /dev/null +++ b/experiments/remote-wp-cow/src/sql.rs @@ -0,0 +1,273 @@ +use std::collections::BTreeSet; + +pub fn is_write_sql(sql: &str) -> bool { + matches!( + first_keyword(sql).as_deref(), + Some("INSERT") + | Some("UPDATE") + | Some("DELETE") + | Some("REPLACE") + | Some("ALTER") + | Some("CREATE") + | Some("DROP") + | Some("TRUNCATE") + | Some("RENAME") + | Some("LOAD") + | Some("LOCK") + | Some("UNLOCK") + | Some("GRANT") + | Some("REVOKE") + | Some("OPTIMIZE") + | Some("ANALYZE") + | Some("REPAIR") + ) +} + +pub fn is_safe_read_sql(sql: &str) -> bool { + match first_keyword(sql).as_deref() { + Some("SELECT") => !select_has_remote_side_effect_clause(sql), + Some("SHOW") | Some("DESCRIBE") | Some("DESC") | Some("EXPLAIN") => true, + _ => false, + } +} + +#[allow(dead_code)] +pub fn extract_tables(sql: &str) -> Vec { + let mut tables = BTreeSet::new(); + let tokens = tokenize(sql); + let table_markers = ["FROM", "JOIN", "UPDATE", "INTO", "TABLE"]; + let mut i = 0; + while i < tokens.len() { + if table_markers.contains(&tokens[i].to_ascii_uppercase().as_str()) { + if let Some(next) = tokens.get(i + 1) { + if !is_keyword(next) { + tables.insert(next.trim_matches('`').to_string()); + } + } + } + i += 1; + } + tables.into_iter().collect() +} + +pub fn expand_wordpress_groups(table_prefix: &str, tables: &[String]) -> Vec { + let content_group = [ + "posts", + "postmeta", + "terms", + "term_taxonomy", + "term_relationships", + ]; + let mut out: BTreeSet = tables.iter().cloned().collect(); + + let touches_content_group = tables.iter().any(|table| { + content_group + .iter() + .any(|suffix| table == &format!("{}{}", table_prefix, suffix)) + }); + + if touches_content_group { + for suffix in content_group { + out.insert(format!("{}{}", table_prefix, suffix)); + } + } + + out.into_iter().collect() +} + +fn first_keyword(sql: &str) -> Option { + let stripped = strip_leading_comments(sql); + stripped + .split(|ch: char| ch.is_whitespace() || ch == '(') + .find(|part| !part.is_empty()) + .map(|part| part.trim_matches('`').to_ascii_uppercase()) +} + +fn select_has_remote_side_effect_clause(sql: &str) -> bool { + let tokens = tokenize(strip_leading_comments(sql)) + .into_iter() + .map(|token| token.to_ascii_uppercase()) + .collect::>(); + + tokens.iter().any(|token| token == "INTO") + || tokens.windows(2).any(|window| window == ["FOR", "UPDATE"]) + || tokens + .windows(4) + .any(|window| window == ["LOCK", "IN", "SHARE", "MODE"]) +} + +fn strip_leading_comments(mut sql: &str) -> &str { + loop { + let trimmed = sql.trim_start(); + if let Some(rest) = trimmed.strip_prefix("--") { + if let Some(pos) = rest.find('\n') { + sql = &rest[pos + 1..]; + continue; + } + return ""; + } + if let Some(rest) = trimmed.strip_prefix('#') { + if let Some(pos) = rest.find('\n') { + sql = &rest[pos + 1..]; + continue; + } + return ""; + } + if let Some(rest) = trimmed.strip_prefix("/*") { + if let Some(pos) = rest.find("*/") { + sql = &rest[pos + 2..]; + continue; + } + return ""; + } + return trimmed; + } +} + +#[allow(dead_code)] +fn tokenize(sql: &str) -> Vec { + let mut tokens = Vec::new(); + let mut current = String::new(); + let mut quote = None; + let mut chars = sql.chars().peekable(); + + while let Some(ch) = chars.next() { + if let Some(q) = quote { + if ch == '\\' { + let _ = chars.next(); + continue; + } + if ch == q { + if q == '\'' && chars.peek() == Some(&'\'') { + let _ = chars.next(); + continue; + } + quote = None; + } + continue; + } + + if ch == '\'' || ch == '"' { + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } + quote = Some(ch); + continue; + } + if ch == '-' && chars.peek() == Some(&'-') { + let _ = chars.next(); + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } + for skipped in chars.by_ref() { + if skipped == '\n' { + break; + } + } + continue; + } + if ch == '#' { + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } + for skipped in chars.by_ref() { + if skipped == '\n' { + break; + } + } + continue; + } + if ch == '/' && chars.peek() == Some(&'*') { + let _ = chars.next(); + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } + let mut prev = '\0'; + for skipped in chars.by_ref() { + if prev == '*' && skipped == '/' { + break; + } + prev = skipped; + } + continue; + } + + if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' || ch == '`' { + current.push(ch); + } else if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } + } + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + } + tokens +} + +#[allow(dead_code)] +fn is_keyword(token: &str) -> bool { + matches!( + token.to_ascii_uppercase().as_str(), + "SELECT" | "WHERE" | "SET" | "ON" | "USING" | "VALUES" | "INNER" | "LEFT" | "RIGHT" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn classifies_sql() { + assert!(is_safe_read_sql(" /* ok */ SELECT * FROM wp_posts")); + assert!(is_write_sql( + "UPDATE wp_posts SET post_title = 'x' WHERE ID = 1" + )); + assert!(is_write_sql("LOAD DATA INFILE 'x' INTO TABLE wp_posts")); + assert!(!is_safe_read_sql( + "SELECT * FROM wp_posts INTO OUTFILE '/tmp/wp-cow-leak'" + )); + assert!(!is_safe_read_sql( + "SELECT post_title FROM wp_posts WHERE ID = 1 FOR UPDATE" + )); + assert!(!is_safe_read_sql( + "SELECT post_title FROM wp_posts WHERE ID = 1 LOCK IN SHARE MODE" + )); + assert!(is_safe_read_sql( + "SELECT * FROM wp_posts WHERE post_title = 'FOR UPDATE'" + )); + assert!(is_safe_read_sql( + "SELECT * FROM wp_posts /* FOR UPDATE */ WHERE ID = 1" + )); + } + + #[test] + fn expands_wordpress_content_group() { + let tables = vec!["wp_posts".to_string()]; + let expanded = expand_wordpress_groups("wp_", &tables); + assert!(expanded.contains(&"wp_postmeta".to_string())); + assert!(expanded.contains(&"wp_term_relationships".to_string())); + } + + #[test] + fn extract_tables_preserves_wordpress_table_case_for_proxy_cow() { + assert_eq!( + extract_tables("UPDATE wp_posts SET post_title = 'x' WHERE ID = 1"), + vec!["wp_posts".to_string()] + ); + assert_eq!( + extract_tables("INSERT INTO `wp_postmeta` (`post_id`) VALUES (1)"), + vec!["wp_postmeta".to_string()] + ); + assert_eq!( + extract_tables( + "SELECT * FROM wp_posts JOIN wp_postmeta ON wp_postmeta.post_id = wp_posts.ID" + ), + vec!["wp_postmeta".to_string(), "wp_posts".to_string()] + ); + } +} diff --git a/scripts/codex-until-pass.sh b/scripts/codex-until-pass.sh new file mode 100755 index 00000000..dd090dc1 --- /dev/null +++ b/scripts/codex-until-pass.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env bash +set -uo pipefail + +usage() { + cat <<'USAGE' +Usage: + scripts/codex-until-pass.sh --task-file TASK.md [--work-dir DIR] + scripts/codex-until-pass.sh --task "Implement ..." [--work-dir DIR] + +Runs Codex in an implement/verify loop until an independent verifier ends with +exactly "VERDICT: PASS". By default MAX_ITERATIONS=0, which means no iteration +limit. Press Ctrl-C to stop. + +Environment: + CODEX_CMD Codex command. Default: codex + MAX_ITERATIONS 0 means unlimited. Default: 0 + SLEEP_SECONDS Delay after structural failures. Default: 5 + IMPLEMENTER_TAIL_BYTES Bytes of implementer output sent to verifier. Default: 24000 + FEEDBACK_TAIL_BYTES Bytes of verifier feedback kept for next iteration. Default: 32000 + CODEX_UNTIL_PASS_BYPASS_SANDBOX 1 passes --dangerously-bypass-approvals-and-sandbox. Default: 1 +USAGE +} + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +TASK_TEXT="" +TASK_FILE="" +WORK_DIR="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --task) + [[ $# -ge 2 ]] || { echo "missing value for --task" >&2; exit 64; } + TASK_TEXT="$2" + shift 2 + ;; + --task-file) + [[ $# -ge 2 ]] || { echo "missing value for --task-file" >&2; exit 64; } + TASK_FILE="$2" + shift 2 + ;; + --work-dir) + [[ $# -ge 2 ]] || { echo "missing value for --work-dir" >&2; exit 64; } + WORK_DIR="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown argument: $1" >&2 + usage >&2 + exit 64 + ;; + esac +done + +if [[ -n "$TASK_TEXT" && -n "$TASK_FILE" ]]; then + echo "pass either --task or --task-file, not both" >&2 + exit 64 +fi +if [[ -z "$TASK_TEXT" && -z "$TASK_FILE" ]]; then + echo "missing --task or --task-file" >&2 + usage >&2 + exit 64 +fi + +CODEX_CMD="${CODEX_CMD:-codex}" +MAX_ITERATIONS="${MAX_ITERATIONS:-0}" +SLEEP_SECONDS="${SLEEP_SECONDS:-5}" +IMPLEMENTER_TAIL_BYTES="${IMPLEMENTER_TAIL_BYTES:-24000}" +FEEDBACK_TAIL_BYTES="${FEEDBACK_TAIL_BYTES:-32000}" +CODEX_UNTIL_PASS_BYPASS_SANDBOX="${CODEX_UNTIL_PASS_BYPASS_SANDBOX:-1}" + +if [[ -z "$WORK_DIR" ]]; then + WORK_DIR=".codex-until-pass/$(date +%Y%m%d-%H%M%S)" +fi +mkdir -p "$WORK_DIR" + +RAW_TASK="$WORK_DIR/task.raw.md" +TASK="$WORK_DIR/task.md" +FEEDBACK="$WORK_DIR/feedback.md" +LOG="$WORK_DIR/log.md" +PASS_FILE="$WORK_DIR/passed-on-iteration.txt" + +if [[ -n "$TASK_FILE" ]]; then + cp "$TASK_FILE" "$RAW_TASK" +else + printf '%s\n' "$TASK_TEXT" > "$RAW_TASK" +fi + +# Prevent nested Codex runs from interpreting task text as another request to +# launch this or any adversarial loop. The original text remains in task.raw.md. +sed \ + -e 's/\$adversarial-loop/[adversarial-loop trigger disabled inside codex-until-pass]/g' \ + -e 's#/adversarial-loop#[adversarial-loop trigger disabled inside codex-until-pass]#g' \ + "$RAW_TASK" > "$TASK" + +: > "$FEEDBACK" +: > "$LOG" + +codex_args=() +if [[ "$CODEX_UNTIL_PASS_BYPASS_SANDBOX" == "1" ]]; then + codex_args+=(--dangerously-bypass-approvals-and-sandbox) +fi + +iteration=0 +while true; do + iteration=$((iteration + 1)) + if [[ "$MAX_ITERATIONS" != "0" && "$iteration" -gt "$MAX_ITERATIONS" ]]; then + echo "Did not converge after $MAX_ITERATIONS iterations" | tee -a "$LOG" + exit 1 + fi + + echo "=== Iteration $iteration ===" | tee -a "$LOG" + + impl_prompt="$WORK_DIR/iter-$iteration-impl-prompt.md" + impl_out="$WORK_DIR/iter-$iteration-impl-output.md" + impl_status="$WORK_DIR/iter-$iteration-impl-status" + verify_prompt="$WORK_DIR/iter-$iteration-verify-prompt.md" + verify_out="$WORK_DIR/iter-$iteration-verify-output.md" + verify_status="$WORK_DIR/iter-$iteration-verify-status" + + { + echo "# Task" + cat "$TASK" + echo + echo "# Harness Rules" + echo "- You are already inside scripts/codex-until-pass.sh." + echo "- Do not invoke adversarial-loop, codex-until-pass, or any recursive Codex restart loop." + echo "- Make real edits in this repository. Do not stop at a proposal." + echo "- If full completion is impossible, implement the next concrete blocker and explain the remaining blocker precisely." + echo + echo "# Prior Verifier Feedback" + if [[ -s "$FEEDBACK" ]]; then + cat "$FEEDBACK" + else + echo "(none)" + fi + echo + echo "# Final Output Contract" + echo "End with a compact summary of files changed and checks run." + } > "$impl_prompt" + + set +e + "$CODEX_CMD" exec "${codex_args[@]}" < "$impl_prompt" > "$impl_out" 2>&1 + code=$? + set -e + printf '%s\n' "$code" > "$impl_status" + if [[ "$code" -ne 0 ]]; then + echo "Implementer exited $code on iteration $iteration; restarting after ${SLEEP_SECONDS}s" | tee -a "$LOG" + sleep "$SLEEP_SECONDS" + continue + fi + + git_status_file="$WORK_DIR/iter-$iteration-git-status.txt" + git_diff_stat_file="$WORK_DIR/iter-$iteration-git-diff-stat.txt" + git status --short > "$git_status_file" 2>&1 || true + git diff --stat > "$git_diff_stat_file" 2>&1 || true + + { + echo "# Task" + cat "$TASK" + echo + echo "# Verifier Instructions" + echo "Independently inspect the actual working tree. Do not trust the implementer output." + echo "Run whatever checks are needed. Do not make edits." + echo "PASS only when the task is fully complete by code, tests, and docs where relevant." + echo "If not complete, include a concise '## Issues' section with concrete actionable blockers." + echo + echo "End with exactly one final line:" + echo "VERDICT: PASS" + echo "or" + echo "VERDICT: FAIL" + echo + echo "# Current Git Status" + cat "$git_status_file" + echo + echo "# Current Diff Stat" + cat "$git_diff_stat_file" + echo + echo "# Implementer Output Tail" + tail -c "$IMPLEMENTER_TAIL_BYTES" "$impl_out" + } > "$verify_prompt" + + set +e + "$CODEX_CMD" exec "${codex_args[@]}" < "$verify_prompt" > "$verify_out" 2>&1 + code=$? + set -e + printf '%s\n' "$code" > "$verify_status" + if [[ "$code" -ne 0 ]]; then + echo "Verifier exited $code on iteration $iteration; restarting verifier/implementer after ${SLEEP_SECONDS}s" | tee -a "$LOG" + sleep "$SLEEP_SECONDS" + continue + fi + + if tail -n 20 "$verify_out" | grep -qx 'VERDICT: PASS'; then + echo "PASS on iteration $iteration" | tee -a "$LOG" + printf '%s\n' "$iteration" > "$PASS_FILE" + exit 0 + fi + + echo "FAIL on iteration $iteration; feeding capped verifier output back to implementer" | tee -a "$LOG" + { + echo + echo "## Iteration $iteration Verifier Feedback" + tail -c "$FEEDBACK_TAIL_BYTES" "$verify_out" + } >> "$FEEDBACK" +done