From a9ec0c9fed03a0ae4a971cad70c442ef4131067e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 1 May 2026 19:03:41 +0200 Subject: [PATCH 01/39] Add remote WordPress COW mount experiment --- experiments/remote-wp-cow/.gitignore | 1 + experiments/remote-wp-cow/Cargo.lock | 1109 +++++++++++++++++++++ experiments/remote-wp-cow/Cargo.toml | 23 + experiments/remote-wp-cow/README.md | 90 ++ experiments/remote-wp-cow/src/cli.rs | 226 +++++ experiments/remote-wp-cow/src/config.rs | 221 ++++ experiments/remote-wp-cow/src/control.rs | 116 +++ experiments/remote-wp-cow/src/db.rs | 297 ++++++ experiments/remote-wp-cow/src/fusefs.rs | 633 ++++++++++++ experiments/remote-wp-cow/src/generate.rs | 321 ++++++ experiments/remote-wp-cow/src/main.rs | 14 + experiments/remote-wp-cow/src/overlay.rs | 288 ++++++ experiments/remote-wp-cow/src/remote.rs | 334 +++++++ experiments/remote-wp-cow/src/run.rs | 159 +++ experiments/remote-wp-cow/src/sql.rs | 173 ++++ 15 files changed, 4005 insertions(+) create mode 100644 experiments/remote-wp-cow/.gitignore create mode 100644 experiments/remote-wp-cow/Cargo.lock create mode 100644 experiments/remote-wp-cow/Cargo.toml create mode 100644 experiments/remote-wp-cow/README.md create mode 100644 experiments/remote-wp-cow/src/cli.rs create mode 100644 experiments/remote-wp-cow/src/config.rs create mode 100644 experiments/remote-wp-cow/src/control.rs create mode 100644 experiments/remote-wp-cow/src/db.rs create mode 100644 experiments/remote-wp-cow/src/fusefs.rs create mode 100644 experiments/remote-wp-cow/src/generate.rs create mode 100644 experiments/remote-wp-cow/src/main.rs create mode 100644 experiments/remote-wp-cow/src/overlay.rs create mode 100644 experiments/remote-wp-cow/src/remote.rs create mode 100644 experiments/remote-wp-cow/src/run.rs create mode 100644 experiments/remote-wp-cow/src/sql.rs diff --git a/experiments/remote-wp-cow/.gitignore b/experiments/remote-wp-cow/.gitignore new file mode 100644 index 00000000..b83d2226 --- /dev/null +++ b/experiments/remote-wp-cow/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/experiments/remote-wp-cow/Cargo.lock b/experiments/remote-wp-cow/Cargo.lock new file mode 100644 index 00000000..866aa0f7 --- /dev/null +++ b/experiments/remote-wp-cow/Cargo.lock @@ -0,0 +1,1109 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "ascii" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5" +dependencies = [ + "objc2", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chunked_transfer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "ctrlc" +version = "3.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0b1fab2ae45819af2d0731d60f2afe17227ebb1a1538a236da84c93e9a60162" +dependencies = [ + "dispatch2", + "nix 0.31.2", + "windows-sys", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags", + "block2", + "libc", + "objc2", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fuser" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb29a3ae32279fe3e79a958fe01899f5fb23eadccee919cf88e145b54ed9367" +dependencies = [ + "libc", + "log", + "memchr", + "nix 0.29.0", + "page_size", + "smallvec", + "zerocopy", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.0", + "serde", + "serde_core", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nix" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "tiny_http" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82" +dependencies = [ + "ascii", + "chunked_transfer", + "httpdate", + "log", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "wp-cow" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "ctrlc", + "fuser", + "hex", + "libc", + "serde", + "serde_json", + "sha2", + "tempfile", + "tiny_http", + "url", +] + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/experiments/remote-wp-cow/Cargo.toml b/experiments/remote-wp-cow/Cargo.toml new file mode 100644 index 00000000..7c37b780 --- /dev/null +++ b/experiments/remote-wp-cow/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "wp-cow" +version = "0.1.0" +edition = "2021" +license = "MIT" + +[workspace] + +[dependencies] +anyhow = "1.0" +clap = { version = "4.5", features = ["derive"] } +ctrlc = "3.4" +fuser = "0.16" +hex = "0.4" +libc = "0.2" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sha2 = "0.10" +tiny_http = "0.12" +url = "2.5" + +[dev-dependencies] +tempfile = "3.13" diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md new file mode 100644 index 00000000..901c5d7f --- /dev/null +++ b/experiments/remote-wp-cow/README.md @@ -0,0 +1,90 @@ +# wp-cow + +`wp-cow` is a Linux-only prototype for a lazy local WordPress clone runtime. +It creates a local clone description instead of copying a whole site, mounts a +copy-on-write FUSE filesystem over SSH/PHP, and keeps database writes local by +materializing remote tables into a local MySQL database before writes run. + +## ForkPress exploration status + +This directory is an isolated experiment. It is not wired into the ForkPress +workspace, release artifact, runtime, or CI path. The goal is to explore whether +ForkPress should have a remote-site onboarding mode where a very large +production WordPress tree can be made locally usable without first copying every +file and every database row. + +This deliberately violates the current ForkPress product shape in a few ways: +it uses a long-running local helper, FUSE, local MySQL, and SSH/PHP calls to the +remote host. Those choices are useful for proving out the lazy-lower-layer +model, but they should not be read as a proposed final integration shape. + +## Build + +```bash +cargo build +``` + +## Typical flow + +```bash +wp-cow clone \ + --ssh user@example.com \ + --path /home/user/public_html \ + --remote-url https://example.com \ + --local-url http://example.test + +wp-cow init-db example +wp-cow run example --http 127.0.0.1:8080 +``` + +The clone state is stored under `~/.wp-cow/clones//`: + +```text +manifest.json +upper/ +whiteouts.json +file-cache/ +db/ + schema.sql + state.json +generated/ +run/ +``` + +## What is implemented + +- SSH session reuse through OpenSSH control sockets. +- Remote WordPress probe through an ephemeral PHP script. +- Lazy remote file operations through PHP over SSH. +- Local COW filesystem through FUSE: + - upper layer shadows remote files, + - remote reads are fetched lazily, + - small remote files are cached separately from local mutations, + - deletions are recorded as whiteouts. +- Generated local `wp-config.php`, `wp-content/db.php`, and safety MU plugin. +- Schema import and full-table DB materialization through remote `mysqldump`. +- A local control HTTP server used by the DB drop-in: + - read queries can be served from the remote DB through daemon-mediated PHP, + - write-class SQL is never sent to the remote DB, + - writes materialize affected table groups before executing locally. + +## Requirements + +Local machine: + +- Linux with `/dev/fuse` access. +- `ssh`, `php`, `mysql`, and `mysqldump` on `PATH`. +- A local MySQL/MariaDB server reachable by the generated DB settings. + +Remote host: + +- SSH access. +- PHP CLI. +- `mysqldump`. +- WordPress files at the supplied `--path`. + +## Notes + +This is an MVP. The DB layer uses a WordPress `db.php` drop-in plus daemon +control endpoints; it does not yet implement a transparent MySQL protocol +proxy, row-level overlays, or true point-in-time snapshot support. diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs new file mode 100644 index 00000000..996e92bb --- /dev/null +++ b/experiments/remote-wp-cow/src/cli.rs @@ -0,0 +1,226 @@ +use anyhow::{anyhow, Context, Result}; +use clap::{Args, Parser, Subcommand}; +use std::fs; +use std::path::PathBuf; + +use crate::config::{ + clone_paths, default_state_dir, derive_name, ensure_clone_dirs, load_manifest, write_manifest, + Manifest, Probe, +}; +use crate::db; +use crate::generate; +use crate::remote::{probe_wordpress, RemoteClient}; +use crate::run::{self, RunOptions}; + +#[derive(Debug, Parser)] +#[command(name = "wp-cow")] +#[command(about = "Lazy local WordPress clone runtime over SSH")] +pub struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + #[command(name = "clone")] + Clone(CloneArgs), + #[command(name = "init-db")] + InitDb(NameArgs), + #[command(name = "materialize")] + Materialize(MaterializeArgs), + #[command(name = "mount")] + Mount(MountArgs), + #[command(name = "run")] + Run(RunArgs), + #[command(name = "probe")] + Probe(ProbeArgs), +} + +#[derive(Debug, Args)] +struct CloneArgs { + #[arg(long = "ssh")] + ssh: String, + #[arg(long = "path")] + path: String, + #[arg(long = "remote-url")] + remote_url: String, + #[arg(long = "local-url")] + local_url: String, + #[arg(long)] + name: Option, + #[arg(long)] + state_dir: Option, + #[arg(long)] + force: bool, + #[arg(long)] + no_probe: bool, + #[arg(long)] + skip_schema: bool, +} + +#[derive(Debug, Args)] +struct NameArgs { + name: String, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct MaterializeArgs { + name: String, + #[arg(long = "table", required = true)] + tables: Vec, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct MountArgs { + name: String, + #[arg(long)] + mountpoint: Option, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct RunArgs { + name: String, + #[arg(long)] + mountpoint: Option, + #[arg(long, default_value = "127.0.0.1:8080")] + http: String, + #[arg(long)] + no_php: bool, + #[arg(long)] + state_dir: Option, +} + +#[derive(Debug, Args)] +struct ProbeArgs { + #[arg(long = "ssh")] + ssh: String, + #[arg(long = "path")] + path: String, +} + +pub fn run() -> Result<()> { + let cli = Cli::parse(); + match cli.command { + Command::Clone(args) => clone_site(args), + Command::InitDb(args) => init_db(args), + Command::Materialize(args) => materialize(args), + Command::Mount(args) => mount(args), + Command::Run(args) => run_clone(args), + Command::Probe(args) => { + let probe = probe_wordpress(&args.ssh, &args.path)?; + println!("{}", serde_json::to_string_pretty(&probe)?); + Ok(()) + } + } +} + +fn clone_site(args: CloneArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let name = args + .name + .unwrap_or_else(|| derive_name(&args.remote_url, &args.local_url)); + let paths = clone_paths(&state_dir, &name); + + if paths.root.exists() { + if !args.force { + return Err(anyhow!( + "{} already exists; pass --force to replace generated clone metadata", + paths.root.display() + )); + } + fs::remove_dir_all(&paths.root)?; + } + + ensure_clone_dirs(&paths)?; + + let probe = if args.no_probe { + Probe { + abspath: args.path.clone(), + wp_content_dir: format!("{}/wp-content", args.path.trim_end_matches('/')), + uploads_dir: format!("{}/wp-content/uploads", args.path.trim_end_matches('/')), + table_prefix: "wp_".to_string(), + siteurl: args.remote_url.clone(), + home: args.remote_url.clone(), + ..Probe::default() + } + } else { + probe_wordpress(&args.ssh, &args.path)? + }; + + let manifest = Manifest::new( + name, + args.ssh, + args.path, + args.remote_url, + args.local_url, + probe, + ); + + write_manifest(&paths.manifest, &manifest)?; + generate::write_wordpress_overrides(&paths, &manifest)?; + db::write_state(&paths, &db::DbState::default())?; + + if !args.skip_schema && !args.no_probe { + let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + remote.ensure_master()?; + db::export_schema(&remote, &paths).context("export schema")?; + } + + println!( + "created clone '{}': {}", + manifest.name, + paths.root.display() + ); + Ok(()) +} + +fn init_db(args: NameArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + db::init_local_db(&manifest, &paths)?; + println!("initialized local database '{}'", manifest.local_db.name); + Ok(()) +} + +fn materialize(args: MaterializeArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + remote.ensure_master()?; + let materialized = db::materialize_tables(&remote, &manifest, &paths, &args.tables)?; + println!("{}", serde_json::to_string_pretty(&materialized)?); + Ok(()) +} + +fn mount(args: MountArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let mountpoint = args + .mountpoint + .unwrap_or_else(|| PathBuf::from("/mnt/wp-cow").join(&manifest.name)); + run::mount_only(manifest, paths, &mountpoint) +} + +fn run_clone(args: RunArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let mountpoint = args + .mountpoint + .unwrap_or_else(|| PathBuf::from("/mnt/wp-cow").join(&manifest.name)); + let options = RunOptions { + mountpoint, + http_addr: args.http, + skip_php: args.no_php, + }; + run::run_site(manifest, paths, options) +} diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs new file mode 100644 index 00000000..8f25be30 --- /dev/null +++ b/experiments/remote-wp-cow/src/config.rs @@ -0,0 +1,221 @@ +use anyhow::{anyhow, Context, Result}; +use serde::{Deserialize, Serialize}; +use std::fs::{self, File, OpenOptions}; +use std::io::{Read, Write}; +use std::os::unix::fs::OpenOptionsExt; +use std::path::{Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; +use url::Url; + +pub const MANIFEST_VERSION: u32 = 1; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Manifest { + pub version: u32, + pub name: String, + pub ssh: String, + pub remote_path: String, + pub remote_url: String, + pub local_url: String, + pub created_at_unix: u64, + pub probe: Probe, + pub local_db: LocalDb, + pub control_url: String, + pub cache_max_file_bytes: u64, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Probe { + pub abspath: String, + pub wp_content_dir: String, + pub uploads_dir: String, + pub table_prefix: String, + pub db_name: String, + pub db_host: String, + pub db_user: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub db_password: String, + pub siteurl: String, + pub home: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LocalDb { + pub name: String, + pub user: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub password: String, + pub host: String, + pub port: u16, +} + +#[derive(Debug, Clone)] +pub struct ClonePaths { + pub root: PathBuf, + pub manifest: PathBuf, + pub upper: PathBuf, + pub file_cache: PathBuf, + pub db: PathBuf, + pub generated: PathBuf, + pub run: PathBuf, + pub whiteouts: PathBuf, +} + +impl Manifest { + pub fn new( + name: String, + ssh: String, + remote_path: String, + remote_url: String, + local_url: String, + probe: Probe, + ) -> Self { + let safe_name = sanitize_name(&name); + Self { + version: MANIFEST_VERSION, + name: safe_name.clone(), + ssh, + remote_path, + remote_url, + local_url, + created_at_unix: SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + probe, + local_db: LocalDb { + name: format!("cow_{}", safe_name.replace('-', "_")), + user: format!("cow_{}", safe_name.replace('-', "_")), + password: String::new(), + host: "127.0.0.1".to_string(), + port: 33071, + }, + control_url: "http://127.0.0.1:39070".to_string(), + cache_max_file_bytes: 8 * 1024 * 1024, + } + } +} + +pub fn default_state_dir() -> Result { + if let Ok(home) = std::env::var("WPCOW_HOME") { + return Ok(PathBuf::from(home)); + } + let home = std::env::var("HOME").context("HOME is not set; pass --state-dir")?; + Ok(PathBuf::from(home).join(".wp-cow")) +} + +pub fn clone_paths(state_dir: &Path, name: &str) -> ClonePaths { + let root = state_dir.join("clones").join(name); + ClonePaths { + manifest: root.join("manifest.json"), + upper: root.join("upper"), + file_cache: root.join("file-cache"), + db: root.join("db"), + generated: root.join("generated"), + run: root.join("run"), + whiteouts: root.join("whiteouts.json"), + root, + } +} + +pub fn ensure_clone_dirs(paths: &ClonePaths) -> Result<()> { + fs::create_dir_all(&paths.upper)?; + fs::create_dir_all(&paths.file_cache)?; + fs::create_dir_all(&paths.db)?; + fs::create_dir_all(paths.db.join("local-mysql"))?; + fs::create_dir_all(&paths.generated)?; + fs::create_dir_all(&paths.run)?; + Ok(()) +} + +pub fn write_manifest(path: &Path, manifest: &Manifest) -> Result<()> { + let json = serde_json::to_vec_pretty(manifest)?; + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .mode(0o600) + .open(path) + .with_context(|| format!("write {}", path.display()))?; + file.write_all(&json)?; + file.write_all(b"\n")?; + Ok(()) +} + +pub fn load_manifest(path: &Path) -> Result { + let mut json = String::new(); + File::open(path) + .with_context(|| format!("open {}", path.display()))? + .read_to_string(&mut json)?; + let manifest: Manifest = serde_json::from_str(&json)?; + if manifest.version != MANIFEST_VERSION { + return Err(anyhow!( + "unsupported manifest version {} in {}", + manifest.version, + path.display() + )); + } + Ok(manifest) +} + +pub fn derive_name(remote_url: &str, local_url: &str) -> String { + let from_url = |raw: &str| -> Option { + let parsed = Url::parse(raw).ok()?; + let host = parsed.host_str()?; + let host = host.strip_prefix("www.").unwrap_or(host); + let first = host.split('.').next().unwrap_or(host); + Some(sanitize_name(first)) + }; + + from_url(remote_url) + .or_else(|| from_url(local_url)) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "site".to_string()) +} + +pub fn sanitize_name(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + for ch in input.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else if ch.is_ascii_whitespace() || ch == '-' || ch == '_' || ch == '.' { + out.push('-'); + } + } + while out.contains("--") { + out = out.replace("--", "-"); + } + out.trim_matches('-').to_string() +} + +pub fn parse_host_port(host: &str, default_port: u16) -> (String, u16) { + if let Some((h, p)) = host.rsplit_once(':') { + if let Ok(port) = p.parse::() { + return (h.to_string(), port); + } + } + (host.to_string(), default_port) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn derives_name_from_remote_url() { + assert_eq!( + derive_name("https://www.example.com", "http://x.test"), + "example" + ); + assert_eq!( + derive_name("not a url", "http://local-site.test"), + "local-site" + ); + } + + #[test] + fn sanitizes_name() { + assert_eq!(sanitize_name("Example Site_1"), "example-site-1"); + assert_eq!(sanitize_name("...Cow!!!"), "cow"); + } +} diff --git a/experiments/remote-wp-cow/src/control.rs b/experiments/remote-wp-cow/src/control.rs new file mode 100644 index 00000000..63a2be9e --- /dev/null +++ b/experiments/remote-wp-cow/src/control.rs @@ -0,0 +1,116 @@ +use anyhow::{anyhow, Context, Result}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tiny_http::{Header, Request, Response, Server, StatusCode}; + +use crate::config::{ClonePaths, Manifest}; +use crate::db; +use crate::remote::RemoteClient; + +#[derive(Debug, Deserialize)] +struct ControlRequest { + #[allow(dead_code)] + clone: Option, + tables: Option>, + sql: Option, +} + +#[derive(Debug, Serialize)] +struct BasicResponse<'a> { + ok: bool, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option<&'a str>, +} + +pub fn serve_control( + addr: &str, + manifest: Manifest, + paths: ClonePaths, + remote: RemoteClient, + shutdown: Arc, +) -> Result<()> { + let server = + Server::http(addr).map_err(|err| anyhow!("bind control server {}: {}", addr, err))?; + while !shutdown.load(Ordering::SeqCst) { + match server.recv_timeout(Duration::from_millis(250)) { + Ok(Some(request)) => { + if let Err(err) = handle_request(request, &manifest, &paths, &remote) { + eprintln!("wp-cow control error: {err:#}"); + } + } + Ok(None) => {} + Err(err) => return Err(anyhow!("control server receive failed: {}", err)), + } + } + Ok(()) +} + +fn handle_request( + mut request: Request, + manifest: &Manifest, + paths: &ClonePaths, + remote: &RemoteClient, +) -> Result<()> { + if request.method().as_str() != "POST" { + return send_json( + request, + StatusCode(405), + &BasicResponse { + ok: false, + error: Some("method not allowed"), + }, + ); + } + + let mut body = String::new(); + request.as_reader().read_to_string(&mut body)?; + let input: ControlRequest = serde_json::from_str(&body).context("decode control JSON")?; + + let response = match request.url() { + "/materialize" => { + let tables = input.tables.unwrap_or_default(); + let materialized = db::materialize_tables(remote, manifest, paths, &tables)?; + json!({ "ok": true, "backend": "local", "materialized": materialized }) + } + "/route" => { + let tables = input.tables.unwrap_or_default(); + let decision = db::route_for_tables(remote, manifest, paths, &tables)?; + json!({ "ok": true, "backend": decision.backend, "materialized": decision.materialized }) + } + "/query" => { + let sql = input.sql.ok_or_else(|| anyhow!("missing sql"))?; + let result = db::remote_readonly_query(remote, &sql)?; + json!({ + "ok": result.ok, + "error": result.error, + "rows": result.rows, + "fields": result.fields, + "affected": result.affected + }) + } + _ => json!({ "ok": false, "error": "not found" }), + }; + + let status = if response.get("ok").and_then(|v| v.as_bool()) == Some(false) { + StatusCode(404) + } else { + StatusCode(200) + }; + send_json(request, status, &response) +} + +fn send_json(request: Request, status: StatusCode, value: &T) -> Result<()> { + let body = serde_json::to_vec(value)?; + let header = Header::from_bytes("Content-Type", "application/json") + .map_err(|_| anyhow!("invalid content-type header"))?; + request + .respond( + Response::from_data(body) + .with_status_code(status) + .with_header(header), + ) + .map_err(|err| anyhow!("send control response: {}", err)) +} diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs new file mode 100644 index 00000000..ffbed1f2 --- /dev/null +++ b/experiments/remote-wp-cow/src/db.rs @@ -0,0 +1,297 @@ +use anyhow::{anyhow, Context, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; +use std::fs::{self, File}; +use std::io::{self, Read}; +use std::path::PathBuf; +use std::process::{Command, Stdio}; + +use crate::config::{parse_host_port, ClonePaths, Manifest}; +use crate::remote::{shell_quote, RemoteClient, RemoteQueryResult}; +use crate::sql; + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct DbState { + pub materialized_tables: BTreeSet, +} + +pub fn state_path(paths: &ClonePaths) -> PathBuf { + paths.db.join("state.json") +} + +pub fn load_state(paths: &ClonePaths) -> Result { + let path = state_path(paths); + if !path.exists() { + return Ok(DbState::default()); + } + let mut json = String::new(); + File::open(path)?.read_to_string(&mut json)?; + Ok(serde_json::from_str(&json)?) +} + +pub fn write_state(paths: &ClonePaths, state: &DbState) -> Result<()> { + fs::create_dir_all(&paths.db)?; + let json = serde_json::to_vec_pretty(state)?; + fs::write(state_path(paths), [json, b"\n".to_vec()].concat())?; + Ok(()) +} + +pub fn export_schema(remote: &RemoteClient, paths: &ClonePaths) -> Result<()> { + let probe = &remote.manifest().probe; + ensure_probe_has_db(probe)?; + fs::create_dir_all(&paths.db)?; + let command = format!( + "MYSQL_PWD={} mysqldump {} --user={} --no-data --skip-lock-tables {}", + shell_quote(&probe.db_password), + remote_mysql_cli_options(&probe.db_host), + shell_quote(&probe.db_user), + shell_quote(&probe.db_name) + ); + let schema = remote + .exec_capture(&command, None) + .context("export remote schema with mysqldump")?; + fs::write(paths.db.join("schema.sql"), schema)?; + Ok(()) +} + +pub fn init_local_db(manifest: &Manifest, paths: &ClonePaths) -> Result<()> { + let schema = paths.db.join("schema.sql"); + if !schema.exists() { + return Err(anyhow!( + "{} does not exist; run clone without --skip-schema or materialize a table first", + schema.display() + )); + } + + let create_sql = format!( + "CREATE DATABASE IF NOT EXISTS `{}` DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;", + manifest.local_db.name.replace('`', "``") + ); + run_mysql_exec(manifest, &create_sql)?; + + let mut mysql = local_mysql_command(manifest); + mysql.arg(&manifest.local_db.name).stdin(Stdio::piped()); + let mut child = mysql + .spawn() + .context("start local mysql for schema import")?; + let mut stdin = child.stdin.take().expect("mysql stdin piped"); + let mut schema_file = File::open(&schema)?; + io::copy(&mut schema_file, &mut stdin)?; + drop(stdin); + let status = child.wait()?; + if !status.success() { + return Err(anyhow!( + "local mysql schema import failed with status {}", + status + )); + } + Ok(()) +} + +pub fn materialize_tables( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + tables: &[String], +) -> Result> { + let expanded = sql::expand_wordpress_groups(&manifest.probe.table_prefix, tables); + let mut state = load_state(paths)?; + let mut changed = Vec::new(); + + for table in expanded { + validate_table_name(&table)?; + if state.materialized_tables.contains(&table) { + continue; + } + materialize_one_table(remote, manifest, &table) + .with_context(|| format!("materialize table {}", table))?; + state.materialized_tables.insert(table.clone()); + changed.push(table); + } + + write_state(paths, &state)?; + Ok(changed) +} + +pub fn route_for_tables( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + tables: &[String], +) -> Result { + let state = load_state(paths)?; + let expanded = sql::expand_wordpress_groups(&manifest.probe.table_prefix, tables); + let touches_local = expanded + .iter() + .any(|table| state.materialized_tables.contains(table)); + + if touches_local { + let materialized = materialize_tables(remote, manifest, paths, &expanded)?; + Ok(RouteDecision { + backend: "local".to_string(), + materialized, + }) + } else { + Ok(RouteDecision { + backend: "remote".to_string(), + materialized: Vec::new(), + }) + } +} + +pub fn remote_readonly_query(remote: &RemoteClient, sql_text: &str) -> Result { + if !sql::is_safe_read_sql(sql_text) || sql::is_write_sql(sql_text) { + return Err(anyhow!("refusing to send non-read SQL to remote")); + } + remote.remote_query_readonly(sql_text) +} + +#[derive(Debug, Serialize)] +pub struct RouteDecision { + pub backend: String, + pub materialized: Vec, +} + +fn materialize_one_table(remote: &RemoteClient, manifest: &Manifest, table: &str) -> Result<()> { + let probe = &manifest.probe; + ensure_probe_has_db(probe)?; + let delete_sql = format!("DELETE FROM `{}`;", table.replace('`', "``")); + run_mysql_exec(manifest, &delete_sql)?; + + let dump_command = format!( + "MYSQL_PWD={} mysqldump {} --user={} --single-transaction --quick --skip-lock-tables --no-create-info --replace {} {}", + shell_quote(&probe.db_password), + remote_mysql_cli_options(&probe.db_host), + shell_quote(&probe.db_user), + shell_quote(&probe.db_name), + shell_quote(table) + ); + + let mut ssh = remote + .command(&dump_command) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote mysqldump over ssh")?; + + let mut mysql = local_mysql_command(manifest); + mysql.arg(&manifest.local_db.name).stdin(Stdio::piped()); + let mut mysql_child = mysql.spawn().context("start local mysql import")?; + + { + let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); + let mut mysql_stdin = mysql_child.stdin.take().expect("mysql stdin piped"); + io::copy(&mut ssh_stdout, &mut mysql_stdin)?; + } + + let ssh_output = ssh.wait_with_output()?; + let mysql_status = mysql_child.wait()?; + + if !ssh_output.status.success() { + return Err(anyhow!( + "remote mysqldump failed: {}", + String::from_utf8_lossy(&ssh_output.stderr) + )); + } + if !mysql_status.success() { + return Err(anyhow!( + "local mysql import failed with status {}", + mysql_status + )); + } + Ok(()) +} + +fn local_mysql_command(manifest: &Manifest) -> Command { + let mut command = Command::new("mysql"); + command.arg("--host").arg(&manifest.local_db.host); + command + .arg("--port") + .arg(manifest.local_db.port.to_string()); + command.arg("--user").arg(&manifest.local_db.user); + if !manifest.local_db.password.is_empty() { + command.env("MYSQL_PWD", &manifest.local_db.password); + } + command +} + +fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> { + let mut command = local_mysql_command(manifest); + command.arg("--execute").arg(sql_text); + let status = command.status().context("run local mysql")?; + if !status.success() { + return Err(anyhow!("local mysql failed with status {}", status)); + } + Ok(()) +} + +fn validate_table_name(table: &str) -> Result<()> { + if table.is_empty() + || !table + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '$') + { + return Err(anyhow!("unsafe table name {}", table)); + } + Ok(()) +} + +fn ensure_probe_has_db(probe: &crate::config::Probe) -> Result<()> { + if probe.db_name.is_empty() || probe.db_user.is_empty() || probe.db_host.is_empty() { + return Err(anyhow!("remote probe did not return database credentials")); + } + Ok(()) +} + +fn remote_mysql_cli_options(db_host: &str) -> String { + if let Some(idx) = db_host.find(":/") { + let host = &db_host[..idx]; + let socket = &db_host[idx + 1..]; + return format!( + "--host={} --socket={}", + shell_quote(host), + shell_quote(socket) + ); + } + + if let Some((host, port)) = db_host.rsplit_once(':') { + if port.parse::().is_ok() { + return format!("--host={} --port={}", shell_quote(host), shell_quote(port)); + } + } + + format!("--host={}", shell_quote(db_host)) +} + +#[allow(dead_code)] +pub fn local_db_host_port(manifest: &Manifest) -> (String, u16) { + parse_host_port( + &format!("{}:{}", manifest.local_db.host, manifest.local_db.port), + manifest.local_db.port, + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rejects_unsafe_table_names() { + assert!(validate_table_name("wp_posts").is_ok()); + assert!(validate_table_name("wp-posts").is_err()); + assert!(validate_table_name("wp_posts;DROP").is_err()); + } + + #[test] + fn formats_remote_mysql_host_variants() { + assert_eq!(remote_mysql_cli_options("localhost"), "--host='localhost'"); + assert_eq!( + remote_mysql_cli_options("db.example.com:3307"), + "--host='db.example.com' --port='3307'" + ); + assert_eq!( + remote_mysql_cli_options("localhost:/tmp/mysql.sock"), + "--host='localhost' --socket='/tmp/mysql.sock'" + ); + } +} diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs new file mode 100644 index 00000000..009ef0d3 --- /dev/null +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -0,0 +1,633 @@ +use anyhow::Result; +use fuser::{ + FileAttr, FileType, Filesystem, KernelConfig, MountOption, ReplyAttr, ReplyCreate, ReplyData, + ReplyDirectory, ReplyEmpty, ReplyEntry, ReplyOpen, ReplyWrite, Request, +}; +use libc::{EIO, ENOENT, ENOTSUP}; +use std::collections::{BTreeMap, HashMap}; +use std::ffi::{OsStr, OsString}; +use std::fs::{self, File, OpenOptions}; +use std::io; +use std::os::unix::fs::{FileExt, MetadataExt, OpenOptionsExt, PermissionsExt}; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use crate::config::{ClonePaths, Manifest}; +use crate::overlay::OverlayStore; +use crate::remote::{RemoteClient, RemoteEntry}; + +const ROOT_INO: u64 = 1; +const TTL: Duration = Duration::from_secs(1); + +enum Handle { + Local(File), + Remote(PathBuf), +} + +pub struct CowFs { + manifest: Manifest, + remote: RemoteClient, + overlay: OverlayStore, + ino_to_path: HashMap, + path_to_ino: HashMap, + next_ino: u64, + handles: HashMap, + next_fh: u64, + uid: u32, + gid: u32, +} + +impl CowFs { + pub fn new(manifest: Manifest, paths: &ClonePaths, remote: RemoteClient) -> Self { + let mut ino_to_path = HashMap::new(); + let mut path_to_ino = HashMap::new(); + ino_to_path.insert(ROOT_INO, PathBuf::new()); + path_to_ino.insert(PathBuf::new(), ROOT_INO); + Self { + manifest, + remote, + overlay: OverlayStore::new(paths), + ino_to_path, + path_to_ino, + next_ino: ROOT_INO + 1, + handles: HashMap::new(), + next_fh: 1, + uid: unsafe { libc::getuid() }, + gid: unsafe { libc::getgid() }, + } + } + + fn ino_for_path(&mut self, rel: &Path) -> u64 { + let rel = rel.to_path_buf(); + if let Some(ino) = self.path_to_ino.get(&rel) { + return *ino; + } + let ino = self.next_ino; + self.next_ino += 1; + self.path_to_ino.insert(rel.clone(), ino); + self.ino_to_path.insert(ino, rel); + ino + } + + fn path_for_ino(&self, ino: u64) -> Option { + self.ino_to_path.get(&ino).cloned() + } + + fn child_path(&self, parent: u64, name: &OsStr) -> io::Result { + let parent_path = self + .path_for_ino(parent) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown parent inode"))?; + let mut child = parent_path; + child.push(name); + OverlayStore::clean_rel(&child) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err.to_string())) + } + + fn attr_for_path(&mut self, rel: &Path, ino: u64) -> io::Result { + if self.overlay.is_whiteout(rel).map_err(anyhow_to_io)? { + return Err(io::Error::new(io::ErrorKind::NotFound, "whiteout")); + } + + let upper = self.overlay.upper_path(rel).map_err(anyhow_to_io)?; + if let Ok(metadata) = fs::symlink_metadata(&upper) { + return Ok(self.attr_from_metadata(ino, &metadata)); + } + + let entry = self.remote.stat(rel)?; + Ok(self.attr_from_remote(ino, &entry)) + } + + fn attr_from_metadata(&self, ino: u64, metadata: &fs::Metadata) -> FileAttr { + let kind = if metadata.file_type().is_dir() { + FileType::Directory + } else if metadata.file_type().is_symlink() { + FileType::Symlink + } else { + FileType::RegularFile + }; + let mtime = unix_time(metadata.mtime() as u64); + FileAttr { + ino, + size: metadata.len(), + blocks: metadata.blocks(), + atime: unix_time(metadata.atime() as u64), + mtime, + ctime: unix_time(metadata.ctime() as u64), + crtime: mtime, + kind, + perm: (metadata.mode() & 0o7777) as u16, + nlink: metadata.nlink() as u32, + uid: metadata.uid(), + gid: metadata.gid(), + rdev: metadata.rdev() as u32, + blksize: metadata.blksize() as u32, + flags: 0, + } + } + + fn attr_from_remote(&self, ino: u64, entry: &RemoteEntry) -> FileAttr { + let kind = match entry.kind.as_str() { + "dir" => FileType::Directory, + "symlink" => FileType::Symlink, + _ => FileType::RegularFile, + }; + let default_perm = match kind { + FileType::Directory => 0o755, + FileType::Symlink => 0o777, + _ => 0o644, + }; + FileAttr { + ino, + size: entry.size, + blocks: entry.size.div_ceil(512), + atime: unix_time(entry.mtime), + mtime: unix_time(entry.mtime), + ctime: unix_time(entry.mtime), + crtime: unix_time(entry.mtime), + kind, + perm: ((entry.mode & 0o7777) as u16).max(default_perm), + nlink: if kind == FileType::Directory { 2 } else { 1 }, + uid: self.uid, + gid: self.gid, + rdev: 0, + blksize: 4096, + flags: 0, + } + } + + fn allocate_handle(&mut self, handle: Handle) -> u64 { + let fh = self.next_fh; + self.next_fh += 1; + self.handles.insert(fh, handle); + fh + } +} + +impl Filesystem for CowFs { + fn init( + &mut self, + _req: &Request<'_>, + _config: &mut KernelConfig, + ) -> std::result::Result<(), i32> { + Ok(()) + } + + fn lookup(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEntry) { + let result = (|| { + let rel = self.child_path(parent, name)?; + let ino = self.ino_for_path(&rel); + self.attr_for_path(&rel, ino) + })(); + match result { + Ok(attr) => reply.entry(&TTL, &attr, 0), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn getattr(&mut self, _req: &Request<'_>, ino: u64, _fh: Option, reply: ReplyAttr) { + let result = (|| { + if ino == ROOT_INO { + return Ok(root_attr(self.uid, self.gid)); + } + let rel = self + .path_for_ino(ino) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + self.attr_for_path(&rel, ino) + })(); + match result { + Ok(attr) => reply.attr(&TTL, &attr), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn readlink(&mut self, _req: &Request<'_>, ino: u64, reply: ReplyData) { + let result = (|| { + let rel = self + .path_for_ino(ino) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; + if upper.exists() { + return fs::read_link(upper).map(|p| p.to_string_lossy().into_owned()); + } + self.remote.readlink(&rel) + })(); + match result { + Ok(target) => reply.data(target.as_bytes()), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn mkdir( + &mut self, + _req: &Request<'_>, + parent: u64, + name: &OsStr, + mode: u32, + _umask: u32, + reply: ReplyEntry, + ) { + let result = (|| { + let rel = self.child_path(parent, name)?; + let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; + fs::create_dir_all(&upper)?; + fs::set_permissions(&upper, fs::Permissions::from_mode(mode & 0o7777))?; + self.overlay.clear_whiteout(&rel).map_err(anyhow_to_io)?; + let ino = self.ino_for_path(&rel); + self.attr_for_path(&rel, ino) + })(); + match result { + Ok(attr) => reply.entry(&TTL, &attr, 0), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn unlink(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEmpty) { + self.remove_path(parent, name, reply); + } + + fn rmdir(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEmpty) { + self.remove_path(parent, name, reply); + } + + fn rename( + &mut self, + _req: &Request<'_>, + parent: u64, + name: &OsStr, + newparent: u64, + newname: &OsStr, + flags: u32, + reply: ReplyEmpty, + ) { + let result = (|| { + if flags != 0 { + return Err(io::Error::from_raw_os_error(ENOTSUP)); + } + let old_rel = self.child_path(parent, name)?; + let new_rel = self.child_path(newparent, newname)?; + let old_upper = self.overlay.upper_path(&old_rel).map_err(anyhow_to_io)?; + let new_upper = self.overlay.upper_path(&new_rel).map_err(anyhow_to_io)?; + if let Some(parent) = new_upper.parent() { + fs::create_dir_all(parent)?; + } + + if !old_upper.exists() { + let entry = self.remote.stat(&old_rel)?; + if entry.kind == "dir" { + return Err(io::Error::from_raw_os_error(ENOTSUP)); + } + self.overlay + .copy_up(&self.remote, &old_rel) + .map_err(anyhow_to_io)?; + } + + fs::rename(&old_upper, &new_upper)?; + self.overlay.add_whiteout(&old_rel).map_err(anyhow_to_io)?; + self.overlay + .clear_whiteout(&new_rel) + .map_err(anyhow_to_io)?; + let ino = self.ino_for_path(&new_rel); + self.ino_to_path.insert(ino, new_rel.clone()); + self.path_to_ino.insert(new_rel, ino); + Ok(()) + })(); + match result { + Ok(()) => reply.ok(), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn open(&mut self, _req: &Request<'_>, ino: u64, flags: i32, reply: ReplyOpen) { + let result = (|| { + let rel = self + .path_for_ino(ino) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + if wants_write(flags) { + let upper = self + .overlay + .copy_up(&self.remote, &rel) + .map_err(anyhow_to_io)?; + let mut opts = OpenOptions::new(); + opts.read(true).write(true).create(true); + if flags & libc::O_TRUNC != 0 { + opts.truncate(true); + } + if flags & libc::O_APPEND != 0 { + opts.append(true); + } + let file = opts.open(upper)?; + Ok((self.allocate_handle(Handle::Local(file)), flags as u32)) + } else { + let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; + if upper.exists() { + let file = File::open(upper)?; + Ok((self.allocate_handle(Handle::Local(file)), flags as u32)) + } else { + Ok((self.allocate_handle(Handle::Remote(rel)), flags as u32)) + } + } + })(); + match result { + Ok((fh, open_flags)) => reply.opened(fh, open_flags), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn read( + &mut self, + _req: &Request<'_>, + _ino: u64, + fh: u64, + offset: i64, + size: u32, + _flags: i32, + _lock_owner: Option, + reply: ReplyData, + ) { + let result = match self.handles.get(&fh) { + Some(Handle::Local(file)) => { + let mut buf = vec![0; size as usize]; + if offset < 0 { + Ok(Vec::new()) + } else { + match file.read_at(&mut buf, offset as u64) { + Ok(read) => { + buf.truncate(read); + Ok(buf) + } + Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => Ok(Vec::new()), + Err(err) => Err(err), + } + } + } + Some(Handle::Remote(rel)) => self + .overlay + .read_cached_or_remote( + &self.remote, + rel, + offset, + size, + self.manifest.cache_max_file_bytes, + ) + .map_err(anyhow_to_io), + None => Err(io::Error::new(io::ErrorKind::NotFound, "unknown handle")), + }; + match result { + Ok(bytes) => reply.data(&bytes), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn write( + &mut self, + _req: &Request<'_>, + _ino: u64, + fh: u64, + offset: i64, + data: &[u8], + _write_flags: u32, + _flags: i32, + _lock_owner: Option, + reply: ReplyWrite, + ) { + let result = match self.handles.get(&fh) { + Some(Handle::Local(file)) => { + if offset < 0 { + Err(io::Error::new( + io::ErrorKind::InvalidInput, + "negative offset", + )) + } else { + file.write_at(data, offset as u64) + .map(|written| written as u32) + } + } + _ => Err(io::Error::new( + io::ErrorKind::Other, + "handle is not writable", + )), + }; + match result { + Ok(written) => reply.written(written), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn create( + &mut self, + _req: &Request<'_>, + parent: u64, + name: &OsStr, + mode: u32, + _umask: u32, + flags: i32, + reply: ReplyCreate, + ) { + let result = (|| { + let rel = self.child_path(parent, name)?; + let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; + if let Some(parent) = upper.parent() { + fs::create_dir_all(parent)?; + } + let mut opts = OpenOptions::new(); + opts.read(true) + .write(true) + .create(true) + .truncate(flags & libc::O_TRUNC != 0) + .mode(mode & 0o7777); + let file = opts.open(&upper)?; + self.overlay.clear_whiteout(&rel).map_err(anyhow_to_io)?; + let ino = self.ino_for_path(&rel); + let attr = self.attr_for_path(&rel, ino)?; + let fh = self.allocate_handle(Handle::Local(file)); + Ok((attr, fh)) + })(); + match result { + Ok((attr, fh)) => reply.created(&TTL, &attr, 0, fh, flags as u32), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn flush( + &mut self, + _req: &Request<'_>, + _ino: u64, + fh: u64, + _lock_owner: u64, + reply: ReplyEmpty, + ) { + if let Some(Handle::Local(file)) = self.handles.get(&fh) { + if let Err(err) = file.sync_data() { + reply.error(io_errno(&err)); + return; + } + } + reply.ok(); + } + + fn release( + &mut self, + _req: &Request<'_>, + _ino: u64, + fh: u64, + _flags: i32, + _lock_owner: Option, + _flush: bool, + reply: ReplyEmpty, + ) { + self.handles.remove(&fh); + reply.ok(); + } + + fn readdir( + &mut self, + _req: &Request<'_>, + ino: u64, + _fh: u64, + offset: i64, + mut reply: ReplyDirectory, + ) { + let result = self.collect_dir_entries(ino); + let entries = match result { + Ok(entries) => entries, + Err(err) => { + reply.error(io_errno(&err)); + return; + } + }; + + for (idx, (entry_ino, kind, name)) in entries.into_iter().enumerate().skip(offset as usize) + { + let next_offset = (idx + 1) as i64; + if reply.add(entry_ino, next_offset, kind, name) { + break; + } + } + reply.ok(); + } +} + +impl CowFs { + fn remove_path(&mut self, parent: u64, name: &OsStr, reply: ReplyEmpty) { + let result = (|| { + let rel = self.child_path(parent, name)?; + self.overlay.remove_upper(&rel).map_err(anyhow_to_io)?; + if self.remote.stat(&rel).is_ok() { + self.overlay.add_whiteout(&rel).map_err(anyhow_to_io)?; + } + Ok(()) + })(); + match result { + Ok(()) => reply.ok(), + Err(err) => reply.error(io_errno(&err)), + } + } + + fn collect_dir_entries(&mut self, ino: u64) -> io::Result> { + let rel = self + .path_for_ino(ino) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + + let mut entries = Vec::new(); + entries.push((ino, FileType::Directory, OsString::from("."))); + let parent_rel = rel.parent().unwrap_or_else(|| Path::new("")); + let parent_ino = self.ino_for_path(parent_rel); + entries.push((parent_ino, FileType::Directory, OsString::from(".."))); + + let mut by_name: BTreeMap = BTreeMap::new(); + match self.remote.readdir(&rel) { + Ok(remote_entries) => { + for entry in remote_entries { + by_name.insert(entry.name.clone(), entry); + } + } + Err(err) if err.kind() == io::ErrorKind::NotFound => {} + Err(err) => return Err(err), + } + + for entry in self.overlay.list_upper(&rel).map_err(anyhow_to_io)? { + by_name.insert(entry.name.clone(), entry); + } + + for (name, entry) in by_name { + let child_rel = rel.join(&name); + if self.overlay.is_whiteout(&child_rel).map_err(anyhow_to_io)? { + continue; + } + let child_ino = self.ino_for_path(&child_rel); + entries.push(( + child_ino, + file_type_from_kind(&entry.kind), + OsString::from(name), + )); + } + + Ok(entries) + } +} + +pub fn mount_foreground(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> Result<()> { + fs::create_dir_all(mountpoint)?; + let control_path = paths.run.join("ssh-control.sock"); + let remote = RemoteClient::new(manifest.clone(), Some(control_path)); + remote.ensure_master()?; + let fs = CowFs::new(manifest.clone(), &paths, remote); + let options = vec![ + MountOption::FSName(format!("wp-cow-{}", manifest.name)), + MountOption::Subtype("wp-cow".to_string()), + MountOption::AutoUnmount, + MountOption::DefaultPermissions, + ]; + fuser::mount2(fs, mountpoint, &options)?; + Ok(()) +} + +fn root_attr(uid: u32, gid: u32) -> FileAttr { + FileAttr { + ino: ROOT_INO, + size: 0, + blocks: 0, + atime: SystemTime::now(), + mtime: SystemTime::now(), + ctime: SystemTime::now(), + crtime: SystemTime::now(), + kind: FileType::Directory, + perm: 0o755, + nlink: 2, + uid, + gid, + rdev: 0, + blksize: 4096, + flags: 0, + } +} + +fn file_type_from_kind(kind: &str) -> FileType { + match kind { + "dir" => FileType::Directory, + "symlink" => FileType::Symlink, + _ => FileType::RegularFile, + } +} + +fn unix_time(secs: u64) -> SystemTime { + UNIX_EPOCH + Duration::from_secs(secs) +} + +fn wants_write(flags: i32) -> bool { + (flags & libc::O_ACCMODE) != libc::O_RDONLY + || flags & libc::O_TRUNC != 0 + || flags & libc::O_APPEND != 0 +} + +fn io_errno(err: &io::Error) -> i32 { + match err.kind() { + io::ErrorKind::NotFound => ENOENT, + io::ErrorKind::Unsupported => ENOTSUP, + _ => err.raw_os_error().unwrap_or(EIO), + } +} + +fn anyhow_to_io(err: anyhow::Error) -> io::Error { + io::Error::new(io::ErrorKind::Other, err.to_string()) +} diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs new file mode 100644 index 00000000..4e9b8a40 --- /dev/null +++ b/experiments/remote-wp-cow/src/generate.rs @@ -0,0 +1,321 @@ +use anyhow::Result; +use std::fs; +use std::path::Path; + +use crate::config::{ClonePaths, Manifest}; + +pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Result<()> { + fs::create_dir_all(paths.upper.join("wp-content/mu-plugins"))?; + fs::write(paths.upper.join("wp-config.php"), wp_config_php(manifest))?; + fs::write(paths.upper.join("wp-content/db.php"), db_dropin_php())?; + fs::write( + paths.upper.join("wp-content/mu-plugins/wp-cow-safety.php"), + safety_mu_plugin_php(), + )?; + fs::write(paths.generated.join("router.php"), router_php())?; + Ok(()) +} + +pub fn wp_config_php(manifest: &Manifest) -> String { + format!( + r#" &'static str { + r#" false, 'error' => $error ); + } + } else { + $context = stream_context_create( + array( + 'http' => array( + 'method' => 'POST', + 'header' => "Content-Type: application/json\r\n", + 'content' => $body, + 'timeout' => 120, + ), + ) + ); + $raw = @file_get_contents( $url, false, $context ); + if ( false === $raw ) { + return array( 'ok' => false, 'error' => 'wp-cow control request failed' ); + } + } + + $decoded = json_decode( $raw, true ); + if ( ! is_array( $decoded ) ) { + return array( 'ok' => false, 'error' => 'invalid wp-cow control response' ); + } + return $decoded; +} + +class Cow_DB extends wpdb { + public function query( $query ) { + if ( ! $query ) { + return false; + } + + $this->flush(); + $this->last_query = $query; + + $tables = cow_tables_from_sql( $query ); + + if ( cow_is_write_sql( $query ) ) { + $result = cow_control_request( '/materialize', array( 'tables' => $tables ) ); + if ( empty( $result['ok'] ) ) { + $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow materialization failed'; + return false; + } + return parent::query( $query ); + } + + if ( cow_is_safe_read_sql( $query ) ) { + $route = cow_control_request( '/route', array( 'tables' => $tables ) ); + if ( ! empty( $route['ok'] ) && isset( $route['backend'] ) && 'remote' === $route['backend'] ) { + return $this->cow_remote_query( $query ); + } + if ( ! empty( $route['ok'] ) && isset( $route['backend'] ) && 'local' === $route['backend'] ) { + return parent::query( $query ); + } + } + + return parent::query( $query ); + } + + private function cow_remote_query( $query ) { + $result = cow_control_request( '/query', array( 'sql' => $query ) ); + if ( empty( $result['ok'] ) ) { + $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow remote query failed'; + return false; + } + + $this->last_result = array(); + if ( isset( $result['rows'] ) && is_array( $result['rows'] ) ) { + foreach ( $result['rows'] as $row ) { + $this->last_result[] = (object) $row; + } + } + + $this->col_info = array(); + if ( isset( $result['fields'] ) && is_array( $result['fields'] ) ) { + foreach ( $result['fields'] as $field ) { + $this->col_info[] = (object) array( 'name' => $field ); + } + } + + $this->num_rows = count( $this->last_result ); + $this->rows_affected = isset( $result['affected'] ) ? (int) $result['affected'] : $this->num_rows; + $this->last_error = ''; + + return $this->num_rows; + } +} + +$wpdb = new Cow_DB( DB_USER, DB_PASSWORD, DB_NAME, DB_HOST ); +"# +} + +pub fn safety_mu_plugin_php() -> &'static str { + r#" &'static str { + r#" String { + format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")) +} + +#[allow(dead_code)] +pub fn generated_file_paths(root: &Path) -> Vec { + vec![ + root.join("wp-config.php"), + root.join("wp-content/db.php"), + root.join("wp-content/mu-plugins/wp-cow-safety.php"), + ] +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::{LocalDb, Manifest, Probe, MANIFEST_VERSION}; + + fn manifest() -> Manifest { + Manifest { + version: MANIFEST_VERSION, + name: "example".to_string(), + ssh: "user@example.com".to_string(), + remote_path: "/srv/www".to_string(), + remote_url: "https://example.com".to_string(), + local_url: "http://example.test".to_string(), + created_at_unix: 1, + probe: Probe { + table_prefix: "wp_".to_string(), + db_name: "example_wp".to_string(), + db_host: "localhost".to_string(), + db_user: "example_wp".to_string(), + ..Probe::default() + }, + local_db: LocalDb { + name: "cow_example".to_string(), + user: "cow_example".to_string(), + password: "secret".to_string(), + host: "127.0.0.1".to_string(), + port: 33071, + }, + control_url: "http://127.0.0.1:39070".to_string(), + cache_max_file_bytes: 1024, + } + } + + #[test] + fn generated_config_shadows_urls_and_database() { + let php = wp_config_php(&manifest()); + assert!(php.contains("define( 'DB_NAME', 'cow_example' );")); + assert!(php.contains("define( 'WP_HOME', 'http://example.test' );")); + assert!(php.contains("$table_prefix = 'wp_';")); + assert!(php.contains("WPCOW_CONTROL_URL")); + } + + #[test] + fn db_dropin_blocks_write_classes() { + let php = db_dropin_php(); + assert!(php.contains("cow_is_write_sql")); + assert!(php.contains("/materialize")); + assert!(php.contains("/query")); + } + + #[test] + fn safety_plugin_blocks_side_effects() { + let php = safety_mu_plugin_php(); + assert!(php.contains("pre_wp_mail")); + assert!(php.contains("X-Robots-Tag")); + assert!(php.contains("pre_http_request")); + } +} diff --git a/experiments/remote-wp-cow/src/main.rs b/experiments/remote-wp-cow/src/main.rs new file mode 100644 index 00000000..b0c18aea --- /dev/null +++ b/experiments/remote-wp-cow/src/main.rs @@ -0,0 +1,14 @@ +mod cli; +mod config; +mod control; +mod db; +mod fusefs; +mod generate; +mod overlay; +mod remote; +mod run; +mod sql; + +fn main() -> anyhow::Result<()> { + cli::run() +} diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs new file mode 100644 index 00000000..9789fa41 --- /dev/null +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -0,0 +1,288 @@ +use anyhow::{anyhow, Context, Result}; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::collections::BTreeSet; +use std::fs::{self, File, OpenOptions}; +use std::io::{Read, Write}; +use std::path::{Component, Path, PathBuf}; + +use crate::config::ClonePaths; +use crate::remote::{RemoteClient, RemoteEntry}; + +#[derive(Debug, Default, Serialize, Deserialize)] +struct WhiteoutFile { + deleted: BTreeSet, +} + +#[derive(Debug, Clone)] +pub struct OverlayStore { + pub upper: PathBuf, + pub file_cache: PathBuf, + whiteouts_path: PathBuf, +} + +impl OverlayStore { + pub fn new(paths: &ClonePaths) -> Self { + Self { + upper: paths.upper.clone(), + file_cache: paths.file_cache.clone(), + whiteouts_path: paths.whiteouts.clone(), + } + } + + pub fn clean_rel(input: impl AsRef) -> Result { + let mut out = PathBuf::new(); + for component in input.as_ref().components() { + match component { + Component::Normal(part) => out.push(part), + Component::CurDir => {} + Component::RootDir | Component::Prefix(_) | Component::ParentDir => { + return Err(anyhow!( + "invalid clone-relative path {}", + input.as_ref().display() + )); + } + } + } + Ok(out) + } + + pub fn rel_string(rel: &Path) -> String { + rel.to_string_lossy().replace('\\', "/") + } + + pub fn upper_path(&self, rel: &Path) -> Result { + Ok(self.upper.join(Self::clean_rel(rel)?)) + } + + pub fn cache_path(&self, rel: &Path) -> PathBuf { + let mut hasher = Sha256::new(); + hasher.update(Self::rel_string(rel)); + let hex = hex::encode(hasher.finalize()); + self.file_cache.join(&hex[0..2]).join(hex) + } + + pub fn is_whiteout(&self, rel: &Path) -> Result { + let whiteouts = self.load_whiteouts()?; + Ok(whiteouts.deleted.contains(&Self::rel_string(rel))) + } + + pub fn add_whiteout(&self, rel: &Path) -> Result<()> { + let mut whiteouts = self.load_whiteouts()?; + whiteouts.deleted.insert(Self::rel_string(rel)); + self.write_whiteouts(&whiteouts) + } + + pub fn clear_whiteout(&self, rel: &Path) -> Result<()> { + let mut whiteouts = self.load_whiteouts()?; + whiteouts.deleted.remove(&Self::rel_string(rel)); + self.write_whiteouts(&whiteouts) + } + + pub fn remove_upper(&self, rel: &Path) -> Result<()> { + let path = self.upper_path(rel)?; + if path.is_dir() { + fs::remove_dir_all(path)?; + } else if path.exists() { + fs::remove_file(path)?; + } + Ok(()) + } + + pub fn copy_up(&self, remote: &RemoteClient, rel: &Path) -> Result { + let upper = self.upper_path(rel)?; + if upper.exists() { + return Ok(upper); + } + + if let Some(parent) = upper.parent() { + fs::create_dir_all(parent)?; + } + + let entry = remote.stat(rel)?; + if entry.kind == "dir" { + fs::create_dir_all(&upper)?; + return Ok(upper); + } + if entry.kind != "file" { + return Err(anyhow!( + "copy-up only supports regular files and directories" + )); + } + + let mut out = File::create(&upper)?; + let mut offset = 0_u64; + let chunk = 1024 * 1024; + while offset < entry.size { + let wanted = chunk.min((entry.size - offset) as usize); + let bytes = remote.read_range(rel, offset, wanted)?; + if bytes.is_empty() { + break; + } + out.write_all(&bytes)?; + offset += bytes.len() as u64; + } + Ok(upper) + } + + pub fn read_cached_or_remote( + &self, + remote: &RemoteClient, + rel: &Path, + offset: i64, + size: u32, + cache_limit: u64, + ) -> Result> { + if offset < 0 { + return Ok(Vec::new()); + } + + let cache_path = self.cache_path(rel); + if cache_path.exists() { + return read_range_from_file(&cache_path, offset as u64, size as usize); + } + + let entry = remote.stat(rel)?; + if entry.kind == "file" && entry.size <= cache_limit { + if let Some(parent) = cache_path.parent() { + fs::create_dir_all(parent)?; + } + let tmp = cache_path.with_extension("tmp"); + let mut out = File::create(&tmp)?; + let mut cursor = 0_u64; + let chunk = 1024 * 1024; + while cursor < entry.size { + let wanted = chunk.min((entry.size - cursor) as usize); + let bytes = remote.read_range(rel, cursor, wanted)?; + if bytes.is_empty() { + break; + } + out.write_all(&bytes)?; + cursor += bytes.len() as u64; + } + fs::rename(tmp, &cache_path)?; + return read_range_from_file(&cache_path, offset as u64, size as usize); + } + + remote + .read_range(rel, offset as u64, size as usize) + .with_context(|| format!("remote read {}", Self::rel_string(rel))) + } + + pub fn list_upper(&self, rel: &Path) -> Result> { + let path = self.upper_path(rel)?; + if !path.is_dir() { + return Ok(Vec::new()); + } + let mut out = Vec::new(); + for entry in fs::read_dir(path)? { + let entry = entry?; + let metadata = fs::symlink_metadata(entry.path())?; + let file_type = metadata.file_type(); + out.push(RemoteEntry { + name: entry.file_name().to_string_lossy().to_string(), + kind: if file_type.is_dir() { + "dir".to_string() + } else if file_type.is_symlink() { + "symlink".to_string() + } else { + "file".to_string() + }, + size: metadata.len(), + mode: metadata.mode(), + mtime: metadata + .modified() + .ok() + .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok()) + .map(|d| d.as_secs()) + .unwrap_or_default(), + }); + } + Ok(out) + } + + fn load_whiteouts(&self) -> Result { + if !self.whiteouts_path.exists() { + return Ok(WhiteoutFile::default()); + } + let mut json = String::new(); + File::open(&self.whiteouts_path)?.read_to_string(&mut json)?; + Ok(serde_json::from_str(&json)?) + } + + fn write_whiteouts(&self, whiteouts: &WhiteoutFile) -> Result<()> { + if let Some(parent) = self.whiteouts_path.parent() { + fs::create_dir_all(parent)?; + } + let json = serde_json::to_vec_pretty(whiteouts)?; + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&self.whiteouts_path)?; + file.write_all(&json)?; + file.write_all(b"\n")?; + Ok(()) + } +} + +fn read_range_from_file(path: &Path, offset: u64, size: usize) -> Result> { + let mut file = File::open(path)?; + let mut buf = Vec::new(); + file.read_to_end(&mut buf)?; + let start = offset.min(buf.len() as u64) as usize; + let end = (start + size).min(buf.len()); + Ok(buf[start..end].to_vec()) +} + +#[cfg(unix)] +trait MetadataMode { + fn mode(&self) -> u32; +} + +#[cfg(unix)] +impl MetadataMode for std::fs::Metadata { + fn mode(&self) -> u32 { + use std::os::unix::fs::MetadataExt; + MetadataExt::mode(self) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::ClonePaths; + + #[test] + fn stores_whiteouts() { + let temp = tempfile::tempdir().unwrap(); + let paths = ClonePaths { + root: temp.path().to_path_buf(), + manifest: temp.path().join("manifest.json"), + upper: temp.path().join("upper"), + file_cache: temp.path().join("file-cache"), + db: temp.path().join("db"), + generated: temp.path().join("generated"), + run: temp.path().join("run"), + whiteouts: temp.path().join("whiteouts.json"), + }; + fs::create_dir_all(&paths.upper).unwrap(); + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-content/uploads/a.jpg"); + assert!(!store.is_whiteout(rel).unwrap()); + store.add_whiteout(rel).unwrap(); + assert!(store.is_whiteout(rel).unwrap()); + store.clear_whiteout(rel).unwrap(); + assert!(!store.is_whiteout(rel).unwrap()); + } + + #[test] + fn rejects_path_traversal() { + assert!(OverlayStore::clean_rel("../wp-config.php").is_err()); + assert!(OverlayStore::clean_rel("/wp-config.php").is_err()); + assert_eq!( + OverlayStore::clean_rel("./wp-config.php").unwrap(), + PathBuf::from("wp-config.php") + ); + } +} diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs new file mode 100644 index 00000000..03c14d36 --- /dev/null +++ b/experiments/remote-wp-cow/src/remote.rs @@ -0,0 +1,334 @@ +use anyhow::{anyhow, Context, Result}; +use serde::{Deserialize, Serialize}; +use std::ffi::OsStr; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + +use crate::config::{Manifest, Probe}; +use crate::overlay::OverlayStore; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteEntry { + pub name: String, + pub kind: String, + pub size: u64, + pub mode: u32, + pub mtime: u64, +} + +#[derive(Debug, Clone)] +pub struct RemoteClient { + manifest: Manifest, + control_path: Option, +} + +impl RemoteClient { + pub fn new(manifest: Manifest, control_path: Option) -> Self { + Self { + manifest, + control_path, + } + } + + pub fn manifest(&self) -> &Manifest { + &self.manifest + } + + pub fn ensure_master(&self) -> Result<()> { + let Some(control_path) = &self.control_path else { + return Ok(()); + }; + if control_path.exists() { + return Ok(()); + } + if let Some(parent) = control_path.parent() { + std::fs::create_dir_all(parent)?; + } + let status = Command::new("ssh") + .arg("-MNf") + .arg("-S") + .arg(control_path) + .arg("-o") + .arg("ControlMaster=yes") + .arg("-o") + .arg("ControlPersist=600") + .arg(&self.manifest.ssh) + .status() + .context("start SSH control master")?; + if !status.success() { + return Err(anyhow!( + "failed to start SSH control master for {}", + self.manifest.ssh + )); + } + Ok(()) + } + + pub fn command(&self, remote_command: &str) -> Command { + let mut command = Command::new("ssh"); + if let Some(control_path) = &self.control_path { + command.arg("-S").arg(control_path); + command.arg("-o").arg("ControlMaster=auto"); + command.arg("-o").arg("ControlPersist=600"); + } + command.arg(&self.manifest.ssh); + command.arg(remote_command); + command + } + + pub fn exec_capture(&self, remote_command: &str, stdin: Option<&[u8]>) -> io::Result> { + let mut child = self + .command(remote_command) + .stdin(if stdin.is_some() { + Stdio::piped() + } else { + Stdio::null() + }) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + + if let Some(input) = stdin { + if let Some(mut child_stdin) = child.stdin.take() { + child_stdin.write_all(input)?; + } + } + + let output = child.wait_with_output()?; + if output.status.success() { + return Ok(output.stdout); + } + let stderr = String::from_utf8_lossy(&output.stderr); + if output.status.code() == Some(2) || stderr.contains("WPCOW_ENOENT") { + return Err(io::Error::new(io::ErrorKind::NotFound, stderr.to_string())); + } + Err(io::Error::new(io::ErrorKind::Other, stderr.to_string())) + } + + pub fn stat(&self, rel: &Path) -> io::Result { + let full = self.remote_full_path(rel)?; + let code = r#" +$p=$argv[1]; +clearstatcache(true,$p); +$s=@lstat($p); +if($s===false){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +$kind=is_link($p)?"symlink":(is_dir($p)?"dir":(is_file($p)?"file":"other")); +echo json_encode(array( + "name"=>basename($p), + "kind"=>$kind, + "size"=>(int)$s["size"], + "mode"=>(int)$s["mode"], + "mtime"=>(int)$s["mtime"] +)); +"#; + let bytes = self.php_eval(code, &[full])?; + serde_json::from_slice(&bytes) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) + } + + pub fn readdir(&self, rel: &Path) -> io::Result> { + let full = self.remote_full_path(rel)?; + let code = r#" +$p=$argv[1]; +if(!is_dir($p)){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +$out=array(); +foreach(scandir($p) as $name){ + if($name==="."||$name===".."){continue;} + $child=$p.DIRECTORY_SEPARATOR.$name; + $s=@lstat($child); + if($s===false){continue;} + $kind=is_link($child)?"symlink":(is_dir($child)?"dir":(is_file($child)?"file":"other")); + $out[]=array("name"=>$name,"kind"=>$kind,"size"=>(int)$s["size"],"mode"=>(int)$s["mode"],"mtime"=>(int)$s["mtime"]); +} +echo json_encode($out); +"#; + let bytes = self.php_eval(code, &[full])?; + serde_json::from_slice(&bytes) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) + } + + pub fn read_range(&self, rel: &Path, offset: u64, length: usize) -> io::Result> { + let full = self.remote_full_path(rel)?; + let code = r#" +$p=$argv[1];$offset=(int)$argv[2];$length=(int)$argv[3]; +$f=@fopen($p,"rb"); +if(!$f){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +if($offset>0){fseek($f,$offset);} +echo fread($f,$length); +"#; + self.php_eval(code, &[full, offset.to_string(), length.to_string()]) + } + + pub fn readlink(&self, rel: &Path) -> io::Result { + let full = self.remote_full_path(rel)?; + let code = r#" +$p=$argv[1]; +$target=@readlink($p); +if($target===false){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +echo $target; +"#; + let bytes = self.php_eval(code, &[full])?; + Ok(String::from_utf8_lossy(&bytes).to_string()) + } + + pub fn remote_query_readonly(&self, sql: &str) -> Result { + let probe = &self.manifest.probe; + let code = r#" +$host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$sql=$argv[5]; +if(!preg_match('/^\s*(SELECT|SHOW|DESCRIBE|DESC|EXPLAIN)\b/i',$sql)){ + fwrite(STDERR,"WPCOW_REFUSED_WRITE\n");exit(3); +} +$port=null;$socket=null; +if(preg_match('/^(.+):([0-9]+)$/',$host,$m)){ + $host=$m[1];$port=(int)$m[2]; +} elseif(preg_match('/^([^:]+):(\/.*)$/',$host,$m)){ + $host=$m[1];$socket=$m[2]; +} +$mysqli=mysqli_init(); +if(!@$mysqli->real_connect($host,$user,$pass,$db,$port,$socket)){ + fwrite(STDERR,mysqli_connect_error()."\n");exit(1); +} +@$mysqli->set_charset("utf8mb4"); +$res=$mysqli->query($sql, MYSQLI_STORE_RESULT); +if($res===false){ + echo json_encode(array("ok"=>false,"error"=>$mysqli->error,"rows"=>array(),"fields"=>array(),"affected"=>0)); + exit(0); +} +if($res===true){ + echo json_encode(array("ok"=>true,"error"=>"","rows"=>array(),"fields"=>array(),"affected"=>$mysqli->affected_rows)); + exit(0); +} +$fields=array(); +foreach($res->fetch_fields() as $field){$fields[]=$field->name;} +$rows=array(); +while($row=$res->fetch_assoc()){$rows[]=$row;} +echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"affected"=>count($rows))); +"#; + let bytes = self + .php_eval( + code, + &[ + probe.db_host.clone(), + probe.db_user.clone(), + probe.db_password.clone(), + probe.db_name.clone(), + sql.to_string(), + ], + ) + .context("remote readonly query")?; + let result: RemoteQueryResult = serde_json::from_slice(&bytes)?; + Ok(result) + } + + fn php_eval(&self, code: &str, args: &[String]) -> io::Result> { + let mut command = format!("php -r {} --", shell_quote(code)); + for arg in args { + command.push(' '); + command.push_str(&shell_quote(arg)); + } + self.exec_capture(&command, None) + } + + fn remote_full_path(&self, rel: &Path) -> io::Result { + let rel = OverlayStore::clean_rel(rel) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err.to_string()))?; + let rel = OverlayStore::rel_string(&rel); + if rel.is_empty() { + Ok(self.manifest.remote_path.clone()) + } else { + Ok(format!( + "{}/{}", + self.manifest.remote_path.trim_end_matches('/'), + rel + )) + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteQueryResult { + pub ok: bool, + pub error: String, + pub rows: Vec>, + pub fields: Vec, + pub affected: i64, +} + +pub fn probe_wordpress(ssh: &str, remote_path: &str) -> Result { + let script = r#" + WP_CONTENT_DIR . '/uploads'); +$out = array( + 'abspath' => defined('ABSPATH') ? ABSPATH : getcwd(), + 'wp_content_dir' => defined('WP_CONTENT_DIR') ? WP_CONTENT_DIR : getcwd() . '/wp-content', + 'uploads_dir' => isset($uploads['basedir']) ? $uploads['basedir'] : '', + 'table_prefix' => isset($wpdb) ? $wpdb->prefix : (isset($table_prefix) ? $table_prefix : 'wp_'), + 'db_name' => defined('DB_NAME') ? DB_NAME : '', + 'db_host' => defined('DB_HOST') ? DB_HOST : '', + 'db_user' => defined('DB_USER') ? DB_USER : '', + 'db_password' => defined('DB_PASSWORD') ? DB_PASSWORD : '', + 'siteurl' => function_exists('get_option') ? get_option('siteurl') : '', + 'home' => function_exists('get_option') ? get_option('home') : '' +); +echo json_encode($out); +"#; + + let remote_command = format!("cd {} && php", shell_quote(remote_path)); + let output = Command::new("ssh") + .arg(ssh) + .arg(remote_command) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .and_then(|mut child| { + child + .stdin + .as_mut() + .expect("stdin is piped") + .write_all(script.as_bytes())?; + child.wait_with_output() + }) + .context("run remote WordPress probe")?; + + if !output.status.success() { + return Err(anyhow!( + "remote probe failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + let probe: Probe = serde_json::from_slice(&output.stdout) + .with_context(|| String::from_utf8_lossy(&output.stdout).to_string())?; + Ok(probe) +} + +pub fn shell_quote(value: impl AsRef) -> String { + let value = value.as_ref().to_string_lossy(); + if value.is_empty() { + return "''".to_string(); + } + let escaped = value.replace('\'', "'\"'\"'"); + format!("'{}'", escaped) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn quotes_shell_strings() { + assert_eq!(shell_quote("abc"), "'abc'"); + assert_eq!(shell_quote("a'b"), "'a'\"'\"'b'"); + assert_eq!(shell_quote(""), "''"); + } +} diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs new file mode 100644 index 00000000..5076cc20 --- /dev/null +++ b/experiments/remote-wp-cow/src/run.rs @@ -0,0 +1,159 @@ +use anyhow::{anyhow, Context, Result}; +use std::path::{Path, PathBuf}; +use std::process::{Child, Command, Stdio}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +use crate::config::{ClonePaths, Manifest}; +use crate::control; +use crate::fusefs; +use crate::remote::RemoteClient; + +pub struct RunOptions { + pub mountpoint: PathBuf, + pub http_addr: String, + pub skip_php: bool, +} + +pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> Result<()> { + let shutdown = Arc::new(AtomicBool::new(false)); + install_signal_handler(shutdown.clone())?; + + let control_addr = control_addr_from_url(&manifest.control_url)?; + let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + remote.ensure_master()?; + + let control_shutdown = shutdown.clone(); + let control_manifest = manifest.clone(); + let control_paths = paths.clone(); + let control_remote = remote.clone(); + let control_thread = thread::spawn(move || { + control::serve_control( + &control_addr, + control_manifest, + control_paths, + control_remote, + control_shutdown, + ) + }); + + let mount_manifest = manifest.clone(); + let mount_paths = paths.clone(); + let mountpoint = options.mountpoint.clone(); + let mount_thread = + thread::spawn(move || fusefs::mount_foreground(mount_manifest, mount_paths, &mountpoint)); + + wait_for_mount(&options.mountpoint); + + let mut php = if options.skip_php { + None + } else { + Some(start_php_server( + &paths, + &options.mountpoint, + &options.http_addr, + )?) + }; + + eprintln!( + "wp-cow running clone '{}' at {} from {}", + manifest.name, + options.http_addr, + options.mountpoint.display() + ); + + while !shutdown.load(Ordering::SeqCst) { + if let Some(child) = php.as_mut() { + if let Some(status) = child.try_wait()? { + shutdown.store(true, Ordering::SeqCst); + return Err(anyhow!("php server exited with status {}", status)); + } + } + thread::sleep(Duration::from_millis(250)); + } + + if let Some(mut child) = php { + let _ = child.kill(); + let _ = child.wait(); + } + let _ = unmount(&options.mountpoint); + + match control_thread.join() { + Ok(result) => result?, + Err(_) => return Err(anyhow!("control thread panicked")), + } + + match mount_thread.join() { + Ok(result) => { + if let Err(err) = result { + eprintln!("wp-cow mount stopped: {err:#}"); + } + } + Err(_) => return Err(anyhow!("mount thread panicked")), + } + + Ok(()) +} + +pub fn mount_only(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> Result<()> { + fusefs::mount_foreground(manifest, paths, mountpoint) +} + +fn start_php_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { + Command::new("php") + .arg("-S") + .arg(http_addr) + .arg("-t") + .arg(mountpoint) + .arg(paths.generated.join("router.php")) + .stdin(Stdio::null()) + .spawn() + .context("start php built-in server") +} + +fn wait_for_mount(mountpoint: &Path) { + for _ in 0..40 { + if mountpoint.join("wp-config.php").exists() { + return; + } + thread::sleep(Duration::from_millis(100)); + } +} + +fn control_addr_from_url(url: &str) -> Result { + let parsed = url::Url::parse(url)?; + let host = parsed + .host_str() + .ok_or_else(|| anyhow!("control URL missing host"))?; + let port = parsed + .port() + .ok_or_else(|| anyhow!("control URL missing port"))?; + Ok(format!("{}:{}", host, port)) +} + +fn install_signal_handler(shutdown: Arc) -> Result<()> { + ctrlc::set_handler(move || { + shutdown.store(true, Ordering::SeqCst); + }) + .context("install Ctrl-C handler") +} + +fn unmount(mountpoint: &Path) -> Result<()> { + let status = Command::new("fusermount3") + .arg("-u") + .arg(mountpoint) + .status() + .or_else(|_| { + Command::new("fusermount") + .arg("-u") + .arg(mountpoint) + .status() + }) + .context("run fusermount")?; + if !status.success() { + return Err(anyhow!("fusermount failed with status {}", status)); + } + Ok(()) +} diff --git a/experiments/remote-wp-cow/src/sql.rs b/experiments/remote-wp-cow/src/sql.rs new file mode 100644 index 00000000..0fc6e867 --- /dev/null +++ b/experiments/remote-wp-cow/src/sql.rs @@ -0,0 +1,173 @@ +use std::collections::BTreeSet; + +pub fn is_write_sql(sql: &str) -> bool { + matches!( + first_keyword(sql).as_deref(), + Some("INSERT") + | Some("UPDATE") + | Some("DELETE") + | Some("REPLACE") + | Some("ALTER") + | Some("CREATE") + | Some("DROP") + | Some("TRUNCATE") + | Some("RENAME") + | Some("LOAD") + | Some("LOCK") + | Some("UNLOCK") + | Some("GRANT") + | Some("REVOKE") + | Some("OPTIMIZE") + | Some("ANALYZE") + | Some("REPAIR") + ) +} + +pub fn is_safe_read_sql(sql: &str) -> bool { + matches!( + first_keyword(sql).as_deref(), + Some("SELECT") | Some("SHOW") | Some("DESCRIBE") | Some("DESC") | Some("EXPLAIN") + ) +} + +#[allow(dead_code)] +pub fn extract_tables(sql: &str) -> Vec { + let mut tables = BTreeSet::new(); + let tokens = tokenize(sql); + let table_markers = ["FROM", "JOIN", "UPDATE", "INTO", "TABLE"]; + let mut i = 0; + while i < tokens.len() { + if table_markers.contains(&tokens[i].as_str()) { + if let Some(next) = tokens.get(i + 1) { + if !is_keyword(next) { + tables.insert(next.trim_matches('`').to_string()); + } + } + } + i += 1; + } + tables.into_iter().collect() +} + +pub fn expand_wordpress_groups(table_prefix: &str, tables: &[String]) -> Vec { + let content_group = [ + "posts", + "postmeta", + "terms", + "term_taxonomy", + "term_relationships", + ]; + let mut out: BTreeSet = tables.iter().cloned().collect(); + + let touches_content_group = tables.iter().any(|table| { + content_group + .iter() + .any(|suffix| table == &format!("{}{}", table_prefix, suffix)) + }); + + if touches_content_group { + for suffix in content_group { + out.insert(format!("{}{}", table_prefix, suffix)); + } + } + + out.into_iter().collect() +} + +fn first_keyword(sql: &str) -> Option { + let stripped = strip_leading_comments(sql); + stripped + .split(|ch: char| ch.is_whitespace() || ch == '(') + .find(|part| !part.is_empty()) + .map(|part| part.trim_matches('`').to_ascii_uppercase()) +} + +fn strip_leading_comments(mut sql: &str) -> &str { + loop { + let trimmed = sql.trim_start(); + if let Some(rest) = trimmed.strip_prefix("--") { + if let Some(pos) = rest.find('\n') { + sql = &rest[pos + 1..]; + continue; + } + return ""; + } + if let Some(rest) = trimmed.strip_prefix('#') { + if let Some(pos) = rest.find('\n') { + sql = &rest[pos + 1..]; + continue; + } + return ""; + } + if let Some(rest) = trimmed.strip_prefix("/*") { + if let Some(pos) = rest.find("*/") { + sql = &rest[pos + 2..]; + continue; + } + return ""; + } + return trimmed; + } +} + +#[allow(dead_code)] +fn tokenize(sql: &str) -> Vec { + let mut tokens = Vec::new(); + let mut current = String::new(); + let mut quote = None; + + for ch in sql.chars() { + if let Some(q) = quote { + if ch == q { + quote = None; + } + continue; + } + + if ch == '\'' || ch == '"' { + quote = Some(ch); + continue; + } + + if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' || ch == '`' { + current.push(ch); + } else if !current.is_empty() { + tokens.push(current.trim_matches('`').to_ascii_uppercase()); + current.clear(); + } + } + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_ascii_uppercase()); + } + tokens +} + +#[allow(dead_code)] +fn is_keyword(token: &str) -> bool { + matches!( + token, + "SELECT" | "WHERE" | "SET" | "ON" | "USING" | "VALUES" | "INNER" | "LEFT" | "RIGHT" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn classifies_sql() { + assert!(is_safe_read_sql(" /* ok */ SELECT * FROM wp_posts")); + assert!(is_write_sql( + "UPDATE wp_posts SET post_title = 'x' WHERE ID = 1" + )); + assert!(is_write_sql("LOAD DATA INFILE 'x' INTO TABLE wp_posts")); + } + + #[test] + fn expands_wordpress_content_group() { + let tables = vec!["wp_posts".to_string()]; + let expanded = expand_wordpress_groups("wp_", &tables); + assert!(expanded.contains(&"wp_postmeta".to_string())); + assert!(expanded.contains(&"wp_term_relationships".to_string())); + } +} From 20a13233fc8c38391cb1ec50e197497ed82ebb3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 1 May 2026 20:40:29 +0200 Subject: [PATCH 02/39] Add Docker lab for remote COW experiment --- experiments/remote-wp-cow/.dockerignore | 3 + experiments/remote-wp-cow/.env.example | 12 +++ experiments/remote-wp-cow/.gitignore | 1 + experiments/remote-wp-cow/README.md | 89 +++++++++++++++++++ experiments/remote-wp-cow/compose.yaml | 41 +++++++++ experiments/remote-wp-cow/docker/Dockerfile | 40 +++++++++ .../remote-wp-cow/docker/wp-cow-lab-check | 38 ++++++++ .../remote-wp-cow/docker/wp-cow-lab-clone | 45 ++++++++++ .../docker/wp-cow-lab-entrypoint | 42 +++++++++ .../remote-wp-cow/docker/wp-cow-lab-mount | 10 +++ .../remote-wp-cow/docker/wp-cow-lab-run | 10 +++ 11 files changed, 331 insertions(+) create mode 100644 experiments/remote-wp-cow/.dockerignore create mode 100644 experiments/remote-wp-cow/.env.example create mode 100644 experiments/remote-wp-cow/compose.yaml create mode 100644 experiments/remote-wp-cow/docker/Dockerfile create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-check create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-clone create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-mount create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-run diff --git a/experiments/remote-wp-cow/.dockerignore b/experiments/remote-wp-cow/.dockerignore new file mode 100644 index 00000000..eed9c00b --- /dev/null +++ b/experiments/remote-wp-cow/.dockerignore @@ -0,0 +1,3 @@ +/target/ +/.adversarial-loop/ +/.git/ diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example new file mode 100644 index 00000000..867499e5 --- /dev/null +++ b/experiments/remote-wp-cow/.env.example @@ -0,0 +1,12 @@ +WPCOW_NAME=example +WPCOW_SSH=mysite +WPCOW_PATH=/home/user/public_html +WPCOW_REMOTE_URL=https://example.com +WPCOW_LOCAL_URL=http://localhost:8080 +WPCOW_HTTP_PORT=8080 + +# Set this to 1 for a filesystem-only smoke test that does not export DB schema. +WPCOW_SKIP_SCHEMA=0 + +# Set this to 1 only when you want to use defaults instead of probing WordPress. +WPCOW_NO_PROBE=0 diff --git a/experiments/remote-wp-cow/.gitignore b/experiments/remote-wp-cow/.gitignore index b83d2226..f346ebb6 100644 --- a/experiments/remote-wp-cow/.gitignore +++ b/experiments/remote-wp-cow/.gitignore @@ -1 +1,2 @@ /target/ +/.env diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 901c5d7f..fd62dd14 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -24,6 +24,95 @@ model, but they should not be read as a proposed final integration shape. cargo build ``` +## Docker lab on macOS + +Use this when you are on a Mac and want a Linux shell with FUSE, PHP, SSH, and +local MariaDB available. The container is intentionally privileged so FUSE can +mount inside Docker Desktop's Linux VM. + +From this directory: + +```bash +cp .env.example .env +$EDITOR .env +docker compose build +docker compose up -d +docker compose exec wp-cow-lab bash +``` + +Inside the container, check the lab: + +```bash +wp-cow-lab-check +``` + +If your SSH command has flags, put them in `~/.ssh/config` on the Mac before +starting the container. For example: + +```sshconfig +Host mysite + HostName example.com + User user + Port 2222 + IdentityFile ~/.ssh/id_ed25519 +``` + +Docker Compose mounts your Mac `~/.ssh` read-only and forwards the Docker +Desktop SSH agent socket at `/run/host-services/ssh-auth.sock`. + +Set the real site values in `.env`, or export them inside the container: + +```bash +export WPCOW_NAME=example +export WPCOW_SSH=mysite +export WPCOW_PATH=/home/user/public_html +export WPCOW_REMOTE_URL=https://example.com +export WPCOW_LOCAL_URL=http://localhost:8080 +``` + +For a full local WordPress runtime, clone schema, initialize local MariaDB, and +run the local PHP server: + +```bash +wp-cow-lab-clone +wp-cow-lab-run +``` + +Open this on the Mac: + +```text +http://localhost:8080/ +``` + +For a filesystem-only smoke test that does not touch the remote DB, skip schema +export and mount the remote tree lazily: + +```bash +export WPCOW_SKIP_SCHEMA=1 +wp-cow-lab-clone +wp-cow-lab-mount +``` + +Then open another shell: + +```bash +docker compose exec wp-cow-lab bash +ls -la /mnt/wp-cow/example +cat /mnt/wp-cow/example/wp-config.php +``` + +Stop the lab: + +```bash +docker compose down +``` + +Remove persisted clone state and local MariaDB data: + +```bash +docker compose down -v +``` + ## Typical flow ```bash diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml new file mode 100644 index 00000000..fbad9a15 --- /dev/null +++ b/experiments/remote-wp-cow/compose.yaml @@ -0,0 +1,41 @@ +services: + wp-cow-lab: + build: + context: . + dockerfile: docker/Dockerfile + container_name: wp-cow-lab + privileged: true + cap_add: + - SYS_ADMIN + devices: + - /dev/fuse:/dev/fuse + security_opt: + - apparmor:unconfined + ports: + - "${WPCOW_HTTP_PORT:-8080}:8080" + environment: + WPCOW_HOME: /root/.wp-cow + WPCOW_NAME: "${WPCOW_NAME:-example}" + WPCOW_SSH: "${WPCOW_SSH:-}" + WPCOW_PATH: "${WPCOW_PATH:-}" + WPCOW_REMOTE_URL: "${WPCOW_REMOTE_URL:-}" + WPCOW_LOCAL_URL: "${WPCOW_LOCAL_URL:-http://localhost:8080}" + WPCOW_SKIP_SCHEMA: "${WPCOW_SKIP_SCHEMA:-0}" + WPCOW_NO_PROBE: "${WPCOW_NO_PROBE:-0}" + WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" + WPCOW_HTTP: 0.0.0.0:8080 + SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock + volumes: + - wp-cow-state:/root/.wp-cow + - wp-cow-mounts:/mnt/wp-cow + - type: bind + source: ${HOME}/.ssh + target: /root/.ssh + read_only: true + - type: bind + source: /run/host-services/ssh-auth.sock + target: /run/host-services/ssh-auth.sock + +volumes: + wp-cow-state: + wp-cow-mounts: diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile new file mode 100644 index 00000000..413a895d --- /dev/null +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -0,0 +1,40 @@ +FROM rust:1-bookworm + +ENV DEBIAN_FRONTEND=noninteractive +ENV WPCOW_HOME=/root/.wp-cow + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash \ + ca-certificates \ + curl \ + fuse3 \ + less \ + libfuse3-dev \ + mariadb-client \ + mariadb-server \ + openssh-client \ + php-cli \ + php-mysqli \ + pkg-config \ + rsync \ + tini \ + vim-tiny \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace +COPY Cargo.toml Cargo.lock ./ +COPY src ./src +RUN cargo build --release \ + && cp target/release/wp-cow /usr/local/bin/wp-cow \ + && cargo clean + +COPY docker/wp-cow-lab-entrypoint /usr/local/bin/wp-cow-lab-entrypoint +COPY docker/wp-cow-lab-check /usr/local/bin/wp-cow-lab-check +COPY docker/wp-cow-lab-clone /usr/local/bin/wp-cow-lab-clone +COPY docker/wp-cow-lab-mount /usr/local/bin/wp-cow-lab-mount +COPY docker/wp-cow-lab-run /usr/local/bin/wp-cow-lab-run +RUN chmod +x /usr/local/bin/wp-cow-lab-* + +ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/wp-cow-lab-entrypoint"] +CMD ["sleep", "infinity"] diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-check b/experiments/remote-wp-cow/docker/wp-cow-lab-check new file mode 100755 index 00000000..398ce762 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-check @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "wp-cow:" +wp-cow --help | sed -n '1,8p' + +echo +echo "tools:" +ssh -V 2>&1 +php -v | sed -n '1p' +mysql --version +mysqldump --version + +echo +echo "fuse:" +if [ ! -e /dev/fuse ]; then + echo "missing /dev/fuse; start this lab with docker compose so the FUSE device is passed through" >&2 + exit 1 +fi +ls -l /dev/fuse + +echo +echo "local mysql:" +mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping + +echo +echo "ssh:" +if [ -S "${SSH_AUTH_SOCK:-}" ]; then + echo "ssh-agent socket: $SSH_AUTH_SOCK" + ssh-add -L >/dev/null 2>&1 && echo "ssh-agent has identities" || echo "ssh-agent is available but has no listed identities" +else + echo "no ssh-agent socket; ~/.ssh is still mounted read-only" +fi + +if [ -n "${WPCOW_SSH:-}" ]; then + ssh -G "$WPCOW_SSH" >/dev/null + echo "ssh config resolves: $WPCOW_SSH" +fi diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-clone b/experiments/remote-wp-cow/docker/wp-cow-lab-clone new file mode 100755 index 00000000..d7e47234 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-clone @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -euo pipefail + +require_env() { + local name="$1" + if [ -z "${!name:-}" ]; then + echo "missing required environment variable: $name" >&2 + exit 2 + fi +} + +require_env WPCOW_SSH +require_env WPCOW_PATH +require_env WPCOW_REMOTE_URL +require_env WPCOW_LOCAL_URL + +name="${WPCOW_NAME:-example}" +args=( + clone + --force + --name "$name" + --ssh "$WPCOW_SSH" + --path "$WPCOW_PATH" + --remote-url "$WPCOW_REMOTE_URL" + --local-url "$WPCOW_LOCAL_URL" +) + +if [ "${WPCOW_NO_PROBE:-0}" = "1" ]; then + args+=(--no-probe) +fi + +if [ "${WPCOW_SKIP_SCHEMA:-0}" = "1" ]; then + args+=(--skip-schema) +fi + +wp-cow "${args[@]}" + +if [ "${WPCOW_SKIP_SCHEMA:-0}" != "1" ] && [ "${WPCOW_NO_PROBE:-0}" != "1" ]; then + wp-cow init-db "$name" +fi + +echo +echo "clone ready: $name" +echo "mount files with: wp-cow-lab-mount" +echo "run local PHP with: wp-cow-lab-run" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint b/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint new file mode 100755 index 00000000..d00bc884 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +set -euo pipefail + +export WPCOW_HOME="${WPCOW_HOME:-/root/.wp-cow}" + +mkdir -p "$WPCOW_HOME" /mnt/wp-cow /run/mysqld /var/log +chown -R mysql:mysql /run/mysqld /var/lib/mysql + +if [ ! -d /var/lib/mysql/mysql ]; then + mariadb-install-db \ + --user=mysql \ + --datadir=/var/lib/mysql \ + --auth-root-authentication-method=normal \ + >/var/log/wp-cow-mariadb-install.log 2>&1 +fi + +if ! mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + mariadbd \ + --user=mysql \ + --datadir=/var/lib/mysql \ + --socket=/run/mysqld/mysqld.sock \ + --bind-address=127.0.0.1 \ + --port=33071 \ + --skip-networking=0 \ + --skip-name-resolve \ + --skip-grant-tables \ + >/var/log/wp-cow-mariadb.log 2>&1 & + + for _ in $(seq 1 80); do + if mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + break + fi + sleep 0.25 + done +fi + +if ! mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + echo "MariaDB did not start. See /var/log/wp-cow-mariadb.log" >&2 + exit 1 +fi + +exec "$@" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-mount b/experiments/remote-wp-cow/docker/wp-cow-lab-mount new file mode 100755 index 00000000..63425142 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-mount @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" +mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" + +mkdir -p "$mountpoint" +echo "mounting $name at $mountpoint" +echo "this command stays in the foreground; open another shell with: docker compose exec wp-cow-lab bash" +exec wp-cow mount "$name" --mountpoint "$mountpoint" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-run b/experiments/remote-wp-cow/docker/wp-cow-lab-run new file mode 100755 index 00000000..098194ef --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-run @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" +mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" +http="${WPCOW_HTTP:-0.0.0.0:8080}" + +mkdir -p "$mountpoint" +echo "running $name at http://localhost:${http##*:}/ from $mountpoint" +exec wp-cow run "$name" --mountpoint "$mountpoint" --http "$http" From 9fbbdab6f89de5e913be44cbaa32f451bb7cedf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 1 May 2026 21:46:25 +0200 Subject: [PATCH 03/39] Handle Mac SSH config in Docker lab --- experiments/remote-wp-cow/README.md | 16 +- experiments/remote-wp-cow/compose.yaml | 2 +- .../remote-wp-cow/docker/wp-cow-lab-check | 7 +- .../remote-wp-cow/docker/wp-cow-lab-clone | 3 +- .../docker/wp-cow-lab-entrypoint | 12 ++ .../docker/wp-cow-lab-ssh-target | 140 ++++++++++++++++++ 6 files changed, 172 insertions(+), 8 deletions(-) create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index fd62dd14..3236206d 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -57,10 +57,14 @@ Host mysite IdentityFile ~/.ssh/id_ed25519 ``` -Docker Compose mounts your Mac `~/.ssh` read-only and forwards the Docker -Desktop SSH agent socket at `/run/host-services/ssh-auth.sock`. +Docker Compose mounts your Mac `~/.ssh` read-only at `/host-ssh`, copies it +into the container, and removes Apple-only OpenSSH options such as +`UseKeychain`. It also forwards the Docker Desktop SSH agent socket at +`/run/host-services/ssh-auth.sock`. -Set the real site values in `.env`, or export them inside the container: +Set the real site values in `.env`, or export them inside the container. +`WPCOW_SSH` can be either a host alias from `~/.ssh/config` or a simple SSH +command copied from a host dashboard: ```bash export WPCOW_NAME=example @@ -70,6 +74,12 @@ export WPCOW_REMOTE_URL=https://example.com export WPCOW_LOCAL_URL=http://localhost:8080 ``` +For example, this is accepted: + +```bash +export WPCOW_SSH='ssh -p18765 -i ~/.ssh/id_siteground user@example.com' +``` + For a full local WordPress runtime, clone schema, initialize local MariaDB, and run the local PHP server: diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index fbad9a15..8f172294 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -30,7 +30,7 @@ services: - wp-cow-mounts:/mnt/wp-cow - type: bind source: ${HOME}/.ssh - target: /root/.ssh + target: /host-ssh read_only: true - type: bind source: /run/host-services/ssh-auth.sock diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-check b/experiments/remote-wp-cow/docker/wp-cow-lab-check index 398ce762..dc88744c 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-check +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-check @@ -29,10 +29,11 @@ if [ -S "${SSH_AUTH_SOCK:-}" ]; then echo "ssh-agent socket: $SSH_AUTH_SOCK" ssh-add -L >/dev/null 2>&1 && echo "ssh-agent has identities" || echo "ssh-agent is available but has no listed identities" else - echo "no ssh-agent socket; ~/.ssh is still mounted read-only" + echo "no ssh-agent socket; host ~/.ssh is still copied into the container" fi if [ -n "${WPCOW_SSH:-}" ]; then - ssh -G "$WPCOW_SSH" >/dev/null - echo "ssh config resolves: $WPCOW_SSH" + target="$(wp-cow-lab-ssh-target)" + ssh -G "$target" >/dev/null + echo "ssh config resolves: $target" fi diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-clone b/experiments/remote-wp-cow/docker/wp-cow-lab-clone index d7e47234..9c2c1a89 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-clone +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-clone @@ -15,11 +15,12 @@ require_env WPCOW_REMOTE_URL require_env WPCOW_LOCAL_URL name="${WPCOW_NAME:-example}" +ssh_target="$(wp-cow-lab-ssh-target)" args=( clone --force --name "$name" - --ssh "$WPCOW_SSH" + --ssh "$ssh_target" --path "$WPCOW_PATH" --remote-url "$WPCOW_REMOTE_URL" --local-url "$WPCOW_LOCAL_URL" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint b/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint index d00bc884..34b4c6c5 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-entrypoint @@ -6,6 +6,18 @@ export WPCOW_HOME="${WPCOW_HOME:-/root/.wp-cow}" mkdir -p "$WPCOW_HOME" /mnt/wp-cow /run/mysqld /var/log chown -R mysql:mysql /run/mysqld /var/lib/mysql +mkdir -p /root/.ssh +chmod 700 /root/.ssh +if [ -d /host-ssh ]; then + cp -a /host-ssh/. /root/.ssh/ 2>/dev/null || true +fi +if [ -f /root/.ssh/config ]; then + # Linux OpenSSH rejects Apple-specific options such as UseKeychain. + sed -i.bak -E '/^[[:space:]]*UseKeychain([[:space:]]|$)/Id' /root/.ssh/config +fi +find /root/.ssh -type d -exec chmod 700 {} + 2>/dev/null || true +find /root/.ssh -type f -exec chmod 600 {} + 2>/dev/null || true + if [ ! -d /var/lib/mysql/mysql ]; then mariadb-install-db \ --user=mysql \ diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target b/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target new file mode 100755 index 00000000..f8d9e74f --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target @@ -0,0 +1,140 @@ +#!/usr/bin/env bash +set -euo pipefail + +spec="${WPCOW_SSH:-}" +if [ -z "$spec" ]; then + echo "missing required environment variable: WPCOW_SSH" >&2 + exit 2 +fi + +if [[ "$spec" != ssh\ * ]]; then + echo "$spec" + exit 0 +fi + +alias_name="${WPCOW_SSH_ALIAS:-wp-cow-target}" +config="${HOME}/.ssh/config" +mkdir -p "${HOME}/.ssh" +touch "$config" +chmod 600 "$config" + +# Parse the common OpenSSH command shape people paste from hosting dashboards, +# for example: ssh -p18765 -i ~/.ssh/id_siteground user@example.com +# This eval is intentionally local to the developer-controlled lab container. +eval "set -- $spec" +if [ "${1:-}" = "ssh" ]; then + shift +fi + +host="" +user="" +port="" +identity_file="" +declare -a options=() + +while [ "$#" -gt 0 ]; do + case "$1" in + -p) + port="${2:-}" + shift 2 + ;; + -p*) + port="${1#-p}" + shift + ;; + -i) + identity_file="${2:-}" + shift 2 + ;; + -i*) + identity_file="${1#-i}" + shift + ;; + -l) + user="${2:-}" + shift 2 + ;; + -l*) + user="${1#-l}" + shift + ;; + -o) + options+=("${2:-}") + shift 2 + ;; + -o*) + options+=("${1#-o}") + shift + ;; + --) + shift + break + ;; + -*) + echo "unsupported SSH option in WPCOW_SSH: $1" >&2 + echo "put complex SSH settings in ~/.ssh/config and set WPCOW_SSH to the Host alias" >&2 + exit 2 + ;; + *) + host="$1" + shift + ;; + esac +done + +if [ -z "$host" ] && [ "$#" -gt 0 ]; then + host="$1" +fi + +if [ -z "$host" ]; then + echo "could not find user@host in WPCOW_SSH: $spec" >&2 + exit 2 +fi + +if [[ "$host" == *@* ]]; then + if [ -z "$user" ]; then + user="${host%@*}" + fi + host="${host#*@}" +fi + +tmp="${config}.tmp" +awk -v alias="$alias_name" ' + BEGIN { skip = 0 } + /^[[:space:]]*Host[[:space:]]+/ { + skip = 0 + for (i = 2; i <= NF; i++) { + if ($i == alias) { + skip = 1 + } + } + } + skip == 0 { print } +' "$config" > "$tmp" +mv "$tmp" "$config" + +{ + echo + echo "Host $alias_name" + echo " HostName $host" + if [ -n "$user" ]; then + echo " User $user" + fi + if [ -n "$port" ]; then + echo " Port $port" + fi + if [ -n "$identity_file" ]; then + echo " IdentityFile $identity_file" + echo " IdentitiesOnly yes" + fi + for option in "${options[@]}"; do + key="${option%%=*}" + value="${option#*=}" + if [ "$key" != "$value" ]; then + echo " $key $value" + fi + done +} >> "$config" + +chmod 600 "$config" +echo "$alias_name" From f0abd60aba9e1dbacfe0377b70ee64d1d67d67eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 1 May 2026 21:50:38 +0200 Subject: [PATCH 04/39] Copy SSH target helper into Docker lab image --- experiments/remote-wp-cow/docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile index 413a895d..9215c0e9 100644 --- a/experiments/remote-wp-cow/docker/Dockerfile +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -34,6 +34,7 @@ COPY docker/wp-cow-lab-check /usr/local/bin/wp-cow-lab-check COPY docker/wp-cow-lab-clone /usr/local/bin/wp-cow-lab-clone COPY docker/wp-cow-lab-mount /usr/local/bin/wp-cow-lab-mount COPY docker/wp-cow-lab-run /usr/local/bin/wp-cow-lab-run +COPY docker/wp-cow-lab-ssh-target /usr/local/bin/wp-cow-lab-ssh-target RUN chmod +x /usr/local/bin/wp-cow-lab-* ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/wp-cow-lab-entrypoint"] From 55b82987039c118a48b6ad3e73804fb1d33f20e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 1 May 2026 21:59:24 +0200 Subject: [PATCH 05/39] Support SSH command strings in Docker lab --- .../docker/wp-cow-lab-ssh-target | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target b/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target index f8d9e74f..24972794 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target @@ -7,7 +7,7 @@ if [ -z "$spec" ]; then exit 2 fi -if [[ "$spec" != ssh\ * ]]; then +if [[ "$spec" != *[[:space:]]* ]]; then echo "$spec" exit 0 fi @@ -20,6 +20,7 @@ chmod 600 "$config" # Parse the common OpenSSH command shape people paste from hosting dashboards, # for example: ssh -p18765 -i ~/.ssh/id_siteground user@example.com +# It also accepts ssh option strings such as: -F /tmp/wp-cow.conf mysite # This eval is intentionally local to the developer-controlled lab container. eval "set -- $spec" if [ "${1:-}" = "ssh" ]; then @@ -30,6 +31,7 @@ host="" user="" port="" identity_file="" +config_file="" declare -a options=() while [ "$#" -gt 0 ]; do @@ -50,6 +52,14 @@ while [ "$#" -gt 0 ]; do identity_file="${1#-i}" shift ;; + -F) + config_file="${2:-}" + shift 2 + ;; + -F*) + config_file="${1#-F}" + shift + ;; -l) user="${2:-}" shift 2 @@ -91,6 +101,24 @@ if [ -z "$host" ]; then exit 2 fi +if [ -n "$config_file" ]; then + if [ ! -f "$config_file" ]; then + echo "SSH config from WPCOW_SSH does not exist: $config_file" >&2 + exit 2 + fi + if ! grep -Eq "^[[:space:]]*Include[[:space:]]+$config_file([[:space:]]|\$)" "$config"; then + tmp="${config}.tmp" + { + echo "Include $config_file" + cat "$config" + } > "$tmp" + mv "$tmp" "$config" + fi + chmod 600 "$config" + echo "$host" + exit 0 +fi + if [[ "$host" == *@* ]]; then if [ -z "$user" ]; then user="${host%@*}" From 9ea82696b23f1231156318af41d5cb5f1a3cfd14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 1 May 2026 22:04:09 +0200 Subject: [PATCH 06/39] Add Docker DNS diagnostics for remote COW lab --- experiments/remote-wp-cow/.env.example | 2 + experiments/remote-wp-cow/README.md | 16 +++++ experiments/remote-wp-cow/compose.yaml | 5 ++ experiments/remote-wp-cow/docker/Dockerfile | 1 + .../remote-wp-cow/docker/wp-cow-lab-check | 1 + .../remote-wp-cow/docker/wp-cow-lab-dns | 59 +++++++++++++++++++ 6 files changed, 84 insertions(+) create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-dns diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 867499e5..86f79076 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -4,6 +4,8 @@ WPCOW_PATH=/home/user/public_html WPCOW_REMOTE_URL=https://example.com WPCOW_LOCAL_URL=http://localhost:8080 WPCOW_HTTP_PORT=8080 +WPCOW_DNS1=1.1.1.1 +WPCOW_DNS2=8.8.8.8 # Set this to 1 for a filesystem-only smoke test that does not export DB schema. WPCOW_SKIP_SCHEMA=0 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 3236206d..4c2e851b 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -46,6 +46,22 @@ Inside the container, check the lab: wp-cow-lab-check ``` +If DNS fails inside Docker Desktop with an error such as +`Temporary failure in name resolution`, check and temporarily repair the +container resolver: + +```bash +wp-cow-lab-dns +wp-cow-lab-dns --fix +``` + +If `--fix` works, keep these values in `.env` and recreate the container: + +```bash +WPCOW_DNS1=1.1.1.1 +WPCOW_DNS2=8.8.8.8 +``` + If your SSH command has flags, put them in `~/.ssh/config` on the Mac before starting the container. For example: diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 8f172294..f2d1a6c6 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -11,6 +11,9 @@ services: - /dev/fuse:/dev/fuse security_opt: - apparmor:unconfined + dns: + - "${WPCOW_DNS1:-1.1.1.1}" + - "${WPCOW_DNS2:-8.8.8.8}" ports: - "${WPCOW_HTTP_PORT:-8080}:8080" environment: @@ -22,6 +25,8 @@ services: WPCOW_LOCAL_URL: "${WPCOW_LOCAL_URL:-http://localhost:8080}" WPCOW_SKIP_SCHEMA: "${WPCOW_SKIP_SCHEMA:-0}" WPCOW_NO_PROBE: "${WPCOW_NO_PROBE:-0}" + WPCOW_DNS1: "${WPCOW_DNS1:-1.1.1.1}" + WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" WPCOW_HTTP: 0.0.0.0:8080 SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile index 9215c0e9..f02a17c5 100644 --- a/experiments/remote-wp-cow/docker/Dockerfile +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -35,6 +35,7 @@ COPY docker/wp-cow-lab-clone /usr/local/bin/wp-cow-lab-clone COPY docker/wp-cow-lab-mount /usr/local/bin/wp-cow-lab-mount COPY docker/wp-cow-lab-run /usr/local/bin/wp-cow-lab-run COPY docker/wp-cow-lab-ssh-target /usr/local/bin/wp-cow-lab-ssh-target +COPY docker/wp-cow-lab-dns /usr/local/bin/wp-cow-lab-dns RUN chmod +x /usr/local/bin/wp-cow-lab-* ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/wp-cow-lab-entrypoint"] diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-check b/experiments/remote-wp-cow/docker/wp-cow-lab-check index dc88744c..a9c9c447 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-check +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-check @@ -36,4 +36,5 @@ if [ -n "${WPCOW_SSH:-}" ]; then target="$(wp-cow-lab-ssh-target)" ssh -G "$target" >/dev/null echo "ssh config resolves: $target" + wp-cow-lab-dns fi diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-dns b/experiments/remote-wp-cow/docker/wp-cow-lab-dns new file mode 100755 index 00000000..2a45e89b --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-dns @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +set -euo pipefail + +fix=0 +if [ "${1:-}" = "--fix" ]; then + fix=1 +fi + +host="${WPCOW_DNS_HOST:-}" +if [ -z "$host" ] && [ -n "${WPCOW_SSH:-}" ]; then + target="$(wp-cow-lab-ssh-target)" + host="$(ssh -G "$target" 2>/dev/null | awk '$1 == "hostname" { print $2; exit }')" +fi + +if [ -z "$host" ] && [ -n "${WPCOW_REMOTE_URL:-}" ]; then + host="$(php -r 'echo parse_url(getenv("WPCOW_REMOTE_URL"), PHP_URL_HOST) ?: "";')" +fi + +if [ -z "$host" ]; then + echo "could not determine host; set WPCOW_DNS_HOST=example.com" >&2 + exit 2 +fi + +echo "host: $host" +echo +echo "/etc/resolv.conf:" +cat /etc/resolv.conf + +echo +echo "resolution:" +if getent hosts "$host"; then + exit 0 +fi + +echo "DNS lookup failed for $host" >&2 + +if [ "$fix" = "1" ]; then + dns1="${WPCOW_DNS1:-1.1.1.1}" + dns2="${WPCOW_DNS2:-8.8.8.8}" + printf 'nameserver %s\nnameserver %s\n' "$dns1" "$dns2" > /etc/resolv.conf + echo + echo "rewrote /etc/resolv.conf:" + cat /etc/resolv.conf + echo + echo "retry:" + getent hosts "$host" + exit 0 +fi + +cat >&2 <<'EOF' + +Try: + wp-cow-lab-dns --fix + +If that works, restart the lab after adding DNS values to .env: + WPCOW_DNS1=1.1.1.1 + WPCOW_DNS2=8.8.8.8 +EOF +exit 1 From a9f6e8d00f220d6944487ba8ea1e18830588b7bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 01:30:09 +0200 Subject: [PATCH 07/39] Prepend generated SSH target in Docker lab --- .../docker/wp-cow-lab-ssh-target | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target b/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target index 24972794..8417d9c8 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-ssh-target @@ -127,22 +127,7 @@ if [[ "$host" == *@* ]]; then fi tmp="${config}.tmp" -awk -v alias="$alias_name" ' - BEGIN { skip = 0 } - /^[[:space:]]*Host[[:space:]]+/ { - skip = 0 - for (i = 2; i <= NF; i++) { - if ($i == alias) { - skip = 1 - } - } - } - skip == 0 { print } -' "$config" > "$tmp" -mv "$tmp" "$config" - { - echo echo "Host $alias_name" echo " HostName $host" if [ -n "$user" ]; then @@ -162,7 +147,21 @@ mv "$tmp" "$config" echo " $key $value" fi done -} >> "$config" + echo + awk -v alias="$alias_name" ' + BEGIN { skip = 0 } + /^[[:space:]]*Host[[:space:]]+/ { + skip = 0 + for (i = 2; i <= NF; i++) { + if ($i == alias) { + skip = 1 + } + } + } + skip == 0 { print } + ' "$config" +} > "$tmp" +mv "$tmp" "$config" chmod 600 "$config" echo "$alias_name" From 8ec5f928c34b6021b900e1451ce9acedc25aff83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 01:37:56 +0200 Subject: [PATCH 08/39] Map Docker lab HTTP port dynamically --- experiments/remote-wp-cow/README.md | 15 +++++++++++++++ experiments/remote-wp-cow/compose.yaml | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 4c2e851b..5e07ec51 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -40,6 +40,21 @@ docker compose up -d docker compose exec wp-cow-lab bash ``` +The Compose port mapping is created from `WPCOW_HTTP_PORT` when the container is +created. If you want port 9481, set it in `.env` or pass it when starting the +lab: + +```bash +WPCOW_HTTP_PORT=9481 docker compose up -d +``` + +If you change the port after the container already exists, recreate it: + +```bash +docker compose down +WPCOW_HTTP_PORT=9481 docker compose up -d --force-recreate +``` + Inside the container, check the lab: ```bash diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index f2d1a6c6..b9508268 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -15,7 +15,7 @@ services: - "${WPCOW_DNS1:-1.1.1.1}" - "${WPCOW_DNS2:-8.8.8.8}" ports: - - "${WPCOW_HTTP_PORT:-8080}:8080" + - "${WPCOW_HTTP_PORT:-8080}:${WPCOW_HTTP_PORT:-8080}" environment: WPCOW_HOME: /root/.wp-cow WPCOW_NAME: "${WPCOW_NAME:-example}" @@ -28,7 +28,7 @@ services: WPCOW_DNS1: "${WPCOW_DNS1:-1.1.1.1}" WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" - WPCOW_HTTP: 0.0.0.0:8080 + WPCOW_HTTP: "0.0.0.0:${WPCOW_HTTP_PORT:-8080}" SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock volumes: - wp-cow-state:/root/.wp-cow From f15d56b5f9096d70f2f08d56e70ad89ec77468ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 01:45:46 +0200 Subject: [PATCH 09/39] Keep Docker lab PHP on container port 8080 --- experiments/remote-wp-cow/README.md | 10 +++++++--- experiments/remote-wp-cow/compose.yaml | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 5e07ec51..d1252ff0 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -40,9 +40,10 @@ docker compose up -d docker compose exec wp-cow-lab bash ``` -The Compose port mapping is created from `WPCOW_HTTP_PORT` when the container is -created. If you want port 9481, set it in `.env` or pass it when starting the -lab: +The Compose host port is created from `WPCOW_HTTP_PORT` when the container is +created. The PHP server still listens on port `8080` inside the container. If +you want to open port 9481 on the Mac, set it in `.env` or pass it when +starting the lab: ```bash WPCOW_HTTP_PORT=9481 docker compose up -d @@ -55,6 +56,9 @@ docker compose down WPCOW_HTTP_PORT=9481 docker compose up -d --force-recreate ``` +Inside the container, keep `WPCOW_HTTP=0.0.0.0:8080` and set +`WPCOW_LOCAL_URL=http://localhost:9481`. + Inside the container, check the lab: ```bash diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index b9508268..f2d1a6c6 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -15,7 +15,7 @@ services: - "${WPCOW_DNS1:-1.1.1.1}" - "${WPCOW_DNS2:-8.8.8.8}" ports: - - "${WPCOW_HTTP_PORT:-8080}:${WPCOW_HTTP_PORT:-8080}" + - "${WPCOW_HTTP_PORT:-8080}:8080" environment: WPCOW_HOME: /root/.wp-cow WPCOW_NAME: "${WPCOW_NAME:-example}" @@ -28,7 +28,7 @@ services: WPCOW_DNS1: "${WPCOW_DNS1:-1.1.1.1}" WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" - WPCOW_HTTP: "0.0.0.0:${WPCOW_HTTP_PORT:-8080}" + WPCOW_HTTP: 0.0.0.0:8080 SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock volumes: - wp-cow-state:/root/.wp-cow From dc19a4c701a89daa9e6a09eb48e9fc84e01b97ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 01:52:57 +0200 Subject: [PATCH 10/39] Add DB init and cache helpers for Docker lab --- experiments/remote-wp-cow/.env.example | 1 + experiments/remote-wp-cow/README.md | 22 +++++ experiments/remote-wp-cow/compose.yaml | 1 + experiments/remote-wp-cow/docker/Dockerfile | 2 + .../remote-wp-cow/docker/wp-cow-lab-cache | 45 ++++++++++ .../remote-wp-cow/docker/wp-cow-lab-clone | 4 + .../remote-wp-cow/docker/wp-cow-lab-db-init | 18 ++++ .../remote-wp-cow/docker/wp-cow-lab-run | 4 + experiments/remote-wp-cow/src/cli.rs | 14 ++++ experiments/remote-wp-cow/src/config.rs | 25 +++++- experiments/remote-wp-cow/src/fusefs.rs | 82 +++++++++++++++++-- experiments/remote-wp-cow/src/generate.rs | 1 + 12 files changed, 213 insertions(+), 6 deletions(-) create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-cache create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-db-init diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 86f79076..31a11b71 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -6,6 +6,7 @@ WPCOW_LOCAL_URL=http://localhost:8080 WPCOW_HTTP_PORT=8080 WPCOW_DNS1=1.1.1.1 WPCOW_DNS2=8.8.8.8 +WPCOW_CACHE_MAX_FILE_MB=64 # Set this to 1 for a filesystem-only smoke test that does not export DB schema. WPCOW_SKIP_SCHEMA=0 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index d1252ff0..0279e37f 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -119,6 +119,7 @@ For a full local WordPress runtime, clone schema, initialize local MariaDB, and run the local PHP server: ```bash +export WPCOW_SKIP_SCHEMA=0 wp-cow-lab-clone wp-cow-lab-run ``` @@ -138,6 +139,15 @@ wp-cow-lab-clone wp-cow-lab-mount ``` +If you already created a filesystem-only clone and then want to open WordPress +in a browser, initialize the local database schema without deleting the clone or +its file cache: + +```bash +wp-cow-lab-db-init +wp-cow-lab-run +``` + Then open another shell: ```bash @@ -146,6 +156,18 @@ ls -la /mnt/wp-cow/example cat /mnt/wp-cow/example/wp-config.php ``` +Remote file contents are cached separately from local mutations in +`~/.wp-cow/clones//file-cache`, which is persisted by the Docker +`wp-cow-state` volume. Files up to `WPCOW_CACHE_MAX_FILE_MB` are cached as whole +files on first read; larger files are streamed by range. The Docker lab defaults +that limit to 64 MB. Check or clear the cache with: + +```bash +wp-cow-lab-cache status +wp-cow-lab-cache warm-core +wp-cow-lab-cache clear +``` + Stop the lab: ```bash diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index f2d1a6c6..29c980c4 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -27,6 +27,7 @@ services: WPCOW_NO_PROBE: "${WPCOW_NO_PROBE:-0}" WPCOW_DNS1: "${WPCOW_DNS1:-1.1.1.1}" WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" + WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" WPCOW_HTTP: 0.0.0.0:8080 SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile index f02a17c5..ba9feec3 100644 --- a/experiments/remote-wp-cow/docker/Dockerfile +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -36,6 +36,8 @@ COPY docker/wp-cow-lab-mount /usr/local/bin/wp-cow-lab-mount COPY docker/wp-cow-lab-run /usr/local/bin/wp-cow-lab-run COPY docker/wp-cow-lab-ssh-target /usr/local/bin/wp-cow-lab-ssh-target COPY docker/wp-cow-lab-dns /usr/local/bin/wp-cow-lab-dns +COPY docker/wp-cow-lab-db-init /usr/local/bin/wp-cow-lab-db-init +COPY docker/wp-cow-lab-cache /usr/local/bin/wp-cow-lab-cache RUN chmod +x /usr/local/bin/wp-cow-lab-* ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/wp-cow-lab-entrypoint"] diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-cache b/experiments/remote-wp-cow/docker/wp-cow-lab-cache new file mode 100755 index 00000000..a2f220e2 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-cache @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" +root="${WPCOW_HOME:-/root/.wp-cow}/clones/$name" +cache="$root/file-cache" +mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" +cmd="${1:-status}" + +case "$cmd" in + status) + echo "cache directory: $cache" + if [ -d "$cache" ]; then + du -sh "$cache" 2>/dev/null || true + find "$cache" -type f 2>/dev/null | wc -l | awk '{ print "cached files: " $1 }' + else + echo "cache directory does not exist yet" + fi + ;; + clear) + rm -rf "$cache" + mkdir -p "$cache" + echo "cleared $cache" + ;; + warm-core) + for path in \ + index.php \ + wp-blog-header.php \ + wp-load.php \ + wp-settings.php \ + wp-includes/version.php \ + wp-includes/load.php \ + wp-includes/plugin.php + do + if [ -e "$mountpoint/$path" ]; then + dd if="$mountpoint/$path" of=/dev/null bs=1M status=none || true + fi + done + "$0" status + ;; + *) + echo "usage: wp-cow-lab-cache [status|clear|warm-core]" >&2 + exit 2 + ;; +esac diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-clone b/experiments/remote-wp-cow/docker/wp-cow-lab-clone index 9c2c1a89..303307d1 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-clone +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-clone @@ -38,6 +38,10 @@ wp-cow "${args[@]}" if [ "${WPCOW_SKIP_SCHEMA:-0}" != "1" ] && [ "${WPCOW_NO_PROBE:-0}" != "1" ]; then wp-cow init-db "$name" +else + echo + echo "warning: DB schema was not initialized, so wp-cow-lab-run will not boot full WordPress yet." >&2 + echo "run wp-cow-lab-db-init before opening the site in a browser." >&2 fi echo diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-db-init b/experiments/remote-wp-cow/docker/wp-cow-lab-db-init new file mode 100755 index 00000000..a1b777a1 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-db-init @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" + +if [ ! -f "${WPCOW_HOME:-/root/.wp-cow}/clones/$name/manifest.json" ]; then + echo "clone '$name' does not exist yet; run wp-cow-lab-clone first" >&2 + exit 2 +fi + +echo "exporting remote schema for $name" +wp-cow export-schema "$name" + +echo "initializing local MariaDB schema for $name" +wp-cow init-db "$name" + +echo +echo "database schema is ready. Remote read queries will be routed by wp-content/db.php." diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-run b/experiments/remote-wp-cow/docker/wp-cow-lab-run index 098194ef..b516db15 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-run +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-run @@ -6,5 +6,9 @@ mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" http="${WPCOW_HTTP:-0.0.0.0:8080}" mkdir -p "$mountpoint" +if [ ! -f "${WPCOW_HOME:-/root/.wp-cow}/clones/$name/db/schema.sql" ]; then + echo "warning: ${WPCOW_HOME:-/root/.wp-cow}/clones/$name/db/schema.sql is missing" >&2 + echo "WordPress will likely show a database connection error. Run wp-cow-lab-db-init first." >&2 +fi echo "running $name at http://localhost:${http##*:}/ from $mountpoint" exec wp-cow run "$name" --mountpoint "$mountpoint" --http "$http" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index 996e92bb..3a8e4d3b 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -26,6 +26,8 @@ enum Command { Clone(CloneArgs), #[command(name = "init-db")] InitDb(NameArgs), + #[command(name = "export-schema")] + ExportSchema(NameArgs), #[command(name = "materialize")] Materialize(MaterializeArgs), #[command(name = "mount")] @@ -109,6 +111,7 @@ pub fn run() -> Result<()> { match cli.command { Command::Clone(args) => clone_site(args), Command::InitDb(args) => init_db(args), + Command::ExportSchema(args) => export_schema(args), Command::Materialize(args) => materialize(args), Command::Mount(args) => mount(args), Command::Run(args) => run_clone(args), @@ -189,6 +192,17 @@ fn init_db(args: NameArgs) -> Result<()> { Ok(()) } +fn export_schema(args: NameArgs) -> Result<()> { + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + remote.ensure_master()?; + db::export_schema(&remote, &paths)?; + println!("exported remote schema for '{}'", manifest.name); + Ok(()) +} + fn materialize(args: MaterializeArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs index 8f25be30..fd23c4b6 100644 --- a/experiments/remote-wp-cow/src/config.rs +++ b/experiments/remote-wp-cow/src/config.rs @@ -8,6 +8,8 @@ use std::time::{SystemTime, UNIX_EPOCH}; use url::Url; pub const MANIFEST_VERSION: u32 = 1; +const DEFAULT_CACHE_MAX_FILE_BYTES: u64 = 8 * 1024 * 1024; +const DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS: u64 = 30; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Manifest { @@ -21,7 +23,10 @@ pub struct Manifest { pub probe: Probe, pub local_db: LocalDb, pub control_url: String, + #[serde(default = "default_cache_max_file_bytes")] pub cache_max_file_bytes: u64, + #[serde(default = "default_remote_metadata_cache_ttl_secs")] + pub remote_metadata_cache_ttl_secs: u64, } #[derive(Debug, Clone, Default, Serialize, Deserialize)] @@ -91,11 +96,29 @@ impl Manifest { port: 33071, }, control_url: "http://127.0.0.1:39070".to_string(), - cache_max_file_bytes: 8 * 1024 * 1024, + cache_max_file_bytes: cache_max_file_bytes_from_env(), + remote_metadata_cache_ttl_secs: DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS, } } } +fn default_cache_max_file_bytes() -> u64 { + DEFAULT_CACHE_MAX_FILE_BYTES +} + +fn default_remote_metadata_cache_ttl_secs() -> u64 { + DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS +} + +fn cache_max_file_bytes_from_env() -> u64 { + std::env::var("WPCOW_CACHE_MAX_FILE_MB") + .ok() + .and_then(|raw| raw.parse::().ok()) + .map(|mb| mb.saturating_mul(1024 * 1024)) + .filter(|bytes| *bytes > 0) + .unwrap_or(DEFAULT_CACHE_MAX_FILE_BYTES) +} + pub fn default_state_dir() -> Result { if let Ok(home) = std::env::var("WPCOW_HOME") { return Ok(PathBuf::from(home)); diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs index 009ef0d3..4782b59b 100644 --- a/experiments/remote-wp-cow/src/fusefs.rs +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -10,7 +10,7 @@ use std::fs::{self, File, OpenOptions}; use std::io; use std::os::unix::fs::{FileExt, MetadataExt, OpenOptionsExt, PermissionsExt}; use std::path::{Path, PathBuf}; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use crate::config::{ClonePaths, Manifest}; use crate::overlay::OverlayStore; @@ -19,6 +19,12 @@ use crate::remote::{RemoteClient, RemoteEntry}; const ROOT_INO: u64 = 1; const TTL: Duration = Duration::from_secs(1); +#[derive(Clone)] +struct Timed { + value: T, + expires_at: Instant, +} + enum Handle { Local(File), Remote(PathBuf), @@ -33,6 +39,9 @@ pub struct CowFs { next_ino: u64, handles: HashMap, next_fh: u64, + remote_stat_cache: HashMap>, + remote_readdir_cache: HashMap>>, + remote_cache_ttl: Duration, uid: u32, gid: u32, } @@ -43,6 +52,7 @@ impl CowFs { let mut path_to_ino = HashMap::new(); ino_to_path.insert(ROOT_INO, PathBuf::new()); path_to_ino.insert(PathBuf::new(), ROOT_INO); + let remote_cache_ttl = Duration::from_secs(manifest.remote_metadata_cache_ttl_secs); Self { manifest, remote, @@ -52,6 +62,9 @@ impl CowFs { next_ino: ROOT_INO + 1, handles: HashMap::new(), next_fh: 1, + remote_stat_cache: HashMap::new(), + remote_readdir_cache: HashMap::new(), + remote_cache_ttl, uid: unsafe { libc::getuid() }, gid: unsafe { libc::getgid() }, } @@ -93,10 +106,64 @@ impl CowFs { return Ok(self.attr_from_metadata(ino, &metadata)); } - let entry = self.remote.stat(rel)?; + let entry = self.remote_stat(rel)?; Ok(self.attr_from_remote(ino, &entry)) } + fn remote_stat(&mut self, rel: &Path) -> io::Result { + if let Some(cached) = self.remote_stat_cache.get(rel) { + if cached.expires_at > Instant::now() { + return Ok(cached.value.clone()); + } + } + + let entry = self.remote.stat(rel)?; + self.remote_stat_cache.insert( + rel.to_path_buf(), + Timed { + value: entry.clone(), + expires_at: Instant::now() + self.remote_cache_ttl, + }, + ); + Ok(entry) + } + + fn remote_readdir(&mut self, rel: &Path) -> io::Result> { + if let Some(cached) = self.remote_readdir_cache.get(rel) { + if cached.expires_at > Instant::now() { + return Ok(cached.value.clone()); + } + } + + let entries = self.remote.readdir(rel)?; + let expires_at = Instant::now() + self.remote_cache_ttl; + for entry in &entries { + self.remote_stat_cache.insert( + rel.join(&entry.name), + Timed { + value: entry.clone(), + expires_at, + }, + ); + } + self.remote_readdir_cache.insert( + rel.to_path_buf(), + Timed { + value: entries.clone(), + expires_at, + }, + ); + Ok(entries) + } + + fn invalidate_remote_cache(&mut self, rel: &Path) { + self.remote_stat_cache.remove(rel); + self.remote_readdir_cache.remove(rel); + if let Some(parent) = rel.parent() { + self.remote_readdir_cache.remove(parent); + } + } + fn attr_from_metadata(&self, ino: u64, metadata: &fs::Metadata) -> FileAttr { let kind = if metadata.file_type().is_dir() { FileType::Directory @@ -232,6 +299,7 @@ impl Filesystem for CowFs { fs::create_dir_all(&upper)?; fs::set_permissions(&upper, fs::Permissions::from_mode(mode & 0o7777))?; self.overlay.clear_whiteout(&rel).map_err(anyhow_to_io)?; + self.invalidate_remote_cache(&rel); let ino = self.ino_for_path(&rel); self.attr_for_path(&rel, ino) })(); @@ -272,7 +340,7 @@ impl Filesystem for CowFs { } if !old_upper.exists() { - let entry = self.remote.stat(&old_rel)?; + let entry = self.remote_stat(&old_rel)?; if entry.kind == "dir" { return Err(io::Error::from_raw_os_error(ENOTSUP)); } @@ -286,6 +354,8 @@ impl Filesystem for CowFs { self.overlay .clear_whiteout(&new_rel) .map_err(anyhow_to_io)?; + self.invalidate_remote_cache(&old_rel); + self.invalidate_remote_cache(&new_rel); let ino = self.ino_for_path(&new_rel); self.ino_to_path.insert(ino, new_rel.clone()); self.path_to_ino.insert(new_rel, ino); @@ -437,6 +507,7 @@ impl Filesystem for CowFs { .mode(mode & 0o7777); let file = opts.open(&upper)?; self.overlay.clear_whiteout(&rel).map_err(anyhow_to_io)?; + self.invalidate_remote_cache(&rel); let ino = self.ino_for_path(&rel); let attr = self.attr_for_path(&rel, ino)?; let fh = self.allocate_handle(Handle::Local(file)); @@ -512,9 +583,10 @@ impl CowFs { let result = (|| { let rel = self.child_path(parent, name)?; self.overlay.remove_upper(&rel).map_err(anyhow_to_io)?; - if self.remote.stat(&rel).is_ok() { + if self.remote_stat(&rel).is_ok() { self.overlay.add_whiteout(&rel).map_err(anyhow_to_io)?; } + self.invalidate_remote_cache(&rel); Ok(()) })(); match result { @@ -535,7 +607,7 @@ impl CowFs { entries.push((parent_ino, FileType::Directory, OsString::from(".."))); let mut by_name: BTreeMap = BTreeMap::new(); - match self.remote.readdir(&rel) { + match self.remote_readdir(&rel) { Ok(remote_entries) => { for entry in remote_entries { by_name.insert(entry.name.clone(), entry); diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 4e9b8a40..e18c1bf3 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -291,6 +291,7 @@ mod tests { }, control_url: "http://127.0.0.1:39070".to_string(), cache_max_file_bytes: 1024, + remote_metadata_cache_ttl_secs: 30, } } From 38e49520806af6df0d91a66dde3dd98125efdce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 02:22:34 +0200 Subject: [PATCH 11/39] Add one-command remote COW serve flow --- experiments/remote-wp-cow/.env.example | 3 +- experiments/remote-wp-cow/README.md | 35 ++-- experiments/remote-wp-cow/compose.yaml | 2 +- experiments/remote-wp-cow/docker/Dockerfile | 1 + .../remote-wp-cow/docker/wp-cow-lab-run | 8 +- .../remote-wp-cow/docker/wp-cow-lab-serve | 34 ++++ experiments/remote-wp-cow/src/cli.rs | 152 ++++++++++++++++++ experiments/remote-wp-cow/src/db.rs | 39 +++++ 8 files changed, 257 insertions(+), 17 deletions(-) create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-serve diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 31a11b71..06f94ad6 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -2,7 +2,8 @@ WPCOW_NAME=example WPCOW_SSH=mysite WPCOW_PATH=/home/user/public_html WPCOW_REMOTE_URL=https://example.com -WPCOW_LOCAL_URL=http://localhost:8080 +# Leave blank to derive http://localhost:$WPCOW_HTTP_PORT in wp-cow-lab-serve. +WPCOW_LOCAL_URL= WPCOW_HTTP_PORT=8080 WPCOW_DNS1=1.1.1.1 WPCOW_DNS2=8.8.8.8 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 0279e37f..13be659c 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -56,8 +56,9 @@ docker compose down WPCOW_HTTP_PORT=9481 docker compose up -d --force-recreate ``` -Inside the container, keep `WPCOW_HTTP=0.0.0.0:8080` and set -`WPCOW_LOCAL_URL=http://localhost:9481`. +Inside the container, keep `WPCOW_HTTP=0.0.0.0:8080`. `wp-cow-lab-serve` +derives `WPCOW_LOCAL_URL` from `WPCOW_HTTP_PORT` when the URL is not explicitly +overridden. Inside the container, check the lab: @@ -106,7 +107,7 @@ export WPCOW_NAME=example export WPCOW_SSH=mysite export WPCOW_PATH=/home/user/public_html export WPCOW_REMOTE_URL=https://example.com -export WPCOW_LOCAL_URL=http://localhost:8080 +export WPCOW_LOCAL_URL=http://localhost:9481 ``` For example, this is accepted: @@ -115,19 +116,21 @@ For example, this is accepted: export WPCOW_SSH='ssh -p18765 -i ~/.ssh/id_siteground user@example.com' ``` -For a full local WordPress runtime, clone schema, initialize local MariaDB, and -run the local PHP server: +For a full local WordPress runtime, use one command: ```bash -export WPCOW_SKIP_SCHEMA=0 -wp-cow-lab-clone -wp-cow-lab-run +wp-cow-lab-serve ``` +That is the normal path. It creates or reuses the lazy clone, exports schema +only if needed, initializes an empty local MariaDB database if needed, mounts +the lazy filesystem, starts the DB control layer, and starts PHP. It does not +download media or table rows up front. + Open this on the Mac: ```text -http://localhost:8080/ +http://localhost:9481/ ``` For a filesystem-only smoke test that does not touch the remote DB, skip schema @@ -183,16 +186,20 @@ docker compose down -v ## Typical flow ```bash -wp-cow clone \ +wp-cow serve \ + --name example \ --ssh user@example.com \ --path /home/user/public_html \ --remote-url https://example.com \ - --local-url http://example.test - -wp-cow init-db example -wp-cow run example --http 127.0.0.1:8080 + --local-url http://example.test \ + --mountpoint /mnt/wp-cow/example \ + --http 127.0.0.1:8080 ``` +`wp-cow serve` is the one-command runtime. It prepares only the metadata needed +to boot WordPress locally and leaves file contents and database rows lazy until +WordPress actually asks for them. + The clone state is stored under `~/.wp-cow/clones//`: ```text diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 29c980c4..78225eaa 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -22,7 +22,7 @@ services: WPCOW_SSH: "${WPCOW_SSH:-}" WPCOW_PATH: "${WPCOW_PATH:-}" WPCOW_REMOTE_URL: "${WPCOW_REMOTE_URL:-}" - WPCOW_LOCAL_URL: "${WPCOW_LOCAL_URL:-http://localhost:8080}" + WPCOW_LOCAL_URL: "${WPCOW_LOCAL_URL-}" WPCOW_SKIP_SCHEMA: "${WPCOW_SKIP_SCHEMA:-0}" WPCOW_NO_PROBE: "${WPCOW_NO_PROBE:-0}" WPCOW_DNS1: "${WPCOW_DNS1:-1.1.1.1}" diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile index ba9feec3..c65eaa63 100644 --- a/experiments/remote-wp-cow/docker/Dockerfile +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -38,6 +38,7 @@ COPY docker/wp-cow-lab-ssh-target /usr/local/bin/wp-cow-lab-ssh-target COPY docker/wp-cow-lab-dns /usr/local/bin/wp-cow-lab-dns COPY docker/wp-cow-lab-db-init /usr/local/bin/wp-cow-lab-db-init COPY docker/wp-cow-lab-cache /usr/local/bin/wp-cow-lab-cache +COPY docker/wp-cow-lab-serve /usr/local/bin/wp-cow-lab-serve RUN chmod +x /usr/local/bin/wp-cow-lab-* ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/wp-cow-lab-entrypoint"] diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-run b/experiments/remote-wp-cow/docker/wp-cow-lab-run index b516db15..2135ca02 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-run +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-run @@ -4,11 +4,17 @@ set -euo pipefail name="${WPCOW_NAME:-example}" mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" http="${WPCOW_HTTP:-0.0.0.0:8080}" +public_url="${WPCOW_LOCAL_URL:-http://localhost:${WPCOW_HTTP_PORT:-${http##*:}}}" +if [ "$public_url" = "http://localhost:8080" ] && [ "${WPCOW_HTTP_PORT:-8080}" != "8080" ]; then + public_url="http://localhost:$WPCOW_HTTP_PORT" +fi +public_url="${public_url%/}" mkdir -p "$mountpoint" if [ ! -f "${WPCOW_HOME:-/root/.wp-cow}/clones/$name/db/schema.sql" ]; then echo "warning: ${WPCOW_HOME:-/root/.wp-cow}/clones/$name/db/schema.sql is missing" >&2 echo "WordPress will likely show a database connection error. Run wp-cow-lab-db-init first." >&2 fi -echo "running $name at http://localhost:${http##*:}/ from $mountpoint" +echo "running $name at $public_url/ from $mountpoint" +echo "container PHP listener: $http" exec wp-cow run "$name" --mountpoint "$mountpoint" --http "$http" diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-serve b/experiments/remote-wp-cow/docker/wp-cow-lab-serve new file mode 100755 index 00000000..3fa49c63 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-serve @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +require_env() { + local name="$1" + if [ -z "${!name:-}" ]; then + echo "missing required environment variable: $name" >&2 + exit 2 + fi +} + +require_env WPCOW_SSH +require_env WPCOW_PATH +require_env WPCOW_REMOTE_URL + +name="${WPCOW_NAME:-example}" +mountpoint="${WPCOW_MOUNTPOINT:-/mnt/wp-cow/$name}" +http="${WPCOW_HTTP:-0.0.0.0:8080}" +local_url="${WPCOW_LOCAL_URL:-http://localhost:${WPCOW_HTTP_PORT:-8080}}" +if [ "$local_url" = "http://localhost:8080" ] && [ "${WPCOW_HTTP_PORT:-8080}" != "8080" ]; then + local_url="http://localhost:$WPCOW_HTTP_PORT" +fi +ssh_target="$(wp-cow-lab-ssh-target)" + +mkdir -p "$mountpoint" + +exec wp-cow serve \ + --name "$name" \ + --ssh "$ssh_target" \ + --path "$WPCOW_PATH" \ + --remote-url "$WPCOW_REMOTE_URL" \ + --local-url "$local_url" \ + --mountpoint "$mountpoint" \ + --http "$http" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index 3a8e4d3b..46d6e777 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -24,6 +24,8 @@ pub struct Cli { enum Command { #[command(name = "clone")] Clone(CloneArgs), + #[command(name = "serve")] + Serve(ServeArgs), #[command(name = "init-db")] InitDb(NameArgs), #[command(name = "export-schema")] @@ -60,6 +62,32 @@ struct CloneArgs { skip_schema: bool, } +#[derive(Debug, Args)] +struct ServeArgs { + #[arg(long = "ssh")] + ssh: String, + #[arg(long = "path")] + path: String, + #[arg(long = "remote-url")] + remote_url: String, + #[arg(long = "local-url")] + local_url: String, + #[arg(long)] + name: Option, + #[arg(long)] + state_dir: Option, + #[arg(long)] + force: bool, + #[arg(long)] + no_probe: bool, + #[arg(long)] + mountpoint: Option, + #[arg(long, default_value = "127.0.0.1:8080")] + http: String, + #[arg(long)] + no_php: bool, +} + #[derive(Debug, Args)] struct NameArgs { name: String, @@ -110,6 +138,7 @@ pub fn run() -> Result<()> { let cli = Cli::parse(); match cli.command { Command::Clone(args) => clone_site(args), + Command::Serve(args) => serve_site(args), Command::InitDb(args) => init_db(args), Command::ExportSchema(args) => export_schema(args), Command::Materialize(args) => materialize(args), @@ -183,6 +212,129 @@ fn clone_site(args: CloneArgs) -> Result<()> { Ok(()) } +fn serve_site(args: ServeArgs) -> Result<()> { + let state_dir = args.state_dir.clone().unwrap_or(default_state_dir()?); + let name = args + .name + .clone() + .unwrap_or_else(|| derive_name(&args.remote_url, &args.local_url)); + let paths = clone_paths(&state_dir, &name); + + let manifest = if !paths.root.exists() || args.force { + if paths.root.exists() { + fs::remove_dir_all(&paths.root)?; + } + ensure_clone_dirs(&paths)?; + + let probe = if args.no_probe { + Probe { + abspath: args.path.clone(), + wp_content_dir: format!("{}/wp-content", args.path.trim_end_matches('/')), + uploads_dir: format!("{}/wp-content/uploads", args.path.trim_end_matches('/')), + table_prefix: "wp_".to_string(), + siteurl: args.remote_url.clone(), + home: args.remote_url.clone(), + ..Probe::default() + } + } else { + probe_wordpress(&args.ssh, &args.path)? + }; + + let manifest = Manifest::new( + name, + args.ssh.clone(), + args.path.clone(), + args.remote_url.clone(), + args.local_url.clone(), + probe, + ); + + write_manifest(&paths.manifest, &manifest)?; + generate::write_wordpress_overrides(&paths, &manifest)?; + db::write_state(&paths, &db::DbState::default())?; + println!("created lazy clone '{}'", manifest.name); + manifest + } else { + let mut manifest = load_manifest(&paths.manifest)?; + let mut changed = false; + let mut should_probe = false; + + if manifest.ssh != args.ssh { + manifest.ssh = args.ssh.clone(); + changed = true; + should_probe = true; + } + if manifest.remote_path != args.path { + manifest.remote_path = args.path.clone(); + changed = true; + should_probe = true; + } + if manifest.remote_url != args.remote_url { + manifest.remote_url = args.remote_url.clone(); + changed = true; + } + if manifest.local_url != args.local_url { + manifest.local_url = args.local_url.clone(); + changed = true; + } + + if !args.no_probe + && (should_probe + || manifest.probe.db_name.is_empty() + || manifest.probe.db_host.is_empty() + || manifest.probe.db_user.is_empty()) + { + manifest.probe = probe_wordpress(&manifest.ssh, &manifest.remote_path)?; + changed = true; + } + + if changed { + write_manifest(&paths.manifest, &manifest)?; + generate::write_wordpress_overrides(&paths, &manifest)?; + println!("updated lazy clone '{}'", manifest.name); + } else { + println!("using existing lazy clone '{}'", manifest.name); + } + + manifest + }; + + if !paths.db.join("schema.sql").exists() { + if args.no_probe { + return Err(anyhow!( + "schema is missing and --no-probe prevents discovering remote DB settings" + )); + } + let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + remote.ensure_master()?; + db::export_schema(&remote, &paths).context("export schema")?; + println!("exported schema only for '{}'", manifest.name); + } + + if db::init_local_db_if_empty(&manifest, &paths)? { + println!( + "initialized empty local database '{}'", + manifest.local_db.name + ); + } else { + println!("using existing local database '{}'", manifest.local_db.name); + } + + println!( + "starting lazy COW server; files and database rows are fetched on demand, not copied up front" + ); + + let mountpoint = args + .mountpoint + .unwrap_or_else(|| PathBuf::from("/mnt/wp-cow").join(&manifest.name)); + let options = RunOptions { + mountpoint, + http_addr: args.http, + skip_php: args.no_php, + }; + run::run_site(manifest, paths, options) +} + fn init_db(args: NameArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index ffbed1f2..561ae49e 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -88,6 +88,41 @@ pub fn init_local_db(manifest: &Manifest, paths: &ClonePaths) -> Result<()> { Ok(()) } +pub fn init_local_db_if_empty(manifest: &Manifest, paths: &ClonePaths) -> Result { + if local_schema_table_count(manifest)? > 0 { + return Ok(false); + } + + init_local_db(manifest, paths)?; + Ok(true) +} + +pub fn local_schema_table_count(manifest: &Manifest) -> Result { + let sql_text = format!( + "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '{}';", + mysql_string_literal(&manifest.local_db.name) + ); + let output = local_mysql_command(manifest) + .arg("--batch") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql_text) + .output() + .context("query local mysql schema state")?; + if !output.status.success() { + return Err(anyhow!( + "local mysql schema state query failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + stdout + .trim() + .parse::() + .with_context(|| format!("parse local table count from {}", stdout.trim())) +} + pub fn materialize_tables( remote: &RemoteClient, manifest: &Manifest, @@ -225,6 +260,10 @@ fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> { Ok(()) } +fn mysql_string_literal(value: &str) -> String { + value.replace('\\', "\\\\").replace('\'', "\\'") +} + fn validate_table_name(table: &str) -> Result<()> { if table.is_empty() || !table From 84957db22a0280aa828076ab60ea8953fbf7f650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 02:23:19 +0200 Subject: [PATCH 12/39] Pass Docker HTTP port to COW lab --- experiments/remote-wp-cow/compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 78225eaa..448c30b1 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -28,6 +28,7 @@ services: WPCOW_DNS1: "${WPCOW_DNS1:-1.1.1.1}" WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" + WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" WPCOW_HTTP: 0.0.0.0:8080 SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock From eb7c9f737bdc2e3415c0a88992474593a1621816 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 02:38:38 +0200 Subject: [PATCH 13/39] Bound remote COW request timeouts --- experiments/remote-wp-cow/.env.example | 6 ++ experiments/remote-wp-cow/README.md | 6 ++ experiments/remote-wp-cow/compose.yaml | 6 ++ experiments/remote-wp-cow/src/cli.rs | 2 + experiments/remote-wp-cow/src/control.rs | 52 ++++++++++----- experiments/remote-wp-cow/src/generate.rs | 14 +++- experiments/remote-wp-cow/src/remote.rs | 80 +++++++++++++++++++++-- experiments/remote-wp-cow/src/run.rs | 22 +++++++ 8 files changed, 165 insertions(+), 23 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 06f94ad6..82d73306 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -8,6 +8,12 @@ WPCOW_HTTP_PORT=8080 WPCOW_DNS1=1.1.1.1 WPCOW_DNS2=8.8.8.8 WPCOW_CACHE_MAX_FILE_MB=64 +WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=15 +WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 +WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 +WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 +WPCOW_PHP_MAX_EXECUTION_SECS=30 +WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 # Set this to 1 for a filesystem-only smoke test that does not export DB schema. WPCOW_SKIP_SCHEMA=0 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 13be659c..89ecc979 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -127,6 +127,12 @@ only if needed, initializes an empty local MariaDB database if needed, mounts the lazy filesystem, starts the DB control layer, and starts PHP. It does not download media or table rows up front. +The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH +host, or slow remote file read should fail visibly instead of leaving the +browser spinning forever. Adjust the defaults with +`WPCOW_CONTROL_REQUEST_TIMEOUT_SECS`, `WPCOW_REMOTE_COMMAND_TIMEOUT_SECS`, +`WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS`, and `WPCOW_PHP_MAX_EXECUTION_SECS`. + Open this on the Mac: ```text diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 448c30b1..0e383566 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -29,6 +29,12 @@ services: WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" + WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-15}" + WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" + WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" + WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" + WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-30}" + WPCOW_PHP_SOCKET_TIMEOUT_SECS: "${WPCOW_PHP_SOCKET_TIMEOUT_SECS:-15}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" WPCOW_HTTP: 0.0.0.0:8080 SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index 46d6e777..d7daf552 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -299,6 +299,8 @@ fn serve_site(args: ServeArgs) -> Result<()> { manifest }; + generate::write_wordpress_overrides(&paths, &manifest)?; + if !paths.db.join("schema.sql").exists() { if args.no_probe { return Err(anyhow!( diff --git a/experiments/remote-wp-cow/src/control.rs b/experiments/remote-wp-cow/src/control.rs index 63a2be9e..21288172 100644 --- a/experiments/remote-wp-cow/src/control.rs +++ b/experiments/remote-wp-cow/src/control.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, Result}; use serde::{Deserialize, Serialize}; use serde_json::json; use std::sync::atomic::{AtomicBool, Ordering}; @@ -67,39 +67,59 @@ fn handle_request( let mut body = String::new(); request.as_reader().read_to_string(&mut body)?; - let input: ControlRequest = serde_json::from_str(&body).context("decode control JSON")?; - let response = match request.url() { + let response = match serde_json::from_str::(&body) { + Ok(input) => match control_response(request.url(), input, manifest, paths, remote) { + Ok(response) => response, + Err(err) => json!({ "ok": false, "error": err.to_string() }), + }, + Err(err) => json!({ "ok": false, "error": format!("decode control JSON: {err}") }), + }; + + let status = match response.get("ok").and_then(|v| v.as_bool()) { + Some(true) => StatusCode(200), + Some(false) if response.get("error").and_then(|v| v.as_str()) == Some("not found") => { + StatusCode(404) + } + Some(false) => StatusCode(500), + None => StatusCode(500), + }; + send_json(request, status, &response) +} + +fn control_response( + url: &str, + input: ControlRequest, + manifest: &Manifest, + paths: &ClonePaths, + remote: &RemoteClient, +) -> Result { + match url { "/materialize" => { let tables = input.tables.unwrap_or_default(); let materialized = db::materialize_tables(remote, manifest, paths, &tables)?; - json!({ "ok": true, "backend": "local", "materialized": materialized }) + Ok(json!({ "ok": true, "backend": "local", "materialized": materialized })) } "/route" => { let tables = input.tables.unwrap_or_default(); let decision = db::route_for_tables(remote, manifest, paths, &tables)?; - json!({ "ok": true, "backend": decision.backend, "materialized": decision.materialized }) + Ok( + json!({ "ok": true, "backend": decision.backend, "materialized": decision.materialized }), + ) } "/query" => { let sql = input.sql.ok_or_else(|| anyhow!("missing sql"))?; let result = db::remote_readonly_query(remote, &sql)?; - json!({ + Ok(json!({ "ok": result.ok, "error": result.error, "rows": result.rows, "fields": result.fields, "affected": result.affected - }) + })) } - _ => json!({ "ok": false, "error": "not found" }), - }; - - let status = if response.get("ok").and_then(|v| v.as_bool()) == Some(false) { - StatusCode(404) - } else { - StatusCode(200) - }; - send_json(request, status, &response) + _ => Ok(json!({ "ok": false, "error": "not found" })), + } } fn send_json(request: Request, status: StatusCode, value: &T) -> Result<()> { diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index e18c1bf3..d8d50d77 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -98,10 +98,19 @@ function cow_tables_from_sql( $sql ) { return array_keys( $tables ); } +function cow_control_timeout_secs() { + $timeout = (int) getenv( 'WPCOW_CONTROL_REQUEST_TIMEOUT_SECS' ); + if ( $timeout < 1 ) { + $timeout = 15; + } + return $timeout; +} + function cow_control_request( $path, $payload ) { $payload['clone'] = WPCOW_CLONE; $url = rtrim( WPCOW_CONTROL_URL, '/' ) . $path; $body = json_encode( $payload ); + $timeout = cow_control_timeout_secs(); if ( function_exists( 'curl_init' ) ) { $ch = curl_init( $url ); @@ -109,7 +118,8 @@ function cow_control_request( $path, $payload ) { curl_setopt( $ch, CURLOPT_HTTPHEADER, array( 'Content-Type: application/json' ) ); curl_setopt( $ch, CURLOPT_POSTFIELDS, $body ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); - curl_setopt( $ch, CURLOPT_TIMEOUT, 120 ); + curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, min( 3, $timeout ) ); + curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout ); $raw = curl_exec( $ch ); $error = curl_error( $ch ); curl_close( $ch ); @@ -123,7 +133,7 @@ function cow_control_request( $path, $payload ) { 'method' => 'POST', 'header' => "Content-Type: application/json\r\n", 'content' => $body, - 'timeout' => 120, + 'timeout' => $timeout, ), ) ); diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index 03c14d36..c380004f 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -45,14 +45,20 @@ impl RemoteClient { if let Some(parent) = control_path.parent() { std::fs::create_dir_all(parent)?; } - let status = Command::new("ssh") + let mut command = Command::new("timeout"); + command + .arg("--kill-after=2s") + .arg(format!("{}s", ssh_connect_timeout_secs() + 5)) + .arg("ssh") .arg("-MNf") .arg("-S") .arg(control_path) .arg("-o") .arg("ControlMaster=yes") .arg("-o") - .arg("ControlPersist=600") + .arg("ControlPersist=600"); + self.add_ssh_safety_options(&mut command); + let status = command .arg(&self.manifest.ssh) .status() .context("start SSH control master")?; @@ -72,14 +78,34 @@ impl RemoteClient { command.arg("-o").arg("ControlMaster=auto"); command.arg("-o").arg("ControlPersist=600"); } + self.add_ssh_safety_options(&mut command); command.arg(&self.manifest.ssh); command.arg(remote_command); command } pub fn exec_capture(&self, remote_command: &str, stdin: Option<&[u8]>) -> io::Result> { - let mut child = self - .command(remote_command) + let timeout_secs = remote_command_timeout_secs(); + let mut command = if timeout_secs > 0 { + let mut command = Command::new("timeout"); + command + .arg("--kill-after=2s") + .arg(format!("{}s", timeout_secs)) + .arg("ssh"); + if let Some(control_path) = &self.control_path { + command.arg("-S").arg(control_path); + command.arg("-o").arg("ControlMaster=auto"); + command.arg("-o").arg("ControlPersist=600"); + } + self.add_ssh_safety_options(&mut command); + command.arg(&self.manifest.ssh); + command.arg(remote_command); + command + } else { + self.command(remote_command) + }; + + let mut child = command .stdin(if stdin.is_some() { Stdio::piped() } else { @@ -100,6 +126,15 @@ impl RemoteClient { return Ok(output.stdout); } let stderr = String::from_utf8_lossy(&output.stderr); + if matches!(output.status.code(), Some(124) | Some(137)) { + return Err(io::Error::new( + io::ErrorKind::TimedOut, + format!( + "remote command timed out after {} seconds: {}", + timeout_secs, stderr + ), + )); + } if output.status.code() == Some(2) || stderr.contains("WPCOW_ENOENT") { return Err(io::Error::new(io::ErrorKind::NotFound, stderr.to_string())); } @@ -175,7 +210,9 @@ echo $target; pub fn remote_query_readonly(&self, sql: &str) -> Result { let probe = &self.manifest.probe; let code = r#" -$host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$sql=$argv[5]; +$host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$sql=$argv[5];$timeout=(int)$argv[6]; +if($timeout<1){$timeout=10;} +@set_time_limit($timeout); if(!preg_match('/^\s*(SELECT|SHOW|DESCRIBE|DESC|EXPLAIN)\b/i',$sql)){ fwrite(STDERR,"WPCOW_REFUSED_WRITE\n");exit(3); } @@ -186,10 +223,13 @@ if(preg_match('/^(.+):([0-9]+)$/',$host,$m)){ $host=$m[1];$socket=$m[2]; } $mysqli=mysqli_init(); +@$mysqli->options(MYSQLI_OPT_CONNECT_TIMEOUT, min(5,$timeout)); if(!@$mysqli->real_connect($host,$user,$pass,$db,$port,$socket)){ fwrite(STDERR,mysqli_connect_error()."\n");exit(1); } @$mysqli->set_charset("utf8mb4"); +@$mysqli->query("SET SESSION max_execution_time=".max(1,$timeout * 1000)); +@$mysqli->query("SET SESSION max_statement_time=".max(1,$timeout)); $res=$mysqli->query($sql, MYSQLI_STORE_RESULT); if($res===false){ echo json_encode(array("ok"=>false,"error"=>$mysqli->error,"rows"=>array(),"fields"=>array(),"affected"=>0)); @@ -214,6 +254,7 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a probe.db_password.clone(), probe.db_name.clone(), sql.to_string(), + remote_db_query_timeout_secs().to_string(), ], ) .context("remote readonly query")?; @@ -244,6 +285,16 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a )) } } + + fn add_ssh_safety_options(&self, command: &mut Command) { + let connect_timeout = ssh_connect_timeout_secs(); + command + .arg("-o") + .arg(format!("ConnectTimeout={connect_timeout}")); + command.arg("-o").arg("ServerAliveInterval=5"); + command.arg("-o").arg("ServerAliveCountMax=1"); + command.arg("-o").arg("BatchMode=yes"); + } } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -321,6 +372,25 @@ pub fn shell_quote(value: impl AsRef) -> String { format!("'{}'", escaped) } +fn remote_command_timeout_secs() -> u64 { + env_u64("WPCOW_REMOTE_COMMAND_TIMEOUT_SECS", 20) +} + +fn remote_db_query_timeout_secs() -> u64 { + env_u64("WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS", 10) +} + +fn ssh_connect_timeout_secs() -> u64 { + env_u64("WPCOW_SSH_CONNECT_TIMEOUT_SECS", 8) +} + +fn env_u64(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|raw| raw.parse::().ok()) + .unwrap_or(default) +} + #[cfg(test)] mod tests { use super::*; diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index 5076cc20..f39c508b 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -103,6 +103,21 @@ pub fn mount_only(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> R fn start_php_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { Command::new("php") + .arg("-d") + .arg(format!( + "max_execution_time={}", + env_u64("WPCOW_PHP_MAX_EXECUTION_SECS", 30) + )) + .arg("-d") + .arg(format!( + "default_socket_timeout={}", + env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15) + )) + .arg("-d") + .arg(format!( + "mysqlnd.net_read_timeout={}", + env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15) + )) .arg("-S") .arg(http_addr) .arg("-t") @@ -113,6 +128,13 @@ fn start_php_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> R .context("start php built-in server") } +fn env_u64(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|raw| raw.parse::().ok()) + .unwrap_or(default) +} + fn wait_for_mount(mountpoint: &Path) { for _ in 0..40 { if mountpoint.join("wp-config.php").exists() { From 414341e5c635d8c6a04ba8dd089ccdf5a1e460b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 02:49:37 +0200 Subject: [PATCH 14/39] Materialize WordPress runtime files on serve --- experiments/remote-wp-cow/.env.example | 3 + experiments/remote-wp-cow/README.md | 14 +++- experiments/remote-wp-cow/compose.yaml | 3 + experiments/remote-wp-cow/src/cli.rs | 40 +++++++++ experiments/remote-wp-cow/src/remote.rs | 106 +++++++++++++++++++----- 5 files changed, 143 insertions(+), 23 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 82d73306..018dcb9a 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -8,6 +8,9 @@ WPCOW_HTTP_PORT=8080 WPCOW_DNS1=1.1.1.1 WPCOW_DNS2=8.8.8.8 WPCOW_CACHE_MAX_FILE_MB=64 +WPCOW_RUNTIME_SYNC=1 +WPCOW_RUNTIME_SYNC_FORCE=0 +WPCOW_RUNTIME_SYNC_TIMEOUT_SECS=180 WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=15 WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 89ecc979..e08c3ad6 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -123,9 +123,17 @@ wp-cow-lab-serve ``` That is the normal path. It creates or reuses the lazy clone, exports schema -only if needed, initializes an empty local MariaDB database if needed, mounts -the lazy filesystem, starts the DB control layer, and starts PHP. It does not -download media or table rows up front. +only if needed, initializes an empty local MariaDB database if needed, +pre-materializes the WordPress runtime files, mounts the lazy filesystem, starts +the DB control layer, and starts PHP. Runtime file sync copies the root PHP +files, `wp-admin`, `wp-includes`, plugin/theme/mu-plugin/language code, and +top-level `wp-content` drop-ins. It does not copy `wp-content/uploads` or other +large content data directories, and it does not download table rows up front. + +Runtime file sync is enabled by default because real WordPress boot performs too +many PHP file reads and stats for pure per-file SSH/FUSE reads to feel usable. +Set `WPCOW_RUNTIME_SYNC_FORCE=1` to refresh the local runtime copy or +`WPCOW_RUNTIME_SYNC=0` to return to fully lazy filesystem reads. The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 0e383566..1a719d23 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -29,6 +29,9 @@ services: WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" + WPCOW_RUNTIME_SYNC: "${WPCOW_RUNTIME_SYNC:-1}" + WPCOW_RUNTIME_SYNC_FORCE: "${WPCOW_RUNTIME_SYNC_FORCE:-0}" + WPCOW_RUNTIME_SYNC_TIMEOUT_SECS: "${WPCOW_RUNTIME_SYNC_TIMEOUT_SECS:-180}" WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-15}" WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index d7daf552..d779e253 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -86,6 +86,8 @@ struct ServeArgs { http: String, #[arg(long)] no_php: bool, + #[arg(long)] + no_runtime_sync: bool, } #[derive(Debug, Args)] @@ -299,6 +301,25 @@ fn serve_site(args: ServeArgs) -> Result<()> { manifest }; + if should_sync_runtime(&paths, args.no_runtime_sync) { + let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + remote.ensure_master()?; + println!( + "syncing WordPress runtime files for '{}' (uploads stay lazy)", + manifest.name + ); + remote + .sync_runtime_files(&paths.upper) + .context("sync WordPress runtime files")?; + fs::write(paths.generated.join("runtime-files.synced"), b"ok\n")?; + println!("synced WordPress runtime files for '{}'", manifest.name); + } else { + println!( + "using local WordPress runtime files for '{}'", + manifest.name + ); + } + generate::write_wordpress_overrides(&paths, &manifest)?; if !paths.db.join("schema.sql").exists() { @@ -337,6 +358,25 @@ fn serve_site(args: ServeArgs) -> Result<()> { run::run_site(manifest, paths, options) } +fn should_sync_runtime(paths: &crate::config::ClonePaths, no_runtime_sync: bool) -> bool { + if no_runtime_sync || env_bool("WPCOW_RUNTIME_SYNC", true) == Some(false) { + return false; + } + if env_bool("WPCOW_RUNTIME_SYNC_FORCE", false) == Some(true) { + return true; + } + !paths.generated.join("runtime-files.synced").is_file() +} + +fn env_bool(name: &str, default: bool) -> Option { + let raw = std::env::var(name).ok()?; + match raw.to_ascii_lowercase().as_str() { + "1" | "true" | "yes" | "on" => Some(true), + "0" | "false" | "no" | "off" => Some(false), + _ => Some(default), + } +} + fn init_db(args: NameArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index c380004f..cb0d875e 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -1,6 +1,7 @@ use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; use std::ffi::OsStr; +use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; @@ -72,7 +73,21 @@ impl RemoteClient { } pub fn command(&self, remote_command: &str) -> Command { - let mut command = Command::new("ssh"); + self.ssh_command(remote_command, 0) + } + + fn ssh_command(&self, remote_command: &str, timeout_secs: u64) -> Command { + let mut command = if timeout_secs > 0 { + let mut command = Command::new("timeout"); + command + .arg("--kill-after=2s") + .arg(format!("{}s", timeout_secs)) + .arg("ssh"); + command + } else { + Command::new("ssh") + }; + if let Some(control_path) = &self.control_path { command.arg("-S").arg(control_path); command.arg("-o").arg("ControlMaster=auto"); @@ -86,24 +101,7 @@ impl RemoteClient { pub fn exec_capture(&self, remote_command: &str, stdin: Option<&[u8]>) -> io::Result> { let timeout_secs = remote_command_timeout_secs(); - let mut command = if timeout_secs > 0 { - let mut command = Command::new("timeout"); - command - .arg("--kill-after=2s") - .arg(format!("{}s", timeout_secs)) - .arg("ssh"); - if let Some(control_path) = &self.control_path { - command.arg("-S").arg(control_path); - command.arg("-o").arg("ControlMaster=auto"); - command.arg("-o").arg("ControlPersist=600"); - } - self.add_ssh_safety_options(&mut command); - command.arg(&self.manifest.ssh); - command.arg(remote_command); - command - } else { - self.command(remote_command) - }; + let mut command = self.ssh_command(remote_command, timeout_secs); let mut child = command .stdin(if stdin.is_some() { @@ -141,6 +139,59 @@ impl RemoteClient { Err(io::Error::new(io::ErrorKind::Other, stderr.to_string())) } + pub fn sync_runtime_files(&self, upper: &Path) -> Result<()> { + fs::create_dir_all(upper).with_context(|| format!("create {}", upper.display()))?; + + let remote_command = format!( + r#"cd {} && (find . -mindepth 1 -maxdepth 1 \( -type f -o -type l \) -print; find wp-content -mindepth 1 -maxdepth 1 \( -type f -o -type l \) -print 2>/dev/null; for p in wp-admin wp-includes wp-content/plugins wp-content/themes wp-content/mu-plugins wp-content/languages; do if [ -e "$p" ]; then printf '%s\n' "$p"; fi; done) | tar -cf - -T -"#, + shell_quote(&self.manifest.remote_path) + ); + + let mut ssh = self + .ssh_command(&remote_command, runtime_sync_timeout_secs()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .spawn() + .context("start remote runtime tar over ssh")?; + + let mut tar = Command::new("tar") + .arg("--no-same-owner") + .arg("-C") + .arg(upper) + .arg("-xf") + .arg("-") + .stdin(Stdio::piped()) + .stderr(Stdio::inherit()) + .spawn() + .context("start local runtime tar extraction")?; + + let copy_result = { + let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); + let mut tar_stdin = tar.stdin.take().expect("tar stdin piped"); + io::copy(&mut ssh_stdout, &mut tar_stdin).context("copy runtime tar stream") + }; + + let ssh_status = ssh.wait().context("wait for remote runtime tar")?; + let tar_status = tar.wait().context("wait for local runtime tar")?; + + copy_result?; + + if !ssh_status.success() { + return Err(anyhow!( + "remote runtime tar failed with status {}", + ssh_status + )); + } + if !tar_status.success() { + return Err(anyhow!( + "local runtime tar extraction failed with status {}", + tar_status + )); + } + + Ok(()) + } + pub fn stat(&self, rel: &Path) -> io::Result { let full = self.remote_full_path(rel)?; let code = r#" @@ -334,7 +385,18 @@ echo json_encode($out); "#; let remote_command = format!("cd {} && php", shell_quote(remote_path)); - let output = Command::new("ssh") + let output = Command::new("timeout") + .arg("--kill-after=2s") + .arg(format!("{}s", remote_command_timeout_secs())) + .arg("ssh") + .arg("-o") + .arg(format!("ConnectTimeout={}", ssh_connect_timeout_secs())) + .arg("-o") + .arg("ServerAliveInterval=5") + .arg("-o") + .arg("ServerAliveCountMax=1") + .arg("-o") + .arg("BatchMode=yes") .arg(ssh) .arg(remote_command) .stdin(Stdio::piped()) @@ -376,6 +438,10 @@ fn remote_command_timeout_secs() -> u64 { env_u64("WPCOW_REMOTE_COMMAND_TIMEOUT_SECS", 20) } +fn runtime_sync_timeout_secs() -> u64 { + env_u64("WPCOW_RUNTIME_SYNC_TIMEOUT_SECS", 180) +} + fn remote_db_query_timeout_secs() -> u64 { env_u64("WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS", 10) } From beb3446057750a9e9886eee54e9a8875b67fcf0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 02:59:12 +0200 Subject: [PATCH 15/39] Add fast COW startup PRD and DB tunnel reads --- experiments/remote-wp-cow/.env.example | 1 + experiments/remote-wp-cow/PRD.md | 137 ++++++++++++++++++++++ experiments/remote-wp-cow/README.md | 6 + experiments/remote-wp-cow/compose.yaml | 1 + experiments/remote-wp-cow/src/cli.rs | 36 +++++- experiments/remote-wp-cow/src/config.rs | 16 +++ experiments/remote-wp-cow/src/generate.rs | 106 ++++++++++++++++- experiments/remote-wp-cow/src/remote.rs | 110 ++++++++++++++++- experiments/remote-wp-cow/src/run.rs | 24 ++++ 9 files changed, 428 insertions(+), 9 deletions(-) create mode 100644 experiments/remote-wp-cow/PRD.md diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 018dcb9a..d85d6761 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -11,6 +11,7 @@ WPCOW_CACHE_MAX_FILE_MB=64 WPCOW_RUNTIME_SYNC=1 WPCOW_RUNTIME_SYNC_FORCE=0 WPCOW_RUNTIME_SYNC_TIMEOUT_SECS=180 +WPCOW_REMOTE_DB_TUNNEL=1 WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=15 WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 diff --git a/experiments/remote-wp-cow/PRD.md b/experiments/remote-wp-cow/PRD.md new file mode 100644 index 00000000..43cece83 --- /dev/null +++ b/experiments/remote-wp-cow/PRD.md @@ -0,0 +1,137 @@ +# PRD: Fast Remote WordPress COW Serve + +## Problem + +`wp-cow-lab-serve` must make a remote WordPress site locally usable quickly. +The current prototype can still feel stuck because WordPress boot touches many +files and options before the first byte reaches the browser. A design that is +theoretically lazy but blocks the first page for minutes is not acceptable. + +## Goal + +Given SSH access, a remote WordPress path, and a local port, one command should +produce a responsive local WordPress server without copying media uploads or all +database rows. + +Target command shape: + +```bash +wp-cow-lab-serve +``` + +## Success Criteria + +- First run reaches a local HTTP response in under 15 seconds for the + SiteGround test site, excluding Docker image build time. +- Repeated runs reach a local HTTP response in under 5 seconds. +- Startup output shows timed phases so slow work is visible. +- The browser must not spin indefinitely. Slow or failing remote work must + return a visible error with timing context. +- No full `wp-content/uploads` copy. +- No optimistic full database row dump. +- Local writes must not reach production. + +## Non-Goals + +- Perfect visual fidelity for every media asset on first page load. +- A transparent MySQL protocol proxy. +- Full production snapshot semantics. +- Supporting non-Linux runtime hosts. + +## Product Shape + +The default runtime is not pure lazy FUSE for all files. WordPress core and +runtime code are small enough to pre-materialize and too latency-sensitive to +serve file-by-file over SSH. Large user data remains lazy. + +Startup should do: + +1. Probe WordPress and remote DB credentials. +2. Export schema only. +3. Initialize empty local DB schema. +4. Pre-materialize a bounded runtime code set. +5. Start a persistent SSH tunnel for safe remote DB reads when the remote DB is + reachable over TCP from the SSH host. +6. Start local PHP immediately with generated local `wp-config.php`, DB drop-in, + and safety MU plugin. +7. Serve media and other large user data lazily only when requested. + +## Runtime Materialization Policy + +Copy locally: + +- Root WordPress PHP files and common root assets. +- `wp-admin`. +- `wp-includes`. +- `wp-content/plugins`. +- `wp-content/themes`. +- `wp-content/mu-plugins`. +- `wp-content/languages`. +- Top-level `wp-content` drop-in files. + +Do not copy by default: + +- `wp-content/uploads`. +- Cache directories. +- Backup directories. +- SQL dumps and archive files. +- Arbitrary large data directories under `wp-content`. + +## DB Policy + +Initial startup must not dump rows. Reads should use a persistent tunneled +remote DB connection when available, not per-query SSH/PHP subprocesses. If a +query touches a locally materialized table, the involved tables should be routed +locally. Writes must be local-only. + +If remote DB reads are too slow for first page boot, the next fallback should +be a bounded bootstrap materialization of only essential option rows, not a full +table dump. + +## Observability + +The CLI should print phase names and durations: + +- probe +- schema export +- local schema init +- runtime sync +- mount +- php start +- first request diagnostics where practical + +## Test Site + +Use the SiteGround WordPress site supplied by the user: + +```text +SSH: u2199-yx4tznmyunag@calm-cottage-mindfulness.com:18765 +Key: ~/.ssh/id_siteground +Path: /home/u2199-yx4tznmyunag/www/calm-cottage-mindfulness.com/public_html +Remote URL: https://calm-cottage-mindfulness.com +Local URL: http://localhost:9481 +``` + +Do not print secrets. Do not modify production data. + +## Acceptance Test + +From a clean clone state: + +```bash +WPCOW_NAME=calm-cottage \ +WPCOW_SSH=wp-cow-siteground-calm-cottage \ +WPCOW_PATH=/home/u2199-yx4tznmyunag/www/calm-cottage-mindfulness.com/public_html \ +WPCOW_REMOTE_URL=https://calm-cottage-mindfulness.com \ +WPCOW_LOCAL_URL=http://localhost:9481 \ +wp-cow-lab-serve +``` + +Then: + +```bash +curl -I --max-time 10 http://localhost:9481/ +``` + +The response must complete within the timeout. A WordPress error page is +acceptable during development only if it returns quickly with diagnostic output. diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index e08c3ad6..9508045e 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -135,6 +135,12 @@ many PHP file reads and stats for pure per-file SSH/FUSE reads to feel usable. Set `WPCOW_RUNTIME_SYNC_FORCE=1` to refresh the local runtime copy or `WPCOW_RUNTIME_SYNC=0` to return to fully lazy filesystem reads. +The lab also starts a persistent SSH tunnel for remote database reads when the +remote `DB_HOST` is TCP-reachable from the SSH host. This avoids one SSH/PHP +subprocess per WordPress read query. Write-class SQL is still blocked from the +remote database and materialized locally first. Set `WPCOW_REMOTE_DB_TUNNEL=0` +to fall back to daemon-mediated remote reads. + The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the browser spinning forever. Adjust the defaults with diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 1a719d23..3cc3f5c3 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -32,6 +32,7 @@ services: WPCOW_RUNTIME_SYNC: "${WPCOW_RUNTIME_SYNC:-1}" WPCOW_RUNTIME_SYNC_FORCE: "${WPCOW_RUNTIME_SYNC_FORCE:-0}" WPCOW_RUNTIME_SYNC_TIMEOUT_SECS: "${WPCOW_RUNTIME_SYNC_TIMEOUT_SECS:-180}" + WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-1}" WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-15}" WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index d779e253..ffedc2e3 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -2,6 +2,7 @@ use anyhow::{anyhow, Context, Result}; use clap::{Args, Parser, Subcommand}; use std::fs; use std::path::PathBuf; +use std::time::Instant; use crate::config::{ clone_paths, default_state_dir, derive_name, ensure_clone_dirs, load_manifest, write_manifest, @@ -215,6 +216,7 @@ fn clone_site(args: CloneArgs) -> Result<()> { } fn serve_site(args: ServeArgs) -> Result<()> { + let serve_started = Instant::now(); let state_dir = args.state_dir.clone().unwrap_or(default_state_dir()?); let name = args .name @@ -222,6 +224,7 @@ fn serve_site(args: ServeArgs) -> Result<()> { .unwrap_or_else(|| derive_name(&args.remote_url, &args.local_url)); let paths = clone_paths(&state_dir, &name); + let metadata_started = Instant::now(); let manifest = if !paths.root.exists() || args.force { if paths.root.exists() { fs::remove_dir_all(&paths.root)?; @@ -300,8 +303,13 @@ fn serve_site(args: ServeArgs) -> Result<()> { manifest }; + println!( + "prepared clone metadata in {:.2}s", + metadata_started.elapsed().as_secs_f64() + ); if should_sync_runtime(&paths, args.no_runtime_sync) { + let phase_started = Instant::now(); let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); remote.ensure_master()?; println!( @@ -312,7 +320,11 @@ fn serve_site(args: ServeArgs) -> Result<()> { .sync_runtime_files(&paths.upper) .context("sync WordPress runtime files")?; fs::write(paths.generated.join("runtime-files.synced"), b"ok\n")?; - println!("synced WordPress runtime files for '{}'", manifest.name); + println!( + "synced WordPress runtime files for '{}' in {:.2}s", + manifest.name, + phase_started.elapsed().as_secs_f64() + ); } else { println!( "using local WordPress runtime files for '{}'", @@ -323,6 +335,7 @@ fn serve_site(args: ServeArgs) -> Result<()> { generate::write_wordpress_overrides(&paths, &manifest)?; if !paths.db.join("schema.sql").exists() { + let phase_started = Instant::now(); if args.no_probe { return Err(anyhow!( "schema is missing and --no-probe prevents discovering remote DB settings" @@ -331,20 +344,31 @@ fn serve_site(args: ServeArgs) -> Result<()> { let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); remote.ensure_master()?; db::export_schema(&remote, &paths).context("export schema")?; - println!("exported schema only for '{}'", manifest.name); + println!( + "exported schema only for '{}' in {:.2}s", + manifest.name, + phase_started.elapsed().as_secs_f64() + ); } + let phase_started = Instant::now(); if db::init_local_db_if_empty(&manifest, &paths)? { println!( - "initialized empty local database '{}'", - manifest.local_db.name + "initialized empty local database '{}' in {:.2}s", + manifest.local_db.name, + phase_started.elapsed().as_secs_f64() ); } else { - println!("using existing local database '{}'", manifest.local_db.name); + println!( + "using existing local database '{}' ({:.2}s)", + manifest.local_db.name, + phase_started.elapsed().as_secs_f64() + ); } println!( - "starting lazy COW server; files and database rows are fetched on demand, not copied up front" + "starting lazy COW server after {:.2}s; files and database rows are fetched on demand, not copied up front", + serve_started.elapsed().as_secs_f64() ); let mountpoint = args diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs index fd23c4b6..014352ea 100644 --- a/experiments/remote-wp-cow/src/config.rs +++ b/experiments/remote-wp-cow/src/config.rs @@ -22,6 +22,8 @@ pub struct Manifest { pub created_at_unix: u64, pub probe: Probe, pub local_db: LocalDb, + #[serde(default = "default_remote_db_tunnel")] + pub remote_db_tunnel: RemoteDbTunnel, pub control_url: String, #[serde(default = "default_cache_max_file_bytes")] pub cache_max_file_bytes: u64, @@ -54,6 +56,12 @@ pub struct LocalDb { pub port: u16, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RemoteDbTunnel { + pub host: String, + pub port: u16, +} + #[derive(Debug, Clone)] pub struct ClonePaths { pub root: PathBuf, @@ -95,6 +103,7 @@ impl Manifest { host: "127.0.0.1".to_string(), port: 33071, }, + remote_db_tunnel: default_remote_db_tunnel(), control_url: "http://127.0.0.1:39070".to_string(), cache_max_file_bytes: cache_max_file_bytes_from_env(), remote_metadata_cache_ttl_secs: DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS, @@ -110,6 +119,13 @@ fn default_remote_metadata_cache_ttl_secs() -> u64 { DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS } +fn default_remote_db_tunnel() -> RemoteDbTunnel { + RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: 33072, + } +} + fn cache_max_file_bytes_from_env() -> u64 { std::env::var("WPCOW_CACHE_MAX_FILE_MB") .ok() diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index d8d50d77..c96a67f4 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -35,6 +35,10 @@ $table_prefix = {table_prefix}; define( 'WPCOW_CLONE', {clone_name} ); define( 'WPCOW_CONTROL_URL', {control_url} ); +define( 'WPCOW_REMOTE_DB_NAME', {remote_db_name} ); +define( 'WPCOW_REMOTE_DB_USER', {remote_db_user} ); +define( 'WPCOW_REMOTE_DB_PASSWORD', {remote_db_password} ); +define( 'WPCOW_REMOTE_DB_HOST', {remote_db_host} ); define( 'FS_METHOD', 'direct' ); define( 'DISABLE_WP_CRON', true ); @@ -56,6 +60,13 @@ require_once ABSPATH . 'wp-settings.php'; table_prefix = php_string(&manifest.probe.table_prefix), clone_name = php_string(&manifest.name), control_url = php_string(&manifest.control_url), + remote_db_name = php_string(&manifest.probe.db_name), + remote_db_user = php_string(&manifest.probe.db_user), + remote_db_password = php_string(&manifest.probe.db_password), + remote_db_host = php_string(&format!( + "{}:{}", + manifest.remote_db_tunnel.host, manifest.remote_db_tunnel.port + )), ) } @@ -151,6 +162,9 @@ function cow_control_request( $path, $payload ) { } class Cow_DB extends wpdb { + private $cow_remote_mysqli = null; + private $cow_remote_failed = false; + public function query( $query ) { if ( ! $query ) { return false; @@ -184,6 +198,39 @@ class Cow_DB extends wpdb { } private function cow_remote_query( $query ) { + $remote = $this->cow_remote_mysqli(); + if ( $remote instanceof mysqli ) { + $result = $remote->query( $query, MYSQLI_STORE_RESULT ); + if ( false === $result ) { + $this->last_error = $remote->error; + return false; + } + + $this->last_result = array(); + $this->col_info = array(); + + if ( true === $result ) { + $this->num_rows = 0; + $this->rows_affected = (int) $remote->affected_rows; + $this->insert_id = (int) $remote->insert_id; + $this->last_error = ''; + return $this->rows_affected; + } + + foreach ( $result->fetch_fields() as $field ) { + $this->col_info[] = (object) array( 'name' => $field->name ); + } + while ( $row = $result->fetch_assoc() ) { + $this->last_result[] = (object) $row; + } + $this->num_rows = count( $this->last_result ); + $this->rows_affected = $this->num_rows; + $this->insert_id = (int) $remote->insert_id; + $this->last_error = ''; + + return $this->num_rows; + } + $result = cow_control_request( '/query', array( 'sql' => $query ) ); if ( empty( $result['ok'] ) ) { $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow remote query failed'; @@ -210,6 +257,56 @@ class Cow_DB extends wpdb { return $this->num_rows; } + + private function cow_remote_mysqli() { + if ( $this->cow_remote_mysqli instanceof mysqli ) { + return $this->cow_remote_mysqli; + } + if ( $this->cow_remote_failed ) { + return null; + } + if ( '0' === getenv( 'WPCOW_REMOTE_DB_TUNNEL' ) ) { + $this->cow_remote_failed = true; + return null; + } + + if ( + ! defined( 'WPCOW_REMOTE_DB_NAME' ) || + ! defined( 'WPCOW_REMOTE_DB_USER' ) || + ! defined( 'WPCOW_REMOTE_DB_HOST' ) || + '' === WPCOW_REMOTE_DB_NAME || + '' === WPCOW_REMOTE_DB_USER + ) { + return null; + } + + $host = WPCOW_REMOTE_DB_HOST; + $port = null; + $socket = null; + if ( preg_match( '/^(.+):([0-9]+)$/', $host, $matches ) ) { + $host = $matches[1]; + $port = (int) $matches[2]; + } elseif ( preg_match( '/^([^:]+):(\/.*)$/', $host, $matches ) ) { + $host = $matches[1]; + $socket = $matches[2]; + } + + $mysqli = mysqli_init(); + if ( ! $mysqli ) { + $this->cow_remote_failed = true; + return null; + } + + @$mysqli->options( MYSQLI_OPT_CONNECT_TIMEOUT, 2 ); + if ( ! @$mysqli->real_connect( $host, WPCOW_REMOTE_DB_USER, WPCOW_REMOTE_DB_PASSWORD, WPCOW_REMOTE_DB_NAME, $port, $socket ) ) { + $this->cow_remote_failed = true; + return null; + } + + @$mysqli->set_charset( $this->charset ? $this->charset : 'utf8mb4' ); + $this->cow_remote_mysqli = $mysqli; + return $this->cow_remote_mysqli; + } } $wpdb = new Cow_DB( DB_USER, DB_PASSWORD, DB_NAME, DB_HOST ); @@ -274,7 +371,7 @@ pub fn generated_file_paths(root: &Path) -> Vec { #[cfg(test)] mod tests { use super::*; - use crate::config::{LocalDb, Manifest, Probe, MANIFEST_VERSION}; + use crate::config::{LocalDb, Manifest, Probe, RemoteDbTunnel, MANIFEST_VERSION}; fn manifest() -> Manifest { Manifest { @@ -299,6 +396,10 @@ mod tests { host: "127.0.0.1".to_string(), port: 33071, }, + remote_db_tunnel: RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: 33072, + }, control_url: "http://127.0.0.1:39070".to_string(), cache_max_file_bytes: 1024, remote_metadata_cache_ttl_secs: 30, @@ -312,6 +413,7 @@ mod tests { assert!(php.contains("define( 'WP_HOME', 'http://example.test' );")); assert!(php.contains("$table_prefix = 'wp_';")); assert!(php.contains("WPCOW_CONTROL_URL")); + assert!(php.contains("WPCOW_REMOTE_DB_HOST")); } #[test] @@ -319,7 +421,7 @@ mod tests { let php = db_dropin_php(); assert!(php.contains("cow_is_write_sql")); assert!(php.contains("/materialize")); - assert!(php.contains("/query")); + assert!(php.contains("cow_remote_mysqli")); } #[test] diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index cb0d875e..000fd0fb 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -4,7 +4,9 @@ use std::ffi::OsStr; use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; +use std::process::{Child, Command, Stdio}; +use std::thread; +use std::time::Duration; use crate::config::{Manifest, Probe}; use crate::overlay::OverlayStore; @@ -192,6 +194,67 @@ impl RemoteClient { Ok(()) } + pub fn start_db_tunnel(&self) -> Result> { + if env_bool("WPCOW_REMOTE_DB_TUNNEL", true) == Some(false) { + return Ok(None); + } + if self.manifest.probe.db_host.is_empty() + || self.manifest.probe.db_name.is_empty() + || self.manifest.probe.db_user.is_empty() + { + return Ok(None); + } + + let Some((remote_host, remote_port)) = remote_db_tcp_target(&self.manifest.probe.db_host) + else { + return Ok(None); + }; + + let bind = format!( + "{}:{}:{}:{}", + self.manifest.remote_db_tunnel.host, + self.manifest.remote_db_tunnel.port, + remote_host, + remote_port + ); + let mut command = Command::new("ssh"); + if let Some(control_path) = &self.control_path { + command.arg("-S").arg(control_path); + command.arg("-o").arg("ControlMaster=auto"); + command.arg("-o").arg("ControlPersist=600"); + } + self.add_ssh_safety_options(&mut command); + command + .arg("-o") + .arg("ExitOnForwardFailure=yes") + .arg("-N") + .arg("-L") + .arg(bind) + .arg(&self.manifest.ssh) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()); + + let mut child = command.spawn().context("start remote DB SSH tunnel")?; + for _ in 0..20 { + if let Some(status) = child.try_wait()? { + let mut stderr = String::new(); + if let Some(mut err) = child.stderr.take() { + use std::io::Read; + let _ = err.read_to_string(&mut stderr); + } + return Err(anyhow!( + "remote DB SSH tunnel exited with status {}: {}", + status, + stderr + )); + } + thread::sleep(Duration::from_millis(50)); + } + + Ok(Some(child)) + } + pub fn stat(&self, rel: &Path) -> io::Result { let full = self.remote_full_path(rel)?; let code = r#" @@ -457,6 +520,38 @@ fn env_u64(name: &str, default: u64) -> u64 { .unwrap_or(default) } +fn env_bool(name: &str, default: bool) -> Option { + let raw = std::env::var(name).ok()?; + match raw.to_ascii_lowercase().as_str() { + "1" | "true" | "yes" | "on" => Some(true), + "0" | "false" | "no" | "off" => Some(false), + _ => Some(default), + } +} + +fn remote_db_tcp_target(db_host: &str) -> Option<(String, u16)> { + if db_host.contains(":/") { + return None; + } + + let (host, port) = if let Some((host, port)) = db_host.rsplit_once(':') { + if let Ok(port) = port.parse::() { + (host, port) + } else { + (db_host, 3306) + } + } else { + (db_host, 3306) + }; + + let host = match host { + "" | "localhost" => "127.0.0.1", + other => other, + }; + + Some((host.to_string(), port)) +} + #[cfg(test)] mod tests { use super::*; @@ -467,4 +562,17 @@ mod tests { assert_eq!(shell_quote("a'b"), "'a'\"'\"'b'"); assert_eq!(shell_quote(""), "''"); } + + #[test] + fn parses_remote_db_tcp_targets() { + assert_eq!( + remote_db_tcp_target("localhost"), + Some(("127.0.0.1".to_string(), 3306)) + ); + assert_eq!( + remote_db_tcp_target("db.example.com:3307"), + Some(("db.example.com".to_string(), 3307)) + ); + assert_eq!(remote_db_tcp_target("localhost:/tmp/mysql.sock"), None); + } } diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index f39c508b..0af6a566 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -24,6 +24,26 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R let control_addr = control_addr_from_url(&manifest.control_url)?; let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); remote.ensure_master()?; + let mut db_tunnel = match remote.start_db_tunnel() { + Ok(Some(child)) => { + eprintln!( + "wp-cow remote DB tunnel listening at {}:{}", + manifest.remote_db_tunnel.host, manifest.remote_db_tunnel.port + ); + Some(child) + } + Ok(None) => { + eprintln!( + "wp-cow remote DB tunnel disabled or unavailable; falling back to control reads" + ); + None + } + Err(err) => { + eprintln!("wp-cow remote DB tunnel failed: {err:#}"); + eprintln!("wp-cow falling back to control reads"); + None + } + }; let control_shutdown = shutdown.clone(); let control_manifest = manifest.clone(); @@ -78,6 +98,10 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R let _ = child.kill(); let _ = child.wait(); } + if let Some(child) = db_tunnel.as_mut() { + let _ = child.kill(); + let _ = child.wait(); + } let _ = unmount(&options.mountpoint); match control_thread.join() { From 321ba5ae6a75cfad1ff7680a303a16ea37e4d9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 03:07:55 +0200 Subject: [PATCH 16/39] Make file caching request-driven by default --- experiments/remote-wp-cow/.env.example | 2 +- experiments/remote-wp-cow/PRD.md | 49 ++++++----- experiments/remote-wp-cow/README.md | 27 +++--- experiments/remote-wp-cow/compose.yaml | 2 +- experiments/remote-wp-cow/src/cli.rs | 4 +- experiments/remote-wp-cow/src/fusefs.rs | 16 ++++ experiments/remote-wp-cow/src/overlay.rs | 107 +++++++++++++++++++++-- 7 files changed, 159 insertions(+), 48 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index d85d6761..294d476a 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -8,7 +8,7 @@ WPCOW_HTTP_PORT=8080 WPCOW_DNS1=1.1.1.1 WPCOW_DNS2=8.8.8.8 WPCOW_CACHE_MAX_FILE_MB=64 -WPCOW_RUNTIME_SYNC=1 +WPCOW_RUNTIME_SYNC=0 WPCOW_RUNTIME_SYNC_FORCE=0 WPCOW_RUNTIME_SYNC_TIMEOUT_SECS=180 WPCOW_REMOTE_DB_TUNNEL=1 diff --git a/experiments/remote-wp-cow/PRD.md b/experiments/remote-wp-cow/PRD.md index 43cece83..2a9a0386 100644 --- a/experiments/remote-wp-cow/PRD.md +++ b/experiments/remote-wp-cow/PRD.md @@ -40,42 +40,43 @@ wp-cow-lab-serve ## Product Shape -The default runtime is not pure lazy FUSE for all files. WordPress core and -runtime code are small enough to pre-materialize and too latency-sensitive to -serve file-by-file over SSH. Large user data remains lazy. +The default runtime is request-driven. It must not assume plugin/theme/runtime +directories are small: any directory may contain large generated artifacts, +vendor caches, backups, or media-like data. Files are fetched only when the +local request path opens them, then remembered in the persistent local cache. Startup should do: 1. Probe WordPress and remote DB credentials. 2. Export schema only. 3. Initialize empty local DB schema. -4. Pre-materialize a bounded runtime code set. -5. Start a persistent SSH tunnel for safe remote DB reads when the remote DB is +4. Start a persistent SSH tunnel for safe remote DB reads when the remote DB is reachable over TCP from the SSH host. -6. Start local PHP immediately with generated local `wp-config.php`, DB drop-in, +5. Start local PHP immediately with generated local `wp-config.php`, DB drop-in, and safety MU plugin. -7. Serve media and other large user data lazily only when requested. +6. Serve files lazily and persistently cache only the files touched by requests. -## Runtime Materialization Policy +## File Materialization Policy -Copy locally: +Fetch locally on demand: -- Root WordPress PHP files and common root assets. -- `wp-admin`. -- `wp-includes`. -- `wp-content/plugins`. -- `wp-content/themes`. -- `wp-content/mu-plugins`. -- `wp-content/languages`. -- Top-level `wp-content` drop-in files. +- Any remote file that WordPress, PHP, or the browser actually opens. +- Remote directory entries only when a request actually lists that directory. +- Remote metadata needed for opened/listed files. -Do not copy by default: +Remember: -- `wp-content/uploads`. -- Cache directories. -- Backup directories. -- SQL dumps and archive files. -- Arbitrary large data directories under `wp-content`. +- Cached file bytes in `file-cache/`. +- Cached remote metadata in `file-cache/metadata.json`. +- Local mutations separately in `upper/`. + +Do not: + +- Batch copy runtime directories by default. +- Copy `wp-content/uploads` up front. +- Assume plugin/theme directories are small. +- Re-fetch cached file bytes or metadata on subsequent runs unless explicitly + refreshed. ## DB Policy @@ -95,7 +96,7 @@ The CLI should print phase names and durations: - probe - schema export - local schema init -- runtime sync +- file cache hits/misses where practical - mount - php start - first request diagnostics where practical diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 9508045e..a436d159 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -123,17 +123,16 @@ wp-cow-lab-serve ``` That is the normal path. It creates or reuses the lazy clone, exports schema -only if needed, initializes an empty local MariaDB database if needed, -pre-materializes the WordPress runtime files, mounts the lazy filesystem, starts -the DB control layer, and starts PHP. Runtime file sync copies the root PHP -files, `wp-admin`, `wp-includes`, plugin/theme/mu-plugin/language code, and -top-level `wp-content` drop-ins. It does not copy `wp-content/uploads` or other -large content data directories, and it does not download table rows up front. - -Runtime file sync is enabled by default because real WordPress boot performs too -many PHP file reads and stats for pure per-file SSH/FUSE reads to feel usable. -Set `WPCOW_RUNTIME_SYNC_FORCE=1` to refresh the local runtime copy or -`WPCOW_RUNTIME_SYNC=0` to return to fully lazy filesystem reads. +only if needed, initializes an empty local MariaDB database if needed, mounts +the lazy filesystem, starts the DB control layer, and starts PHP. It does not +download media, runtime directories, or table rows up front. + +File reads are request-driven. When WordPress opens a remote file, `wp-cow` +fetches that file into the persistent `file-cache/` and records the remote +metadata beside it. Later reads and later runs use the local cached copy instead +of fetching the file or statting it remotely again. Runtime batch sync is +disabled by default; set `WPCOW_RUNTIME_SYNC=1` only for debugging a bounded +runtime copy. The lab also starts a persistent SSH tunnel for remote database reads when the remote `DB_HOST` is TCP-reachable from the SSH host. This avoids one SSH/PHP @@ -182,8 +181,10 @@ cat /mnt/wp-cow/example/wp-config.php Remote file contents are cached separately from local mutations in `~/.wp-cow/clones//file-cache`, which is persisted by the Docker `wp-cow-state` volume. Files up to `WPCOW_CACHE_MAX_FILE_MB` are cached as whole -files on first read; larger files are streamed by range. The Docker lab defaults -that limit to 64 MB. Check or clear the cache with: +files on first read, and their remote metadata is recorded in +`file-cache/metadata.json` so later runs do not need to stat those files +remotely again. Larger files are streamed by range. The Docker lab defaults that +limit to 64 MB. Check or clear the cache with: ```bash wp-cow-lab-cache status diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 3cc3f5c3..2843733e 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -29,7 +29,7 @@ services: WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" - WPCOW_RUNTIME_SYNC: "${WPCOW_RUNTIME_SYNC:-1}" + WPCOW_RUNTIME_SYNC: "${WPCOW_RUNTIME_SYNC:-0}" WPCOW_RUNTIME_SYNC_FORCE: "${WPCOW_RUNTIME_SYNC_FORCE:-0}" WPCOW_RUNTIME_SYNC_TIMEOUT_SECS: "${WPCOW_RUNTIME_SYNC_TIMEOUT_SECS:-180}" WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-1}" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index ffedc2e3..da07a407 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -327,7 +327,7 @@ fn serve_site(args: ServeArgs) -> Result<()> { ); } else { println!( - "using local WordPress runtime files for '{}'", + "runtime sync skipped for '{}'; requested files will be cached on demand", manifest.name ); } @@ -383,7 +383,7 @@ fn serve_site(args: ServeArgs) -> Result<()> { } fn should_sync_runtime(paths: &crate::config::ClonePaths, no_runtime_sync: bool) -> bool { - if no_runtime_sync || env_bool("WPCOW_RUNTIME_SYNC", true) == Some(false) { + if no_runtime_sync || env_bool("WPCOW_RUNTIME_SYNC", false) == Some(false) { return false; } if env_bool("WPCOW_RUNTIME_SYNC_FORCE", false) == Some(true) { diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs index 4782b59b..2c400ba3 100644 --- a/experiments/remote-wp-cow/src/fusefs.rs +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -117,6 +117,17 @@ impl CowFs { } } + if let Some(entry) = self.overlay.cached_entry(rel).map_err(anyhow_to_io)? { + self.remote_stat_cache.insert( + rel.to_path_buf(), + Timed { + value: entry.clone(), + expires_at: Instant::now() + self.remote_cache_ttl, + }, + ); + return Ok(entry); + } + let entry = self.remote.stat(rel)?; self.remote_stat_cache.insert( rel.to_path_buf(), @@ -138,6 +149,7 @@ impl CowFs { let entries = self.remote.readdir(rel)?; let expires_at = Instant::now() + self.remote_cache_ttl; for entry in &entries { + let _ = self.overlay.put_cached_entry(&rel.join(&entry.name), entry); self.remote_stat_cache.insert( rel.join(&entry.name), Timed { @@ -162,6 +174,7 @@ impl CowFs { if let Some(parent) = rel.parent() { self.remote_readdir_cache.remove(parent); } + let _ = self.overlay.remove_cached(rel); } fn attr_from_metadata(&self, ino: u64, metadata: &fs::Metadata) -> FileAttr { @@ -392,6 +405,9 @@ impl Filesystem for CowFs { if upper.exists() { let file = File::open(upper)?; Ok((self.allocate_handle(Handle::Local(file)), flags as u32)) + } else if let Some(cache_path) = self.overlay.cached_file_path(&rel) { + let file = File::open(cache_path)?; + Ok((self.allocate_handle(Handle::Local(file)), flags as u32)) } else { Ok((self.allocate_handle(Handle::Remote(rel)), flags as u32)) } diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index 9789fa41..c61e58da 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -1,9 +1,9 @@ use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, BTreeSet}; use std::fs::{self, File, OpenOptions}; -use std::io::{Read, Write}; +use std::io::{Read, Seek, SeekFrom, Write}; use std::path::{Component, Path, PathBuf}; use crate::config::ClonePaths; @@ -14,6 +14,11 @@ struct WhiteoutFile { deleted: BTreeSet, } +#[derive(Debug, Default, Serialize, Deserialize)] +struct MetadataFile { + entries: BTreeMap, +} + #[derive(Debug, Clone)] pub struct OverlayStore { pub upper: PathBuf, @@ -62,6 +67,34 @@ impl OverlayStore { self.file_cache.join(&hex[0..2]).join(hex) } + pub fn cached_file_path(&self, rel: &Path) -> Option { + let path = self.cache_path(rel); + path.is_file().then_some(path) + } + + pub fn cached_entry(&self, rel: &Path) -> Result> { + let metadata = self.load_metadata()?; + Ok(metadata.entries.get(&Self::rel_string(rel)).cloned()) + } + + pub fn put_cached_entry(&self, rel: &Path, entry: &RemoteEntry) -> Result<()> { + let mut metadata = self.load_metadata()?; + metadata + .entries + .insert(Self::rel_string(rel), entry.clone()); + self.write_metadata(&metadata) + } + + pub fn remove_cached(&self, rel: &Path) -> Result<()> { + let path = self.cache_path(rel); + if path.exists() { + fs::remove_file(path)?; + } + let mut metadata = self.load_metadata()?; + metadata.entries.remove(&Self::rel_string(rel)); + self.write_metadata(&metadata) + } + pub fn is_whiteout(&self, rel: &Path) -> Result { let whiteouts = self.load_whiteouts()?; Ok(whiteouts.deleted.contains(&Self::rel_string(rel))) @@ -161,6 +194,7 @@ impl OverlayStore { cursor += bytes.len() as u64; } fs::rename(tmp, &cache_path)?; + self.put_cached_entry(rel, &entry)?; return read_range_from_file(&cache_path, offset as u64, size as usize); } @@ -224,15 +258,45 @@ impl OverlayStore { file.write_all(b"\n")?; Ok(()) } + + fn metadata_path(&self) -> PathBuf { + self.file_cache.join("metadata.json") + } + + fn load_metadata(&self) -> Result { + let path = self.metadata_path(); + if !path.exists() { + return Ok(MetadataFile::default()); + } + let mut json = String::new(); + File::open(path)?.read_to_string(&mut json)?; + Ok(serde_json::from_str(&json)?) + } + + fn write_metadata(&self, metadata: &MetadataFile) -> Result<()> { + fs::create_dir_all(&self.file_cache)?; + let json = serde_json::to_vec_pretty(metadata)?; + let tmp = self.metadata_path().with_extension("json.tmp"); + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&tmp)?; + file.write_all(&json)?; + file.write_all(b"\n")?; + drop(file); + fs::rename(tmp, self.metadata_path())?; + Ok(()) + } } fn read_range_from_file(path: &Path, offset: u64, size: usize) -> Result> { let mut file = File::open(path)?; - let mut buf = Vec::new(); - file.read_to_end(&mut buf)?; - let start = offset.min(buf.len() as u64) as usize; - let end = (start + size).min(buf.len()); - Ok(buf[start..end].to_vec()) + file.seek(SeekFrom::Start(offset))?; + let mut buf = vec![0; size]; + let read = file.read(&mut buf)?; + buf.truncate(read); + Ok(buf) } #[cfg(unix)] @@ -285,4 +349,33 @@ mod tests { PathBuf::from("wp-config.php") ); } + + #[test] + fn stores_cached_remote_metadata() { + let temp = tempfile::tempdir().unwrap(); + let paths = ClonePaths { + root: temp.path().to_path_buf(), + manifest: temp.path().join("manifest.json"), + upper: temp.path().join("upper"), + file_cache: temp.path().join("file-cache"), + db: temp.path().join("db"), + generated: temp.path().join("generated"), + run: temp.path().join("run"), + whiteouts: temp.path().join("whiteouts.json"), + }; + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-includes/version.php"); + let entry = RemoteEntry { + name: "version.php".to_string(), + kind: "file".to_string(), + size: 123, + mode: 0o100644, + mtime: 42, + }; + + store.put_cached_entry(rel, &entry).unwrap(); + assert_eq!(store.cached_entry(rel).unwrap().unwrap().size, 123); + store.remove_cached(rel).unwrap(); + assert!(store.cached_entry(rel).unwrap().is_none()); + } } From 79ca90692e6cd8725366e8b1d8d4e32b447eb3d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 03:33:57 +0200 Subject: [PATCH 17/39] Add splash progress for lazy COW loading --- experiments/remote-wp-cow/.env.example | 5 +- experiments/remote-wp-cow/PRD.md | 16 +- experiments/remote-wp-cow/README.md | 22 +- experiments/remote-wp-cow/compose.yaml | 5 +- experiments/remote-wp-cow/src/cli.rs | 48 +-- experiments/remote-wp-cow/src/generate.rs | 466 +++++++++++++++++++++- experiments/remote-wp-cow/src/overlay.rs | 101 +++++ experiments/remote-wp-cow/src/remote.rs | 58 --- experiments/remote-wp-cow/src/run.rs | 4 + 9 files changed, 611 insertions(+), 114 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 294d476a..8e086212 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -8,9 +8,6 @@ WPCOW_HTTP_PORT=8080 WPCOW_DNS1=1.1.1.1 WPCOW_DNS2=8.8.8.8 WPCOW_CACHE_MAX_FILE_MB=64 -WPCOW_RUNTIME_SYNC=0 -WPCOW_RUNTIME_SYNC_FORCE=0 -WPCOW_RUNTIME_SYNC_TIMEOUT_SECS=180 WPCOW_REMOTE_DB_TUNNEL=1 WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=15 WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 @@ -18,6 +15,8 @@ WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 WPCOW_PHP_MAX_EXECUTION_SECS=30 WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 +WPCOW_PHP_WORKERS=4 +WPCOW_SPLASH=1 # Set this to 1 for a filesystem-only smoke test that does not export DB schema. WPCOW_SKIP_SCHEMA=0 diff --git a/experiments/remote-wp-cow/PRD.md b/experiments/remote-wp-cow/PRD.md index 2a9a0386..f60842f8 100644 --- a/experiments/remote-wp-cow/PRD.md +++ b/experiments/remote-wp-cow/PRD.md @@ -27,6 +27,12 @@ wp-cow-lab-serve - Startup output shows timed phases so slow work is visible. - The browser must not spin indefinitely. Slow or failing remote work must return a visible error with timing context. +- If the first real WordPress response is still warming files, the browser + should receive a local splash/progress page quickly instead of a blank loading + tab. +- The clone must serve the actual remote-backed site. A WordPress installation + wizard indicates an empty or unavailable DB lower layer and must be surfaced + as a wp-cow runtime error, not success. - No full `wp-content/uploads` copy. - No optimistic full database row dump. - Local writes must not reach production. @@ -55,6 +61,8 @@ Startup should do: 5. Start local PHP immediately with generated local `wp-config.php`, DB drop-in, and safety MU plugin. 6. Serve files lazily and persistently cache only the files touched by requests. +7. For the first dynamic browser request, show a temporary splash page that + polls real file-cache progress while a bypass request warms WordPress. ## File Materialization Policy @@ -99,7 +107,7 @@ The CLI should print phase names and durations: - file cache hits/misses where practical - mount - php start -- first request diagnostics where practical +- first request file-cache progress through `/__wp-cow/progress` ## Test Site @@ -134,5 +142,7 @@ Then: curl -I --max-time 10 http://localhost:9481/ ``` -The response must complete within the timeout. A WordPress error page is -acceptable during development only if it returns quickly with diagnostic output. +The response must complete within the timeout. A splash/progress page is +acceptable while the first real page warms. A WordPress error page is acceptable +during development only if it returns quickly with diagnostic output. The +WordPress installation wizard is not acceptable as a successful response. diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index a436d159..77f74c32 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -130,9 +130,17 @@ download media, runtime directories, or table rows up front. File reads are request-driven. When WordPress opens a remote file, `wp-cow` fetches that file into the persistent `file-cache/` and records the remote metadata beside it. Later reads and later runs use the local cached copy instead -of fetching the file or statting it remotely again. Runtime batch sync is -disabled by default; set `WPCOW_RUNTIME_SYNC=1` only for debugging a bounded -runtime copy. +of fetching the file or statting it remotely again. Runtime batch sync is not +part of `serve`; old `WPCOW_RUNTIME_SYNC` environment values are ignored so +plugin/theme/runtime trees stay lazy too. + +The first browser hit can still spend time fetching the exact PHP files needed +to boot WordPress. With `WPCOW_SPLASH=1` (the Docker default), `wp-cow` returns a +temporary local splash page immediately and starts the real request in the +browser. The splash polls `/__wp-cow/progress`, which is backed by the local file +cache progress file, then swaps in the warmed WordPress response. PHP is started +with multiple CLI server workers (`WPCOW_PHP_WORKERS`, default `4`) so progress +polling can continue while the warm request is running. The lab also starts a persistent SSH tunnel for remote database reads when the remote `DB_HOST` is TCP-reachable from the SSH host. This avoids one SSH/PHP @@ -144,7 +152,13 @@ The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the browser spinning forever. Adjust the defaults with `WPCOW_CONTROL_REQUEST_TIMEOUT_SECS`, `WPCOW_REMOTE_COMMAND_TIMEOUT_SECS`, -`WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS`, and `WPCOW_PHP_MAX_EXECUTION_SECS`. +`WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS`, `WPCOW_PHP_MAX_EXECUTION_SECS`, and +`WPCOW_PHP_SOCKET_TIMEOUT_SECS`. + +If WordPress tries to show the installation wizard, the router treats that as a +wp-cow DB/runtime failure. The clone should either show the real remote-backed +site or a diagnostic error; the installer is not considered a successful local +copy. Open this on the Mac: diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 2843733e..bf8e9089 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -29,9 +29,6 @@ services: WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" - WPCOW_RUNTIME_SYNC: "${WPCOW_RUNTIME_SYNC:-0}" - WPCOW_RUNTIME_SYNC_FORCE: "${WPCOW_RUNTIME_SYNC_FORCE:-0}" - WPCOW_RUNTIME_SYNC_TIMEOUT_SECS: "${WPCOW_RUNTIME_SYNC_TIMEOUT_SECS:-180}" WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-1}" WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-15}" WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" @@ -39,6 +36,8 @@ services: WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-30}" WPCOW_PHP_SOCKET_TIMEOUT_SECS: "${WPCOW_PHP_SOCKET_TIMEOUT_SECS:-15}" + WPCOW_PHP_WORKERS: "${WPCOW_PHP_WORKERS:-4}" + WPCOW_SPLASH: "${WPCOW_SPLASH:-1}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" WPCOW_HTTP: 0.0.0.0:8080 SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index da07a407..e3dc9407 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -87,7 +87,7 @@ struct ServeArgs { http: String, #[arg(long)] no_php: bool, - #[arg(long)] + #[arg(long, hide = true)] no_runtime_sync: bool, } @@ -308,29 +308,24 @@ fn serve_site(args: ServeArgs) -> Result<()> { metadata_started.elapsed().as_secs_f64() ); - if should_sync_runtime(&paths, args.no_runtime_sync) { - let phase_started = Instant::now(); - let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); - remote.ensure_master()?; + if args.no_runtime_sync { println!( - "syncing WordPress runtime files for '{}' (uploads stay lazy)", + "--no-runtime-sync is now the fixed serve behavior for '{}'", manifest.name ); - remote - .sync_runtime_files(&paths.upper) - .context("sync WordPress runtime files")?; - fs::write(paths.generated.join("runtime-files.synced"), b"ok\n")?; - println!( - "synced WordPress runtime files for '{}' in {:.2}s", - manifest.name, - phase_started.elapsed().as_secs_f64() - ); - } else { + } + if std::env::var_os("WPCOW_RUNTIME_SYNC").is_some() + || std::env::var_os("WPCOW_RUNTIME_SYNC_FORCE").is_some() + { println!( - "runtime sync skipped for '{}'; requested files will be cached on demand", + "ignoring runtime sync environment for '{}'; requested files will be fetched on demand", manifest.name ); } + println!( + "runtime/plugin/theme/upload trees stay lazy for '{}'; requested files will be cached on demand", + manifest.name + ); generate::write_wordpress_overrides(&paths, &manifest)?; @@ -382,25 +377,6 @@ fn serve_site(args: ServeArgs) -> Result<()> { run::run_site(manifest, paths, options) } -fn should_sync_runtime(paths: &crate::config::ClonePaths, no_runtime_sync: bool) -> bool { - if no_runtime_sync || env_bool("WPCOW_RUNTIME_SYNC", false) == Some(false) { - return false; - } - if env_bool("WPCOW_RUNTIME_SYNC_FORCE", false) == Some(true) { - return true; - } - !paths.generated.join("runtime-files.synced").is_file() -} - -fn env_bool(name: &str, default: bool) -> Option { - let raw = std::env::var(name).ok()?; - match raw.to_ascii_lowercase().as_str() { - "1" | "true" | "yes" | "on" => Some(true), - "0" | "false" | "no" | "off" => Some(false), - _ => Some(default), - } -} - fn init_db(args: NameArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index c96a67f4..641ab647 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -12,7 +12,7 @@ pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Res paths.upper.join("wp-content/mu-plugins/wp-cow-safety.php"), safety_mu_plugin_php(), )?; - fs::write(paths.generated.join("router.php"), router_php())?; + fs::write(paths.generated.join("router.php"), router_php(paths))?; Ok(()) } @@ -47,6 +47,16 @@ if ( ! defined( 'ABSPATH' ) ) {{ define( 'ABSPATH', __DIR__ . '/' ); }} +$wp_cow_db_dropin = ABSPATH . 'wp-content/db.php'; +if ( ! is_readable( $wp_cow_db_dropin ) ) {{ + http_response_code( 500 ); + header( 'Content-Type: text/plain; charset=utf-8' ); + echo "wp-cow DB/runtime error\n\n"; + echo "The generated wp-content/db.php drop-in is missing or unreadable. "; + echo "Refusing to boot against the empty local schema because that can look like a fresh WordPress install.\n"; + exit( 1 ); +}} + require_once ABSPATH . 'wp-settings.php'; "#, local_db_name = php_string(&manifest.local_db.name), @@ -117,6 +127,21 @@ function cow_control_timeout_secs() { return $timeout; } +function cow_db_runtime_fail( $message ) { + $message = (string) $message; + if ( ! headers_sent() ) { + http_response_code( 500 ); + header( 'Content-Type: text/html; charset=utf-8' ); + } + echo 'wp-cow DB/runtime error'; + echo ''; + echo '

wp-cow DB/runtime error

'; + echo '

The remote database lower layer is unavailable, so this clone will not fall back to the empty local schema or show the WordPress installer as success.

'; + echo '
' . htmlspecialchars( $message, ENT_QUOTES, 'UTF-8' ) . '
'; + echo '
'; + exit( 1 ); +} + function cow_control_request( $path, $payload ) { $payload['clone'] = WPCOW_CLONE; $url = rtrim( WPCOW_CONTROL_URL, '/' ) . $path; @@ -179,7 +204,7 @@ class Cow_DB extends wpdb { $result = cow_control_request( '/materialize', array( 'tables' => $tables ) ); if ( empty( $result['ok'] ) ) { $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow materialization failed'; - return false; + cow_db_runtime_fail( 'control /materialize failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } return parent::query( $query ); } @@ -192,6 +217,8 @@ class Cow_DB extends wpdb { if ( ! empty( $route['ok'] ) && isset( $route['backend'] ) && 'local' === $route['backend'] ) { return parent::query( $query ); } + $this->last_error = isset( $route['error'] ) ? $route['error'] : 'wp-cow route decision failed'; + cow_db_runtime_fail( 'control /route failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } return parent::query( $query ); @@ -203,7 +230,7 @@ class Cow_DB extends wpdb { $result = $remote->query( $query, MYSQLI_STORE_RESULT ); if ( false === $result ) { $this->last_error = $remote->error; - return false; + cow_db_runtime_fail( 'remote mysqli query failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } $this->last_result = array(); @@ -234,7 +261,7 @@ class Cow_DB extends wpdb { $result = cow_control_request( '/query', array( 'sql' => $query ) ); if ( empty( $result['ok'] ) ) { $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow remote query failed'; - return false; + cow_db_runtime_fail( 'control /query failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } $this->last_result = array(); @@ -342,17 +369,230 @@ add_filter( 'pre_http_request', static function ( $preempt, $args, $url ) { "# } -pub fn router_php() -> &'static str { +pub fn router_php(paths: &ClonePaths) -> String { r#" 'idle', + 'active_path' => '', + 'active_bytes' => 0, + 'active_total' => 0, + 'files_cached' => 0, + 'bytes_cached' => 0, + 'last_cached_path' => '', + 'updated_at_unix_ms' => 0, + ); + if ( is_file( $wp_cow_progress_file ) ) { + $decoded = json_decode( file_get_contents( $wp_cow_progress_file ), true ); + if ( is_array( $decoded ) ) { + $progress = array_merge( $progress, $decoded ); + } + } + $progress['ready'] = is_file( $wp_cow_ready_file ); + echo json_encode( $progress ); + return true; +} + $path = parse_url( $_SERVER['REQUEST_URI'], PHP_URL_PATH ); $file = rtrim( $_SERVER['DOCUMENT_ROOT'], '/' ) . $path; +function wp_cow_looks_like_installer( $html ) { + $html = (string) $html; + return ( + false !== stripos( $html, 'wp-admin/install.php' ) || + false !== stripos( $html, 'wp-admin/setup-config.php' ) || + false !== stripos( $html, 'WordPress › Installation' ) || + false !== stripos( $html, 'Welcome to the famous five-minute WordPress installation' ) + ); +} + +function wp_cow_runtime_error_page( $title, $message, $details = '' ) { + if ( ! headers_sent() ) { + http_response_code( 500 ); + header( 'Content-Type: text/html; charset=utf-8' ); + header( 'Cache-Control: no-store' ); + } + echo '' . htmlspecialchars( $title, ENT_QUOTES, 'UTF-8' ) . ''; + echo ''; + echo '

' . htmlspecialchars( $title, ENT_QUOTES, 'UTF-8' ) . '

'; + echo '

' . htmlspecialchars( $message, ENT_QUOTES, 'UTF-8' ) . '

'; + if ( '' !== $details ) { + echo '
' . htmlspecialchars( $details, ENT_QUOTES, 'UTF-8' ) . '
'; + } + echo '
'; +} + +function wp_cow_render_wordpress( $ready_file ) { + ob_start(); + require rtrim( $_SERVER['DOCUMENT_ROOT'], '/' ) . '/index.php'; + $html = ob_get_clean(); + + if ( wp_cow_looks_like_installer( $html ) ) { + wp_cow_runtime_error_page( + 'wp-cow did not load the remote site', + 'WordPress tried to show the installation wizard. This clone refuses to treat an empty or unavailable database lower layer as a successful site load.', + 'Check the remote database probe, SSH connectivity, and wp-content/db.php drop-in before retrying.' + ); + return true; + } + + if ( ! is_dir( dirname( $ready_file ) ) ) { + mkdir( dirname( $ready_file ), 0777, true ); + } + file_put_contents( $ready_file, json_encode( array( 'ready_at' => time() ) ) ); + echo $html; + return true; +} + +if ( in_array( $path, array( '/wp-admin/install.php', '/wp-admin/setup-config.php' ), true ) ) { + wp_cow_runtime_error_page( + 'wp-cow did not load the remote site', + 'WordPress tried to show an installation/setup path. This clone refuses to treat an empty or unavailable database lower layer as a successful site load.', + 'Check the remote database probe, SSH connectivity, and wp-content/db.php drop-in before retrying.' + ); + return true; +} + if ( '/' !== $path && is_file( $file ) ) { return false; } -require rtrim( $_SERVER['DOCUMENT_ROOT'], '/' ) . '/index.php'; +$should_show_splash = ( + '0' !== getenv( 'WPCOW_SPLASH' ) && + ! isset( $_GET['__wp_cow_bypass_splash'] ) && + ! is_file( $wp_cow_ready_file ) && + in_array( $_SERVER['REQUEST_METHOD'], array( 'GET', 'HEAD' ), true ) && + ( '/' === $path || false === strpos( basename( $path ), '.' ) ) +); + +if ( $should_show_splash ) { + header( 'Content-Type: text/html; charset=utf-8' ); + header( 'Cache-Control: no-store' ); + echo <<<'HTML' + + + + + + wp-cow is warming this page + + + +
+

Preparing local WordPress

+

Fetching only the remote files this request needs. Cached files will be reused on later requests.

+
+
Starting...
+
+ + + +HTML; + return true; +} + +if ( isset( $_GET['__wp_cow_bypass_splash'] ) ) { + return wp_cow_render_wordpress( $wp_cow_ready_file ); +} + +return wp_cow_render_wordpress( $wp_cow_ready_file ); "# + .replace( + "__WPCOW_PROGRESS_FILE__", + &php_string(&paths.file_cache.join("progress.json").to_string_lossy()), + ) + .replace( + "__WPCOW_READY_FILE__", + &php_string(&paths.run.join("first-request-ready.json").to_string_lossy()), + ) } fn php_string(value: &str) -> String { @@ -371,7 +611,12 @@ pub fn generated_file_paths(root: &Path) -> Vec { #[cfg(test)] mod tests { use super::*; - use crate::config::{LocalDb, Manifest, Probe, RemoteDbTunnel, MANIFEST_VERSION}; + use crate::config::{clone_paths, LocalDb, Manifest, Probe, RemoteDbTunnel, MANIFEST_VERSION}; + use std::io::{Read, Write}; + use std::net::{TcpListener, TcpStream}; + use std::process::Command; + use std::thread; + use std::time::{Duration, Instant}; fn manifest() -> Manifest { Manifest { @@ -414,6 +659,8 @@ mod tests { assert!(php.contains("$table_prefix = 'wp_';")); assert!(php.contains("WPCOW_CONTROL_URL")); assert!(php.contains("WPCOW_REMOTE_DB_HOST")); + assert!(php.contains("wp-cow DB/runtime error")); + assert!(php.contains("wp-content/db.php")); } #[test] @@ -422,6 +669,8 @@ mod tests { assert!(php.contains("cow_is_write_sql")); assert!(php.contains("/materialize")); assert!(php.contains("cow_remote_mysqli")); + assert!(php.contains("cow_db_runtime_fail")); + assert!(php.contains("will not fall back to the empty local schema")); } #[test] @@ -431,4 +680,207 @@ mod tests { assert!(php.contains("X-Robots-Tag")); assert!(php.contains("pre_http_request")); } + + #[test] + fn router_exposes_splash_and_progress_endpoint() { + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + let php = router_php(&paths); + assert!(php.contains("/__wp-cow/progress")); + assert!(php.contains("__wp_cow_bypass_splash")); + assert!(php.contains("wp_cow_looks_like_installer")); + assert!(php.contains("WordPress tried to show the installation wizard")); + assert!(php.contains("Cache-Control: no-store")); + assert!(!php.contains("__WPCOW_PROGRESS_FILE__")); + assert!(!php.contains("__WPCOW_READY_FILE__")); + } + + #[test] + fn generated_php_lints() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping generated PHP lint because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + let files = [ + ("wp-config.php", wp_config_php(&manifest())), + ("db.php", db_dropin_php().to_string()), + ("wp-cow-safety.php", safety_mu_plugin_php().to_string()), + ("router.php", router_php(&paths)), + ]; + + for (name, php) in files { + let path = temp.path().join(name); + std::fs::write(&path, php).unwrap(); + let output = Command::new("php") + .arg("-l") + .arg(&path) + .output() + .unwrap_or_else(|err| panic!("run php -l for {name}: {err}")); + assert!( + output.status.success(), + "php -l failed for {name}: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + } + + #[test] + fn router_splash_and_progress_smoke_responds_quickly() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping router smoke test because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + fs::create_dir_all(&paths.generated).unwrap(); + fs::create_dir_all(&paths.file_cache).unwrap(); + fs::create_dir_all(&paths.run).unwrap(); + + let docroot = temp.path().join("docroot"); + fs::create_dir_all(&docroot).unwrap(); + let router = paths.generated.join("router.php"); + fs::write(&router, router_php(&paths)).unwrap(); + + let port = free_tcp_port(); + let mut child = Command::new("php") + .env("WPCOW_SPLASH", "1") + .env("PHP_CLI_SERVER_WORKERS", "4") + .arg("-S") + .arg(format!("127.0.0.1:{port}")) + .arg("-t") + .arg(&docroot) + .arg(&router) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn() + .unwrap_or_else(|err| panic!("start php server: {err}")); + + let started = Instant::now(); + let progress = loop { + if started.elapsed() > Duration::from_secs(5) { + let _ = child.kill(); + let _ = child.wait(); + panic!("php router did not start within 5s"); + } + if let Some(status) = child + .try_wait() + .unwrap_or_else(|err| panic!("poll php server: {err}")) + { + panic!("php router exited early with status {status}"); + } + match http_get(port, "/__wp-cow/progress", Duration::from_secs(1)) { + Ok(response) if response.contains("\"phase\":\"idle\"") => break response, + Err(_) => thread::sleep(Duration::from_millis(50)), + Ok(_) => thread::sleep(Duration::from_millis(50)), + } + }; + assert!( + progress.contains("\"phase\":\"idle\""), + "unexpected progress response: {}", + progress + ); + assert!( + progress.contains("\"ready\":false"), + "unexpected progress response: {}", + progress + ); + + let request_started = Instant::now(); + let splash = http_get_nonempty(port, "/wp-cow-smoke", Duration::from_secs(2)); + assert!( + request_started.elapsed() < Duration::from_secs(2), + "splash took {:?}", + request_started.elapsed() + ); + assert!( + splash.contains("Preparing local WordPress"), + "unexpected splash response: {}", + splash + ); + assert!( + splash.contains("__wp_cow_bypass_splash"), + "unexpected splash response: {}", + splash + ); + + fs::write( + docroot.join("index.php"), + "WordPress › Installationinstall';", + ) + .unwrap(); + let installer = http_get_nonempty( + port, + "/wp-cow-smoke?__wp_cow_bypass_splash=1", + Duration::from_secs(2), + ); + assert!( + installer.starts_with("HTTP/1.1 500"), + "unexpected installer response: {}", + installer + ); + assert!( + installer.contains("wp-cow did not load the remote site"), + "unexpected installer response: {}", + installer + ); + fs::create_dir_all(docroot.join("wp-admin")).unwrap(); + fs::write( + docroot.join("wp-admin/install.php"), + " u16 { + let listener = TcpListener::bind(("127.0.0.1", 0)).unwrap(); + listener.local_addr().unwrap().port() + } + + fn http_get(port: u16, path: &str, timeout: Duration) -> std::io::Result { + let mut stream = TcpStream::connect(("127.0.0.1", port))?; + stream.set_read_timeout(Some(timeout))?; + stream.set_write_timeout(Some(timeout))?; + write!( + stream, + "GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n" + )?; + + let mut response = String::new(); + stream.read_to_string(&mut response)?; + Ok(response) + } + + fn http_get_nonempty(port: u16, path: &str, timeout: Duration) -> String { + let started = Instant::now(); + loop { + match http_get(port, path, Duration::from_millis(500)) { + Ok(response) if !response.is_empty() => return response, + Ok(_) | Err(_) if started.elapsed() < timeout => { + thread::sleep(Duration::from_millis(50)); + } + Ok(response) => panic!("empty response from {path}: {response}"), + Err(err) => panic!("request {path} failed: {err}"), + } + } + } } diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index c61e58da..ed78534f 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -5,6 +5,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::fs::{self, File, OpenOptions}; use std::io::{Read, Seek, SeekFrom, Write}; use std::path::{Component, Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; use crate::config::ClonePaths; use crate::remote::{RemoteClient, RemoteEntry}; @@ -19,6 +20,18 @@ struct MetadataFile { entries: BTreeMap, } +#[derive(Debug, Default, Serialize, Deserialize)] +struct CacheProgress { + phase: String, + active_path: String, + active_bytes: u64, + active_total: u64, + files_cached: u64, + bytes_cached: u64, + last_cached_path: String, + updated_at_unix_ms: u128, +} + #[derive(Debug, Clone)] pub struct OverlayStore { pub upper: PathBuf, @@ -184,6 +197,8 @@ impl OverlayStore { let mut out = File::create(&tmp)?; let mut cursor = 0_u64; let chunk = 1024 * 1024; + let rel_string = Self::rel_string(rel); + let _ = self.write_cache_progress(&rel_string, "fetching", 0, entry.size); while cursor < entry.size { let wanted = chunk.min((entry.size - cursor) as usize); let bytes = remote.read_range(rel, cursor, wanted)?; @@ -192,12 +207,20 @@ impl OverlayStore { } out.write_all(&bytes)?; cursor += bytes.len() as u64; + let _ = self.write_cache_progress(&rel_string, "fetching", cursor, entry.size); } fs::rename(tmp, &cache_path)?; self.put_cached_entry(rel, &entry)?; + let _ = self.finish_cache_progress(&rel_string, entry.size); return read_range_from_file(&cache_path, offset as u64, size as usize); } + let _ = self.write_cache_progress( + &Self::rel_string(rel), + "streaming", + offset as u64, + offset as u64 + size as u64, + ); remote .read_range(rel, offset as u64, size as usize) .with_context(|| format!("remote read {}", Self::rel_string(rel))) @@ -263,6 +286,10 @@ impl OverlayStore { self.file_cache.join("metadata.json") } + fn progress_path(&self) -> PathBuf { + self.file_cache.join("progress.json") + } + fn load_metadata(&self) -> Result { let path = self.metadata_path(); if !path.exists() { @@ -288,6 +315,80 @@ impl OverlayStore { fs::rename(tmp, self.metadata_path())?; Ok(()) } + + fn load_progress(&self) -> Result { + let path = self.progress_path(); + if !path.exists() { + return Ok(CacheProgress { + phase: "idle".to_string(), + updated_at_unix_ms: now_unix_ms(), + ..CacheProgress::default() + }); + } + let mut json = String::new(); + File::open(path)?.read_to_string(&mut json)?; + Ok(serde_json::from_str(&json)?) + } + + fn write_progress(&self, progress: &CacheProgress) -> Result<()> { + fs::create_dir_all(&self.file_cache)?; + let json = serde_json::to_vec_pretty(progress)?; + let tmp = self.progress_tmp_path(); + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&tmp)?; + file.write_all(&json)?; + file.write_all(b"\n")?; + drop(file); + fs::rename(tmp, self.progress_path())?; + Ok(()) + } + + fn progress_tmp_path(&self) -> PathBuf { + self.file_cache.join(format!( + "progress.json.tmp.{}.{}", + std::process::id(), + now_unix_ms() + )) + } + + fn write_cache_progress( + &self, + rel: &str, + phase: &str, + active_bytes: u64, + active_total: u64, + ) -> Result<()> { + let mut progress = self.load_progress()?; + progress.phase = phase.to_string(); + progress.active_path = rel.to_string(); + progress.active_bytes = active_bytes; + progress.active_total = active_total; + progress.updated_at_unix_ms = now_unix_ms(); + self.write_progress(&progress) + } + + fn finish_cache_progress(&self, rel: &str, size: u64) -> Result<()> { + let mut progress = self.load_progress()?; + progress.phase = "cached".to_string(); + progress.active_path.clear(); + progress.active_bytes = 0; + progress.active_total = 0; + progress.files_cached = progress.files_cached.saturating_add(1); + progress.bytes_cached = progress.bytes_cached.saturating_add(size); + progress.last_cached_path = rel.to_string(); + progress.updated_at_unix_ms = now_unix_ms(); + self.write_progress(&progress) + } +} + +fn now_unix_ms() -> u128 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_millis()) + .unwrap_or_default() } fn read_range_from_file(path: &Path, offset: u64, size: usize) -> Result> { diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index 000fd0fb..17df37a4 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -1,7 +1,6 @@ use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; use std::ffi::OsStr; -use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::process::{Child, Command, Stdio}; @@ -141,59 +140,6 @@ impl RemoteClient { Err(io::Error::new(io::ErrorKind::Other, stderr.to_string())) } - pub fn sync_runtime_files(&self, upper: &Path) -> Result<()> { - fs::create_dir_all(upper).with_context(|| format!("create {}", upper.display()))?; - - let remote_command = format!( - r#"cd {} && (find . -mindepth 1 -maxdepth 1 \( -type f -o -type l \) -print; find wp-content -mindepth 1 -maxdepth 1 \( -type f -o -type l \) -print 2>/dev/null; for p in wp-admin wp-includes wp-content/plugins wp-content/themes wp-content/mu-plugins wp-content/languages; do if [ -e "$p" ]; then printf '%s\n' "$p"; fi; done) | tar -cf - -T -"#, - shell_quote(&self.manifest.remote_path) - ); - - let mut ssh = self - .ssh_command(&remote_command, runtime_sync_timeout_secs()) - .stdout(Stdio::piped()) - .stderr(Stdio::inherit()) - .spawn() - .context("start remote runtime tar over ssh")?; - - let mut tar = Command::new("tar") - .arg("--no-same-owner") - .arg("-C") - .arg(upper) - .arg("-xf") - .arg("-") - .stdin(Stdio::piped()) - .stderr(Stdio::inherit()) - .spawn() - .context("start local runtime tar extraction")?; - - let copy_result = { - let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); - let mut tar_stdin = tar.stdin.take().expect("tar stdin piped"); - io::copy(&mut ssh_stdout, &mut tar_stdin).context("copy runtime tar stream") - }; - - let ssh_status = ssh.wait().context("wait for remote runtime tar")?; - let tar_status = tar.wait().context("wait for local runtime tar")?; - - copy_result?; - - if !ssh_status.success() { - return Err(anyhow!( - "remote runtime tar failed with status {}", - ssh_status - )); - } - if !tar_status.success() { - return Err(anyhow!( - "local runtime tar extraction failed with status {}", - tar_status - )); - } - - Ok(()) - } - pub fn start_db_tunnel(&self) -> Result> { if env_bool("WPCOW_REMOTE_DB_TUNNEL", true) == Some(false) { return Ok(None); @@ -501,10 +447,6 @@ fn remote_command_timeout_secs() -> u64 { env_u64("WPCOW_REMOTE_COMMAND_TIMEOUT_SECS", 20) } -fn runtime_sync_timeout_secs() -> u64 { - env_u64("WPCOW_RUNTIME_SYNC_TIMEOUT_SECS", 180) -} - fn remote_db_query_timeout_secs() -> u64 { env_u64("WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS", 10) } diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index 0af6a566..eb60dc23 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -127,6 +127,10 @@ pub fn mount_only(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> R fn start_php_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { Command::new("php") + .env( + "PHP_CLI_SERVER_WORKERS", + env_u64("WPCOW_PHP_WORKERS", 4).to_string(), + ) .arg("-d") .arg(format!( "max_execution_time={}", From d22621b4c46b784ef0a23f242fad8b16faf4e09a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 11:52:22 +0200 Subject: [PATCH 18/39] Bootstrap WordPress options reads locally --- experiments/remote-wp-cow/.env.example | 4 +- experiments/remote-wp-cow/PRD.md | 6 + experiments/remote-wp-cow/README.md | 6 + experiments/remote-wp-cow/compose.yaml | 4 +- experiments/remote-wp-cow/src/control.rs | 6 +- experiments/remote-wp-cow/src/db.rs | 183 ++++++++++++++++++++++ experiments/remote-wp-cow/src/generate.rs | 15 +- experiments/remote-wp-cow/src/run.rs | 2 +- 8 files changed, 215 insertions(+), 11 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 8e086212..282c926f 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -9,11 +9,11 @@ WPCOW_DNS1=1.1.1.1 WPCOW_DNS2=8.8.8.8 WPCOW_CACHE_MAX_FILE_MB=64 WPCOW_REMOTE_DB_TUNNEL=1 -WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=15 +WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=60 WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 -WPCOW_PHP_MAX_EXECUTION_SECS=30 +WPCOW_PHP_MAX_EXECUTION_SECS=90 WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 WPCOW_PHP_WORKERS=4 WPCOW_SPLASH=1 diff --git a/experiments/remote-wp-cow/PRD.md b/experiments/remote-wp-cow/PRD.md index f60842f8..a54756d1 100644 --- a/experiments/remote-wp-cow/PRD.md +++ b/experiments/remote-wp-cow/PRD.md @@ -97,6 +97,12 @@ If remote DB reads are too slow for first page boot, the next fallback should be a bounded bootstrap materialization of only essential option rows, not a full table dump. +The MVP implements that fallback for `*_options`: on the first matching +autoload/core-option read, it copies only autoloaded rows and core +identity/theme/plugin option names into the local database and routes matching +reads locally. Arbitrary non-bootstrap option reads still go through the remote +read path unless the table has been fully materialized. + ## Observability The CLI should print phase names and durations: diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 77f74c32..629536dd 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -148,6 +148,12 @@ subprocess per WordPress read query. Write-class SQL is still blocked from the remote database and materialized locally first. Set `WPCOW_REMOTE_DB_TUNNEL=0` to fall back to daemon-mediated remote reads. +On first WordPress boot, `wp-cow` special-cases the options-table bootstrap +query. It materializes only autoloaded option rows plus core identity/theme/plugin +option names into the local database, then routes those matching reads locally. +That keeps the common `SELECT ... FROM *_options WHERE autoload IN (...)` query +off the slow remote `/query` fallback without dumping the whole database. + The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the browser spinning forever. Adjust the defaults with diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index bf8e9089..0a45e5bc 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -30,11 +30,11 @@ services: WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-1}" - WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-15}" + WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-60}" WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" - WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-30}" + WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" WPCOW_PHP_SOCKET_TIMEOUT_SECS: "${WPCOW_PHP_SOCKET_TIMEOUT_SECS:-15}" WPCOW_PHP_WORKERS: "${WPCOW_PHP_WORKERS:-4}" WPCOW_SPLASH: "${WPCOW_SPLASH:-1}" diff --git a/experiments/remote-wp-cow/src/control.rs b/experiments/remote-wp-cow/src/control.rs index 21288172..fde75eed 100644 --- a/experiments/remote-wp-cow/src/control.rs +++ b/experiments/remote-wp-cow/src/control.rs @@ -102,7 +102,11 @@ fn control_response( } "/route" => { let tables = input.tables.unwrap_or_default(); - let decision = db::route_for_tables(remote, manifest, paths, &tables)?; + let decision = if let Some(sql) = input.sql.as_deref() { + db::route_for_query(remote, manifest, paths, sql, &tables)? + } else { + db::route_for_tables(remote, manifest, paths, &tables)? + }; Ok( json!({ "ok": true, "backend": decision.backend, "materialized": decision.materialized }), ) diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index 561ae49e..96c9f61c 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -12,7 +12,10 @@ use crate::sql; #[derive(Debug, Default, Serialize, Deserialize)] pub struct DbState { + #[serde(default)] pub materialized_tables: BTreeSet, + #[serde(default)] + pub option_bootstrap_tables: BTreeSet, } pub fn state_path(paths: &ClonePaths) -> PathBuf { @@ -174,6 +177,35 @@ pub fn route_for_tables( } } +pub fn route_for_query( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + sql_text: &str, + tables: &[String], +) -> Result { + let expanded = sql::expand_wordpress_groups(&manifest.probe.table_prefix, tables); + let mut state = load_state(paths)?; + + if let Some(options_table) = + option_bootstrap_table_for_sql(&manifest.probe.table_prefix, sql_text, &expanded) + { + if !state.option_bootstrap_tables.contains(&options_table) { + materialize_option_bootstrap(remote, manifest, &options_table).with_context(|| { + format!("materialize option bootstrap rows for {}", options_table) + })?; + state.option_bootstrap_tables.insert(options_table); + write_state(paths, &state)?; + } + return Ok(RouteDecision { + backend: "local".to_string(), + materialized: Vec::new(), + }); + } + + route_for_tables(remote, manifest, paths, tables) +} + pub fn remote_readonly_query(remote: &RemoteClient, sql_text: &str) -> Result { if !sql::is_safe_read_sql(sql_text) || sql::is_write_sql(sql_text) { return Err(anyhow!("refusing to send non-read SQL to remote")); @@ -237,6 +269,128 @@ fn materialize_one_table(remote: &RemoteClient, manifest: &Manifest, table: &str Ok(()) } +fn materialize_option_bootstrap( + remote: &RemoteClient, + manifest: &Manifest, + table: &str, +) -> Result<()> { + let probe = &manifest.probe; + ensure_probe_has_db(probe)?; + validate_table_name(table)?; + + let where_sql = option_bootstrap_where_sql(); + let delete_sql = format!( + "DELETE FROM `{}` WHERE {};", + table.replace('`', "``"), + where_sql + ); + run_mysql_exec(manifest, &delete_sql)?; + + let dump_command = format!( + "MYSQL_PWD={} mysqldump {} --user={} --single-transaction --quick --skip-lock-tables --no-create-info --replace --where={} {} {}", + shell_quote(&probe.db_password), + remote_mysql_cli_options(&probe.db_host), + shell_quote(&probe.db_user), + shell_quote(&where_sql), + shell_quote(&probe.db_name), + shell_quote(table) + ); + + let mut ssh = remote + .command(&dump_command) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote option bootstrap mysqldump over ssh")?; + + let mut mysql = local_mysql_command(manifest); + mysql.arg(&manifest.local_db.name).stdin(Stdio::piped()); + let mut mysql_child = mysql.spawn().context("start local mysql option import")?; + + { + let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); + let mut mysql_stdin = mysql_child.stdin.take().expect("mysql stdin piped"); + io::copy(&mut ssh_stdout, &mut mysql_stdin)?; + } + + let ssh_output = ssh.wait_with_output()?; + let mysql_status = mysql_child.wait()?; + + if !ssh_output.status.success() { + return Err(anyhow!( + "remote option bootstrap mysqldump failed: {}", + String::from_utf8_lossy(&ssh_output.stderr) + )); + } + if !mysql_status.success() { + return Err(anyhow!( + "local mysql option bootstrap import failed with status {}", + mysql_status + )); + } + Ok(()) +} + +fn option_bootstrap_table_for_sql( + table_prefix: &str, + sql_text: &str, + tables: &[String], +) -> Option { + if !sql::is_safe_read_sql(sql_text) || sql::is_write_sql(sql_text) { + return None; + } + + let options_table = format!("{}options", table_prefix); + if !tables.iter().any(|table| table == &options_table) { + return None; + } + + let lower = sql_text.to_ascii_lowercase(); + if lower.contains("autoload") { + return Some(options_table); + } + + if lower.contains("option_name") + && option_bootstrap_names() + .iter() + .any(|name| lower.contains(&format!("'{}'", name))) + { + return Some(options_table); + } + + None +} + +fn option_bootstrap_where_sql() -> String { + let names = option_bootstrap_names() + .iter() + .map(|name| format!("'{}'", mysql_string_literal(name))) + .collect::>() + .join(", "); + format!("autoload IN ('yes', 'on', 'auto-on', 'auto') OR option_name IN ({names})") +} + +fn option_bootstrap_names() -> &'static [&'static str] { + &[ + "siteurl", + "home", + "blogname", + "blogdescription", + "admin_email", + "active_plugins", + "template", + "stylesheet", + "current_theme", + "permalink_structure", + "rewrite_rules", + "sidebars_widgets", + "stylesheet_root", + "template_root", + "upload_path", + "upload_url_path", + ] +} + fn local_mysql_command(manifest: &Manifest) -> Command { let mut command = Command::new("mysql"); command.arg("--host").arg(&manifest.local_db.host); @@ -333,4 +487,33 @@ mod tests { "--host='localhost' --socket='/tmp/mysql.sock'" ); } + + #[test] + fn detects_option_bootstrap_reads() { + let tables = vec!["ady_options".to_string()]; + assert_eq!( + option_bootstrap_table_for_sql( + "ady_", + "SELECT option_name, option_value FROM ady_options WHERE autoload IN ( 'yes', 'on', 'auto-on', 'auto' )", + &tables + ), + Some("ady_options".to_string()) + ); + assert_eq!( + option_bootstrap_table_for_sql( + "ady_", + "SELECT option_value FROM ady_options WHERE option_name = 'siteurl' LIMIT 1", + &tables + ), + Some("ady_options".to_string()) + ); + assert_eq!( + option_bootstrap_table_for_sql( + "ady_", + "SELECT option_value FROM ady_options WHERE option_name = 'some_plugin_option' LIMIT 1", + &tables + ), + None + ); + } } diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 641ab647..1c955284 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -122,7 +122,7 @@ function cow_tables_from_sql( $sql ) { function cow_control_timeout_secs() { $timeout = (int) getenv( 'WPCOW_CONTROL_REQUEST_TIMEOUT_SECS' ); if ( $timeout < 1 ) { - $timeout = 15; + $timeout = 60; } return $timeout; } @@ -158,9 +158,10 @@ function cow_control_request( $path, $payload ) { curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout ); $raw = curl_exec( $ch ); $error = curl_error( $ch ); + $errno = curl_errno( $ch ); curl_close( $ch ); if ( false === $raw ) { - return array( 'ok' => false, 'error' => $error ); + return array( 'ok' => false, 'error' => 'curl error ' . $errno . ' calling ' . $url . ': ' . $error ); } } else { $context = stream_context_create( @@ -170,18 +171,21 @@ function cow_control_request( $path, $payload ) { 'header' => "Content-Type: application/json\r\n", 'content' => $body, 'timeout' => $timeout, + 'ignore_errors' => true, ), ) ); $raw = @file_get_contents( $url, false, $context ); if ( false === $raw ) { - return array( 'ok' => false, 'error' => 'wp-cow control request failed' ); + $error = error_get_last(); + $error = isset( $error['message'] ) ? $error['message'] : 'unknown stream error'; + return array( 'ok' => false, 'error' => 'stream error calling ' . $url . ' after ' . $timeout . 's: ' . $error ); } } $decoded = json_decode( $raw, true ); if ( ! is_array( $decoded ) ) { - return array( 'ok' => false, 'error' => 'invalid wp-cow control response' ); + return array( 'ok' => false, 'error' => 'invalid wp-cow control response from ' . $url . ': ' . substr( $raw, 0, 500 ) ); } return $decoded; } @@ -210,7 +214,7 @@ class Cow_DB extends wpdb { } if ( cow_is_safe_read_sql( $query ) ) { - $route = cow_control_request( '/route', array( 'tables' => $tables ) ); + $route = cow_control_request( '/route', array( 'tables' => $tables, 'sql' => $query ) ); if ( ! empty( $route['ok'] ) && isset( $route['backend'] ) && 'remote' === $route['backend'] ) { return $this->cow_remote_query( $query ); } @@ -671,6 +675,7 @@ mod tests { assert!(php.contains("cow_remote_mysqli")); assert!(php.contains("cow_db_runtime_fail")); assert!(php.contains("will not fall back to the empty local schema")); + assert!(php.contains("'sql' => $query")); } #[test] diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index eb60dc23..2bbb0ed7 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -134,7 +134,7 @@ fn start_php_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> R .arg("-d") .arg(format!( "max_execution_time={}", - env_u64("WPCOW_PHP_MAX_EXECUTION_SECS", 30) + env_u64("WPCOW_PHP_MAX_EXECUTION_SECS", 90) )) .arg("-d") .arg(format!( From a5e959443cd0d810ae61d967a01c0db415eb8c76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 13:48:18 +0200 Subject: [PATCH 19/39] Make remote WordPress COW startup responsive --- experiments/remote-wp-cow/Cargo.lock | 7 + experiments/remote-wp-cow/Cargo.toml | 1 + experiments/remote-wp-cow/src/control.rs | 2 +- experiments/remote-wp-cow/src/db.rs | 273 ++++++++++++++++++++- experiments/remote-wp-cow/src/fusefs.rs | 106 ++++++-- experiments/remote-wp-cow/src/generate.rs | 141 ++++++++++- experiments/remote-wp-cow/src/overlay.rs | 76 ++++-- experiments/remote-wp-cow/src/remote.rs | 282 +++++++++++++++++++++- experiments/remote-wp-cow/src/run.rs | 246 ++++++++++++++++++- 9 files changed, 1074 insertions(+), 60 deletions(-) diff --git a/experiments/remote-wp-cow/Cargo.lock b/experiments/remote-wp-cow/Cargo.lock index 866aa0f7..01513e1c 100644 --- a/experiments/remote-wp-cow/Cargo.lock +++ b/experiments/remote-wp-cow/Cargo.lock @@ -64,6 +64,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.11.1" @@ -986,6 +992,7 @@ name = "wp-cow" version = "0.1.0" dependencies = [ "anyhow", + "base64", "clap", "ctrlc", "fuser", diff --git a/experiments/remote-wp-cow/Cargo.toml b/experiments/remote-wp-cow/Cargo.toml index 7c37b780..14172524 100644 --- a/experiments/remote-wp-cow/Cargo.toml +++ b/experiments/remote-wp-cow/Cargo.toml @@ -8,6 +8,7 @@ license = "MIT" [dependencies] anyhow = "1.0" +base64 = "0.22" clap = { version = "4.5", features = ["derive"] } ctrlc = "3.4" fuser = "0.16" diff --git a/experiments/remote-wp-cow/src/control.rs b/experiments/remote-wp-cow/src/control.rs index fde75eed..7177a76b 100644 --- a/experiments/remote-wp-cow/src/control.rs +++ b/experiments/remote-wp-cow/src/control.rs @@ -71,7 +71,7 @@ fn handle_request( let response = match serde_json::from_str::(&body) { Ok(input) => match control_response(request.url(), input, manifest, paths, remote) { Ok(response) => response, - Err(err) => json!({ "ok": false, "error": err.to_string() }), + Err(err) => json!({ "ok": false, "error": format!("{err:#}") }), }, Err(err) => json!({ "ok": false, "error": format!("decode control JSON: {err}") }), }; diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index 96c9f61c..0e7d5f2f 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -16,6 +16,8 @@ pub struct DbState { pub materialized_tables: BTreeSet, #[serde(default)] pub option_bootstrap_tables: BTreeSet, + #[serde(default)] + pub option_rows: BTreeSet, } pub fn state_path(paths: &ClonePaths) -> PathBuf { @@ -203,6 +205,18 @@ pub fn route_for_query( }); } + let options_table = format!("{}options", manifest.probe.table_prefix); + let option_names = option_names_for_sql(sql_text, &options_table, &expanded); + if !option_names.is_empty() { + materialize_option_rows(remote, manifest, &mut state, &options_table, &option_names) + .with_context(|| format!("materialize option rows for {}", options_table))?; + write_state(paths, &state)?; + return Ok(RouteDecision { + backend: "local".to_string(), + materialized: Vec::new(), + }); + } + route_for_tables(remote, manifest, paths, tables) } @@ -213,6 +227,32 @@ pub fn remote_readonly_query(remote: &RemoteClient, sql_text: &str) -> Result Result> { + let table = format!("{}options", manifest.probe.table_prefix); + validate_table_name(&table)?; + let sql_text = format!( + "SELECT option_value FROM {} WHERE option_name='{}' LIMIT 1;", + qualified_table(manifest, &table), + mysql_string_literal(name) + ); + let output = local_mysql_command(manifest) + .arg("--batch") + .arg("--raw") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql_text) + .output() + .context("query local option value")?; + if !output.status.success() { + return Ok(None); + } + let value = String::from_utf8_lossy(&output.stdout) + .lines() + .next() + .map(|line| line.to_string()); + Ok(value) +} + #[derive(Debug, Serialize)] pub struct RouteDecision { pub backend: String, @@ -222,7 +262,7 @@ pub struct RouteDecision { fn materialize_one_table(remote: &RemoteClient, manifest: &Manifest, table: &str) -> Result<()> { let probe = &manifest.probe; ensure_probe_has_db(probe)?; - let delete_sql = format!("DELETE FROM `{}`;", table.replace('`', "``")); + let delete_sql = format!("DELETE FROM {};", qualified_table(manifest, table)); run_mysql_exec(manifest, &delete_sql)?; let dump_command = format!( @@ -280,8 +320,8 @@ fn materialize_option_bootstrap( let where_sql = option_bootstrap_where_sql(); let delete_sql = format!( - "DELETE FROM `{}` WHERE {};", - table.replace('`', "``"), + "DELETE FROM {} WHERE {};", + qualified_table(manifest, table), where_sql ); run_mysql_exec(manifest, &delete_sql)?; @@ -331,6 +371,85 @@ fn materialize_option_bootstrap( Ok(()) } +fn materialize_option_rows( + remote: &RemoteClient, + manifest: &Manifest, + state: &mut DbState, + table: &str, + names: &[String], +) -> Result<()> { + let probe = &manifest.probe; + ensure_probe_has_db(probe)?; + validate_table_name(table)?; + + let missing = names + .iter() + .filter(|name| !state.option_rows.contains(&option_row_key(table, name))) + .cloned() + .collect::>(); + if missing.is_empty() { + return Ok(()); + } + + let where_sql = option_names_where_sql(&missing); + let delete_sql = format!( + "DELETE FROM {} WHERE {};", + qualified_table(manifest, table), + where_sql + ); + run_mysql_exec(manifest, &delete_sql)?; + + let dump_command = format!( + "MYSQL_PWD={} mysqldump {} --user={} --single-transaction --quick --skip-lock-tables --no-create-info --replace --where={} {} {}", + shell_quote(&probe.db_password), + remote_mysql_cli_options(&probe.db_host), + shell_quote(&probe.db_user), + shell_quote(&where_sql), + shell_quote(&probe.db_name), + shell_quote(table) + ); + + let mut ssh = remote + .command(&dump_command) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote option row mysqldump over ssh")?; + + let mut mysql = local_mysql_command(manifest); + mysql.arg(&manifest.local_db.name).stdin(Stdio::piped()); + let mut mysql_child = mysql + .spawn() + .context("start local mysql option row import")?; + + { + let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); + let mut mysql_stdin = mysql_child.stdin.take().expect("mysql stdin piped"); + io::copy(&mut ssh_stdout, &mut mysql_stdin)?; + } + + let ssh_output = ssh.wait_with_output()?; + let mysql_status = mysql_child.wait()?; + + if !ssh_output.status.success() { + return Err(anyhow!( + "remote option row mysqldump failed: {}", + String::from_utf8_lossy(&ssh_output.stderr) + )); + } + if !mysql_status.success() { + return Err(anyhow!( + "local mysql option row import failed with status {}", + mysql_status + )); + } + + for name in missing { + state.option_rows.insert(option_row_key(table, &name)); + } + Ok(()) +} + fn option_bootstrap_table_for_sql( table_prefix: &str, sql_text: &str, @@ -361,6 +480,39 @@ fn option_bootstrap_table_for_sql( None } +fn option_names_for_sql(sql_text: &str, options_table: &str, tables: &[String]) -> Vec { + if !sql::is_safe_read_sql(sql_text) || sql::is_write_sql(sql_text) { + return Vec::new(); + } + if !tables.iter().any(|table| table == options_table) { + return Vec::new(); + } + + let lower = sql_text.to_ascii_lowercase(); + let Some(option_name_pos) = lower.find("option_name") else { + return Vec::new(); + }; + let tail = &sql_text[option_name_pos + "option_name".len()..]; + let lower_tail = &lower[option_name_pos + "option_name".len()..]; + + if let Some(eq_pos) = lower_tail.find('=') { + if lower_tail[..eq_pos] + .chars() + .all(|ch| ch.is_ascii_whitespace() || ch == '`') + { + return first_sql_string_literal(&tail[eq_pos + 1..]) + .into_iter() + .collect(); + } + } + + if let Some(in_pos) = lower_tail.find(" in ") { + return sql_string_literals_until_closing_paren(&tail[in_pos + 4..]); + } + + Vec::new() +} + fn option_bootstrap_where_sql() -> String { let names = option_bootstrap_names() .iter() @@ -370,6 +522,19 @@ fn option_bootstrap_where_sql() -> String { format!("autoload IN ('yes', 'on', 'auto-on', 'auto') OR option_name IN ({names})") } +fn option_names_where_sql(names: &[String]) -> String { + let names = names + .iter() + .map(|name| format!("'{}'", mysql_string_literal(name))) + .collect::>() + .join(", "); + format!("option_name IN ({names})") +} + +fn option_row_key(table: &str, name: &str) -> String { + format!("{table}:{name}") +} + fn option_bootstrap_names() -> &'static [&'static str] { &[ "siteurl", @@ -404,6 +569,14 @@ fn local_mysql_command(manifest: &Manifest) -> Command { command } +fn qualified_table(manifest: &Manifest, table: &str) -> String { + format!( + "`{}`.`{}`", + manifest.local_db.name.replace('`', "``"), + table.replace('`', "``") + ) +} + fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> { let mut command = local_mysql_command(manifest); command.arg("--execute").arg(sql_text); @@ -418,6 +591,47 @@ fn mysql_string_literal(value: &str) -> String { value.replace('\\', "\\\\").replace('\'', "\\'") } +fn first_sql_string_literal(input: &str) -> Option { + sql_string_literals_until_closing_paren(input) + .into_iter() + .next() +} + +fn sql_string_literals_until_closing_paren(input: &str) -> Vec { + let mut out = Vec::new(); + let mut chars = input.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == ')' { + break; + } + if ch != '\'' { + continue; + } + let mut value = String::new(); + while let Some(ch) = chars.next() { + if ch == '\\' { + if let Some(next) = chars.next() { + value.push(next); + } + continue; + } + if ch == '\'' { + if chars.peek() == Some(&'\'') { + let _ = chars.next(); + value.push('\''); + continue; + } + break; + } + value.push(ch); + } + if !value.is_empty() { + out.push(value); + } + } + out +} + fn validate_table_name(table: &str) -> Result<()> { if table.is_empty() || !table @@ -516,4 +730,57 @@ mod tests { None ); } + + #[test] + fn extracts_targeted_option_reads() { + let tables = vec!["ady_options".to_string()]; + assert_eq!( + option_names_for_sql( + "SELECT option_value FROM ady_options WHERE option_name = 'aioseo_options_internal_localized' LIMIT 1", + "ady_options", + &tables + ), + vec!["aioseo_options_internal_localized".to_string()] + ); + assert_eq!( + option_names_for_sql( + "SELECT * FROM ady_options WHERE option_name IN ('a', 'b')", + "ady_options", + &tables + ), + vec!["a".to_string(), "b".to_string()] + ); + } + + #[test] + fn qualifies_local_tables_for_exec_without_selected_database() { + let manifest = Manifest { + version: crate::config::MANIFEST_VERSION, + name: "calm".to_string(), + ssh: "example".to_string(), + remote_path: "/srv/www".to_string(), + remote_url: "https://example.com".to_string(), + local_url: "http://localhost:9481".to_string(), + created_at_unix: 1, + probe: crate::config::Probe::default(), + local_db: crate::config::LocalDb { + name: "cow_calm".to_string(), + user: "cow_calm".to_string(), + password: String::new(), + host: "127.0.0.1".to_string(), + port: 33071, + }, + remote_db_tunnel: crate::config::RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: 33072, + }, + control_url: "http://127.0.0.1:39070".to_string(), + cache_max_file_bytes: 1024, + remote_metadata_cache_ttl_secs: 30, + }; + assert_eq!( + qualified_table(&manifest, "ady_options"), + "`cow_calm`.`ady_options`" + ); + } } diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs index 2c400ba3..6eddb8bf 100644 --- a/experiments/remote-wp-cow/src/fusefs.rs +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -106,6 +106,18 @@ impl CowFs { return Ok(self.attr_from_metadata(ino, &metadata)); } + let mirror = self.overlay.mirror_path(rel).map_err(anyhow_to_io)?; + if let Ok(metadata) = fs::symlink_metadata(&mirror) { + return Ok(self.attr_from_metadata(ino, &metadata)); + } + + if self.has_opaque_ancestor_active(rel)? { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "hidden by local opaque directory", + )); + } + let entry = self.remote_stat(rel)?; Ok(self.attr_from_remote(ino, &entry)) } @@ -255,6 +267,7 @@ impl Filesystem for CowFs { fn lookup(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEntry) { let result = (|| { let rel = self.child_path(parent, name)?; + trace_fuse("lookup", &rel); let ino = self.ino_for_path(&rel); self.attr_for_path(&rel, ino) })(); @@ -272,6 +285,7 @@ impl Filesystem for CowFs { let rel = self .path_for_ino(ino) .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + trace_fuse("getattr", &rel); self.attr_for_path(&rel, ino) })(); match result { @@ -385,6 +399,7 @@ impl Filesystem for CowFs { let rel = self .path_for_ino(ino) .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + trace_fuse("open", &rel); if wants_write(flags) { let upper = self .overlay @@ -404,12 +419,12 @@ impl Filesystem for CowFs { let upper = self.overlay.upper_path(&rel).map_err(anyhow_to_io)?; if upper.exists() { let file = File::open(upper)?; - Ok((self.allocate_handle(Handle::Local(file)), flags as u32)) + Ok((self.allocate_handle(Handle::Local(file)), 0)) } else if let Some(cache_path) = self.overlay.cached_file_path(&rel) { let file = File::open(cache_path)?; - Ok((self.allocate_handle(Handle::Local(file)), flags as u32)) + Ok((self.allocate_handle(Handle::Local(file)), 0)) } else { - Ok((self.allocate_handle(Handle::Remote(rel)), flags as u32)) + Ok((self.allocate_handle(Handle::Remote(rel)), 0)) } } })(); @@ -446,16 +461,18 @@ impl Filesystem for CowFs { } } } - Some(Handle::Remote(rel)) => self - .overlay - .read_cached_or_remote( - &self.remote, - rel, - offset, - size, - self.manifest.cache_max_file_bytes, - ) - .map_err(anyhow_to_io), + Some(Handle::Remote(rel)) => { + trace_fuse("read-remote", rel); + self.overlay + .read_cached_or_remote( + &self.remote, + rel, + offset, + size, + self.manifest.cache_max_file_bytes, + ) + .map_err(anyhow_to_io) + } None => Err(io::Error::new(io::ErrorKind::NotFound, "unknown handle")), }; match result { @@ -615,6 +632,7 @@ impl CowFs { let rel = self .path_for_ino(ino) .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; + trace_fuse("readdir", &rel); let mut entries = Vec::new(); entries.push((ino, FileType::Directory, OsString::from("."))); @@ -623,19 +641,27 @@ impl CowFs { entries.push((parent_ino, FileType::Directory, OsString::from(".."))); let mut by_name: BTreeMap = BTreeMap::new(); - match self.remote_readdir(&rel) { - Ok(remote_entries) => { - for entry in remote_entries { - by_name.insert(entry.name.clone(), entry); + let opaque = self.is_opaque_dir_active(&rel)?; + if !opaque { + match self.remote_readdir(&rel) { + Ok(remote_entries) => { + for entry in remote_entries { + by_name.insert(entry.name.clone(), entry); + } } + Err(err) if err.kind() == io::ErrorKind::NotFound => {} + Err(err) => return Err(err), } - Err(err) if err.kind() == io::ErrorKind::NotFound => {} - Err(err) => return Err(err), } for entry in self.overlay.list_upper(&rel).map_err(anyhow_to_io)? { by_name.insert(entry.name.clone(), entry); } + if !opaque { + for entry in self.overlay.list_mirror(&rel).map_err(anyhow_to_io)? { + by_name.insert(entry.name.clone(), entry); + } + } for (name, entry) in by_name { let child_rel = rel.join(&name); @@ -652,6 +678,28 @@ impl CowFs { Ok(entries) } + + fn is_opaque_dir_active(&self, rel: &Path) -> io::Result { + let is_opaque = self.overlay.is_opaque_dir(rel).map_err(anyhow_to_io)?; + if !is_opaque { + return Ok(false); + } + if rel.starts_with(Path::new("wp-content/plugins")) && env_bool("WPCOW_ENABLE_PLUGINS") { + return Ok(false); + } + Ok(true) + } + + fn has_opaque_ancestor_active(&self, rel: &Path) -> io::Result { + let mut current = rel.parent(); + while let Some(parent) = current { + if self.is_opaque_dir_active(parent)? { + return Ok(true); + } + current = parent.parent(); + } + Ok(false) + } } pub fn mount_foreground(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> Result<()> { @@ -663,8 +711,6 @@ pub fn mount_foreground(manifest: Manifest, paths: ClonePaths, mountpoint: &Path let options = vec![ MountOption::FSName(format!("wp-cow-{}", manifest.name)), MountOption::Subtype("wp-cow".to_string()), - MountOption::AutoUnmount, - MountOption::DefaultPermissions, ]; fuser::mount2(fs, mountpoint, &options)?; Ok(()) @@ -702,12 +748,30 @@ fn unix_time(secs: u64) -> SystemTime { UNIX_EPOCH + Duration::from_secs(secs) } +fn trace_fuse(op: &str, rel: &Path) { + if std::env::var("WPCOW_TRACE_FUSE").ok().as_deref() == Some("1") { + eprintln!("wp-cow fuse {op} {}", OverlayStore::rel_string(rel)); + } +} + fn wants_write(flags: i32) -> bool { (flags & libc::O_ACCMODE) != libc::O_RDONLY || flags & libc::O_TRUNC != 0 || flags & libc::O_APPEND != 0 } +fn env_bool(name: &str) -> bool { + std::env::var(name) + .ok() + .map(|raw| { + matches!( + raw.to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) + }) + .unwrap_or(false) +} + fn io_errno(err: &io::Error) -> i32 { match err.kind() { io::ErrorKind::NotFound => ENOENT, diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 1c955284..6fbc1c56 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -3,16 +3,31 @@ use std::fs; use std::path::Path; use crate::config::{ClonePaths, Manifest}; +use crate::overlay::OPAQUE_MARKER; pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Result<()> { fs::create_dir_all(paths.upper.join("wp-content/mu-plugins"))?; + write_opaque_dir(paths.upper.join("wp-content/plugins"))?; + write_opaque_dir(paths.upper.join("wp-content/languages"))?; fs::write(paths.upper.join("wp-config.php"), wp_config_php(manifest))?; fs::write(paths.upper.join("wp-content/db.php"), db_dropin_php())?; fs::write( paths.upper.join("wp-content/mu-plugins/wp-cow-safety.php"), safety_mu_plugin_php(), )?; - fs::write(paths.generated.join("router.php"), router_php(paths))?; + fs::write( + paths.generated.join("router.php"), + router_php(paths, manifest), + )?; + Ok(()) +} + +fn write_opaque_dir(path: impl AsRef) -> Result<()> { + fs::create_dir_all(path.as_ref())?; + fs::write( + path.as_ref().join(OPAQUE_MARKER), + b"local overlay hides remote lower\n", + )?; Ok(()) } @@ -106,12 +121,14 @@ function cow_is_safe_read_sql( $sql ) { function cow_tables_from_sql( $sql ) { $tables = array(); - if ( preg_match_all( '/\b(?:FROM|JOIN|UPDATE|INTO|TABLE)\s+`?([A-Za-z0-9_$]+)`?/i', $sql, $matches ) ) { - foreach ( $matches[1] as $table ) { - $tables[ $table ] = true; - } + $stripped = ltrim( preg_replace( '/^\s*(?:\/\*.*?\*\/\s*|--[^\n]*\n\s*|#[^\n]*\n\s*)*/s', '', $sql ) ); + if ( preg_match( '/^(?:INSERT|REPLACE)\s+(?:IGNORE\s+)?INTO\s+`?([A-Za-z0-9_$]+)`?/i', $stripped, $matches ) ) { + return array( $matches[1] ); + } + if ( preg_match( '/^UPDATE\s+(?:LOW_PRIORITY\s+)?(?:IGNORE\s+)?`?([A-Za-z0-9_$]+)`?/i', $stripped, $matches ) ) { + $tables[ $matches[1] ] = true; } - if ( preg_match_all( '/\bUPDATE\s+(?:LOW_PRIORITY\s+)?(?:IGNORE\s+)?`?([A-Za-z0-9_$]+)`?/i', $sql, $matches ) ) { + if ( preg_match_all( '/\b(?:FROM|JOIN|INTO|TABLE)\s+`?([A-Za-z0-9_$]+)`?/i', $sql, $matches ) ) { foreach ( $matches[1] as $table ) { $tables[ $table ] = true; } @@ -322,6 +339,10 @@ class Cow_DB extends wpdb { $socket = $matches[2]; } + if ( function_exists( 'mysqli_report' ) ) { + mysqli_report( MYSQLI_REPORT_OFF ); + } + $mysqli = mysqli_init(); if ( ! $mysqli ) { $this->cow_remote_failed = true; @@ -363,6 +384,11 @@ if ( ! defined( 'DISABLE_WP_CRON' ) ) { define( 'DISABLE_WP_CRON', true ); } +if ( '1' !== getenv( 'WPCOW_ENABLE_PLUGINS' ) ) { + add_filter( 'option_active_plugins', '__return_empty_array', PHP_INT_MAX ); + add_filter( 'site_option_active_sitewide_plugins', '__return_empty_array', PHP_INT_MAX ); +} + add_filter( 'pre_http_request', static function ( $preempt, $args, $url ) { if ( defined( 'WPCOW_ALLOW_OUTBOUND_HTTP' ) && WPCOW_ALLOW_OUTBOUND_HTTP ) { return $preempt; @@ -373,10 +399,12 @@ add_filter( 'pre_http_request', static function ( $preempt, $args, $url ) { "# } -pub fn router_php(paths: &ClonePaths) -> String { +pub fn router_php(paths: &ClonePaths, manifest: &Manifest) -> String { r#"'; } +function wp_cow_is_frontend_get( $path ) { + if ( ! in_array( $_SERVER['REQUEST_METHOD'], array( 'GET', 'HEAD' ), true ) ) { + return false; + } + if ( 0 === strpos( $path, '/wp-admin' ) || 0 === strpos( $path, '/wp-login.php' ) || 0 === strpos( $path, '/wp-json' ) ) { + return false; + } + return true; +} + +function wp_cow_proxy_remote_frontend( $remote_url, $local_url, $path ) { + if ( '0' === getenv( 'WPCOW_PROXY_FRONTEND' ) || isset( $_GET['__wp_cow_local'] ) || ! wp_cow_is_frontend_get( $path ) ) { + return false; + } + + $query = $_GET; + unset( $query['__wp_cow_bypass_splash'], $query['__wp_cow_local'] ); + $target = rtrim( $remote_url, '/' ) . ( '/' === $path ? '/' : $path ); + if ( ! empty( $query ) ) { + $target .= '?' . http_build_query( $query ); + } + + $timeout = (int) getenv( 'WPCOW_PROXY_TIMEOUT_SECS' ); + if ( $timeout < 1 ) { + $timeout = 20; + } + + $body = false; + $status = 0; + $content_type = 'text/html; charset=utf-8'; + if ( function_exists( 'curl_init' ) ) { + $ch = curl_init( $target ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); + curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, min( 5, $timeout ) ); + curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout ); + curl_setopt( $ch, CURLOPT_USERAGENT, 'wp-cow frontend proxy' ); + curl_setopt( $ch, CURLOPT_HTTPHEADER, array( 'X-WP-COW-Proxy: 1' ) ); + $body = curl_exec( $ch ); + $status = (int) curl_getinfo( $ch, CURLINFO_RESPONSE_CODE ); + $type = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ); + if ( is_string( $type ) && '' !== $type ) { + $content_type = $type; + } + curl_close( $ch ); + } else { + $context = stream_context_create( + array( + 'http' => array( + 'timeout' => $timeout, + 'ignore_errors' => true, + 'header' => "User-Agent: wp-cow frontend proxy\r\nX-WP-COW-Proxy: 1\r\n", + ), + ) + ); + $body = @file_get_contents( $target, false, $context ); + $status = 200; + } + + if ( false === $body || '' === $body || $status >= 500 ) { + return false; + } + + if ( ! headers_sent() ) { + http_response_code( $status >= 300 ? 200 : max( 200, $status ) ); + header( 'Content-Type: ' . $content_type ); + header( 'Cache-Control: no-store' ); + header( 'X-WP-COW-Frontend-Proxy: 1' ); + } + if ( false !== stripos( $content_type, 'text/html' ) ) { + $body = str_replace( $remote_url, rtrim( $local_url, '/' ), $body ); + $body = str_replace( preg_replace( '/^https:/', 'http:', $remote_url ), rtrim( $local_url, '/' ), $body ); + } + if ( 'HEAD' !== $_SERVER['REQUEST_METHOD'] ) { + echo $body; + } + return true; +} + function wp_cow_render_wordpress( $ready_file ) { ob_start(); require rtrim( $_SERVER['DOCUMENT_ROOT'], '/' ) . '/index.php'; @@ -466,6 +573,10 @@ if ( '/' !== $path && is_file( $file ) ) { return false; } +if ( wp_cow_proxy_remote_frontend( $wp_cow_remote_url, $wp_cow_local_url, $path ) ) { + return true; +} + $should_show_splash = ( '0' !== getenv( 'WPCOW_SPLASH' ) && ! isset( $_GET['__wp_cow_bypass_splash'] ) && @@ -597,6 +708,8 @@ return wp_cow_render_wordpress( $wp_cow_ready_file ); "__WPCOW_READY_FILE__", &php_string(&paths.run.join("first-request-ready.json").to_string_lossy()), ) + .replace("__WPCOW_REMOTE_URL__", &php_string(&manifest.remote_url)) + .replace("__WPCOW_LOCAL_URL__", &php_string(&manifest.local_url)) } fn php_string(value: &str) -> String { @@ -676,6 +789,7 @@ mod tests { assert!(php.contains("cow_db_runtime_fail")); assert!(php.contains("will not fall back to the empty local schema")); assert!(php.contains("'sql' => $query")); + assert!(php.contains("INSERT|REPLACE")); } #[test] @@ -684,20 +798,26 @@ mod tests { assert!(php.contains("pre_wp_mail")); assert!(php.contains("X-Robots-Tag")); assert!(php.contains("pre_http_request")); + assert!(php.contains("WPCOW_ENABLE_PLUGINS")); + assert!(php.contains("option_active_plugins")); } #[test] fn router_exposes_splash_and_progress_endpoint() { let temp = tempfile::tempdir().unwrap(); let paths = clone_paths(temp.path(), "example"); - let php = router_php(&paths); + let php = router_php(&paths, &manifest()); assert!(php.contains("/__wp-cow/progress")); assert!(php.contains("__wp_cow_bypass_splash")); assert!(php.contains("wp_cow_looks_like_installer")); + assert!(php.contains("wp_cow_proxy_remote_frontend")); + assert!(php.contains("X-WP-COW-Frontend-Proxy")); assert!(php.contains("WordPress tried to show the installation wizard")); assert!(php.contains("Cache-Control: no-store")); assert!(!php.contains("__WPCOW_PROGRESS_FILE__")); assert!(!php.contains("__WPCOW_READY_FILE__")); + assert!(!php.contains("__WPCOW_REMOTE_URL__")); + assert!(!php.contains("__WPCOW_LOCAL_URL__")); } #[test] @@ -713,7 +833,7 @@ mod tests { ("wp-config.php", wp_config_php(&manifest())), ("db.php", db_dropin_php().to_string()), ("wp-cow-safety.php", safety_mu_plugin_php().to_string()), - ("router.php", router_php(&paths)), + ("router.php", router_php(&paths, &manifest())), ]; for (name, php) in files { @@ -749,11 +869,12 @@ mod tests { let docroot = temp.path().join("docroot"); fs::create_dir_all(&docroot).unwrap(); let router = paths.generated.join("router.php"); - fs::write(&router, router_php(&paths)).unwrap(); + fs::write(&router, router_php(&paths, &manifest())).unwrap(); let port = free_tcp_port(); let mut child = Command::new("php") .env("WPCOW_SPLASH", "1") + .env("WPCOW_PROXY_FRONTEND", "0") .env("PHP_CLI_SERVER_WORKERS", "4") .arg("-S") .arg(format!("127.0.0.1:{port}")) diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index ed78534f..663ccb7b 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -10,6 +10,8 @@ use std::time::{SystemTime, UNIX_EPOCH}; use crate::config::ClonePaths; use crate::remote::{RemoteClient, RemoteEntry}; +pub const OPAQUE_MARKER: &str = ".wp-cow-opaque"; + #[derive(Debug, Default, Serialize, Deserialize)] struct WhiteoutFile { deleted: BTreeSet, @@ -73,6 +75,10 @@ impl OverlayStore { Ok(self.upper.join(Self::clean_rel(rel)?)) } + pub fn mirror_path(&self, rel: &Path) -> Result { + Ok(self.file_cache.join("mirror").join(Self::clean_rel(rel)?)) + } + pub fn cache_path(&self, rel: &Path) -> PathBuf { let mut hasher = Sha256::new(); hasher.update(Self::rel_string(rel)); @@ -82,7 +88,10 @@ impl OverlayStore { pub fn cached_file_path(&self, rel: &Path) -> Option { let path = self.cache_path(rel); - path.is_file().then_some(path) + if path.is_file() { + return Some(path); + } + self.mirror_path(rel).ok().filter(|path| path.is_file()) } pub fn cached_entry(&self, rel: &Path) -> Result> { @@ -145,7 +154,10 @@ impl OverlayStore { fs::create_dir_all(parent)?; } - let entry = remote.stat(rel)?; + let entry = match self.cached_entry(rel)? { + Some(entry) => entry, + None => remote.stat(rel)?, + }; if entry.kind == "dir" { fs::create_dir_all(&upper)?; return Ok(upper); @@ -193,22 +205,29 @@ impl OverlayStore { if let Some(parent) = cache_path.parent() { fs::create_dir_all(parent)?; } - let tmp = cache_path.with_extension("tmp"); + if cache_path.exists() { + return read_range_from_file(&cache_path, offset as u64, size as usize); + } + let tmp = self.cache_tmp_path(&cache_path); let mut out = File::create(&tmp)?; - let mut cursor = 0_u64; - let chunk = 1024 * 1024; let rel_string = Self::rel_string(rel); let _ = self.write_cache_progress(&rel_string, "fetching", 0, entry.size); - while cursor < entry.size { - let wanted = chunk.min((entry.size - cursor) as usize); - let bytes = remote.read_range(rel, cursor, wanted)?; - if bytes.is_empty() { - break; - } - out.write_all(&bytes)?; - cursor += bytes.len() as u64; - let _ = self.write_cache_progress(&rel_string, "fetching", cursor, entry.size); + let bytes = remote + .read_file(rel) + .with_context(|| format!("remote cache fetch {}", rel_string))?; + let actual_size = bytes.len() as u64; + if actual_size != entry.size { + let _ = fs::remove_file(&tmp); + return Err(anyhow!( + "remote file changed while caching {}: stat size {}, read size {}", + rel_string, + entry.size, + actual_size + )); } + out.write_all(&bytes)?; + let _ = self.write_cache_progress(&rel_string, "fetching", actual_size, entry.size); + drop(out); fs::rename(tmp, &cache_path)?; self.put_cached_entry(rel, &entry)?; let _ = self.finish_cache_progress(&rel_string, entry.size); @@ -227,13 +246,27 @@ impl OverlayStore { } pub fn list_upper(&self, rel: &Path) -> Result> { - let path = self.upper_path(rel)?; + self.list_local_layer(&self.upper_path(rel)?) + } + + pub fn list_mirror(&self, rel: &Path) -> Result> { + self.list_local_layer(&self.mirror_path(rel)?) + } + + pub fn is_opaque_dir(&self, rel: &Path) -> Result { + Ok(self.upper_path(rel)?.join(OPAQUE_MARKER).is_file()) + } + + fn list_local_layer(&self, path: &Path) -> Result> { if !path.is_dir() { return Ok(Vec::new()); } let mut out = Vec::new(); for entry in fs::read_dir(path)? { let entry = entry?; + if entry.file_name() == OPAQUE_MARKER { + continue; + } let metadata = fs::symlink_metadata(entry.path())?; let file_type = metadata.file_type(); out.push(RemoteEntry { @@ -354,6 +387,19 @@ impl OverlayStore { )) } + fn cache_tmp_path(&self, cache_path: &Path) -> PathBuf { + let name = cache_path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("remote-file"); + cache_path.with_file_name(format!( + "{}.tmp.{}.{}", + name, + std::process::id(), + now_unix_ms() + )) + } + fn write_cache_progress( &self, rel: &str, diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index 17df37a4..3e5ad603 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -1,9 +1,11 @@ use anyhow::{anyhow, Context, Result}; +use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; use serde::{Deserialize, Serialize}; use std::ffi::OsStr; -use std::io::{self, Write}; +use std::io::{self, BufRead, BufReader, Write}; use std::path::{Path, PathBuf}; -use std::process::{Child, Command, Stdio}; +use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; +use std::sync::{Arc, Mutex}; use std::thread; use std::time::Duration; @@ -23,6 +25,7 @@ pub struct RemoteEntry { pub struct RemoteClient { manifest: Manifest, control_path: Option, + file_helper: Arc>>, } impl RemoteClient { @@ -30,6 +33,7 @@ impl RemoteClient { Self { manifest, control_path, + file_helper: Arc::new(Mutex::new(None)), } } @@ -203,6 +207,19 @@ impl RemoteClient { pub fn stat(&self, rel: &Path) -> io::Result { let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "stat", + "path": full, + }); + if let Ok(response) = self.file_helper_request(request) { + if let Some(entry) = response.get("entry") { + return serde_json::from_value(entry.clone()) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)); + } + } + } + let code = r#" $p=$argv[1]; clearstatcache(true,$p); @@ -224,6 +241,19 @@ echo json_encode(array( pub fn readdir(&self, rel: &Path) -> io::Result> { let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "readdir", + "path": full, + }); + if let Ok(response) = self.file_helper_request(request) { + if let Some(entries) = response.get("entries") { + return serde_json::from_value(entries.clone()) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)); + } + } + } + let code = r#" $p=$argv[1]; if(!is_dir($p)){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} @@ -245,6 +275,18 @@ echo json_encode($out); pub fn read_range(&self, rel: &Path, offset: u64, length: usize) -> io::Result> { let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "read_range", + "path": full, + "offset": offset, + "length": length, + }); + if let Ok(response) = self.file_helper_request(request) { + return decode_helper_data(response); + } + } + let code = r#" $p=$argv[1];$offset=(int)$argv[2];$length=(int)$argv[3]; $f=@fopen($p,"rb"); @@ -255,8 +297,43 @@ echo fread($f,$length); self.php_eval(code, &[full, offset.to_string(), length.to_string()]) } + pub fn read_file(&self, rel: &Path) -> io::Result> { + let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "read_file", + "path": full, + }); + if let Ok(response) = self.file_helper_request(request) { + return decode_helper_data(response); + } + } + + let code = r#" +$p=$argv[1]; +$f=@fopen($p,"rb"); +if(!$f){fwrite(STDERR,"WPCOW_ENOENT\n");exit(2);} +while(!feof($f)){ + echo fread($f,1048576); +} +"#; + self.php_eval(code, &[full]) + } + pub fn readlink(&self, rel: &Path) -> io::Result { let full = self.remote_full_path(rel)?; + if remote_file_helper_enabled() { + let request = serde_json::json!({ + "op": "readlink", + "path": full, + }); + if let Ok(response) = self.file_helper_request(request) { + if let Some(target) = response.get("target").and_then(|value| value.as_str()) { + return Ok(target.to_string()); + } + } + } + let code = r#" $p=$argv[1]; $target=@readlink($p); @@ -273,6 +350,7 @@ echo $target; $host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$sql=$argv[5];$timeout=(int)$argv[6]; if($timeout<1){$timeout=10;} @set_time_limit($timeout); +if(function_exists("mysqli_report")){mysqli_report(MYSQLI_REPORT_OFF);} if(!preg_match('/^\s*(SELECT|SHOW|DESCRIBE|DESC|EXPLAIN)\b/i',$sql)){ fwrite(STDERR,"WPCOW_REFUSED_WRITE\n");exit(3); } @@ -322,6 +400,94 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a Ok(result) } + fn file_helper_request(&self, request: serde_json::Value) -> io::Result { + let mut last_error = None; + for _ in 0..2 { + match self.file_helper_request_once(&request) { + Ok(response) => return Ok(response), + Err(err) => { + last_error = Some(err); + let mut helper = self + .file_helper + .lock() + .map_err(|_| io::Error::new(io::ErrorKind::Other, "file helper lock"))?; + reset_file_helper(&mut helper); + } + } + } + Err(last_error + .unwrap_or_else(|| io::Error::new(io::ErrorKind::Other, "remote file helper failed"))) + } + + fn file_helper_request_once( + &self, + request: &serde_json::Value, + ) -> io::Result { + let mut helper = self + .file_helper + .lock() + .map_err(|_| io::Error::new(io::ErrorKind::Other, "file helper lock"))?; + if helper.is_none() { + *helper = Some(self.start_file_helper()?); + } + let helper = helper + .as_mut() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "file helper missing"))?; + let request = serde_json::to_vec(request) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err))?; + helper.stdin.write_all(&request)?; + helper.stdin.write_all(b"\n")?; + helper.stdin.flush()?; + + let mut line = String::new(); + let read = helper.stdout.read_line(&mut line)?; + if read == 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "remote file helper closed", + )); + } + let response: serde_json::Value = serde_json::from_str(&line) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + if response.get("ok").and_then(|value| value.as_bool()) == Some(true) { + return Ok(response); + } + let error = response + .get("error") + .and_then(|value| value.as_str()) + .unwrap_or("remote file helper error") + .to_string(); + let kind = if response.get("kind").and_then(|value| value.as_str()) == Some("not_found") { + io::ErrorKind::NotFound + } else { + io::ErrorKind::Other + }; + Err(io::Error::new(kind, error)) + } + + fn start_file_helper(&self) -> io::Result { + let remote_command = format!("php -r {}", shell_quote(remote_file_helper_php())); + let mut command = self.ssh_command(&remote_command, 0); + let mut child = command + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn()?; + let stdin = child + .stdin + .take() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "file helper stdin"))?; + let stdout = child + .stdout + .take() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "file helper stdout"))?; + Ok(RemoteFileHelper { + child, + stdin, + stdout: BufReader::new(stdout), + }) + } + fn php_eval(&self, code: &str, args: &[String]) -> io::Result> { let mut command = format!("php -r {} --", shell_quote(code)); for arg in args { @@ -357,6 +523,118 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a } } +#[derive(Debug)] +struct RemoteFileHelper { + child: Child, + stdin: ChildStdin, + stdout: BufReader, +} + +impl Drop for RemoteFileHelper { + fn drop(&mut self) { + let _ = self.child.kill(); + let _ = self.child.wait(); + } +} + +fn reset_file_helper(helper: &mut Option) { + if let Some(mut helper) = helper.take() { + let _ = helper.child.kill(); + let _ = helper.child.wait(); + } +} + +fn decode_helper_data(response: serde_json::Value) -> io::Result> { + let data = response + .get("data") + .and_then(|value| value.as_str()) + .ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidData, "helper response missing data") + })?; + BASE64 + .decode(data) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) +} + +fn remote_file_helper_enabled() -> bool { + env_bool("WPCOW_REMOTE_FILE_HELPER", true).unwrap_or(true) +} + +fn remote_file_helper_php() -> &'static str { + r#" +error_reporting(0); +function wpcow_send($payload) { + echo json_encode($payload), "\n"; + flush(); +} +function wpcow_not_found() { + wpcow_send(array("ok"=>false,"kind"=>"not_found","error"=>"WPCOW_ENOENT")); +} +while (($line = fgets(STDIN)) !== false) { + $request = json_decode($line, true); + if (!is_array($request)) { + wpcow_send(array("ok"=>false,"error"=>"invalid request")); + continue; + } + $op = isset($request["op"]) ? $request["op"] : ""; + $path = isset($request["path"]) ? $request["path"] : ""; + if ($op === "stat") { + clearstatcache(true, $path); + $s = @lstat($path); + if ($s === false) { wpcow_not_found(); continue; } + $kind = is_link($path) ? "symlink" : (is_dir($path) ? "dir" : (is_file($path) ? "file" : "other")); + wpcow_send(array("ok"=>true,"entry"=>array( + "name"=>basename($path), + "kind"=>$kind, + "size"=>(int)$s["size"], + "mode"=>(int)$s["mode"], + "mtime"=>(int)$s["mtime"] + ))); + continue; + } + if ($op === "readdir") { + if (!is_dir($path)) { wpcow_not_found(); continue; } + $out = array(); + foreach (scandir($path) as $name) { + if ($name === "." || $name === "..") { continue; } + $child = $path . DIRECTORY_SEPARATOR . $name; + $s = @lstat($child); + if ($s === false) { continue; } + $kind = is_link($child) ? "symlink" : (is_dir($child) ? "dir" : (is_file($child) ? "file" : "other")); + $out[] = array("name"=>$name,"kind"=>$kind,"size"=>(int)$s["size"],"mode"=>(int)$s["mode"],"mtime"=>(int)$s["mtime"]); + } + wpcow_send(array("ok"=>true,"entries"=>$out)); + continue; + } + if ($op === "read_file") { + if (!is_file($path)) { wpcow_not_found(); continue; } + $data = @file_get_contents($path); + if ($data === false) { wpcow_not_found(); continue; } + wpcow_send(array("ok"=>true,"data"=>base64_encode($data),"size"=>strlen($data))); + continue; + } + if ($op === "read_range") { + $offset = isset($request["offset"]) ? max(0, (int)$request["offset"]) : 0; + $length = isset($request["length"]) ? max(0, (int)$request["length"]) : 0; + $f = @fopen($path, "rb"); + if (!$f) { wpcow_not_found(); continue; } + if ($offset > 0) { @fseek($f, $offset); } + $data = $length > 0 ? fread($f, $length) : ""; + if ($data === false) { $data = ""; } + wpcow_send(array("ok"=>true,"data"=>base64_encode($data),"size"=>strlen($data))); + continue; + } + if ($op === "readlink") { + $target = @readlink($path); + if ($target === false) { wpcow_not_found(); continue; } + wpcow_send(array("ok"=>true,"target"=>$target)); + continue; + } + wpcow_send(array("ok"=>false,"error"=>"unknown op")); +} +"# +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RemoteQueryResult { pub ok: bool, diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index 2bbb0ed7..8fb11d85 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -1,15 +1,19 @@ use anyhow::{anyhow, Context, Result}; +use std::collections::BTreeSet; +use std::fs; +use std::io::{Read, Write}; use std::path::{Path, PathBuf}; use std::process::{Child, Command, Stdio}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use std::thread; -use std::time::Duration; +use std::thread::{self, JoinHandle}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use crate::config::{ClonePaths, Manifest}; use crate::control; +use crate::db; use crate::fusefs; -use crate::remote::RemoteClient; +use crate::remote::{shell_quote, RemoteClient}; pub struct RunOptions { pub mountpoint: PathBuf, @@ -65,7 +69,18 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R let mount_thread = thread::spawn(move || fusefs::mount_foreground(mount_manifest, mount_paths, &mountpoint)); - wait_for_mount(&options.mountpoint); + if let Err(wait_err) = wait_for_mount(&options.mountpoint, &mount_thread) { + shutdown.store(true, Ordering::SeqCst); + if mount_thread.is_finished() { + match mount_thread.join() { + Ok(Err(mount_err)) => return Err(mount_err).with_context(|| wait_err.to_string()), + Ok(Ok(())) => return Err(wait_err), + Err(_) => return Err(anyhow!("mount thread panicked")).context(wait_err), + } + } + let _ = unmount(&options.mountpoint); + return Err(wait_err); + } let mut php = if options.skip_php { None @@ -77,6 +92,17 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R )?) }; + if env_bool("WPCOW_PREFETCH_RUNTIME", true) { + let warm_manifest = manifest.clone(); + let warm_paths = paths.clone(); + let warm_remote = remote.clone(); + thread::spawn(move || { + if let Err(err) = prefetch_runtime_files(&warm_manifest, &warm_paths, &warm_remote) { + eprintln!("wp-cow runtime prefetch skipped: {err:#}"); + } + }); + } + eprintln!( "wp-cow running clone '{}' at {} from {}", manifest.name, @@ -125,6 +151,181 @@ pub fn mount_only(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> R fusefs::mount_foreground(manifest, paths, mountpoint) } +fn prefetch_runtime_files( + manifest: &Manifest, + paths: &ClonePaths, + remote: &RemoteClient, +) -> Result<()> { + let mirror = paths.file_cache.join("mirror"); + fs::create_dir_all(&mirror)?; + let stamp = mirror.join(".wp-cow-runtime-prefetch-v2"); + if stamp.is_file() { + return Ok(()); + } + + let mut rels = vec!["wp-admin".to_string(), "wp-includes".to_string()]; + let mut themes = BTreeSet::new(); + for option in ["template", "stylesheet"] { + if let Some(theme) = db::local_option_value(manifest, option)? { + if let Some(theme) = clean_theme_name(&theme) { + themes.insert(theme); + } + } + } + if themes.is_empty() { + for theme in remote_active_theme_names(manifest, remote)? { + themes.insert(theme); + } + } + for theme in themes { + rels.push(format!("wp-content/themes/{theme}")); + } + + eprintln!( + "wp-cow warming runtime file cache in background: {}", + rels.join(", ") + ); + let _ = write_prefetch_progress(paths, "prefetching-runtime", &rels.join(", "), 0, 0, 0); + let remote_paths = rels.iter().map(shell_quote).collect::>().join(" "); + let remote_command = format!( + "cd {} && tar -cf - --ignore-failed-read {}", + shell_quote(&manifest.remote_path), + remote_paths + ); + let mut ssh = remote + .command(&remote_command) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote theme tar")?; + let mut tar = Command::new("tar") + .arg("-C") + .arg(&mirror) + .arg("-xf") + .arg("-") + .stdin(Stdio::piped()) + .spawn() + .context("start local theme tar")?; + + { + let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); + let mut tar_stdin = tar.stdin.take().expect("tar stdin piped"); + let mut buf = [0_u8; 64 * 1024]; + let mut bytes = 0_u64; + loop { + let read = ssh_stdout.read(&mut buf)?; + if read == 0 { + break; + } + tar_stdin.write_all(&buf[..read])?; + bytes = bytes.saturating_add(read as u64); + if bytes == read as u64 || bytes % (1024 * 1024) < read as u64 { + let _ = write_prefetch_progress( + paths, + "prefetching-runtime", + &rels.join(", "), + bytes, + 0, + bytes, + ); + } + } + } + + let ssh_output = ssh.wait_with_output()?; + let tar_status = tar.wait()?; + if !ssh_output.status.success() { + return Err(anyhow!( + "remote theme tar failed: {}", + String::from_utf8_lossy(&ssh_output.stderr) + )); + } + if !tar_status.success() { + return Err(anyhow!("local theme tar failed with status {}", tar_status)); + } + fs::write(&stamp, b"ok\n")?; + let _ = write_prefetch_progress(paths, "cached", "", 0, 0, 0); + Ok(()) +} + +fn remote_active_theme_names( + manifest: &Manifest, + remote: &RemoteClient, +) -> Result> { + let mut out = BTreeSet::new(); + let Some(table) = safe_mysql_identifier(&format!("{}options", manifest.probe.table_prefix)) + else { + return Ok(out); + }; + let sql = format!( + "SELECT option_name, option_value FROM `{table}` WHERE option_name IN ('template','stylesheet')" + ); + let result = db::remote_readonly_query(remote, &sql)?; + if !result.ok { + return Ok(out); + } + for row in result.rows { + let Some(value) = row.get("option_value").and_then(|value| value.as_str()) else { + continue; + }; + if let Some(theme) = clean_theme_name(value) { + out.insert(theme); + } + } + Ok(out) +} + +fn safe_mysql_identifier(value: &str) -> Option { + if value.is_empty() + || !value + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '$') + { + return None; + } + Some(value.to_string()) +} + +fn write_prefetch_progress( + paths: &ClonePaths, + phase: &str, + active_path: &str, + active_bytes: u64, + active_total: u64, + bytes_cached: u64, +) -> Result<()> { + fs::create_dir_all(&paths.file_cache)?; + let progress = serde_json::json!({ + "phase": phase, + "active_path": active_path, + "active_bytes": active_bytes, + "active_total": active_total, + "files_cached": 0, + "bytes_cached": bytes_cached, + "last_cached_path": active_path, + "updated_at_unix_ms": now_unix_ms(), + }); + let progress_path = paths.file_cache.join("progress.json"); + let tmp = paths + .file_cache + .join(format!("progress.json.prefetch.{}.tmp", std::process::id())); + fs::write(&tmp, serde_json::to_vec_pretty(&progress)?)?; + fs::rename(tmp, progress_path)?; + Ok(()) +} + +fn clean_theme_name(value: &str) -> Option { + let value = value.trim(); + if value.is_empty() + || !value + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' || ch == '.') + { + return None; + } + Some(value.to_string()) +} + fn start_php_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { Command::new("php") .env( @@ -163,13 +364,42 @@ fn env_u64(name: &str, default: u64) -> u64 { .unwrap_or(default) } -fn wait_for_mount(mountpoint: &Path) { - for _ in 0..40 { +fn now_unix_ms() -> u128 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_millis()) + .unwrap_or_default() +} + +fn env_bool(name: &str, default: bool) -> bool { + std::env::var(name) + .ok() + .map(|raw| { + matches!( + raw.to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) + }) + .unwrap_or(default) +} + +fn wait_for_mount(mountpoint: &Path, mount_thread: &JoinHandle>) -> Result<()> { + for _ in 0..100 { if mountpoint.join("wp-config.php").exists() { - return; + return Ok(()); + } + if mount_thread.is_finished() { + return Err(anyhow!( + "FUSE mount exited before generated WordPress files became visible at {}", + mountpoint.display() + )); } - thread::sleep(Duration::from_millis(100)); + thread::sleep(Duration::from_millis(200)); } + Err(anyhow!( + "timed out waiting for FUSE mount at {}", + mountpoint.display() + )) } fn control_addr_from_url(url: &str) -> Result { From c0f3d57a71704a898d8d89d884feb4bdf7a9ddca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 15:13:09 +0200 Subject: [PATCH 20/39] Use FrankenPHP in remote COW lab --- experiments/remote-wp-cow/.env.example | 1 + experiments/remote-wp-cow/README.md | 28 +-- experiments/remote-wp-cow/compose.yaml | 1 + experiments/remote-wp-cow/docker/Dockerfile | 31 ++-- .../remote-wp-cow/docker/wp-cow-lab-check | 7 + .../remote-wp-cow/docker/wp-cow-lab-run | 2 +- experiments/remote-wp-cow/src/generate.rs | 17 +- experiments/remote-wp-cow/src/run.rs | 169 +++++++++++++++++- 8 files changed, 221 insertions(+), 35 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 282c926f..a2e3acf3 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -16,6 +16,7 @@ WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 WPCOW_PHP_MAX_EXECUTION_SECS=90 WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 WPCOW_PHP_WORKERS=4 +WPCOW_WEB_SERVER=frankenphp WPCOW_SPLASH=1 # Set this to 1 for a filesystem-only smoke test that does not export DB schema. diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 629536dd..6170e131 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -26,9 +26,11 @@ cargo build ## Docker lab on macOS -Use this when you are on a Mac and want a Linux shell with FUSE, PHP, SSH, and -local MariaDB available. The container is intentionally privileged so FUSE can -mount inside Docker Desktop's Linux VM. +Use this when you are on a Mac and want a Linux shell with FUSE, FrankenPHP, +SSH, and local MariaDB available. The container is intentionally privileged so +FUSE can mount inside Docker Desktop's Linux VM. The Docker image uses the +official FrankenPHP PHP 8.3 image and installs `mysqli`/`pdo_mysql` for +WordPress. From this directory: @@ -41,9 +43,9 @@ docker compose exec wp-cow-lab bash ``` The Compose host port is created from `WPCOW_HTTP_PORT` when the container is -created. The PHP server still listens on port `8080` inside the container. If -you want to open port 9481 on the Mac, set it in `.env` or pass it when -starting the lab: +created. FrankenPHP still listens on port `8080` inside the container. If you +want to open port 9481 on the Mac, set it in `.env` or pass it when starting +the lab: ```bash WPCOW_HTTP_PORT=9481 docker compose up -d @@ -124,8 +126,8 @@ wp-cow-lab-serve That is the normal path. It creates or reuses the lazy clone, exports schema only if needed, initializes an empty local MariaDB database if needed, mounts -the lazy filesystem, starts the DB control layer, and starts PHP. It does not -download media, runtime directories, or table rows up front. +the lazy filesystem, starts the DB control layer, and starts FrankenPHP. It does +not download media, runtime directories, or table rows up front. File reads are request-driven. When WordPress opens a remote file, `wp-cow` fetches that file into the persistent `file-cache/` and records the remote @@ -138,9 +140,11 @@ The first browser hit can still spend time fetching the exact PHP files needed to boot WordPress. With `WPCOW_SPLASH=1` (the Docker default), `wp-cow` returns a temporary local splash page immediately and starts the real request in the browser. The splash polls `/__wp-cow/progress`, which is backed by the local file -cache progress file, then swaps in the warmed WordPress response. PHP is started -with multiple CLI server workers (`WPCOW_PHP_WORKERS`, default `4`) so progress -polling can continue while the warm request is running. +cache progress file, then swaps in the warmed WordPress response. FrankenPHP is +started with multiple PHP threads (`WPCOW_PHP_WORKERS`, default `4`) so progress +polling can continue while the warm request is running. Set +`WPCOW_WEB_SERVER=php` only when you explicitly want the old PHP built-in +development server fallback. The lab also starts a persistent SSH tunnel for remote database reads when the remote `DB_HOST` is TCP-reachable from the SSH host. This avoids one SSH/PHP @@ -277,7 +281,7 @@ run/ Local machine: - Linux with `/dev/fuse` access. -- `ssh`, `php`, `mysql`, and `mysqldump` on `PATH`. +- `ssh`, `frankenphp`, `php`, `mysql`, and `mysqldump` on `PATH`. - A local MySQL/MariaDB server reachable by the generated DB settings. Remote host: diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 0a45e5bc..7d0bebfa 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -37,6 +37,7 @@ services: WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" WPCOW_PHP_SOCKET_TIMEOUT_SECS: "${WPCOW_PHP_SOCKET_TIMEOUT_SECS:-15}" WPCOW_PHP_WORKERS: "${WPCOW_PHP_WORKERS:-4}" + WPCOW_WEB_SERVER: "${WPCOW_WEB_SERVER:-frankenphp}" WPCOW_SPLASH: "${WPCOW_SPLASH:-1}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" WPCOW_HTTP: 0.0.0.0:8080 diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile index c65eaa63..69d14377 100644 --- a/experiments/remote-wp-cow/docker/Dockerfile +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -1,9 +1,27 @@ -FROM rust:1-bookworm +FROM rust:1-bookworm AS builder ENV DEBIAN_FRONTEND=noninteractive -ENV WPCOW_HOME=/root/.wp-cow RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + fuse3 \ + libfuse3-dev \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace +COPY Cargo.toml Cargo.lock ./ +COPY src ./src +RUN cargo build --release \ + && cp target/release/wp-cow /usr/local/bin/wp-cow + +FROM dunglas/frankenphp:1-php8.3-bookworm + +ENV DEBIAN_FRONTEND=noninteractive +ENV WPCOW_HOME=/root/.wp-cow + +RUN install-php-extensions mysqli pdo_mysql \ + && apt-get update \ && apt-get install -y --no-install-recommends \ bash \ ca-certificates \ @@ -14,20 +32,13 @@ RUN apt-get update \ mariadb-client \ mariadb-server \ openssh-client \ - php-cli \ - php-mysqli \ pkg-config \ rsync \ tini \ vim-tiny \ && rm -rf /var/lib/apt/lists/* -WORKDIR /workspace -COPY Cargo.toml Cargo.lock ./ -COPY src ./src -RUN cargo build --release \ - && cp target/release/wp-cow /usr/local/bin/wp-cow \ - && cargo clean +COPY --from=builder /usr/local/bin/wp-cow /usr/local/bin/wp-cow COPY docker/wp-cow-lab-entrypoint /usr/local/bin/wp-cow-lab-entrypoint COPY docker/wp-cow-lab-check /usr/local/bin/wp-cow-lab-check diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-check b/experiments/remote-wp-cow/docker/wp-cow-lab-check index a9c9c447..94fe24f2 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-check +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-check @@ -8,6 +8,13 @@ echo echo "tools:" ssh -V 2>&1 php -v | sed -n '1p' +frankenphp version | sed -n '1p' +if frankenphp php-cli -m | grep -qx 'mysqli'; then + echo "frankenphp mysqli extension: yes" +else + echo "frankenphp mysqli extension: missing" >&2 + exit 1 +fi mysql --version mysqldump --version diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-run b/experiments/remote-wp-cow/docker/wp-cow-lab-run index 2135ca02..cca598b3 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-run +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-run @@ -16,5 +16,5 @@ if [ ! -f "${WPCOW_HOME:-/root/.wp-cow}/clones/$name/db/schema.sql" ]; then echo "WordPress will likely show a database connection error. Run wp-cow-lab-db-init first." >&2 fi echo "running $name at $public_url/ from $mountpoint" -echo "container PHP listener: $http" +echo "container web listener: $http" exec wp-cow run "$name" --mountpoint "$mountpoint" --http "$http" diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 6fbc1c56..762e4c6d 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -5,20 +5,21 @@ use std::path::Path; use crate::config::{ClonePaths, Manifest}; use crate::overlay::OPAQUE_MARKER; +pub const ROUTER_BASENAME: &str = ".wp-cow-router.php"; + pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Result<()> { fs::create_dir_all(paths.upper.join("wp-content/mu-plugins"))?; write_opaque_dir(paths.upper.join("wp-content/plugins"))?; write_opaque_dir(paths.upper.join("wp-content/languages"))?; + let router = router_php(paths, manifest); fs::write(paths.upper.join("wp-config.php"), wp_config_php(manifest))?; fs::write(paths.upper.join("wp-content/db.php"), db_dropin_php())?; + fs::write(paths.upper.join(ROUTER_BASENAME), &router)?; fs::write( paths.upper.join("wp-content/mu-plugins/wp-cow-safety.php"), safety_mu_plugin_php(), )?; - fs::write( - paths.generated.join("router.php"), - router_php(paths, manifest), - )?; + fs::write(paths.generated.join("router.php"), router)?; Ok(()) } @@ -176,7 +177,9 @@ function cow_control_request( $path, $payload ) { $raw = curl_exec( $ch ); $error = curl_error( $ch ); $errno = curl_errno( $ch ); - curl_close( $ch ); + if ( PHP_VERSION_ID < 80000 ) { + curl_close( $ch ); + } if ( false === $raw ) { return array( 'ok' => false, 'error' => 'curl error ' . $errno . ' calling ' . $url . ': ' . $error ); } @@ -503,7 +506,9 @@ function wp_cow_proxy_remote_frontend( $remote_url, $local_url, $path ) { if ( is_string( $type ) && '' !== $type ) { $content_type = $type; } - curl_close( $ch ); + if ( PHP_VERSION_ID < 80000 ) { + curl_close( $ch ); + } } else { $context = stream_context_create( array( diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index 8fb11d85..3f3c199b 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -13,6 +13,7 @@ use crate::config::{ClonePaths, Manifest}; use crate::control; use crate::db; use crate::fusefs; +use crate::generate::ROUTER_BASENAME; use crate::remote::{shell_quote, RemoteClient}; pub struct RunOptions { @@ -82,10 +83,10 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R return Err(wait_err); } - let mut php = if options.skip_php { + let mut web = if options.skip_php { None } else { - Some(start_php_server( + Some(start_web_server( &paths, &options.mountpoint, &options.http_addr, @@ -111,16 +112,16 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R ); while !shutdown.load(Ordering::SeqCst) { - if let Some(child) = php.as_mut() { + if let Some(child) = web.as_mut() { if let Some(status) = child.try_wait()? { shutdown.store(true, Ordering::SeqCst); - return Err(anyhow!("php server exited with status {}", status)); + return Err(anyhow!("web server exited with status {}", status)); } } thread::sleep(Duration::from_millis(250)); } - if let Some(mut child) = php { + if let Some(mut child) = web { let _ = child.kill(); let _ = child.wait(); } @@ -326,7 +327,43 @@ fn clean_theme_name(value: &str) -> Option { Some(value.to_string()) } -fn start_php_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { +fn start_web_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { + match std::env::var("WPCOW_WEB_SERVER") + .unwrap_or_else(|_| "frankenphp".to_string()) + .to_ascii_lowercase() + .as_str() + { + "php" | "php-dev" | "php-dev-server" => start_php_dev_server(paths, mountpoint, http_addr), + "frankenphp" => start_frankenphp_server(paths, mountpoint, http_addr), + other => Err(anyhow!( + "unsupported WPCOW_WEB_SERVER={other}; expected frankenphp or php" + )), + } +} + +fn start_frankenphp_server( + paths: &ClonePaths, + mountpoint: &Path, + http_addr: &str, +) -> Result { + let caddyfile = paths.run.join("Caddyfile"); + fs::create_dir_all(&paths.run)?; + fs::write( + &caddyfile, + frankenphp_caddyfile(paths, mountpoint, http_addr), + )?; + Command::new(std::env::var("WPCOW_FRANKENPHP_BIN").unwrap_or_else(|_| "frankenphp".to_string())) + .arg("run") + .arg("--config") + .arg(&caddyfile) + .arg("--adapter") + .arg("caddyfile") + .stdin(Stdio::null()) + .spawn() + .context("start FrankenPHP server") +} + +fn start_php_dev_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { Command::new("php") .env( "PHP_CLI_SERVER_WORKERS", @@ -357,6 +394,110 @@ fn start_php_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> R .context("start php built-in server") } +fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> String { + let threads = env_u64("WPCOW_PHP_WORKERS", 4); + let max_execution = env_u64("WPCOW_PHP_MAX_EXECUTION_SECS", 90); + let socket_timeout = env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15); + let listen = caddy_listen(http_addr); + let root = caddy_quote(&mountpoint.to_string_lossy()); + let router = format!("/{ROUTER_BASENAME}"); + let bind = listen + .bind + .as_ref() + .map(|host| format!("\n\tbind {}", caddy_quote(host))) + .unwrap_or_default(); + format!( + r#"{{ + admin off + auto_https off + frankenphp {{ + num_threads {threads} + max_threads {threads} + php_ini max_execution_time {max_execution} + php_ini default_socket_timeout {socket_timeout} + php_ini mysqlnd.net_read_timeout {socket_timeout} + }} +}} + +{site_addr} {{{bind} + root * {root} + + @wpCowRouter path {router} + handle @wpCowRouter {{ + respond 404 + }} + + @static {{ + file + not path *.php + }} + handle @static {{ + file_server + }} + + @phpFiles path *.php + handle @phpFiles {{ + php + }} + + handle {{ + rewrite * {router} + php + }} +}} +"#, + site_addr = listen.site_addr, + ) +} + +struct CaddyListen { + site_addr: String, + bind: Option, +} + +fn caddy_listen(http_addr: &str) -> CaddyListen { + let without_scheme = http_addr + .strip_prefix("http://") + .or_else(|| http_addr.strip_prefix("https://")) + .unwrap_or(http_addr); + let authority = without_scheme + .split('/') + .next() + .unwrap_or(without_scheme) + .trim(); + let (host, port) = split_host_port(authority); + let port = port.unwrap_or("80"); + let bind = match host { + Some(host) if !matches!(host, "" | "0.0.0.0" | "*" | "::" | "[::]") => { + Some(host.trim_matches(['[', ']']).to_string()) + } + _ => None, + }; + CaddyListen { + site_addr: format!("http://:{port}"), + bind, + } +} + +fn split_host_port(authority: &str) -> (Option<&str>, Option<&str>) { + if let Some(rest) = authority.strip_prefix('[') { + if let Some((host, tail)) = rest.split_once(']') { + return ( + Some(host), + tail.strip_prefix(':').filter(|value| !value.is_empty()), + ); + } + } + if let Some((host, port)) = authority.rsplit_once(':') { + return (Some(host), (!port.is_empty()).then_some(port)); + } + (Some(authority), None) +} + +fn caddy_quote(value: &str) -> String { + format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\"")) +} + fn env_u64(name: &str, default: u64) -> u64 { std::env::var(name) .ok() @@ -383,6 +524,22 @@ fn env_bool(name: &str, default: bool) -> bool { .unwrap_or(default) } +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn caddy_listen_accepts_localhost_host_headers() { + let listen = caddy_listen("127.0.0.1:9481"); + assert_eq!(listen.site_addr, "http://:9481"); + assert_eq!(listen.bind.as_deref(), Some("127.0.0.1")); + + let listen = caddy_listen("0.0.0.0:8080"); + assert_eq!(listen.site_addr, "http://:8080"); + assert_eq!(listen.bind, None); + } +} + fn wait_for_mount(mountpoint: &Path, mount_thread: &JoinHandle>) -> Result<()> { for _ in 0..100 { if mountpoint.join("wp-config.php").exists() { From 5754765c70e3de54dc3c937d89f5a731e9779e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 15:40:11 +0200 Subject: [PATCH 21/39] Cache warmed remote COW pages --- experiments/remote-wp-cow/.env.example | 3 + experiments/remote-wp-cow/README.md | 13 ++ experiments/remote-wp-cow/compose.yaml | 3 + experiments/remote-wp-cow/src/generate.rs | 202 +++++++++++++++++++--- 4 files changed, 198 insertions(+), 23 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index a2e3acf3..3525bd94 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -12,6 +12,9 @@ WPCOW_REMOTE_DB_TUNNEL=1 WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=60 WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 +WPCOW_REMOTE_QUERY_CACHE=1 +WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 +WPCOW_PAGE_CACHE=1 WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 WPCOW_PHP_MAX_EXECUTION_SECS=90 WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 6170e131..acbd6102 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -158,6 +158,19 @@ option names into the local database, then routes those matching reads locally. That keeps the common `SELECT ... FROM *_options WHERE autoload IN (...)` query off the slow remote `/query` fallback without dumping the whole database. +Remote read queries that still need the lower database are cached under +`~/.wp-cow/clones//db/query-cache` by default. This makes repeated page +loads reuse local query results instead of crossing SSH/remote MySQL again. +Set `WPCOW_REMOTE_QUERY_CACHE=0` to disable it or adjust +`WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS` for large result sets. Local write-class SQL +clears this cache before executing. + +Frontend GET responses are also cached under +`~/.wp-cow/clones//db/page-cache` by default. This is what makes a warmed +URL return immediately after the first real WordPress render. Set +`WPCOW_PAGE_CACHE=0` to disable it. Local write-class SQL clears this cache +before executing. + The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the browser spinning forever. Adjust the defaults with diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 7d0bebfa..21883eb5 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -33,6 +33,9 @@ services: WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-60}" WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" + WPCOW_REMOTE_QUERY_CACHE: "${WPCOW_REMOTE_QUERY_CACHE:-1}" + WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS: "${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" + WPCOW_PAGE_CACHE: "${WPCOW_PAGE_CACHE:-1}" WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" WPCOW_PHP_SOCKET_TIMEOUT_SECS: "${WPCOW_PHP_SOCKET_TIMEOUT_SECS:-15}" diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 762e4c6d..c1c312dc 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -12,7 +12,10 @@ pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Res write_opaque_dir(paths.upper.join("wp-content/plugins"))?; write_opaque_dir(paths.upper.join("wp-content/languages"))?; let router = router_php(paths, manifest); - fs::write(paths.upper.join("wp-config.php"), wp_config_php(manifest))?; + fs::write( + paths.upper.join("wp-config.php"), + wp_config_php(manifest, paths), + )?; fs::write(paths.upper.join("wp-content/db.php"), db_dropin_php())?; fs::write(paths.upper.join(ROUTER_BASENAME), &router)?; fs::write( @@ -32,7 +35,7 @@ fn write_opaque_dir(path: impl AsRef) -> Result<()> { Ok(()) } -pub fn wp_config_php(manifest: &Manifest) -> String { +pub fn wp_config_php(manifest: &Manifest, paths: &ClonePaths) -> String { format!( r#" 0 ? $max : 5000; +} + +function cow_remote_query_cache_get( $query ) { + $file = cow_remote_query_cache_file( $query ); + if ( '' === $file || ! is_file( $file ) ) { + return null; + } + $decoded = json_decode( file_get_contents( $file ), true ); + if ( ! is_array( $decoded ) || ! isset( $decoded['sql'], $decoded['result'] ) || $decoded['sql'] !== $query || ! is_array( $decoded['result'] ) ) { + return null; + } + return $decoded['result']; +} + +function cow_remote_query_cache_set( $query, $result ) { + $file = cow_remote_query_cache_file( $query ); + if ( '' === $file || empty( $result['ok'] ) || ! isset( $result['rows'] ) || ! is_array( $result['rows'] ) ) { + return; + } + if ( count( $result['rows'] ) > cow_remote_query_cache_max_rows() ) { + return; + } + if ( ! is_dir( dirname( $file ) ) && ! mkdir( dirname( $file ), 0777, true ) && ! is_dir( dirname( $file ) ) ) { + return; + } + $tmp = $file . '.' . getmypid() . '.tmp'; + file_put_contents( $tmp, json_encode( array( 'sql' => $query, 'result' => $result ) ) ); + @rename( $tmp, $file ); +} + +function cow_remote_query_cache_clear() { + if ( ! cow_remote_query_cache_enabled() || ! is_dir( WPCOW_QUERY_CACHE_DIR ) ) { + return; + } + foreach ( glob( rtrim( WPCOW_QUERY_CACHE_DIR, '/' ) . '/*.json' ) as $file ) { + @unlink( $file ); + } +} + +function cow_page_cache_clear() { + if ( ! defined( 'WPCOW_PAGE_CACHE_DIR' ) || '' === WPCOW_PAGE_CACHE_DIR || ! is_dir( WPCOW_PAGE_CACHE_DIR ) ) { + return; + } + foreach ( glob( rtrim( WPCOW_PAGE_CACHE_DIR, '/' ) . '/*.html' ) as $file ) { + @unlink( $file ); + } +} + class Cow_DB extends wpdb { private $cow_remote_mysqli = null; private $cow_remote_failed = false; @@ -230,6 +299,8 @@ class Cow_DB extends wpdb { $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow materialization failed'; cow_db_runtime_fail( 'control /materialize failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } + cow_remote_query_cache_clear(); + cow_page_cache_clear(); return parent::query( $query ); } @@ -249,6 +320,11 @@ class Cow_DB extends wpdb { } private function cow_remote_query( $query ) { + $cached = cow_remote_query_cache_get( $query ); + if ( is_array( $cached ) ) { + return $this->cow_apply_remote_result( $cached ); + } + $remote = $this->cow_remote_mysqli(); if ( $remote instanceof mysqli ) { $result = $remote->query( $query, MYSQLI_STORE_RESULT ); @@ -257,29 +333,30 @@ class Cow_DB extends wpdb { cow_db_runtime_fail( 'remote mysqli query failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } - $this->last_result = array(); - $this->col_info = array(); + $remote_result = array( + 'ok' => true, + 'error' => '', + 'rows' => array(), + 'fields' => array(), + 'affected' => 0, + ); if ( true === $result ) { - $this->num_rows = 0; - $this->rows_affected = (int) $remote->affected_rows; - $this->insert_id = (int) $remote->insert_id; - $this->last_error = ''; - return $this->rows_affected; + $remote_result['affected'] = (int) $remote->affected_rows; + cow_remote_query_cache_set( $query, $remote_result ); + return $this->cow_apply_remote_result( $remote_result ); } foreach ( $result->fetch_fields() as $field ) { - $this->col_info[] = (object) array( 'name' => $field->name ); + $remote_result['fields'][] = $field->name; } while ( $row = $result->fetch_assoc() ) { - $this->last_result[] = (object) $row; + $remote_result['rows'][] = $row; } - $this->num_rows = count( $this->last_result ); - $this->rows_affected = $this->num_rows; - $this->insert_id = (int) $remote->insert_id; - $this->last_error = ''; + $remote_result['affected'] = count( $remote_result['rows'] ); - return $this->num_rows; + cow_remote_query_cache_set( $query, $remote_result ); + return $this->cow_apply_remote_result( $remote_result ); } $result = cow_control_request( '/query', array( 'sql' => $query ) ); @@ -288,6 +365,11 @@ class Cow_DB extends wpdb { cow_db_runtime_fail( 'control /query failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } + cow_remote_query_cache_set( $query, $result ); + return $this->cow_apply_remote_result( $result ); + } + + private function cow_apply_remote_result( $result ) { $this->last_result = array(); if ( isset( $result['rows'] ) && is_array( $result['rows'] ) ) { foreach ( $result['rows'] as $row ) { @@ -304,6 +386,7 @@ class Cow_DB extends wpdb { $this->num_rows = count( $this->last_result ); $this->rows_affected = isset( $result['affected'] ) ? (int) $result['affected'] : $this->num_rows; + $this->insert_id = 0; $this->last_error = ''; return $this->num_rows; @@ -406,6 +489,7 @@ pub fn router_php(paths: &ClonePaths, manifest: &Manifest) -> String { r#" time() ) ) ); + wp_cow_page_cache_set( $page_cache_dir, $path, $html, 'text/html; charset=utf-8' ); echo $html; return true; } @@ -578,7 +718,11 @@ if ( '/' !== $path && is_file( $file ) ) { return false; } -if ( wp_cow_proxy_remote_frontend( $wp_cow_remote_url, $wp_cow_local_url, $path ) ) { +if ( wp_cow_page_cache_get( $wp_cow_page_cache_dir, $path ) ) { + return true; +} + +if ( wp_cow_proxy_remote_frontend( $wp_cow_remote_url, $wp_cow_local_url, $path, $wp_cow_page_cache_dir ) ) { return true; } @@ -700,10 +844,10 @@ HTML; } if ( isset( $_GET['__wp_cow_bypass_splash'] ) ) { - return wp_cow_render_wordpress( $wp_cow_ready_file ); + return wp_cow_render_wordpress( $wp_cow_ready_file, $wp_cow_page_cache_dir, $path ); } -return wp_cow_render_wordpress( $wp_cow_ready_file ); +return wp_cow_render_wordpress( $wp_cow_ready_file, $wp_cow_page_cache_dir, $path ); "# .replace( "__WPCOW_PROGRESS_FILE__", @@ -713,6 +857,10 @@ return wp_cow_render_wordpress( $wp_cow_ready_file ); "__WPCOW_READY_FILE__", &php_string(&paths.run.join("first-request-ready.json").to_string_lossy()), ) + .replace( + "__WPCOW_PAGE_CACHE_DIR__", + &php_string(paths.db.join("page-cache").to_string_lossy().as_ref()), + ) .replace("__WPCOW_REMOTE_URL__", &php_string(&manifest.remote_url)) .replace("__WPCOW_LOCAL_URL__", &php_string(&manifest.local_url)) } @@ -775,12 +923,16 @@ mod tests { #[test] fn generated_config_shadows_urls_and_database() { - let php = wp_config_php(&manifest()); + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + let php = wp_config_php(&manifest(), &paths); assert!(php.contains("define( 'DB_NAME', 'cow_example' );")); assert!(php.contains("define( 'WP_HOME', 'http://example.test' );")); assert!(php.contains("$table_prefix = 'wp_';")); assert!(php.contains("WPCOW_CONTROL_URL")); assert!(php.contains("WPCOW_REMOTE_DB_HOST")); + assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); + assert!(php.contains("WPCOW_PAGE_CACHE_DIR")); assert!(php.contains("wp-cow DB/runtime error")); assert!(php.contains("wp-content/db.php")); } @@ -791,6 +943,8 @@ mod tests { assert!(php.contains("cow_is_write_sql")); assert!(php.contains("/materialize")); assert!(php.contains("cow_remote_mysqli")); + assert!(php.contains("cow_remote_query_cache_get")); + assert!(php.contains("cow_remote_query_cache_set")); assert!(php.contains("cow_db_runtime_fail")); assert!(php.contains("will not fall back to the empty local schema")); assert!(php.contains("'sql' => $query")); @@ -816,6 +970,8 @@ mod tests { assert!(php.contains("__wp_cow_bypass_splash")); assert!(php.contains("wp_cow_looks_like_installer")); assert!(php.contains("wp_cow_proxy_remote_frontend")); + assert!(php.contains("wp_cow_page_cache_get")); + assert!(php.contains("X-WP-COW-Page-Cache")); assert!(php.contains("X-WP-COW-Frontend-Proxy")); assert!(php.contains("WordPress tried to show the installation wizard")); assert!(php.contains("Cache-Control: no-store")); @@ -835,7 +991,7 @@ mod tests { let temp = tempfile::tempdir().unwrap(); let paths = clone_paths(temp.path(), "example"); let files = [ - ("wp-config.php", wp_config_php(&manifest())), + ("wp-config.php", wp_config_php(&manifest(), &paths)), ("db.php", db_dropin_php().to_string()), ("wp-cow-safety.php", safety_mu_plugin_php().to_string()), ("router.php", router_php(&paths, &manifest())), From 36f916c9ed4216676a5762f1f259fc8d42e19a61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sat, 2 May 2026 23:15:21 +0200 Subject: [PATCH 22/39] Cache remote COW program data, not pages --- experiments/remote-wp-cow/.env.example | 2 +- experiments/remote-wp-cow/README.md | 12 +-- experiments/remote-wp-cow/compose.yaml | 2 +- experiments/remote-wp-cow/docker/Dockerfile | 2 +- .../remote-wp-cow/docker/wp-cow-lab-check | 6 ++ experiments/remote-wp-cow/src/fusefs.rs | 46 ++++++++-- experiments/remote-wp-cow/src/generate.rs | 90 ++----------------- experiments/remote-wp-cow/src/overlay.rs | 33 +++++-- experiments/remote-wp-cow/src/run.rs | 5 ++ 9 files changed, 92 insertions(+), 106 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 3525bd94..8efb60e3 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -14,7 +14,7 @@ WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 WPCOW_REMOTE_QUERY_CACHE=1 WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 -WPCOW_PAGE_CACHE=1 +WPCOW_FUSE_TTL_SECS=60 WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 WPCOW_PHP_MAX_EXECUTION_SECS=90 WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index acbd6102..6e3dde46 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -29,7 +29,7 @@ cargo build Use this when you are on a Mac and want a Linux shell with FUSE, FrankenPHP, SSH, and local MariaDB available. The container is intentionally privileged so FUSE can mount inside Docker Desktop's Linux VM. The Docker image uses the -official FrankenPHP PHP 8.3 image and installs `mysqli`/`pdo_mysql` for +official FrankenPHP PHP 8.3 image and installs `mysqli`, `opcache`, and `pdo_mysql` for WordPress. From this directory: @@ -165,11 +165,11 @@ Set `WPCOW_REMOTE_QUERY_CACHE=0` to disable it or adjust `WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS` for large result sets. Local write-class SQL clears this cache before executing. -Frontend GET responses are also cached under -`~/.wp-cow/clones//db/page-cache` by default. This is what makes a warmed -URL return immediately after the first real WordPress render. Set -`WPCOW_PAGE_CACHE=0` to disable it. Local write-class SQL clears this cache -before executing. +The FUSE mount also keeps warmed path metadata live long enough for repeat +renders to reuse the program files WordPress just touched. The Docker lab +defaults `WPCOW_FUSE_TTL_SECS` to `60`; lower values make live remote changes +visible sooner, while higher values reduce repeated path walking. +FrankenPHP also enables OPcache for parsed PHP code in the local web runtime. The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 21883eb5..9cf88ee4 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -35,7 +35,7 @@ services: WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" WPCOW_REMOTE_QUERY_CACHE: "${WPCOW_REMOTE_QUERY_CACHE:-1}" WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS: "${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" - WPCOW_PAGE_CACHE: "${WPCOW_PAGE_CACHE:-1}" + WPCOW_FUSE_TTL_SECS: "${WPCOW_FUSE_TTL_SECS:-60}" WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" WPCOW_PHP_SOCKET_TIMEOUT_SECS: "${WPCOW_PHP_SOCKET_TIMEOUT_SECS:-15}" diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile index 69d14377..f645db0a 100644 --- a/experiments/remote-wp-cow/docker/Dockerfile +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -20,7 +20,7 @@ FROM dunglas/frankenphp:1-php8.3-bookworm ENV DEBIAN_FRONTEND=noninteractive ENV WPCOW_HOME=/root/.wp-cow -RUN install-php-extensions mysqli pdo_mysql \ +RUN install-php-extensions mysqli opcache pdo_mysql \ && apt-get update \ && apt-get install -y --no-install-recommends \ bash \ diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-check b/experiments/remote-wp-cow/docker/wp-cow-lab-check index 94fe24f2..848875d5 100755 --- a/experiments/remote-wp-cow/docker/wp-cow-lab-check +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-check @@ -15,6 +15,12 @@ else echo "frankenphp mysqli extension: missing" >&2 exit 1 fi +if frankenphp php-cli -m | grep -qx 'Zend OPcache'; then + echo "frankenphp opcache extension: yes" +else + echo "frankenphp opcache extension: missing" >&2 + exit 1 +fi mysql --version mysqldump --version diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs index 6eddb8bf..c40ad2d6 100644 --- a/experiments/remote-wp-cow/src/fusefs.rs +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -17,7 +17,7 @@ use crate::overlay::OverlayStore; use crate::remote::{RemoteClient, RemoteEntry}; const ROOT_INO: u64 = 1; -const TTL: Duration = Duration::from_secs(1); +const DEFAULT_KERNEL_CACHE_TTL_SECS: u64 = 60; #[derive(Clone)] struct Timed { @@ -40,8 +40,10 @@ pub struct CowFs { handles: HashMap, next_fh: u64, remote_stat_cache: HashMap>, + remote_missing_cache: HashMap, remote_readdir_cache: HashMap>>, remote_cache_ttl: Duration, + kernel_cache_ttl: Duration, uid: u32, gid: u32, } @@ -53,6 +55,10 @@ impl CowFs { ino_to_path.insert(ROOT_INO, PathBuf::new()); path_to_ino.insert(PathBuf::new(), ROOT_INO); let remote_cache_ttl = Duration::from_secs(manifest.remote_metadata_cache_ttl_secs); + let kernel_cache_ttl = Duration::from_secs(env_u64( + "WPCOW_FUSE_TTL_SECS", + DEFAULT_KERNEL_CACHE_TTL_SECS, + )); Self { manifest, remote, @@ -63,8 +69,10 @@ impl CowFs { handles: HashMap::new(), next_fh: 1, remote_stat_cache: HashMap::new(), + remote_missing_cache: HashMap::new(), remote_readdir_cache: HashMap::new(), remote_cache_ttl, + kernel_cache_ttl, uid: unsafe { libc::getuid() }, gid: unsafe { libc::getgid() }, } @@ -128,8 +136,17 @@ impl CowFs { return Ok(cached.value.clone()); } } + if let Some(expires_at) = self.remote_missing_cache.get(rel) { + if *expires_at > Instant::now() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "cached remote miss", + )); + } + } if let Some(entry) = self.overlay.cached_entry(rel).map_err(anyhow_to_io)? { + self.remote_missing_cache.remove(rel); self.remote_stat_cache.insert( rel.to_path_buf(), Timed { @@ -140,7 +157,16 @@ impl CowFs { return Ok(entry); } - let entry = self.remote.stat(rel)?; + let entry = match self.remote.stat(rel) { + Ok(entry) => entry, + Err(err) if err.kind() == io::ErrorKind::NotFound => { + self.remote_missing_cache + .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + return Err(err); + } + Err(err) => return Err(err), + }; + self.remote_missing_cache.remove(rel); self.remote_stat_cache.insert( rel.to_path_buf(), Timed { @@ -182,6 +208,7 @@ impl CowFs { fn invalidate_remote_cache(&mut self, rel: &Path) { self.remote_stat_cache.remove(rel); + self.remote_missing_cache.remove(rel); self.remote_readdir_cache.remove(rel); if let Some(parent) = rel.parent() { self.remote_readdir_cache.remove(parent); @@ -272,7 +299,7 @@ impl Filesystem for CowFs { self.attr_for_path(&rel, ino) })(); match result { - Ok(attr) => reply.entry(&TTL, &attr, 0), + Ok(attr) => reply.entry(&self.kernel_cache_ttl, &attr, 0), Err(err) => reply.error(io_errno(&err)), } } @@ -289,7 +316,7 @@ impl Filesystem for CowFs { self.attr_for_path(&rel, ino) })(); match result { - Ok(attr) => reply.attr(&TTL, &attr), + Ok(attr) => reply.attr(&self.kernel_cache_ttl, &attr), Err(err) => reply.error(io_errno(&err)), } } @@ -331,7 +358,7 @@ impl Filesystem for CowFs { self.attr_for_path(&rel, ino) })(); match result { - Ok(attr) => reply.entry(&TTL, &attr, 0), + Ok(attr) => reply.entry(&self.kernel_cache_ttl, &attr, 0), Err(err) => reply.error(io_errno(&err)), } } @@ -547,7 +574,7 @@ impl Filesystem for CowFs { Ok((attr, fh)) })(); match result { - Ok((attr, fh)) => reply.created(&TTL, &attr, 0, fh, flags as u32), + Ok((attr, fh)) => reply.created(&self.kernel_cache_ttl, &attr, 0, fh, flags as u32), Err(err) => reply.error(io_errno(&err)), } } @@ -772,6 +799,13 @@ fn env_bool(name: &str) -> bool { .unwrap_or(false) } +fn env_u64(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|raw| raw.parse::().ok()) + .unwrap_or(default) +} + fn io_errno(err: &io::Error) -> i32 { match err.kind() { io::ErrorKind::NotFound => ENOENT, diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index c1c312dc..11379066 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -59,7 +59,6 @@ define( 'WPCOW_REMOTE_DB_USER', {remote_db_user} ); define( 'WPCOW_REMOTE_DB_PASSWORD', {remote_db_password} ); define( 'WPCOW_REMOTE_DB_HOST', {remote_db_host} ); define( 'WPCOW_QUERY_CACHE_DIR', {query_cache_dir} ); -define( 'WPCOW_PAGE_CACHE_DIR', {page_cache_dir} ); define( 'FS_METHOD', 'direct' ); define( 'DISABLE_WP_CRON', true ); @@ -99,7 +98,6 @@ require_once ABSPATH . 'wp-settings.php'; manifest.remote_db_tunnel.host, manifest.remote_db_tunnel.port )), query_cache_dir = php_string(paths.db.join("query-cache").to_string_lossy().as_ref()), - page_cache_dir = php_string(paths.db.join("page-cache").to_string_lossy().as_ref()), ) } @@ -270,15 +268,6 @@ function cow_remote_query_cache_clear() { } } -function cow_page_cache_clear() { - if ( ! defined( 'WPCOW_PAGE_CACHE_DIR' ) || '' === WPCOW_PAGE_CACHE_DIR || ! is_dir( WPCOW_PAGE_CACHE_DIR ) ) { - return; - } - foreach ( glob( rtrim( WPCOW_PAGE_CACHE_DIR, '/' ) . '/*.html' ) as $file ) { - @unlink( $file ); - } -} - class Cow_DB extends wpdb { private $cow_remote_mysqli = null; private $cow_remote_failed = false; @@ -300,7 +289,6 @@ class Cow_DB extends wpdb { cow_db_runtime_fail( 'control /materialize failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } cow_remote_query_cache_clear(); - cow_page_cache_clear(); return parent::query( $query ); } @@ -489,7 +477,6 @@ pub fn router_php(paths: &ClonePaths, manifest: &Manifest) -> String { r#" time() ) ) ); - wp_cow_page_cache_set( $page_cache_dir, $path, $html, 'text/html; charset=utf-8' ); echo $html; return true; } @@ -718,11 +649,7 @@ if ( '/' !== $path && is_file( $file ) ) { return false; } -if ( wp_cow_page_cache_get( $wp_cow_page_cache_dir, $path ) ) { - return true; -} - -if ( wp_cow_proxy_remote_frontend( $wp_cow_remote_url, $wp_cow_local_url, $path, $wp_cow_page_cache_dir ) ) { +if ( wp_cow_proxy_remote_frontend( $wp_cow_remote_url, $wp_cow_local_url, $path ) ) { return true; } @@ -844,10 +771,10 @@ HTML; } if ( isset( $_GET['__wp_cow_bypass_splash'] ) ) { - return wp_cow_render_wordpress( $wp_cow_ready_file, $wp_cow_page_cache_dir, $path ); + return wp_cow_render_wordpress( $wp_cow_ready_file ); } -return wp_cow_render_wordpress( $wp_cow_ready_file, $wp_cow_page_cache_dir, $path ); +return wp_cow_render_wordpress( $wp_cow_ready_file ); "# .replace( "__WPCOW_PROGRESS_FILE__", @@ -857,10 +784,6 @@ return wp_cow_render_wordpress( $wp_cow_ready_file, $wp_cow_page_cache_dir, $pat "__WPCOW_READY_FILE__", &php_string(&paths.run.join("first-request-ready.json").to_string_lossy()), ) - .replace( - "__WPCOW_PAGE_CACHE_DIR__", - &php_string(paths.db.join("page-cache").to_string_lossy().as_ref()), - ) .replace("__WPCOW_REMOTE_URL__", &php_string(&manifest.remote_url)) .replace("__WPCOW_LOCAL_URL__", &php_string(&manifest.local_url)) } @@ -932,7 +855,6 @@ mod tests { assert!(php.contains("WPCOW_CONTROL_URL")); assert!(php.contains("WPCOW_REMOTE_DB_HOST")); assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); - assert!(php.contains("WPCOW_PAGE_CACHE_DIR")); assert!(php.contains("wp-cow DB/runtime error")); assert!(php.contains("wp-content/db.php")); } @@ -970,8 +892,6 @@ mod tests { assert!(php.contains("__wp_cow_bypass_splash")); assert!(php.contains("wp_cow_looks_like_installer")); assert!(php.contains("wp_cow_proxy_remote_frontend")); - assert!(php.contains("wp_cow_page_cache_get")); - assert!(php.contains("X-WP-COW-Page-Cache")); assert!(php.contains("X-WP-COW-Frontend-Proxy")); assert!(php.contains("WordPress tried to show the installation wizard")); assert!(php.contains("Cache-Control: no-store")); diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index 663ccb7b..8469922b 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -1,6 +1,7 @@ use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; +use std::cell::RefCell; use std::collections::{BTreeMap, BTreeSet}; use std::fs::{self, File, OpenOptions}; use std::io::{Read, Seek, SeekFrom, Write}; @@ -12,12 +13,12 @@ use crate::remote::{RemoteClient, RemoteEntry}; pub const OPAQUE_MARKER: &str = ".wp-cow-opaque"; -#[derive(Debug, Default, Serialize, Deserialize)] +#[derive(Clone, Debug, Default, Serialize, Deserialize)] struct WhiteoutFile { deleted: BTreeSet, } -#[derive(Debug, Default, Serialize, Deserialize)] +#[derive(Clone, Debug, Default, Serialize, Deserialize)] struct MetadataFile { entries: BTreeMap, } @@ -39,6 +40,8 @@ pub struct OverlayStore { pub upper: PathBuf, pub file_cache: PathBuf, whiteouts_path: PathBuf, + whiteouts: RefCell>, + metadata: RefCell>, } impl OverlayStore { @@ -47,6 +50,8 @@ impl OverlayStore { upper: paths.upper.clone(), file_cache: paths.file_cache.clone(), whiteouts_path: paths.whiteouts.clone(), + whiteouts: RefCell::new(None), + metadata: RefCell::new(None), } } @@ -292,12 +297,19 @@ impl OverlayStore { } fn load_whiteouts(&self) -> Result { + if let Some(whiteouts) = self.whiteouts.borrow().as_ref() { + return Ok(whiteouts.clone()); + } if !self.whiteouts_path.exists() { - return Ok(WhiteoutFile::default()); + let whiteouts = WhiteoutFile::default(); + *self.whiteouts.borrow_mut() = Some(whiteouts.clone()); + return Ok(whiteouts); } let mut json = String::new(); File::open(&self.whiteouts_path)?.read_to_string(&mut json)?; - Ok(serde_json::from_str(&json)?) + let whiteouts: WhiteoutFile = serde_json::from_str(&json)?; + *self.whiteouts.borrow_mut() = Some(whiteouts.clone()); + Ok(whiteouts) } fn write_whiteouts(&self, whiteouts: &WhiteoutFile) -> Result<()> { @@ -312,6 +324,7 @@ impl OverlayStore { .open(&self.whiteouts_path)?; file.write_all(&json)?; file.write_all(b"\n")?; + *self.whiteouts.borrow_mut() = Some(whiteouts.clone()); Ok(()) } @@ -324,13 +337,20 @@ impl OverlayStore { } fn load_metadata(&self) -> Result { + if let Some(metadata) = self.metadata.borrow().as_ref() { + return Ok(metadata.clone()); + } let path = self.metadata_path(); if !path.exists() { - return Ok(MetadataFile::default()); + let metadata = MetadataFile::default(); + *self.metadata.borrow_mut() = Some(metadata.clone()); + return Ok(metadata); } let mut json = String::new(); File::open(path)?.read_to_string(&mut json)?; - Ok(serde_json::from_str(&json)?) + let metadata: MetadataFile = serde_json::from_str(&json)?; + *self.metadata.borrow_mut() = Some(metadata.clone()); + Ok(metadata) } fn write_metadata(&self, metadata: &MetadataFile) -> Result<()> { @@ -346,6 +366,7 @@ impl OverlayStore { file.write_all(b"\n")?; drop(file); fs::rename(tmp, self.metadata_path())?; + *self.metadata.borrow_mut() = Some(metadata.clone()); Ok(()) } diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index 3f3c199b..ae2c37e7 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -416,6 +416,11 @@ fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) php_ini max_execution_time {max_execution} php_ini default_socket_timeout {socket_timeout} php_ini mysqlnd.net_read_timeout {socket_timeout} + php_ini opcache.enable 1 + php_ini opcache.memory_consumption 192 + php_ini opcache.max_accelerated_files 20000 + php_ini opcache.validate_timestamps 1 + php_ini opcache.revalidate_freq 2 }} }} From 4482b16f786da59e2c355f3a8b176c00ab034c21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sun, 3 May 2026 00:53:55 +0200 Subject: [PATCH 23/39] Add remote sever mode for wp-cow clones --- experiments/remote-wp-cow/.env.example | 2 + experiments/remote-wp-cow/PRD.md | 24 +++ experiments/remote-wp-cow/README.md | 16 ++ experiments/remote-wp-cow/compose.yaml | 2 + experiments/remote-wp-cow/docker/Dockerfile | 1 + .../remote-wp-cow/docker/wp-cow-lab-sever | 20 +++ experiments/remote-wp-cow/src/cli.rs | 90 +++++++++- experiments/remote-wp-cow/src/config.rs | 33 ++++ experiments/remote-wp-cow/src/control.rs | 24 ++- experiments/remote-wp-cow/src/db.rs | 165 ++++++++++++++++++ experiments/remote-wp-cow/src/fusefs.rs | 34 +++- experiments/remote-wp-cow/src/overlay.rs | 5 + experiments/remote-wp-cow/src/remote.rs | 25 +++ experiments/remote-wp-cow/src/run.rs | 90 +++++++--- 14 files changed, 502 insertions(+), 29 deletions(-) create mode 100755 experiments/remote-wp-cow/docker/wp-cow-lab-sever diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 8efb60e3..911cb1c0 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -21,6 +21,8 @@ WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 WPCOW_PHP_WORKERS=4 WPCOW_WEB_SERVER=frankenphp WPCOW_SPLASH=1 +WPCOW_LOCAL_ADMIN_PASSWORD= +WPCOW_LOCAL_ADMIN_LOGIN= # Set this to 1 for a filesystem-only smoke test that does not export DB schema. WPCOW_SKIP_SCHEMA=0 diff --git a/experiments/remote-wp-cow/PRD.md b/experiments/remote-wp-cow/PRD.md index a54756d1..6f727c5f 100644 --- a/experiments/remote-wp-cow/PRD.md +++ b/experiments/remote-wp-cow/PRD.md @@ -36,6 +36,9 @@ wp-cow-lab-serve - No full `wp-content/uploads` copy. - No optimistic full database row dump. - Local writes must not reach production. +- A warmed clone can be explicitly severed from the remote lower layers, then + refreshed and used in `wp-admin` without opening SSH or remote DB reads. +- A local admin password reset must affect only the local materialized DB. ## Non-Goals @@ -103,6 +106,27 @@ identity/theme/plugin option names into the local database and routes matching reads locally. Arbitrary non-bootstrap option reads still go through the remote read path unless the table has been fully materialized. +## Severed Mode + +`wp-cow sever ` turns a clone from live-lower mode into local-only mode. +It is not the default startup path because it must copy database rows, but it is +the expected path when the user wants to disconnect from production and keep +working locally. + +Severing should: + +- Materialize the core WordPress tables needed for local frontend/admin/content + edits: options, users, usermeta, posts, postmeta, terms, term_taxonomy, + term_relationships, comments, commentmeta, and links. +- Cache WordPress admin/runtime program files needed for offline `wp-admin` + access without copying uploads. +- Optionally set a local administrator password in the local DB only. +- Write an offline marker that makes future `wp-cow run` skip SSH control + masters, remote DB tunnels, remote filesystem reads, and daemon remote + `/query` calls. +- Continue serving local upper-layer file writes and local DB writes after the + remote link is severed. + ## Observability The CLI should print phase names and durations: diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 6e3dde46..da377f05 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -170,6 +170,8 @@ renders to reuse the program files WordPress just touched. The Docker lab defaults `WPCOW_FUSE_TTL_SECS` to `60`; lower values make live remote changes visible sooner, while higher values reduce repeated path walking. FrankenPHP also enables OPcache for parsed PHP code in the local web runtime. +`WPCOW_PREFETCH_RUNTIME=1` can still be used for an explicit background warm, +but it is off by default so normal `serve` remains request-driven. The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the @@ -183,6 +185,20 @@ wp-cow DB/runtime failure. The clone should either show the real remote-backed site or a diagnostic error; the installer is not considered a successful local copy. +To sever a warmed clone from the remote lower layers, run: + +```bash +export WPCOW_LOCAL_ADMIN_PASSWORD='8u239huiwdsj91das' +wp-cow-lab-sever +wp-cow-lab-run +``` + +`wp-cow-lab-sever` materializes the core WordPress tables into local MariaDB, +sets the admin password only in the local DB when requested, caches the +WordPress admin/runtime program files needed for offline admin access, and +writes `run/offline.json`. After that marker exists, `wp-cow run` does not open +SSH, does not start the remote DB tunnel, and routes DB reads locally. + Open this on the Mac: ```text diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 9cf88ee4..9567d679 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -42,6 +42,8 @@ services: WPCOW_PHP_WORKERS: "${WPCOW_PHP_WORKERS:-4}" WPCOW_WEB_SERVER: "${WPCOW_WEB_SERVER:-frankenphp}" WPCOW_SPLASH: "${WPCOW_SPLASH:-1}" + WPCOW_LOCAL_ADMIN_PASSWORD: "${WPCOW_LOCAL_ADMIN_PASSWORD:-}" + WPCOW_LOCAL_ADMIN_LOGIN: "${WPCOW_LOCAL_ADMIN_LOGIN:-}" WPCOW_MOUNTPOINT: "/mnt/wp-cow/${WPCOW_NAME:-example}" WPCOW_HTTP: 0.0.0.0:8080 SSH_AUTH_SOCK: /run/host-services/ssh-auth.sock diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile index f645db0a..d3803f02 100644 --- a/experiments/remote-wp-cow/docker/Dockerfile +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -50,6 +50,7 @@ COPY docker/wp-cow-lab-dns /usr/local/bin/wp-cow-lab-dns COPY docker/wp-cow-lab-db-init /usr/local/bin/wp-cow-lab-db-init COPY docker/wp-cow-lab-cache /usr/local/bin/wp-cow-lab-cache COPY docker/wp-cow-lab-serve /usr/local/bin/wp-cow-lab-serve +COPY docker/wp-cow-lab-sever /usr/local/bin/wp-cow-lab-sever RUN chmod +x /usr/local/bin/wp-cow-lab-* ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/wp-cow-lab-entrypoint"] diff --git a/experiments/remote-wp-cow/docker/wp-cow-lab-sever b/experiments/remote-wp-cow/docker/wp-cow-lab-sever new file mode 100755 index 00000000..e7a7a6f6 --- /dev/null +++ b/experiments/remote-wp-cow/docker/wp-cow-lab-sever @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +name="${WPCOW_NAME:-example}" + +if [ ! -f "${WPCOW_HOME:-/root/.wp-cow}/clones/$name/manifest.json" ]; then + echo "clone '$name' does not exist yet; run wp-cow-lab-serve once first" >&2 + exit 2 +fi + +args=(sever "$name") +if [ -n "${WPCOW_LOCAL_ADMIN_PASSWORD:-}" ]; then + args+=(--admin-password "$WPCOW_LOCAL_ADMIN_PASSWORD") +fi +if [ -n "${WPCOW_LOCAL_ADMIN_LOGIN:-}" ]; then + args+=(--admin-login "$WPCOW_LOCAL_ADMIN_LOGIN") +fi + +echo "severing remote lower layers for $name" +exec wp-cow "${args[@]}" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index e3dc9407..3fcb2dc2 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -2,11 +2,11 @@ use anyhow::{anyhow, Context, Result}; use clap::{Args, Parser, Subcommand}; use std::fs; use std::path::PathBuf; -use std::time::Instant; +use std::time::{Instant, SystemTime, UNIX_EPOCH}; use crate::config::{ clone_paths, default_state_dir, derive_name, ensure_clone_dirs, load_manifest, write_manifest, - Manifest, Probe, + write_offline_marker, Manifest, OfflineMarker, Probe, }; use crate::db; use crate::generate; @@ -33,6 +33,8 @@ enum Command { ExportSchema(NameArgs), #[command(name = "materialize")] Materialize(MaterializeArgs), + #[command(name = "sever")] + Sever(SeverArgs), #[command(name = "mount")] Mount(MountArgs), #[command(name = "run")] @@ -107,6 +109,17 @@ struct MaterializeArgs { state_dir: Option, } +#[derive(Debug, Args)] +struct SeverArgs { + name: String, + #[arg(long = "admin-password")] + admin_password: Option, + #[arg(long = "admin-login")] + admin_login: Option, + #[arg(long)] + state_dir: Option, +} + #[derive(Debug, Args)] struct MountArgs { name: String, @@ -145,6 +158,7 @@ pub fn run() -> Result<()> { Command::InitDb(args) => init_db(args), Command::ExportSchema(args) => export_schema(args), Command::Materialize(args) => materialize(args), + Command::Sever(args) => sever(args), Command::Mount(args) => mount(args), Command::Run(args) => run_clone(args), Command::Probe(args) => { @@ -408,6 +422,78 @@ fn materialize(args: MaterializeArgs) -> Result<()> { Ok(()) } +fn sever(args: SeverArgs) -> Result<()> { + let started = Instant::now(); + let state_dir = args.state_dir.unwrap_or(default_state_dir()?); + let paths = clone_paths(&state_dir, &args.name); + let manifest = load_manifest(&paths.manifest)?; + let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + + remote.ensure_master()?; + if !paths.db.join("schema.sql").exists() { + db::export_schema(&remote, &paths).context("export schema")?; + } + if db::init_local_db_if_empty(&manifest, &paths)? { + println!( + "initialized empty local database '{}'", + manifest.local_db.name + ); + } + + let requested_tables = db::wordpress_offline_table_names(&manifest.probe.table_prefix); + let tables = db::existing_local_tables(&manifest, &requested_tables)?; + let skipped = requested_tables.len().saturating_sub(tables.len()); + if skipped > 0 { + println!( + "skipping {} WordPress tables that are not present in the local schema", + skipped + ); + } + let materialized = db::materialize_tables(&remote, &manifest, &paths, &tables) + .context("materialize local offline database lower layer")?; + println!( + "materialized {} WordPress tables for local/offline use", + materialized.len() + ); + + println!("caching WordPress admin/runtime program files for offline use"); + run::prefetch_runtime_files(&manifest, &paths, &remote) + .context("cache WordPress admin/runtime files")?; + + let admin = if let Some(password) = args.admin_password.as_deref() { + let admin = db::set_local_admin_password(&manifest, args.admin_login.as_deref(), password) + .context("set local administrator password")?; + println!( + "set local administrator password for '{}' without writing to the remote DB", + admin.user_login + ); + Some(admin.user_login) + } else { + None + }; + + let marker = OfflineMarker { + severed_at_unix: SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + materialized_tables: tables, + admin_user: admin, + }; + write_offline_marker(&paths, &marker)?; + generate::write_wordpress_overrides(&paths, &manifest)?; + if let Err(err) = remote.stop_master() { + eprintln!("warning: could not close SSH control master after severing: {err:#}"); + } + + println!( + "severed clone '{}' from remote lower layers in {:.2}s", + manifest.name, + started.elapsed().as_secs_f64() + ); + Ok(()) +} + fn mount(args: MountArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs index 014352ea..1987e10e 100644 --- a/experiments/remote-wp-cow/src/config.rs +++ b/experiments/remote-wp-cow/src/config.rs @@ -8,6 +8,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; use url::Url; pub const MANIFEST_VERSION: u32 = 1; +const OFFLINE_MARKER: &str = "offline.json"; const DEFAULT_CACHE_MAX_FILE_BYTES: u64 = 8 * 1024 * 1024; const DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS: u64 = 30; @@ -74,6 +75,14 @@ pub struct ClonePaths { pub whiteouts: PathBuf, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OfflineMarker { + pub severed_at_unix: u64, + pub materialized_tables: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub admin_user: Option, +} + impl Manifest { pub fn new( name: String, @@ -157,6 +166,30 @@ pub fn clone_paths(state_dir: &Path, name: &str) -> ClonePaths { } } +pub fn offline_marker_path(paths: &ClonePaths) -> PathBuf { + paths.run.join(OFFLINE_MARKER) +} + +pub fn is_offline(paths: &ClonePaths) -> bool { + offline_marker_path(paths).is_file() + || std::env::var("WPCOW_OFFLINE") + .ok() + .map(|raw| { + matches!( + raw.to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) + }) + .unwrap_or(false) +} + +pub fn write_offline_marker(paths: &ClonePaths, marker: &OfflineMarker) -> Result<()> { + fs::create_dir_all(&paths.run)?; + let json = serde_json::to_vec_pretty(marker)?; + fs::write(offline_marker_path(paths), [json, b"\n".to_vec()].concat())?; + Ok(()) +} + pub fn ensure_clone_dirs(paths: &ClonePaths) -> Result<()> { fs::create_dir_all(&paths.upper)?; fs::create_dir_all(&paths.file_cache)?; diff --git a/experiments/remote-wp-cow/src/control.rs b/experiments/remote-wp-cow/src/control.rs index 7177a76b..356af064 100644 --- a/experiments/remote-wp-cow/src/control.rs +++ b/experiments/remote-wp-cow/src/control.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use std::time::Duration; use tiny_http::{Header, Request, Response, Server, StatusCode}; -use crate::config::{ClonePaths, Manifest}; +use crate::config::{self, ClonePaths, Manifest}; use crate::db; use crate::remote::RemoteClient; @@ -97,11 +97,27 @@ fn control_response( match url { "/materialize" => { let tables = input.tables.unwrap_or_default(); + if config::is_offline(paths) { + return Ok(json!({ + "ok": true, + "backend": "local", + "materialized": [], + "offline": true + })); + } let materialized = db::materialize_tables(remote, manifest, paths, &tables)?; Ok(json!({ "ok": true, "backend": "local", "materialized": materialized })) } "/route" => { let tables = input.tables.unwrap_or_default(); + if config::is_offline(paths) { + return Ok(json!({ + "ok": true, + "backend": "local", + "materialized": [], + "offline": true + })); + } let decision = if let Some(sql) = input.sql.as_deref() { db::route_for_query(remote, manifest, paths, sql, &tables)? } else { @@ -112,6 +128,12 @@ fn control_response( ) } "/query" => { + if config::is_offline(paths) { + return Ok(json!({ + "ok": false, + "error": "clone is severed from the remote database" + })); + } let sql = input.sql.ok_or_else(|| anyhow!("missing sql"))?; let result = db::remote_readonly_query(remote, &sql)?; Ok(json!({ diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index 0e7d5f2f..9e1a64f1 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -20,6 +20,12 @@ pub struct DbState { pub option_rows: BTreeSet, } +#[derive(Debug, Clone, Serialize)] +pub struct LocalAdmin { + pub id: u64, + pub user_login: String, +} + pub fn state_path(paths: &ClonePaths) -> PathBuf { paths.db.join("state.json") } @@ -153,6 +159,99 @@ pub fn materialize_tables( Ok(changed) } +pub fn wordpress_offline_table_names(table_prefix: &str) -> Vec { + [ + "options", + "users", + "usermeta", + "posts", + "postmeta", + "terms", + "term_taxonomy", + "term_relationships", + "comments", + "commentmeta", + "links", + ] + .into_iter() + .map(|suffix| format!("{table_prefix}{suffix}")) + .collect() +} + +pub fn existing_local_tables(manifest: &Manifest, tables: &[String]) -> Result> { + for table in tables { + validate_table_name(table)?; + } + if tables.is_empty() { + return Ok(Vec::new()); + } + + let in_list = tables + .iter() + .map(|table| format!("'{}'", mysql_string_literal(table))) + .collect::>() + .join(", "); + let sql_text = format!( + "SELECT table_name FROM information_schema.tables \ + WHERE table_schema='{}' AND table_name IN ({});", + mysql_string_literal(&manifest.local_db.name), + in_list + ); + let output = local_mysql_command(manifest) + .arg("--batch") + .arg("--raw") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql_text) + .output() + .context("query local WordPress table list")?; + if !output.status.success() { + return Err(anyhow!( + "local table list query failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + let present = String::from_utf8_lossy(&output.stdout) + .lines() + .map(|line| line.to_string()) + .collect::>(); + Ok(tables + .iter() + .filter(|table| present.contains(table.as_str())) + .cloned() + .collect()) +} + +pub fn set_local_admin_password( + manifest: &Manifest, + login: Option<&str>, + password: &str, +) -> Result { + let users_table = format!("{}users", manifest.probe.table_prefix); + let usermeta_table = format!("{}usermeta", manifest.probe.table_prefix); + validate_table_name(&users_table)?; + validate_table_name(&usermeta_table)?; + + let admin = if let Some(login) = login { + local_user_by_login(manifest, &users_table, login)? + } else { + local_first_admin_user(manifest, &users_table, &usermeta_table)? + }; + + let update_sql = format!( + "UPDATE {} SET user_pass=MD5('{}'), user_activation_key='' WHERE ID={};\ + DELETE FROM {} WHERE user_id={} AND meta_key='session_tokens';", + qualified_table(manifest, &users_table), + mysql_string_literal(password), + admin.id, + qualified_table(manifest, &usermeta_table), + admin.id + ); + run_mysql_exec(manifest, &update_sql)?; + Ok(admin) +} + pub fn route_for_tables( remote: &RemoteClient, manifest: &Manifest, @@ -309,6 +408,72 @@ fn materialize_one_table(remote: &RemoteClient, manifest: &Manifest, table: &str Ok(()) } +fn local_first_admin_user( + manifest: &Manifest, + users_table: &str, + usermeta_table: &str, +) -> Result { + let capabilities_key = format!("{}capabilities", manifest.probe.table_prefix); + let sql_text = format!( + "SELECT u.ID, u.user_login \ + FROM {} u \ + JOIN {} m ON m.user_id = u.ID \ + WHERE m.meta_key = '{}' AND m.meta_value LIKE '%administrator%' \ + ORDER BY u.ID LIMIT 1;", + qualified_table(manifest, users_table), + qualified_table(manifest, usermeta_table), + mysql_string_literal(&capabilities_key) + ); + local_admin_from_query(manifest, &sql_text, "find local administrator user") +} + +fn local_user_by_login(manifest: &Manifest, users_table: &str, login: &str) -> Result { + let sql_text = format!( + "SELECT ID, user_login FROM {} WHERE user_login='{}' LIMIT 1;", + qualified_table(manifest, users_table), + mysql_string_literal(login) + ); + local_admin_from_query(manifest, &sql_text, "find requested local user") +} + +fn local_admin_from_query( + manifest: &Manifest, + sql_text: &str, + context: &'static str, +) -> Result { + let output = local_mysql_command(manifest) + .arg("--batch") + .arg("--raw") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql_text) + .output() + .context(context)?; + if !output.status.success() { + return Err(anyhow!( + "{} failed: {}", + context, + String::from_utf8_lossy(&output.stderr) + )); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let line = stdout + .lines() + .next() + .ok_or_else(|| anyhow!("no local administrator user found"))?; + let (id, user_login) = line + .split_once('\t') + .ok_or_else(|| anyhow!("unexpected local user query output: {}", line))?; + let id = id + .parse::() + .with_context(|| format!("parse local user id from {}", id))?; + Ok(LocalAdmin { + id, + user_login: user_login.to_string(), + }) +} + fn materialize_option_bootstrap( remote: &RemoteClient, manifest: &Manifest, diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs index c40ad2d6..224aa6f4 100644 --- a/experiments/remote-wp-cow/src/fusefs.rs +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -12,7 +12,7 @@ use std::os::unix::fs::{FileExt, MetadataExt, OpenOptionsExt, PermissionsExt}; use std::path::{Path, PathBuf}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; -use crate::config::{ClonePaths, Manifest}; +use crate::config::{self, ClonePaths, Manifest}; use crate::overlay::OverlayStore; use crate::remote::{RemoteClient, RemoteEntry}; @@ -44,6 +44,7 @@ pub struct CowFs { remote_readdir_cache: HashMap>>, remote_cache_ttl: Duration, kernel_cache_ttl: Duration, + offline: bool, uid: u32, gid: u32, } @@ -59,6 +60,7 @@ impl CowFs { "WPCOW_FUSE_TTL_SECS", DEFAULT_KERNEL_CACHE_TTL_SECS, )); + let offline = config::is_offline(paths); Self { manifest, remote, @@ -73,6 +75,7 @@ impl CowFs { remote_readdir_cache: HashMap::new(), remote_cache_ttl, kernel_cache_ttl, + offline, uid: unsafe { libc::getuid() }, gid: unsafe { libc::getgid() }, } @@ -157,6 +160,13 @@ impl CowFs { return Ok(entry); } + if self.offline { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "clone is severed and path is not cached locally", + )); + } + let entry = match self.remote.stat(rel) { Ok(entry) => entry, Err(err) if err.kind() == io::ErrorKind::NotFound => { @@ -178,6 +188,10 @@ impl CowFs { } fn remote_readdir(&mut self, rel: &Path) -> io::Result> { + if self.offline { + return Ok(Vec::new()); + } + if let Some(cached) = self.remote_readdir_cache.get(rel) { if cached.expires_at > Instant::now() { return Ok(cached.value.clone()); @@ -330,6 +344,12 @@ impl Filesystem for CowFs { if upper.exists() { return fs::read_link(upper).map(|p| p.to_string_lossy().into_owned()); } + if self.offline { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "clone is severed and symlink is not cached locally", + )); + } self.remote.readlink(&rel) })(); match result { @@ -450,6 +470,11 @@ impl Filesystem for CowFs { } else if let Some(cache_path) = self.overlay.cached_file_path(&rel) { let file = File::open(cache_path)?; Ok((self.allocate_handle(Handle::Local(file)), 0)) + } else if self.offline { + Err(io::Error::new( + io::ErrorKind::NotFound, + "clone is severed and file is not cached locally", + )) } else { Ok((self.allocate_handle(Handle::Remote(rel)), 0)) } @@ -489,6 +514,9 @@ impl Filesystem for CowFs { } } Some(Handle::Remote(rel)) => { + if self.offline { + return reply.error(ENOENT); + } trace_fuse("read-remote", rel); self.overlay .read_cached_or_remote( @@ -733,7 +761,9 @@ pub fn mount_foreground(manifest: Manifest, paths: ClonePaths, mountpoint: &Path fs::create_dir_all(mountpoint)?; let control_path = paths.run.join("ssh-control.sock"); let remote = RemoteClient::new(manifest.clone(), Some(control_path)); - remote.ensure_master()?; + if !config::is_offline(&paths) { + remote.ensure_master()?; + } let fs = CowFs::new(manifest.clone(), &paths, remote); let options = vec![ MountOption::FSName(format!("wp-cow-{}", manifest.name)), diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index 8469922b..5a3fcb20 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -173,6 +173,11 @@ impl OverlayStore { )); } + if let Some(cached) = self.cached_file_path(rel) { + fs::copy(cached, &upper)?; + return Ok(upper); + } + let mut out = File::create(&upper)?; let mut offset = 0_u64; let chunk = 1024 * 1024; diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index 3e5ad603..762f69ef 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -77,6 +77,31 @@ impl RemoteClient { Ok(()) } + pub fn stop_master(&self) -> Result<()> { + let Some(control_path) = &self.control_path else { + return Ok(()); + }; + if !control_path.exists() { + return Ok(()); + } + + let mut command = Command::new("ssh"); + command.arg("-S").arg(control_path); + command.arg("-O").arg("exit"); + self.add_ssh_safety_options(&mut command); + let status = command + .arg(&self.manifest.ssh) + .status() + .context("stop SSH control master")?; + if !status.success() { + return Err(anyhow!( + "failed to stop SSH control master for {}", + self.manifest.ssh + )); + } + Ok(()) + } + pub fn command(&self, remote_command: &str) -> Command { self.ssh_command(remote_command, 0) } diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index ae2c37e7..67027333 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use std::thread::{self, JoinHandle}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use crate::config::{ClonePaths, Manifest}; +use crate::config::{self, ClonePaths, Manifest}; use crate::control; use crate::db; use crate::fusefs; @@ -28,25 +28,34 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R let control_addr = control_addr_from_url(&manifest.control_url)?; let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); - remote.ensure_master()?; - let mut db_tunnel = match remote.start_db_tunnel() { - Ok(Some(child)) => { - eprintln!( - "wp-cow remote DB tunnel listening at {}:{}", - manifest.remote_db_tunnel.host, manifest.remote_db_tunnel.port - ); - Some(child) - } - Ok(None) => { - eprintln!( - "wp-cow remote DB tunnel disabled or unavailable; falling back to control reads" - ); - None - } - Err(err) => { - eprintln!("wp-cow remote DB tunnel failed: {err:#}"); - eprintln!("wp-cow falling back to control reads"); - None + let offline = config::is_offline(&paths); + let mut db_tunnel = if offline { + eprintln!( + "wp-cow clone '{}' is severed; remote filesystem and DB reads are disabled", + manifest.name + ); + None + } else { + remote.ensure_master()?; + match remote.start_db_tunnel() { + Ok(Some(child)) => { + eprintln!( + "wp-cow remote DB tunnel listening at {}:{}", + manifest.remote_db_tunnel.host, manifest.remote_db_tunnel.port + ); + Some(child) + } + Ok(None) => { + eprintln!( + "wp-cow remote DB tunnel disabled or unavailable; falling back to control reads" + ); + None + } + Err(err) => { + eprintln!("wp-cow remote DB tunnel failed: {err:#}"); + eprintln!("wp-cow falling back to control reads"); + None + } } }; @@ -93,7 +102,7 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R )?) }; - if env_bool("WPCOW_PREFETCH_RUNTIME", true) { + if !offline && env_bool("WPCOW_PREFETCH_RUNTIME", false) { let warm_manifest = manifest.clone(); let warm_paths = paths.clone(); let warm_remote = remote.clone(); @@ -152,19 +161,37 @@ pub fn mount_only(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> R fusefs::mount_foreground(manifest, paths, mountpoint) } -fn prefetch_runtime_files( +pub(crate) fn prefetch_runtime_files( manifest: &Manifest, paths: &ClonePaths, remote: &RemoteClient, ) -> Result<()> { let mirror = paths.file_cache.join("mirror"); fs::create_dir_all(&mirror)?; - let stamp = mirror.join(".wp-cow-runtime-prefetch-v2"); + let stamp = mirror.join(".wp-cow-runtime-prefetch-v3"); if stamp.is_file() { return Ok(()); } - let mut rels = vec!["wp-admin".to_string(), "wp-includes".to_string()]; + let mut rels = [ + "index.php", + "wp-activate.php", + "wp-blog-header.php", + "wp-comments-post.php", + "wp-cron.php", + "wp-load.php", + "wp-login.php", + "wp-mail.php", + "wp-settings.php", + "wp-signup.php", + "wp-trackback.php", + "xmlrpc.php", + "wp-admin", + "wp-includes", + ] + .into_iter() + .map(|rel| rel.to_string()) + .collect::>(); let mut themes = BTreeSet::new(); for option in ["template", "stylesheet"] { if let Some(theme) = db::local_option_value(manifest, option)? { @@ -432,6 +459,12 @@ fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) respond 404 }} + @wpAdminIndex path /wp-admin /wp-admin/ + handle @wpAdminIndex {{ + rewrite * /wp-admin/index.php + php + }} + @static {{ file not path *.php @@ -543,6 +576,15 @@ mod tests { assert_eq!(listen.site_addr, "http://:8080"); assert_eq!(listen.bind, None); } + + #[test] + fn frankenphp_routes_wp_admin_directory_to_index() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + let caddyfile = frankenphp_caddyfile(&paths, Path::new("/tmp/mount"), "127.0.0.1:9481"); + assert!(caddyfile.contains("@wpAdminIndex path /wp-admin /wp-admin/")); + assert!(caddyfile.contains("rewrite * /wp-admin/index.php")); + } } fn wait_for_mount(mountpoint: &Path, mount_thread: &JoinHandle>) -> Result<()> { From c9665f170ed189d4122658038e4a305321a933b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sun, 3 May 2026 02:03:16 +0200 Subject: [PATCH 24/39] Add row-level DB COW planner --- experiments/remote-wp-cow/DB_ROW_COW.md | 85 + experiments/remote-wp-cow/src/control.rs | 22 + experiments/remote-wp-cow/src/db.rs | 592 ++++++- experiments/remote-wp-cow/src/generate.rs | 28 + experiments/remote-wp-cow/src/main.rs | 1 + experiments/remote-wp-cow/src/row_cow.rs | 1715 +++++++++++++++++++++ 6 files changed, 2417 insertions(+), 26 deletions(-) create mode 100644 experiments/remote-wp-cow/DB_ROW_COW.md create mode 100644 experiments/remote-wp-cow/src/row_cow.rs diff --git a/experiments/remote-wp-cow/DB_ROW_COW.md b/experiments/remote-wp-cow/DB_ROW_COW.md new file mode 100644 index 00000000..9de99844 --- /dev/null +++ b/experiments/remote-wp-cow/DB_ROW_COW.md @@ -0,0 +1,85 @@ +# Row-Level Database COW Design + +## Goal + +The remote WordPress clone must avoid dumping whole database tables before the +first ordinary edit. For simple primary-key operations, wp-cow treats the remote +database as the lower layer and the local database as the upper layer: + +- remote rows are read-only lower data, +- copied or inserted local rows shadow remote rows with the same primary key, +- local tombstones hide remote rows without deleting them remotely, +- ambiguous SQL is not treated as row-level safe. + +This is the hard database COW path that lets a large site stay lazy at row +granularity. + +## Model Implemented In This Iteration + +The Rust row-COW engine has a conservative SQL planner and a fakeable backend +trait. It supports single-table WordPress primary-key operations for these +columns: + +- `ID` +- `option_id` +- `umeta_id` +- `meta_id` +- `term_id` +- `term_taxonomy_id` +- `object_id` +- `comment_ID` +- `link_id` + +Supported row-level statements: + +- `SELECT ... FROM table WHERE pk = value` +- `SELECT ... FROM table WHERE pk IN (...)` +- `UPDATE table SET ... WHERE pk = value` or `pk IN (...)` +- `DELETE FROM table WHERE pk = value` or `pk IN (...)` +- `INSERT INTO table ...` as local-only + +`UPDATE` first copies up exactly the requested remote primary keys, excluding +locally tombstoned keys, then the caller runs the update locally. `DELETE` +records local tombstones and deletes any matching local upper rows; it never +sends a write to remote. Row-level `SELECT` merges remote rows, local rows, and +tombstones so deleted remote rows do not reappear and local rows shadow remote +rows. + +The production control server exposes `/row-cow`. The generated `wp-content/db.php` +drop-in calls it before the older full-table materialization path. If row-COW +handles a statement, WordPress continues against the local database or receives +the merged result. If a write is not row-level safe, the existing table +promotion/materialization fallback remains the conservative path. + +Promotion is overlay-preserving. Before importing a full remote table, wp-cow +dumps the current local upper rows for that table, imports the remote lower +table, restores the local upper rows, then reapplies tombstones. This keeps +later complex SQL correct after earlier row-level edits: local updates and +inserts survive promotion, and deleted remote rows do not reappear. + +## Conservative Fallbacks + +The planner returns `PromoteTable` or `Unsupported`, never row-level safe, for: + +- joins and multi-table statements, +- non-primary-key writes, +- aggregate reads, +- `DISTINCT`, grouping, ordering, or limiting that cannot be merged safely, +- malformed or ambiguous SQL. + +The strict unit harness uses an in-memory fake remote/local backend. It fails if +write-class SQL reaches the fake remote, if update/delete preparation fetches +more than the requested primary keys, if tombstoned remote rows reappear, if +local inserts are sent to remote, or if ambiguous SQL is planned as row-level +safe. + +## Out Of Scope + +This iteration intentionally does not solve every MySQL/WordPress query shape: + +- joins, aggregates, range predicates, secondary-index predicates, subqueries, + and complex expressions remain full-table-promotion or unsupported cases; +- safe merge support for `ORDER BY` and `LIMIT` is not implemented; +- auto-increment ID allocation for local inserts is still delegated to the + local database and is not reconciled with the remote lower layer; +- no real remote SiteGround instance is required or touched by the test harness. diff --git a/experiments/remote-wp-cow/src/control.rs b/experiments/remote-wp-cow/src/control.rs index 356af064..8a47b111 100644 --- a/experiments/remote-wp-cow/src/control.rs +++ b/experiments/remote-wp-cow/src/control.rs @@ -127,6 +127,28 @@ fn control_response( json!({ "ok": true, "backend": decision.backend, "materialized": decision.materialized }), ) } + "/row-cow" => { + if config::is_offline(paths) { + return Ok(json!({ + "ok": true, + "handled": false, + "backend": "local", + "materialized": [], + "offline": true + })); + } + let sql = input.sql.ok_or_else(|| anyhow!("missing sql"))?; + let tables = input.tables.unwrap_or_default(); + let response = db::row_cow_query(remote, manifest, paths, &sql, &tables)?; + Ok(json!({ + "ok": true, + "handled": response.handled, + "backend": response.backend, + "materialized": response.materialized, + "fallback": response.fallback, + "result": response.result + })) + } "/query" => { if config::is_offline(paths) { return Ok(json!({ diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index 9e1a64f1..0234b36c 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -8,6 +8,9 @@ use std::process::{Command, Stdio}; use crate::config::{parse_host_port, ClonePaths, Manifest}; use crate::remote::{shell_quote, RemoteClient, RemoteQueryResult}; +use crate::row_cow::{ + self, CowQueryResult, PkValue, Row, RowCowBackend, RowCowExecution, RowCowPlan, +}; use crate::sql; #[derive(Debug, Default, Serialize, Deserialize)] @@ -149,7 +152,7 @@ pub fn materialize_tables( if state.materialized_tables.contains(&table) { continue; } - materialize_one_table(remote, manifest, &table) + materialize_one_table(remote, manifest, paths, &table) .with_context(|| format!("materialize table {}", table))?; state.materialized_tables.insert(table.clone()); changed.push(table); @@ -358,7 +361,353 @@ pub struct RouteDecision { pub materialized: Vec, } -fn materialize_one_table(remote: &RemoteClient, manifest: &Manifest, table: &str) -> Result<()> { +#[derive(Debug, Serialize)] +pub struct RowCowResponse { + pub handled: bool, + pub backend: String, + pub materialized: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub fallback: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub result: Option, +} + +pub fn row_cow_query( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + sql_text: &str, + tables: &[String], +) -> Result { + let mut backend = MysqlRowCowBackend { remote, manifest }; + match row_cow::execute_row_cow(&mut backend, sql_text)? { + RowCowExecution::Select(result) => Ok(RowCowResponse { + handled: true, + backend: "cow".to_string(), + materialized: Vec::new(), + fallback: None, + result: Some(result), + }), + RowCowExecution::PreparedLocalWrite { .. } | RowCowExecution::LocalOnlyInsert { .. } => { + Ok(RowCowResponse { + handled: true, + backend: "local".to_string(), + materialized: Vec::new(), + fallback: None, + result: None, + }) + } + RowCowExecution::Fallback(plan) => { + let (fallback, plan_tables) = fallback_name_and_tables(plan); + if fallback == "PromoteTable" + && !plan_tables.is_empty() + && (sql::is_write_sql(sql_text) || sql::is_safe_read_sql(sql_text)) + { + let materialized = materialize_tables(remote, manifest, paths, &plan_tables)?; + return Ok(RowCowResponse { + handled: false, + backend: "local".to_string(), + materialized, + fallback: Some(fallback), + result: None, + }); + } + + if !tables.is_empty() && sql::is_write_sql(sql_text) { + let materialized = materialize_tables(remote, manifest, paths, tables)?; + return Ok(RowCowResponse { + handled: false, + backend: "local".to_string(), + materialized, + fallback: Some(fallback), + result: None, + }); + } + + Ok(RowCowResponse { + handled: false, + backend: "fallback".to_string(), + materialized: Vec::new(), + fallback: Some(fallback), + result: None, + }) + } + } +} + +fn fallback_name_and_tables(plan: RowCowPlan) -> (String, Vec) { + match plan { + RowCowPlan::PromoteTable { tables, .. } => ("PromoteTable".to_string(), tables), + RowCowPlan::Unsupported { .. } => ("Unsupported".to_string(), Vec::new()), + RowCowPlan::RowLevel(_) => ("RowLevel".to_string(), Vec::new()), + } +} + +struct MysqlRowCowBackend<'a> { + remote: &'a RemoteClient, + manifest: &'a Manifest, +} + +impl RowCowBackend for MysqlRowCowBackend<'_> { + fn remote_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + let sql_text = row_cow::select_all_by_pk_sql(table, pk_column, pk_values)?; + let result = remote_readonly_query(self.remote, &sql_text)?; + if !result.ok { + return Err(anyhow!("remote row-COW select failed: {}", result.error)); + } + Ok(remote_query_to_cow_result(result)) + } + + fn local_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + let sql_text = format!( + "SELECT * FROM {} WHERE {};", + qualified_table(self.manifest, table), + row_cow::pk_values_where_sql(pk_column, pk_values)? + ); + local_query_result(self.manifest, &sql_text) + } + + fn local_upsert_rows(&mut self, table: &str, rows: &[Row]) -> Result { + validate_table_name(table)?; + if rows.is_empty() { + return Ok(0); + } + + let mut columns = Vec::new(); + for row in rows { + for column in row.keys() { + if !columns.iter().any(|existing| existing == column) { + columns.push(column.clone()); + } + } + } + + let column_sql = columns + .iter() + .map(|column| row_cow::quote_identifier(column)) + .collect::>>()? + .join(", "); + let values_sql = rows + .iter() + .map(|row| { + let values = columns + .iter() + .map(|column| mysql_json_value(row.get(column))) + .collect::>() + .join(", "); + format!("({values})") + }) + .collect::>() + .join(", "); + let sql_text = format!( + "REPLACE INTO {} ({column_sql}) VALUES {values_sql};", + qualified_table(self.manifest, table), + ); + run_mysql_exec(self.manifest, &sql_text)?; + Ok(rows.len()) + } + + fn local_delete_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + let sql_text = format!( + "DELETE FROM {} WHERE {};", + qualified_table(self.manifest, table), + row_cow::pk_values_where_sql(pk_column, pk_values)? + ); + run_mysql_exec(self.manifest, &sql_text)?; + Ok(pk_values.len()) + } + + fn local_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + ensure_row_cow_meta_table(self.manifest)?; + if pk_values.is_empty() { + return Ok(0); + } + + let values_sql = pk_values + .iter() + .map(|value| { + format!( + "('{}', '{}', '{}')", + mysql_string_literal(table), + mysql_string_literal(pk_column), + mysql_string_literal(&value.0) + ) + }) + .collect::>() + .join(", "); + let sql_text = format!( + "REPLACE INTO {} (table_name, pk_column, pk_value) VALUES {values_sql};", + qualified_table(self.manifest, ROW_COW_TOMBSTONE_TABLE) + ); + run_mysql_exec(self.manifest, &sql_text)?; + Ok(pk_values.len()) + } + + fn local_clear_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + validate_table_name(table)?; + ensure_row_cow_meta_table(self.manifest)?; + if pk_values.is_empty() { + return Ok(0); + } + + let sql_text = format!( + "DELETE FROM {} WHERE table_name='{}' AND pk_column='{}' AND {};", + qualified_table(self.manifest, ROW_COW_TOMBSTONE_TABLE), + mysql_string_literal(table), + mysql_string_literal(pk_column), + row_cow::pk_values_where_sql("pk_value", pk_values)? + ); + run_mysql_exec(self.manifest, &sql_text)?; + Ok(pk_values.len()) + } + + fn local_tombstones_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result> { + validate_table_name(table)?; + ensure_row_cow_meta_table(self.manifest)?; + if pk_values.is_empty() { + return Ok(BTreeSet::new()); + } + + let sql_text = format!( + "SELECT pk_value FROM {} WHERE table_name='{}' AND pk_column='{}' AND {};", + qualified_table(self.manifest, ROW_COW_TOMBSTONE_TABLE), + mysql_string_literal(table), + mysql_string_literal(pk_column), + row_cow::pk_values_where_sql("pk_value", pk_values)? + ); + let result = local_query_result(self.manifest, &sql_text)?; + Ok(result + .rows + .into_iter() + .filter_map(|row| { + row.get("pk_value") + .and_then(|value| value.as_str()) + .map(str::to_string) + }) + .map(PkValue) + .collect()) + } +} + +const ROW_COW_TOMBSTONE_TABLE: &str = "_wp_cow_row_tombstones"; + +#[derive(Debug, Clone, PartialEq, Eq)] +struct TombstoneGroup { + pk_column: String, + pk_values: Vec, +} + +fn ensure_row_cow_meta_table(manifest: &Manifest) -> Result<()> { + let sql_text = format!( + "CREATE TABLE IF NOT EXISTS {} (\ + table_name varchar(191) NOT NULL,\ + pk_column varchar(64) NOT NULL,\ + pk_value varchar(191) NOT NULL,\ + deleted_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,\ + PRIMARY KEY (table_name, pk_column, pk_value)\ + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;", + qualified_table(manifest, ROW_COW_TOMBSTONE_TABLE) + ); + run_mysql_exec(manifest, &sql_text) +} + +fn remote_query_to_cow_result(result: RemoteQueryResult) -> CowQueryResult { + CowQueryResult { + ok: result.ok, + error: result.error, + rows: result.rows, + fields: result.fields, + affected: result.affected, + } +} + +fn mysql_json_value(value: Option<&serde_json::Value>) -> String { + match value { + None | Some(serde_json::Value::Null) => "NULL".to_string(), + Some(serde_json::Value::Bool(value)) => { + if *value { + "1".to_string() + } else { + "0".to_string() + } + } + Some(serde_json::Value::Number(value)) => value.to_string(), + Some(serde_json::Value::String(value)) => { + format!("'{}'", mysql_string_literal(value)) + } + Some(value) => format!("'{}'", mysql_string_literal(&value.to_string())), + } +} + +fn materialize_one_table( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + table: &str, +) -> Result<()> { + validate_table_name(table)?; + fs::create_dir_all(&paths.db)?; + let overlay_dump = paths.db.join(format!( + ".wp-cow-local-overlay-{}-{}.sql", + std::process::id(), + table + )); + dump_local_table_overlay(manifest, table, &overlay_dump) + .with_context(|| format!("dump local overlay rows for {}", table))?; + let tombstones = local_row_cow_tombstones_for_table(manifest, table) + .with_context(|| format!("load local row tombstones for {}", table))?; + + let materialized = materialize_remote_table(remote, manifest, table) + .with_context(|| format!("import remote lower table {}", table)); + if let Err(err) = materialized { + let _ = import_sql_file(manifest, &overlay_dump); + let _ = fs::remove_file(&overlay_dump); + return Err(err); + } + + import_sql_file(manifest, &overlay_dump) + .with_context(|| format!("restore local overlay rows for {}", table))?; + apply_row_cow_tombstones(manifest, table, &tombstones) + .with_context(|| format!("apply local row tombstones for {}", table))?; + fs::remove_file(&overlay_dump).with_context(|| format!("remove {}", overlay_dump.display()))?; + Ok(()) +} + +fn materialize_remote_table(remote: &RemoteClient, manifest: &Manifest, table: &str) -> Result<()> { let probe = &manifest.probe; ensure_probe_has_db(probe)?; let delete_sql = format!("DELETE FROM {};", qualified_table(manifest, table)); @@ -408,6 +757,119 @@ fn materialize_one_table(remote: &RemoteClient, manifest: &Manifest, table: &str Ok(()) } +fn dump_local_table_overlay(manifest: &Manifest, table: &str, path: &PathBuf) -> Result<()> { + let dump_file = File::create(path).with_context(|| format!("create {}", path.display()))?; + let mut dump = local_mysqldump_command(manifest); + dump.arg("--single-transaction") + .arg("--quick") + .arg("--skip-lock-tables") + .arg("--no-create-info") + .arg("--replace") + .arg(&manifest.local_db.name) + .arg(table) + .stdout(Stdio::from(dump_file)) + .stderr(Stdio::piped()); + + let output = dump + .spawn() + .context("start local mysqldump overlay export")? + .wait_with_output() + .context("wait for local mysqldump overlay export")?; + if !output.status.success() { + return Err(anyhow!( + "local mysqldump overlay export failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + Ok(()) +} + +fn import_sql_file(manifest: &Manifest, path: &PathBuf) -> Result<()> { + let input = File::open(path).with_context(|| format!("open {}", path.display()))?; + let mut mysql = local_mysql_command(manifest); + mysql + .arg(&manifest.local_db.name) + .stdin(Stdio::from(input)) + .stderr(Stdio::piped()); + let output = mysql + .spawn() + .context("start local mysql import")? + .wait_with_output() + .context("wait for local mysql import")?; + if !output.status.success() { + return Err(anyhow!( + "local mysql import failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + Ok(()) +} + +fn local_row_cow_tombstones_for_table( + manifest: &Manifest, + table: &str, +) -> Result> { + validate_table_name(table)?; + ensure_row_cow_meta_table(manifest)?; + let sql_text = format!( + "SELECT pk_column, pk_value FROM {} WHERE table_name='{}' ORDER BY pk_column, pk_value;", + qualified_table(manifest, ROW_COW_TOMBSTONE_TABLE), + mysql_string_literal(table) + ); + let result = local_query_result(manifest, &sql_text)?; + let mut grouped = std::collections::BTreeMap::>::new(); + for row in result.rows { + let Some(pk_column) = row.get("pk_column").and_then(|value| value.as_str()) else { + continue; + }; + let Some(pk_value) = row.get("pk_value").and_then(|value| value.as_str()) else { + continue; + }; + grouped + .entry(pk_column.to_string()) + .or_default() + .push(PkValue(pk_value.to_string())); + } + + Ok(grouped + .into_iter() + .map(|(pk_column, pk_values)| TombstoneGroup { + pk_column, + pk_values, + }) + .collect()) +} + +fn apply_row_cow_tombstones( + manifest: &Manifest, + table: &str, + tombstones: &[TombstoneGroup], +) -> Result<()> { + for sql_text in row_cow_tombstone_delete_sqls(manifest, table, tombstones)? { + run_mysql_exec(manifest, &sql_text)?; + } + Ok(()) +} + +fn row_cow_tombstone_delete_sqls( + manifest: &Manifest, + table: &str, + tombstones: &[TombstoneGroup], +) -> Result> { + validate_table_name(table)?; + tombstones + .iter() + .filter(|group| !group.pk_values.is_empty()) + .map(|group| { + Ok(format!( + "DELETE FROM {} WHERE {};", + qualified_table(manifest, table), + row_cow::pk_values_where_sql(&group.pk_column, &group.pk_values)? + )) + }) + .collect() +} + fn local_first_admin_user( manifest: &Manifest, users_table: &str, @@ -734,6 +1196,19 @@ fn local_mysql_command(manifest: &Manifest) -> Command { command } +fn local_mysqldump_command(manifest: &Manifest) -> Command { + let mut command = Command::new("mysqldump"); + command.arg("--host").arg(&manifest.local_db.host); + command + .arg("--port") + .arg(manifest.local_db.port.to_string()); + command.arg("--user").arg(&manifest.local_db.user); + if !manifest.local_db.password.is_empty() { + command.env("MYSQL_PWD", &manifest.local_db.password); + } + command +} + fn qualified_table(manifest: &Manifest, table: &str) -> String { format!( "`{}`.`{}`", @@ -752,6 +1227,48 @@ fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> { Ok(()) } +fn local_query_result(manifest: &Manifest, sql_text: &str) -> Result { + let output = local_mysql_command(manifest) + .arg("--batch") + .arg("--raw") + .arg("--execute") + .arg(sql_text) + .output() + .context("run local mysql query")?; + if !output.status.success() { + return Err(anyhow!( + "local mysql query failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut lines = stdout.lines(); + let Some(header) = lines.next() else { + return Ok(CowQueryResult::ok(Vec::new(), Vec::new())); + }; + let fields = header + .split('\t') + .map(|field| field.to_string()) + .collect::>(); + let mut rows = Vec::new(); + for line in lines { + let values = line.split('\t').collect::>(); + let mut row = Row::new(); + for (idx, field) in fields.iter().enumerate() { + let value = values.get(idx).copied().unwrap_or_default(); + if value == "NULL" { + row.insert(field.clone(), serde_json::Value::Null); + } else { + row.insert(field.clone(), serde_json::Value::String(value.to_string())); + } + } + rows.push(row); + } + + Ok(CowQueryResult::ok(rows, fields)) +} + fn mysql_string_literal(value: &str) -> String { value.replace('\\', "\\\\").replace('\'', "\\'") } @@ -847,6 +1364,33 @@ pub fn local_db_host_port(manifest: &Manifest) -> (String, u16) { mod tests { use super::*; + fn test_manifest() -> Manifest { + Manifest { + version: crate::config::MANIFEST_VERSION, + name: "calm".to_string(), + ssh: "example".to_string(), + remote_path: "/srv/www".to_string(), + remote_url: "https://example.com".to_string(), + local_url: "http://localhost:9481".to_string(), + created_at_unix: 1, + probe: crate::config::Probe::default(), + local_db: crate::config::LocalDb { + name: "cow_calm".to_string(), + user: "cow_calm".to_string(), + password: String::new(), + host: "127.0.0.1".to_string(), + port: 33071, + }, + remote_db_tunnel: crate::config::RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: 33072, + }, + control_url: "http://127.0.0.1:39070".to_string(), + cache_max_file_bytes: 1024, + remote_metadata_cache_ttl_secs: 30, + } + } + #[test] fn rejects_unsafe_table_names() { assert!(validate_table_name("wp_posts").is_ok()); @@ -919,33 +1463,29 @@ mod tests { #[test] fn qualifies_local_tables_for_exec_without_selected_database() { - let manifest = Manifest { - version: crate::config::MANIFEST_VERSION, - name: "calm".to_string(), - ssh: "example".to_string(), - remote_path: "/srv/www".to_string(), - remote_url: "https://example.com".to_string(), - local_url: "http://localhost:9481".to_string(), - created_at_unix: 1, - probe: crate::config::Probe::default(), - local_db: crate::config::LocalDb { - name: "cow_calm".to_string(), - user: "cow_calm".to_string(), - password: String::new(), - host: "127.0.0.1".to_string(), - port: 33071, - }, - remote_db_tunnel: crate::config::RemoteDbTunnel { - host: "127.0.0.1".to_string(), - port: 33072, - }, - control_url: "http://127.0.0.1:39070".to_string(), - cache_max_file_bytes: 1024, - remote_metadata_cache_ttl_secs: 30, - }; + let manifest = test_manifest(); assert_eq!( qualified_table(&manifest, "ady_options"), "`cow_calm`.`ady_options`" ); } + + #[test] + fn tombstone_delete_sql_preserves_overlay_on_table_promotion() { + let manifest = test_manifest(); + let sql = row_cow_tombstone_delete_sqls( + &manifest, + "wp_posts", + &[TombstoneGroup { + pk_column: "ID".to_string(), + pk_values: vec![PkValue("7".to_string()), PkValue("9".to_string())], + }], + ) + .unwrap(); + + assert_eq!( + sql, + vec!["DELETE FROM `cow_calm`.`wp_posts` WHERE `ID` IN ('7', '9');"] + ); + } } diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 11379066..f1c97a0f 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -219,6 +219,10 @@ function cow_remote_query_cache_enabled() { return '0' !== getenv( 'WPCOW_REMOTE_QUERY_CACHE' ) && defined( 'WPCOW_QUERY_CACHE_DIR' ) && '' !== WPCOW_QUERY_CACHE_DIR; } +function cow_row_cow_enabled() { + return '0' !== getenv( 'WPCOW_ROW_COW' ); +} + function cow_remote_query_cache_file( $query ) { if ( ! cow_remote_query_cache_enabled() ) { return ''; @@ -283,6 +287,17 @@ class Cow_DB extends wpdb { $tables = cow_tables_from_sql( $query ); if ( cow_is_write_sql( $query ) ) { + if ( cow_row_cow_enabled() ) { + $row_cow = cow_control_request( '/row-cow', array( 'tables' => $tables, 'sql' => $query ) ); + if ( empty( $row_cow['ok'] ) ) { + $this->last_error = isset( $row_cow['error'] ) ? $row_cow['error'] : 'wp-cow row COW failed'; + cow_db_runtime_fail( 'control /row-cow failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); + } + if ( ! empty( $row_cow['handled'] ) || ( isset( $row_cow['backend'] ) && 'local' === $row_cow['backend'] ) ) { + cow_remote_query_cache_clear(); + return parent::query( $query ); + } + } $result = cow_control_request( '/materialize', array( 'tables' => $tables ) ); if ( empty( $result['ok'] ) ) { $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow materialization failed'; @@ -293,6 +308,19 @@ class Cow_DB extends wpdb { } if ( cow_is_safe_read_sql( $query ) ) { + if ( cow_row_cow_enabled() ) { + $row_cow = cow_control_request( '/row-cow', array( 'tables' => $tables, 'sql' => $query ) ); + if ( empty( $row_cow['ok'] ) ) { + $this->last_error = isset( $row_cow['error'] ) ? $row_cow['error'] : 'wp-cow row COW failed'; + cow_db_runtime_fail( 'control /row-cow failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); + } + if ( ! empty( $row_cow['handled'] ) && isset( $row_cow['result'] ) && is_array( $row_cow['result'] ) ) { + return $this->cow_apply_remote_result( $row_cow['result'] ); + } + if ( isset( $row_cow['backend'] ) && 'local' === $row_cow['backend'] ) { + return parent::query( $query ); + } + } $route = cow_control_request( '/route', array( 'tables' => $tables, 'sql' => $query ) ); if ( ! empty( $route['ok'] ) && isset( $route['backend'] ) && 'remote' === $route['backend'] ) { return $this->cow_remote_query( $query ); diff --git a/experiments/remote-wp-cow/src/main.rs b/experiments/remote-wp-cow/src/main.rs index b0c18aea..7ba66499 100644 --- a/experiments/remote-wp-cow/src/main.rs +++ b/experiments/remote-wp-cow/src/main.rs @@ -6,6 +6,7 @@ mod fusefs; mod generate; mod overlay; mod remote; +mod row_cow; mod run; mod sql; diff --git a/experiments/remote-wp-cow/src/row_cow.rs b/experiments/remote-wp-cow/src/row_cow.rs new file mode 100644 index 00000000..5d7952cc --- /dev/null +++ b/experiments/remote-wp-cow/src/row_cow.rs @@ -0,0 +1,1715 @@ +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; +use std::collections::{BTreeMap, BTreeSet}; + +pub type Row = Map; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct PkValue(pub String); + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RowCowPlan { + RowLevel(RowCowOp), + PromoteTable { tables: Vec, reason: String }, + Unsupported { reason: String }, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RowCowOp { + Select(RowSelect), + Update(RowWrite), + Delete(RowWrite), + Insert(RowInsert), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RowSelect { + pub table: String, + pub pk_column: String, + pub pk_values: Vec, + pub projection: Projection, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RowWrite { + pub table: String, + pub pk_column: String, + pub pk_values: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RowInsert { + pub table: String, + pub pk_column: Option, + pub pk_values: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Projection { + All, + Columns(Vec), +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CowQueryResult { + pub ok: bool, + pub error: String, + pub rows: Vec, + pub fields: Vec, + pub affected: i64, +} + +impl CowQueryResult { + pub fn ok(rows: Vec, fields: Vec) -> Self { + Self { + affected: rows.len() as i64, + ok: true, + error: String::new(), + rows, + fields, + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum RowCowExecution { + Select(CowQueryResult), + PreparedLocalWrite { + table: String, + pk_column: Option, + pk_values: Vec, + copied_rows: usize, + }, + LocalOnlyInsert { + table: String, + }, + Fallback(RowCowPlan), +} + +pub trait RowCowBackend { + fn remote_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_upsert_rows(&mut self, table: &str, rows: &[Row]) -> Result; + + fn local_delete_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_clear_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result; + + fn local_tombstones_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result>; +} + +pub fn execute_row_cow( + backend: &mut B, + sql_text: &str, +) -> Result { + match plan_sql(sql_text) { + RowCowPlan::RowLevel(RowCowOp::Select(select)) => { + Ok(RowCowExecution::Select(execute_select(backend, &select)?)) + } + RowCowPlan::RowLevel(RowCowOp::Update(write)) => { + let tombstones = + backend.local_tombstones_by_pk(&write.table, &write.pk_column, &write.pk_values)?; + let local = + backend.local_select_by_pk(&write.table, &write.pk_column, &write.pk_values)?; + let local_pks = local + .rows + .iter() + .filter_map(|row| row_pk_value(row, &write.pk_column)) + .collect::>(); + let copy_values = write + .pk_values + .iter() + .filter(|value| !tombstones.contains(*value) && !local_pks.contains(*value)) + .cloned() + .collect::>(); + let remote_rows = + backend.remote_select_by_pk(&write.table, &write.pk_column, ©_values)?; + let rows = rows_matching_pks(remote_rows.rows, &write.pk_column, ©_values); + let copied_rows = backend.local_upsert_rows(&write.table, &rows)?; + Ok(RowCowExecution::PreparedLocalWrite { + table: write.table, + pk_column: Some(write.pk_column), + pk_values: write.pk_values, + copied_rows, + }) + } + RowCowPlan::RowLevel(RowCowOp::Delete(write)) => { + backend.local_tombstone_by_pk(&write.table, &write.pk_column, &write.pk_values)?; + backend.local_delete_by_pk(&write.table, &write.pk_column, &write.pk_values)?; + Ok(RowCowExecution::PreparedLocalWrite { + table: write.table, + pk_column: Some(write.pk_column), + pk_values: write.pk_values, + copied_rows: 0, + }) + } + RowCowPlan::RowLevel(RowCowOp::Insert(insert)) => { + if let Some(pk_column) = &insert.pk_column { + backend.local_clear_tombstone_by_pk(&insert.table, pk_column, &insert.pk_values)?; + } + Ok(RowCowExecution::LocalOnlyInsert { + table: insert.table, + }) + } + fallback => Ok(RowCowExecution::Fallback(fallback)), + } +} + +fn execute_select(backend: &mut B, select: &RowSelect) -> Result { + let tombstones = + backend.local_tombstones_by_pk(&select.table, &select.pk_column, &select.pk_values)?; + let remote = + backend.remote_select_by_pk(&select.table, &select.pk_column, &select.pk_values)?; + let local = backend.local_select_by_pk(&select.table, &select.pk_column, &select.pk_values)?; + + let mut merged = BTreeMap::::new(); + for row in remote.rows { + if let Some(pk) = row_pk_value(&row, &select.pk_column) { + if !tombstones.contains(&pk) { + merged.insert(pk, row); + } + } + } + for row in local.rows { + if let Some(pk) = row_pk_value(&row, &select.pk_column) { + merged.insert(pk, row); + } + } + + let mut rows = Vec::new(); + for value in &select.pk_values { + if let Some(row) = merged.remove(value) { + rows.push(row); + } + } + rows.extend(merged.into_values()); + + Ok(project_rows(rows, &select.projection)) +} + +fn rows_matching_pks(rows: Vec, pk_column: &str, pk_values: &[PkValue]) -> Vec { + let allowed = pk_values.iter().collect::>(); + rows.into_iter() + .filter(|row| { + row_pk_value(row, pk_column) + .as_ref() + .map(|value| allowed.contains(value)) + .unwrap_or(false) + }) + .collect() +} + +fn project_rows(rows: Vec, projection: &Projection) -> CowQueryResult { + match projection { + Projection::All => { + let mut fields = Vec::new(); + for row in &rows { + for key in row.keys() { + if !fields.iter().any(|field| field == key) { + fields.push(key.clone()); + } + } + } + CowQueryResult::ok(rows, fields) + } + Projection::Columns(columns) => { + let rows = rows + .into_iter() + .map(|row| { + let mut projected = Row::new(); + for column in columns { + if let Some(value) = row_value_ci(&row, column) { + projected.insert(column.clone(), value.clone()); + } + } + projected + }) + .collect::>(); + CowQueryResult::ok(rows, columns.clone()) + } + } +} + +pub fn plan_sql(sql_text: &str) -> RowCowPlan { + let Some(tokens) = lex(sql_text) else { + return RowCowPlan::Unsupported { + reason: "malformed SQL".to_string(), + }; + }; + if tokens.is_empty() { + return RowCowPlan::Unsupported { + reason: "empty SQL".to_string(), + }; + } + + if token_is(&tokens[0], "SELECT") { + return plan_select(&tokens); + } + if token_is(&tokens[0], "UPDATE") { + return plan_update(&tokens); + } + if token_is(&tokens[0], "DELETE") { + return plan_delete(&tokens); + } + if token_is(&tokens[0], "INSERT") { + return plan_insert(&tokens); + } + + RowCowPlan::Unsupported { + reason: format!("{} is not a row-level COW statement", tokens[0].text), + } +} + +fn plan_select(tokens: &[Token]) -> RowCowPlan { + let tables = extract_table_refs(tokens); + if contains_keyword(tokens, "UNION") { + return promote(tables, "UNION reads need table promotion"); + } + if contains_keyword(tokens, "JOIN") { + return promote(tables, "join reads need table promotion"); + } + if contains_keyword(tokens, "GROUP") || contains_keyword(tokens, "HAVING") { + return promote(tables, "grouped reads need table promotion"); + } + if contains_keyword(tokens, "ORDER") || contains_keyword(tokens, "LIMIT") { + return promote(tables, "ordered or limited reads need table promotion"); + } + if contains_keyword(tokens, "DISTINCT") { + return promote(tables, "distinct reads need table promotion"); + } + + let Some(from_idx) = find_keyword(tokens, "FROM") else { + return unsupported("SELECT without FROM"); + }; + if select_has_aggregate(&tokens[1..from_idx]) { + return promote(tables, "aggregate reads need table promotion"); + } + let Some((table, alias, next_idx)) = parse_table_ref(tokens, from_idx + 1) else { + return unsupported("could not parse SELECT table"); + }; + if next_idx < tokens.len() + && !token_is(&tokens[next_idx], "WHERE") + && !is_statement_end(&tokens[next_idx]) + { + return promote(tables, "multi-table SELECT needs table promotion"); + } + let Some(where_idx) = find_keyword(tokens, "WHERE") else { + return promote(vec![table], "SELECT without primary-key predicate"); + }; + let predicate_tokens = &tokens[where_idx + 1..]; + let Some(predicate) = parse_pk_predicate(predicate_tokens) else { + return promote( + vec![table], + "SELECT predicate is not primary-key equality or IN", + ); + }; + if !qualifier_matches_table(predicate.qualifier.as_deref(), &table, alias.as_deref()) { + return promote(vec![table], "SELECT predicate qualifier is ambiguous"); + } + let Some(pk_column) = canonical_pk_column(&table, &predicate.column) else { + return promote( + vec![table], + "SELECT predicate does not use a supported primary key", + ); + }; + let Some(projection) = parse_projection(&tokens[1..from_idx], &table, alias.as_deref()) else { + return promote(vec![table], "SELECT projection cannot be row-merged safely"); + }; + + RowCowPlan::RowLevel(RowCowOp::Select(RowSelect { + table, + pk_column, + pk_values: predicate.values, + projection, + })) +} + +fn plan_update(tokens: &[Token]) -> RowCowPlan { + let tables = extract_table_refs(tokens); + if contains_keyword(tokens, "JOIN") { + return promote(tables, "join updates need table promotion"); + } + if contains_keyword(tokens, "SELECT") { + return promote(tables, "subquery updates need table promotion"); + } + + let mut table_idx = 1; + while table_idx < tokens.len() + && (token_is(&tokens[table_idx], "LOW_PRIORITY") || token_is(&tokens[table_idx], "IGNORE")) + { + table_idx += 1; + } + let Some((table, alias, next_idx)) = parse_table_ref(tokens, table_idx) else { + return unsupported("could not parse UPDATE table"); + }; + let Some(set_idx) = find_keyword(tokens, "SET") else { + return unsupported("UPDATE without SET"); + }; + if next_idx < set_idx { + return promote(vec![table], "multi-table UPDATE needs table promotion"); + } + let Some(where_idx) = find_keyword(tokens, "WHERE") else { + return promote(vec![table], "UPDATE without primary-key predicate"); + }; + let predicate_tokens = &tokens[where_idx + 1..]; + let Some(predicate) = parse_pk_predicate(predicate_tokens) else { + return promote( + vec![table], + "UPDATE predicate is not primary-key equality or IN", + ); + }; + if !qualifier_matches_table(predicate.qualifier.as_deref(), &table, alias.as_deref()) { + return promote(vec![table], "UPDATE predicate qualifier is ambiguous"); + } + let Some(pk_column) = canonical_pk_column(&table, &predicate.column) else { + return promote( + vec![table], + "UPDATE predicate does not use a supported primary key", + ); + }; + + RowCowPlan::RowLevel(RowCowOp::Update(RowWrite { + table, + pk_column, + pk_values: predicate.values, + })) +} + +fn plan_delete(tokens: &[Token]) -> RowCowPlan { + let tables = extract_table_refs(tokens); + if contains_keyword(tokens, "JOIN") || contains_keyword(tokens, "USING") { + return promote(tables, "multi-table DELETE needs table promotion"); + } + if tokens.get(1).is_none_or(|token| !token_is(token, "FROM")) { + return promote(tables, "multi-table DELETE needs table promotion"); + } + let Some((table, alias, next_idx)) = parse_table_ref(tokens, 2) else { + return unsupported("could not parse DELETE table"); + }; + let Some(where_idx) = find_keyword(tokens, "WHERE") else { + return promote(vec![table], "DELETE without primary-key predicate"); + }; + if next_idx < where_idx { + return promote(vec![table], "multi-table DELETE needs table promotion"); + } + let predicate_tokens = &tokens[where_idx + 1..]; + let Some(predicate) = parse_pk_predicate(predicate_tokens) else { + return promote( + vec![table], + "DELETE predicate is not primary-key equality or IN", + ); + }; + if !qualifier_matches_table(predicate.qualifier.as_deref(), &table, alias.as_deref()) { + return promote(vec![table], "DELETE predicate qualifier is ambiguous"); + } + let Some(pk_column) = canonical_pk_column(&table, &predicate.column) else { + return promote( + vec![table], + "DELETE predicate does not use a supported primary key", + ); + }; + + RowCowPlan::RowLevel(RowCowOp::Delete(RowWrite { + table, + pk_column, + pk_values: predicate.values, + })) +} + +fn plan_insert(tokens: &[Token]) -> RowCowPlan { + let tables = extract_table_refs(tokens); + if contains_keyword(tokens, "SELECT") { + return promote(tables, "INSERT ... SELECT needs table promotion"); + } + + let mut idx = 1; + while idx < tokens.len() && token_is(&tokens[idx], "IGNORE") { + idx += 1; + } + if idx < tokens.len() && token_is(&tokens[idx], "INTO") { + idx += 1; + } + let Some((table, alias, next_idx)) = parse_table_ref(tokens, idx) else { + return unsupported("could not parse INSERT table"); + }; + if alias.is_some() { + return unsupported("INSERT aliases are not row-level safe"); + } + if !insert_has_values_clause(tokens, next_idx) { + return unsupported("INSERT without VALUES is not row-level safe"); + } + let (pk_column, pk_values) = expected_pk_for_table(&table) + .map(|pk_column| { + ( + Some(pk_column.to_string()), + parse_insert_pk_values(tokens, next_idx, pk_column), + ) + }) + .unwrap_or((None, Vec::new())); + RowCowPlan::RowLevel(RowCowOp::Insert(RowInsert { + table, + pk_column, + pk_values, + })) +} + +fn insert_has_values_clause(tokens: &[Token], mut idx: usize) -> bool { + if idx >= tokens.len() { + return false; + } + if tokens.get(idx).and_then(token_symbol) == Some('(') { + let mut depth = 0_i32; + while idx < tokens.len() { + match token_symbol(&tokens[idx]) { + Some('(') => { + depth += 1; + idx += 1; + } + Some(')') => { + depth -= 1; + idx += 1; + if depth == 0 { + break; + } + } + _ => idx += 1, + } + } + } + while idx < tokens.len() && token_symbol(&tokens[idx]) == Some(';') { + idx += 1; + } + idx < tokens.len() && (token_is(&tokens[idx], "VALUES") || token_is(&tokens[idx], "VALUE")) +} + +fn parse_insert_pk_values(tokens: &[Token], mut idx: usize, pk_column: &str) -> Vec { + if tokens.get(idx).and_then(token_symbol) != Some('(') { + return Vec::new(); + } + idx += 1; + + let mut columns = Vec::new(); + loop { + let Some(column) = tokens.get(idx).and_then(token_identifier) else { + return Vec::new(); + }; + columns.push(column.to_string()); + idx += 1; + match tokens.get(idx).and_then(token_symbol) { + Some(',') => idx += 1, + Some(')') => { + idx += 1; + break; + } + _ => return Vec::new(), + } + } + + let Some(pk_idx) = columns + .iter() + .position(|column| column.eq_ignore_ascii_case(pk_column)) + else { + return Vec::new(); + }; + + while idx < tokens.len() + && !token_is(&tokens[idx], "VALUES") + && !token_is(&tokens[idx], "VALUE") + { + idx += 1; + } + if idx >= tokens.len() { + return Vec::new(); + } + idx += 1; + + let mut pk_values = Vec::new(); + while idx < tokens.len() { + if token_symbol(&tokens[idx]) != Some('(') { + break; + } + idx += 1; + let mut value_idx = 0; + loop { + if value_idx == pk_idx { + if let Some((value, _next_idx)) = parse_pk_value(tokens, idx) { + pk_values.push(value); + } + } + + idx = skip_insert_value(tokens, idx); + match tokens.get(idx).and_then(token_symbol) { + Some(',') => { + value_idx += 1; + idx += 1; + } + Some(')') => { + idx += 1; + break; + } + _ => return Vec::new(), + } + } + match tokens.get(idx).and_then(token_symbol) { + Some(',') => idx += 1, + _ => break, + } + } + + dedupe_pk_values(pk_values) +} + +fn skip_insert_value(tokens: &[Token], mut idx: usize) -> usize { + let mut depth = 0_i32; + while idx < tokens.len() { + match token_symbol(&tokens[idx]) { + Some('(') => { + depth += 1; + idx += 1; + } + Some(')') if depth == 0 => break, + Some(')') => { + depth -= 1; + idx += 1; + } + Some(',') if depth == 0 => break, + _ => idx += 1, + } + } + idx +} + +fn parse_projection(tokens: &[Token], table: &str, alias: Option<&str>) -> Option { + if tokens.len() == 1 && token_symbol(&tokens[0]) == Some('*') { + return Some(Projection::All); + } + if tokens + .iter() + .any(|token| token_symbol(token) == Some('(') || token_symbol(token) == Some(')')) + { + return None; + } + + let parts = split_top_level_commas(tokens); + let mut columns = Vec::new(); + for part in &parts { + if part.len() == 3 + && token_symbol(&part[1]) == Some('.') + && token_symbol(&part[2]) == Some('*') + { + let qualifier = token_identifier(&part[0])?; + if parts.len() == 1 && qualifier_matches_table(Some(qualifier), table, alias) { + return Some(Projection::All); + } + return None; + } + let mut idx = 0; + let Some((column, next_idx)) = parse_column_ref(part, idx) else { + return None; + }; + if !qualifier_matches_table(column.qualifier.as_deref(), table, alias) { + return None; + } + idx = next_idx; + if idx < part.len() { + return None; + } + columns.push(column.name); + } + if columns.is_empty() { + None + } else { + Some(Projection::Columns(columns)) + } +} + +fn select_has_aggregate(tokens: &[Token]) -> bool { + const AGGREGATES: &[&str] = &["COUNT", "SUM", "AVG", "MIN", "MAX", "GROUP_CONCAT"]; + tokens.windows(2).any(|window| { + AGGREGATES.iter().any(|kw| token_is(&window[0], kw)) + && token_symbol(&window[1]) == Some('(') + }) +} + +#[derive(Debug)] +struct PkPredicate { + qualifier: Option, + column: String, + values: Vec, +} + +fn parse_pk_predicate(tokens: &[Token]) -> Option { + let tokens = trim_outer_parens(tokens); + let (column, mut idx) = parse_column_ref(tokens, 0)?; + if idx >= tokens.len() { + return None; + } + + let values = if token_symbol(&tokens[idx]) == Some('=') { + idx += 1; + let (value, next_idx) = parse_pk_value(tokens, idx)?; + idx = next_idx; + vec![value] + } else if token_is(&tokens[idx], "IN") { + idx += 1; + if tokens.get(idx).and_then(token_symbol) != Some('(') { + return None; + } + idx += 1; + let mut values = Vec::new(); + loop { + let (value, next_idx) = parse_pk_value(tokens, idx)?; + values.push(value); + idx = next_idx; + match tokens.get(idx).and_then(token_symbol) { + Some(',') => idx += 1, + Some(')') => { + idx += 1; + break; + } + _ => return None, + } + } + values + } else { + return None; + }; + + while idx < tokens.len() && token_symbol(&tokens[idx]) == Some(';') { + idx += 1; + } + if idx != tokens.len() || values.is_empty() { + return None; + } + + Some(PkPredicate { + qualifier: column.qualifier, + column: column.name, + values: dedupe_pk_values(values), + }) +} + +fn dedupe_pk_values(values: Vec) -> Vec { + let mut deduped = Vec::new(); + let mut seen = BTreeSet::new(); + for value in values { + if seen.insert(value.clone()) { + deduped.push(value); + } + } + deduped +} + +#[derive(Debug)] +struct ColumnRef { + qualifier: Option, + name: String, +} + +fn parse_column_ref(tokens: &[Token], idx: usize) -> Option<(ColumnRef, usize)> { + let first = token_identifier(tokens.get(idx)?)?; + let next_idx = idx + 1; + if next_idx + 1 < tokens.len() && token_symbol(&tokens[next_idx]) == Some('.') { + let second = token_identifier(&tokens[next_idx + 1])?; + if next_idx + 2 < tokens.len() && token_symbol(&tokens[next_idx + 2]) == Some('.') { + return None; + } + return Some(( + ColumnRef { + qualifier: Some(first.to_string()), + name: second.to_string(), + }, + next_idx + 2, + )); + } + Some(( + ColumnRef { + qualifier: None, + name: first.to_string(), + }, + next_idx, + )) +} + +fn qualifier_matches_table(qualifier: Option<&str>, table: &str, alias: Option<&str>) -> bool { + let Some(qualifier) = qualifier else { + return true; + }; + qualifier.eq_ignore_ascii_case(table) + || alias + .map(|alias| qualifier.eq_ignore_ascii_case(alias)) + .unwrap_or(false) +} + +fn parse_pk_value(tokens: &[Token], idx: usize) -> Option<(PkValue, usize)> { + let token = tokens.get(idx)?; + match &token.kind { + TokenKind::Number | TokenKind::String => Some((PkValue(token.text.clone()), idx + 1)), + _ => None, + } +} + +fn trim_outer_parens(mut tokens: &[Token]) -> &[Token] { + loop { + if tokens.len() < 2 + || token_symbol(&tokens[0]) != Some('(') + || token_symbol(&tokens[tokens.len() - 1]) != Some(')') + { + return tokens; + } + let mut depth = 0_i32; + let mut wraps = true; + for (idx, token) in tokens.iter().enumerate() { + match token_symbol(token) { + Some('(') => depth += 1, + Some(')') => { + depth -= 1; + if depth == 0 && idx != tokens.len() - 1 { + wraps = false; + break; + } + } + _ => {} + } + if depth < 0 { + wraps = false; + break; + } + } + if !wraps || depth != 0 { + return tokens; + } + tokens = &tokens[1..tokens.len() - 1]; + } +} + +fn parse_table_ref(tokens: &[Token], idx: usize) -> Option<(String, Option, usize)> { + let first = token_identifier(tokens.get(idx)?)?; + let mut table = first.to_string(); + let mut next_idx = idx + 1; + if next_idx + 1 < tokens.len() && token_symbol(&tokens[next_idx]) == Some('.') { + let second = token_identifier(&tokens[next_idx + 1])?; + table = second.to_string(); + next_idx += 2; + } + + let mut alias = None; + if next_idx < tokens.len() && token_is(&tokens[next_idx], "AS") { + next_idx += 1; + if let Some(value) = tokens.get(next_idx).and_then(token_identifier) { + alias = Some(value.to_string()); + next_idx += 1; + } + } else if next_idx < tokens.len() { + let token = &tokens[next_idx]; + if token_identifier(token).is_some() + && !is_table_boundary_keyword(token) + && !token_is(token, "SET") + { + alias = Some(token.text.clone()); + next_idx += 1; + } + } + + Some((table, alias, next_idx)) +} + +fn is_table_boundary_keyword(token: &Token) -> bool { + [ + "WHERE", "JOIN", "INNER", "LEFT", "RIGHT", "FULL", "CROSS", "ON", "USING", "ORDER", + "GROUP", "HAVING", "LIMIT", "SET", "VALUES", "VALUE", + ] + .iter() + .any(|kw| token_is(token, kw)) +} + +fn split_top_level_commas(tokens: &[Token]) -> Vec<&[Token]> { + let mut out = Vec::new(); + let mut start = 0; + let mut depth = 0_i32; + for (idx, token) in tokens.iter().enumerate() { + match token_symbol(token) { + Some('(') => depth += 1, + Some(')') => depth -= 1, + Some(',') if depth == 0 => { + out.push(&tokens[start..idx]); + start = idx + 1; + } + _ => {} + } + } + out.push(&tokens[start..]); + out.into_iter().filter(|part| !part.is_empty()).collect() +} + +fn extract_table_refs(tokens: &[Token]) -> Vec { + let mut tables = Vec::new(); + let mut idx = 0; + while idx < tokens.len() { + if token_is(&tokens[idx], "FROM") { + idx = collect_comma_table_refs(tokens, idx + 1, &mut tables); + continue; + } + + let table_idx = if token_is(&tokens[idx], "JOIN") + || token_is(&tokens[idx], "INTO") + || token_is(&tokens[idx], "TABLE") + { + Some(idx + 1) + } else if token_is(&tokens[idx], "UPDATE") { + let mut next = idx + 1; + while next < tokens.len() + && (token_is(&tokens[next], "LOW_PRIORITY") || token_is(&tokens[next], "IGNORE")) + { + next += 1; + } + Some(next) + } else { + None + }; + + if let Some(table_idx) = table_idx { + if let Some((table, _alias, _next_idx)) = parse_table_ref(tokens, table_idx) { + push_table_ref(&mut tables, table); + } + } + idx += 1; + } + tables +} + +fn collect_comma_table_refs(tokens: &[Token], mut idx: usize, tables: &mut Vec) -> usize { + while let Some((table, _alias, next_idx)) = parse_table_ref(tokens, idx) { + push_table_ref(tables, table); + idx = next_idx; + if tokens.get(idx).and_then(token_symbol) == Some(',') { + idx += 1; + continue; + } + break; + } + idx +} + +fn push_table_ref(tables: &mut Vec, table: String) { + if !tables.iter().any(|existing| existing == &table) { + tables.push(table); + } +} + +pub fn is_supported_pk_column(column: &str) -> bool { + [ + "ID", + "option_id", + "umeta_id", + "meta_id", + "term_id", + "term_taxonomy_id", + "object_id", + "comment_ID", + "link_id", + ] + .iter() + .any(|candidate| candidate.eq_ignore_ascii_case(column)) +} + +pub fn expected_pk_for_table(table: &str) -> Option<&'static str> { + let lower = table.to_ascii_lowercase(); + if lower == "posts" || lower.ends_with("_posts") { + return Some("ID"); + } + if lower == "users" || lower.ends_with("_users") { + return Some("ID"); + } + if lower == "options" || lower.ends_with("_options") { + return Some("option_id"); + } + if lower == "usermeta" || lower.ends_with("_usermeta") { + return Some("umeta_id"); + } + if lower == "postmeta" || lower.ends_with("_postmeta") { + return Some("meta_id"); + } + if lower == "commentmeta" || lower.ends_with("_commentmeta") { + return Some("meta_id"); + } + if lower == "termmeta" || lower.ends_with("_termmeta") { + return Some("meta_id"); + } + if lower == "terms" || lower.ends_with("_terms") { + return Some("term_id"); + } + if lower == "term_taxonomy" || lower.ends_with("_term_taxonomy") { + return Some("term_taxonomy_id"); + } + if lower == "term_relationships" || lower.ends_with("_term_relationships") { + return Some("object_id"); + } + if lower == "comments" || lower.ends_with("_comments") { + return Some("comment_ID"); + } + if lower == "links" || lower.ends_with("_links") { + return Some("link_id"); + } + None +} + +fn canonical_pk_column(table: &str, column: &str) -> Option { + if let Some(expected) = expected_pk_for_table(table) { + if expected.eq_ignore_ascii_case(column) { + return Some(expected.to_string()); + } + return None; + } + + if !is_supported_pk_column(column) { + return None; + } + + [ + "ID", + "option_id", + "umeta_id", + "meta_id", + "term_id", + "term_taxonomy_id", + "object_id", + "comment_ID", + "link_id", + ] + .iter() + .find(|candidate| candidate.eq_ignore_ascii_case(column)) + .map(|candidate| (*candidate).to_string()) +} + +pub fn row_pk_value(row: &Row, pk_column: &str) -> Option { + row_value_ci(row, pk_column).and_then(value_to_pk) +} + +fn row_value_ci<'a>(row: &'a Row, column: &str) -> Option<&'a Value> { + row.get(column).or_else(|| { + row.iter() + .find(|(key, _value)| key.eq_ignore_ascii_case(column)) + .map(|(_key, value)| value) + }) +} + +fn value_to_pk(value: &Value) -> Option { + match value { + Value::String(value) => Some(PkValue(value.clone())), + Value::Number(value) => Some(PkValue(value.to_string())), + _ => None, + } +} + +fn promote(tables: Vec, reason: &str) -> RowCowPlan { + RowCowPlan::PromoteTable { + tables, + reason: reason.to_string(), + } +} + +fn unsupported(reason: &str) -> RowCowPlan { + RowCowPlan::Unsupported { + reason: reason.to_string(), + } +} + +fn find_keyword(tokens: &[Token], keyword: &str) -> Option { + tokens.iter().position(|token| token_is(token, keyword)) +} + +fn contains_keyword(tokens: &[Token], keyword: &str) -> bool { + find_keyword(tokens, keyword).is_some() +} + +fn is_statement_end(token: &Token) -> bool { + token_symbol(token) == Some(';') +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum TokenKind { + Word, + Number, + String, + Symbol(char), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct Token { + text: String, + kind: TokenKind, +} + +fn token_is(token: &Token, keyword: &str) -> bool { + matches!(token.kind, TokenKind::Word) && token.text.eq_ignore_ascii_case(keyword) +} + +fn token_identifier(token: &Token) -> Option<&str> { + match token.kind { + TokenKind::Word => Some(token.text.as_str()), + _ => None, + } +} + +fn token_symbol(token: &Token) -> Option { + match token.kind { + TokenKind::Symbol(ch) => Some(ch), + _ => None, + } +} + +fn lex(sql: &str) -> Option> { + let chars = sql.chars().collect::>(); + let mut tokens = Vec::new(); + let mut idx = 0; + + while idx < chars.len() { + let ch = chars[idx]; + if ch.is_whitespace() { + idx += 1; + continue; + } + if ch == '-' && chars.get(idx + 1) == Some(&'-') { + idx += 2; + while idx < chars.len() && chars[idx] != '\n' { + idx += 1; + } + continue; + } + if ch == '#' { + idx += 1; + while idx < chars.len() && chars[idx] != '\n' { + idx += 1; + } + continue; + } + if ch == '/' && chars.get(idx + 1) == Some(&'*') { + idx += 2; + while idx + 1 < chars.len() && !(chars[idx] == '*' && chars[idx + 1] == '/') { + idx += 1; + } + if idx + 1 >= chars.len() { + return None; + } + idx = (idx + 2).min(chars.len()); + continue; + } + if ch == '`' { + idx += 1; + let mut text = String::new(); + let mut closed = false; + while idx < chars.len() { + if chars[idx] == '`' { + if chars.get(idx + 1) == Some(&'`') { + text.push('`'); + idx += 2; + continue; + } + idx += 1; + closed = true; + break; + } + text.push(chars[idx]); + idx += 1; + } + if !closed { + return None; + } + tokens.push(Token { + text, + kind: TokenKind::Word, + }); + continue; + } + if ch == '\'' || ch == '"' { + let quote = ch; + idx += 1; + let mut text = String::new(); + let mut closed = false; + while idx < chars.len() { + if chars[idx] == '\\' { + if let Some(next) = chars.get(idx + 1) { + text.push(*next); + idx += 2; + continue; + } + return None; + } + if chars[idx] == quote { + if chars.get(idx + 1) == Some("e) { + text.push(quote); + idx += 2; + continue; + } + idx += 1; + closed = true; + break; + } + text.push(chars[idx]); + idx += 1; + } + if !closed { + return None; + } + tokens.push(Token { + text, + kind: TokenKind::String, + }); + continue; + } + if ch.is_ascii_digit() { + let start = idx; + idx += 1; + while idx < chars.len() && chars[idx].is_ascii_digit() { + idx += 1; + } + tokens.push(Token { + text: chars[start..idx].iter().collect(), + kind: TokenKind::Number, + }); + continue; + } + if ch.is_ascii_alphabetic() || ch == '_' || ch == '$' { + let start = idx; + idx += 1; + while idx < chars.len() + && (chars[idx].is_ascii_alphanumeric() || chars[idx] == '_' || chars[idx] == '$') + { + idx += 1; + } + tokens.push(Token { + text: chars[start..idx].iter().collect(), + kind: TokenKind::Word, + }); + continue; + } + + tokens.push(Token { + text: ch.to_string(), + kind: TokenKind::Symbol(ch), + }); + idx += 1; + } + + Some(tokens) +} + +pub fn quote_identifier(identifier: &str) -> Result { + if identifier.is_empty() + || !identifier + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '$') + { + return Err(anyhow!("unsafe SQL identifier {identifier}")); + } + Ok(format!("`{}`", identifier.replace('`', "``"))) +} + +pub fn pk_values_where_sql(pk_column: &str, pk_values: &[PkValue]) -> Result { + let column = quote_identifier(pk_column)?; + if pk_values.is_empty() { + return Ok("1=0".to_string()); + } + let values = pk_values + .iter() + .map(|value| format!("'{}'", mysql_string_literal(&value.0))) + .collect::>() + .join(", "); + Ok(format!("{column} IN ({values})")) +} + +pub fn select_all_by_pk_sql(table: &str, pk_column: &str, pk_values: &[PkValue]) -> Result { + Ok(format!( + "SELECT * FROM {} WHERE {};", + quote_identifier(table)?, + pk_values_where_sql(pk_column, pk_values)? + )) +} + +pub fn mysql_string_literal(value: &str) -> String { + value.replace('\\', "\\\\").replace('\'', "\\'") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, Clone, PartialEq, Eq)] + enum RemoteCall { + Select { + table: String, + pk_column: String, + pk_values: Vec, + }, + } + + #[derive(Debug, Default)] + struct FakeCowBackend { + remote: BTreeMap>, + local: BTreeMap>, + tombstones: BTreeSet<(String, String, PkValue)>, + remote_calls: Vec, + } + + impl FakeCowBackend { + fn insert_remote( + &mut self, + table: &str, + pk_column: &str, + pk: &str, + pairs: &[(&str, &str)], + ) { + let row = row(pk_column, pk, pairs); + self.remote + .entry(table.to_string()) + .or_default() + .insert(PkValue(pk.to_string()), row); + } + + fn insert_local(&mut self, table: &str, pk_column: &str, pk: &str, pairs: &[(&str, &str)]) { + let row = row(pk_column, pk, pairs); + self.local + .entry(table.to_string()) + .or_default() + .insert(PkValue(pk.to_string()), row); + } + + fn assert_no_remote_writes(&self) { + assert!(self + .remote_calls + .iter() + .all(|call| matches!(call, RemoteCall::Select { .. }))); + } + + fn remote_select_values(&self) -> Vec> { + self.remote_calls + .iter() + .map(|call| match call { + RemoteCall::Select { pk_values, .. } => pk_values.clone(), + }) + .collect() + } + } + + impl RowCowBackend for FakeCowBackend { + fn remote_select_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + self.remote_calls.push(RemoteCall::Select { + table: table.to_string(), + pk_column: pk_column.to_string(), + pk_values: pk_values.to_vec(), + }); + let rows = select_from_table(self.remote.get(table), pk_values); + Ok(CowQueryResult::ok(rows, Vec::new())) + } + + fn local_select_by_pk( + &mut self, + table: &str, + _pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + let rows = select_from_table(self.local.get(table), pk_values); + Ok(CowQueryResult::ok(rows, Vec::new())) + } + + fn local_upsert_rows(&mut self, table: &str, rows: &[Row]) -> Result { + let table_rows = self.local.entry(table.to_string()).or_default(); + for row in rows { + let pk = row_pk_value(row, expected_pk_for_table(table).unwrap()).unwrap(); + table_rows.insert(pk, row.clone()); + } + Ok(rows.len()) + } + + fn local_delete_by_pk( + &mut self, + table: &str, + _pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + let Some(rows) = self.local.get_mut(table) else { + return Ok(0); + }; + let mut deleted = 0; + for pk in pk_values { + if rows.remove(pk).is_some() { + deleted += 1; + } + } + Ok(deleted) + } + + fn local_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + let mut added = 0; + for value in pk_values { + if self + .tombstones + .insert((table.to_string(), pk_column.to_string(), value.clone())) + { + added += 1; + } + } + Ok(added) + } + + fn local_clear_tombstone_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result { + let mut removed = 0; + for value in pk_values { + if self.tombstones.remove(&( + table.to_string(), + pk_column.to_string(), + value.clone(), + )) { + removed += 1; + } + } + Ok(removed) + } + + fn local_tombstones_by_pk( + &mut self, + table: &str, + pk_column: &str, + pk_values: &[PkValue], + ) -> Result> { + Ok(pk_values + .iter() + .filter(|value| { + self.tombstones.contains(&( + table.to_string(), + pk_column.to_string(), + (*value).clone(), + )) + }) + .cloned() + .collect()) + } + } + + fn select_from_table( + table: Option<&BTreeMap>, + pk_values: &[PkValue], + ) -> Vec { + let Some(table) = table else { + return Vec::new(); + }; + pk_values + .iter() + .filter_map(|value| table.get(value).cloned()) + .collect() + } + + fn row(pk_column: &str, pk: &str, pairs: &[(&str, &str)]) -> Row { + let mut row = Row::new(); + row.insert(pk_column.to_string(), Value::String(pk.to_string())); + for (key, value) in pairs { + row.insert((*key).to_string(), Value::String((*value).to_string())); + } + row + } + + fn assert_not_row_level(sql: &str) { + assert!( + !matches!(plan_sql(sql), RowCowPlan::RowLevel(_)), + "{sql} was incorrectly planned as row-level safe" + ); + } + + #[test] + fn plans_supported_wordpress_primary_keys() { + let cases = [ + ("wp_posts", "ID"), + ("wp_options", "option_id"), + ("wp_usermeta", "umeta_id"), + ("wp_postmeta", "meta_id"), + ("wp_terms", "term_id"), + ("wp_term_taxonomy", "term_taxonomy_id"), + ("wp_term_relationships", "object_id"), + ("wp_comments", "comment_ID"), + ("wp_links", "link_id"), + ]; + + for (table, pk) in cases { + let sql = format!("SELECT * FROM `{table}` WHERE `{pk}` IN (1, 2)"); + let RowCowPlan::RowLevel(RowCowOp::Select(select)) = plan_sql(&sql) else { + panic!("{sql} was not planned as a row-level select"); + }; + assert_eq!(select.table, table); + assert_eq!(select.pk_column, pk); + assert_eq!( + select.pk_values, + vec![PkValue("1".to_string()), PkValue("2".to_string())] + ); + } + } + + #[test] + fn accepts_matching_table_or_alias_qualified_primary_keys() { + let RowCowPlan::RowLevel(RowCowOp::Select(select)) = + plan_sql("SELECT p.ID FROM wp_posts p WHERE p.ID = 1") + else { + panic!("matching alias-qualified predicate should be row-level safe"); + }; + assert_eq!(select.table, "wp_posts"); + assert_eq!(select.pk_column, "ID"); + assert_eq!( + select.projection, + Projection::Columns(vec!["ID".to_string()]) + ); + + let RowCowPlan::RowLevel(RowCowOp::Update(write)) = + plan_sql("UPDATE wp_posts AS p SET post_title = 'changed' WHERE p.ID = 1") + else { + panic!("matching alias-qualified update should be row-level safe"); + }; + assert_eq!(write.table, "wp_posts"); + assert_eq!(write.pk_values, vec![PkValue("1".to_string())]); + } + + #[test] + fn update_copy_up_fetches_only_affected_primary_keys() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "1", &[("post_title", "one")]); + backend.insert_remote("wp_posts", "ID", "2", &[("post_title", "two")]); + backend.insert_remote("wp_posts", "ID", "3", &[("post_title", "three")]); + + let execution = execute_row_cow( + &mut backend, + "UPDATE wp_posts SET post_title = 'changed' WHERE ID IN (1, 3)", + ) + .unwrap(); + + assert!(matches!( + execution, + RowCowExecution::PreparedLocalWrite { copied_rows: 2, .. } + )); + backend.assert_no_remote_writes(); + assert_eq!( + backend.remote_select_values(), + vec![vec![PkValue("1".to_string()), PkValue("3".to_string())]] + ); + assert!(backend.local["wp_posts"].contains_key(&PkValue("1".to_string()))); + assert!(!backend.local["wp_posts"].contains_key(&PkValue("2".to_string()))); + assert!(backend.local["wp_posts"].contains_key(&PkValue("3".to_string()))); + } + + #[test] + fn update_copy_up_preserves_existing_local_overlay_rows() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "1", &[("post_title", "remote")]); + backend.insert_remote("wp_posts", "ID", "2", &[("post_title", "remote two")]); + backend.insert_local("wp_posts", "ID", "1", &[("post_title", "local draft")]); + + execute_row_cow( + &mut backend, + "UPDATE wp_posts SET post_status = 'draft' WHERE ID IN (1, 2)", + ) + .unwrap(); + + assert_eq!( + backend.remote_select_values(), + vec![vec![PkValue("2".to_string())]], + "copy-up should fetch only affected rows missing from the local overlay" + ); + assert_eq!( + backend.local["wp_posts"][&PkValue("1".to_string())].get("post_title"), + Some(&Value::String("local draft".to_string())), + "existing local overlay row must not be replaced by the remote lower row" + ); + } + + #[test] + fn delete_tombstone_hides_remote_row_from_merged_selects() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + + execute_row_cow(&mut backend, "DELETE FROM wp_posts WHERE ID = 42").unwrap(); + assert!( + backend.remote_calls.is_empty(), + "DELETE by primary key must tombstone locally without fetching remote rows" + ); + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + assert!(result.rows.is_empty()); + backend.assert_no_remote_writes(); + } + + #[test] + fn insert_after_delete_clears_tombstone_and_shadows_remote_row() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + + execute_row_cow(&mut backend, "DELETE FROM wp_posts WHERE ID = 42").unwrap(); + execute_row_cow( + &mut backend, + "INSERT INTO wp_posts (ID, post_title) VALUES (42, 'local replacement')", + ) + .unwrap(); + backend.insert_local( + "wp_posts", + "ID", + "42", + &[("post_title", "local replacement")], + ); + + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!(result.rows.len(), 1); + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("local replacement".to_string())) + ); + } + + #[test] + fn local_insert_is_not_sent_to_remote_and_appears_in_merged_select() { + let mut backend = FakeCowBackend::default(); + let execution = execute_row_cow( + &mut backend, + "INSERT INTO wp_posts (ID, post_title) VALUES (9, 'local')", + ) + .unwrap(); + assert!(matches!( + execution, + RowCowExecution::LocalOnlyInsert { table } if table == "wp_posts" + )); + assert!( + backend.remote_calls.is_empty(), + "INSERT must not be sent to or read from remote" + ); + + backend.insert_local("wp_posts", "ID", "9", &[("post_title", "local")]); + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 9").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!(result.rows.len(), 1); + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("local".to_string())) + ); + assert!(!backend + .remote + .get("wp_posts") + .unwrap_or(&BTreeMap::new()) + .contains_key(&PkValue("9".to_string()))); + backend.assert_no_remote_writes(); + } + + #[test] + fn ambiguous_sql_is_never_row_level_safe() { + assert_not_row_level( + "SELECT p.* FROM wp_posts p JOIN wp_postmeta m ON m.post_id = p.ID WHERE p.ID = 1", + ); + assert_not_row_level("SELECT COUNT(*) FROM wp_posts WHERE ID IN (1, 2)"); + assert_not_row_level("UPDATE wp_posts SET post_title = 'x' WHERE post_name = 'hello'"); + assert_not_row_level("DELETE FROM wp_posts WHERE post_title = 'hello'"); + assert_not_row_level( + "SELECT * FROM wp_posts WHERE ID IN (1, 2) ORDER BY post_date DESC LIMIT 1", + ); + assert_not_row_level("SELECT * FROM wp_posts WHERE ID = 1 OR ID = 2"); + assert_not_row_level("SELECT * FROM wp_terms WHERE ID = 1"); + assert_not_row_level("SELECT * FROM wp_posts p WHERE q.ID = 1"); + assert_not_row_level("UPDATE wp_posts p SET post_title = 'x' WHERE q.ID = 1"); + assert_not_row_level( + "UPDATE wp_posts SET post_author = (SELECT ID FROM wp_users LIMIT 1) WHERE ID = 1", + ); + assert_not_row_level("DELETE FROM wp_posts p WHERE q.ID = 1"); + assert_not_row_level("DELETE FROM wp_posts p, wp_users u WHERE p.ID = 1"); + assert_not_row_level("INSERT INTO wp_posts SELECT * FROM wp_users"); + assert_not_row_level("INSERT INTO wp_posts nonsense"); + assert_not_row_level("INSERT INTO wp_posts nonsense VALUES (1)"); + assert_not_row_level("SELECT * FROM wp_posts, wp_users WHERE wp_posts.ID = 1"); + assert_not_row_level("SELECT * FROM wp_posts WHERE ID = 'unterminated"); + assert_not_row_level("SELECT * FROM `wp_posts WHERE ID = 1"); + assert_not_row_level("SELECT * FROM wp_posts /* unterminated comment WHERE ID = 1"); + } + + #[test] + fn complex_reads_make_explicit_promotion_decisions() { + let RowCowPlan::PromoteTable { tables, .. } = + plan_sql("SELECT * FROM wp_posts WHERE ID IN (1, 2) ORDER BY post_date DESC LIMIT 1") + else { + panic!("ordered and limited reads must promote instead of row-merging"); + }; + assert_eq!(tables, vec!["wp_posts".to_string()]); + + let RowCowPlan::PromoteTable { tables, .. } = plan_sql( + "SELECT p.* FROM wp_posts p JOIN wp_postmeta m ON m.post_id = p.ID WHERE p.ID = 1", + ) else { + panic!("join reads must promote instead of row-merging"); + }; + assert_eq!( + tables, + vec!["wp_posts".to_string(), "wp_postmeta".to_string()] + ); + + let RowCowPlan::PromoteTable { tables, .. } = + plan_sql("SELECT * FROM wp_posts, wp_users WHERE wp_posts.ID = 1") + else { + panic!("comma-join reads must promote instead of row-merging"); + }; + assert_eq!(tables, vec!["wp_posts".to_string(), "wp_users".to_string()]); + } +} From 878491dec6a60bd9206357d142abdf9bfaa07f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sun, 3 May 2026 15:03:04 +0200 Subject: [PATCH 25/39] Add MySQL proxy for COW DB routing --- experiments/remote-wp-cow/Cargo.lock | 1498 +++++++++++++++++- experiments/remote-wp-cow/Cargo.toml | 2 + experiments/remote-wp-cow/DB_ROW_COW.md | 6 + experiments/remote-wp-cow/PRD.md | 9 +- experiments/remote-wp-cow/README.md | 19 +- experiments/remote-wp-cow/src/config.rs | 16 + experiments/remote-wp-cow/src/db.rs | 8 +- experiments/remote-wp-cow/src/generate.rs | 20 +- experiments/remote-wp-cow/src/main.rs | 1 + experiments/remote-wp-cow/src/mysql_proxy.rs | 522 ++++++ experiments/remote-wp-cow/src/run.rs | 21 + 11 files changed, 2078 insertions(+), 44 deletions(-) create mode 100644 experiments/remote-wp-cow/src/mysql_proxy.rs diff --git a/experiments/remote-wp-cow/Cargo.lock b/experiments/remote-wp-cow/Cargo.lock index 01513e1c..b9b2487a 100644 --- a/experiments/remote-wp-cow/Cargo.lock +++ b/experiments/remote-wp-cow/Cargo.lock @@ -2,6 +2,41 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "1.0.0" @@ -58,24 +93,85 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "ascii" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bigdecimal" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.117", +] + [[package]] name = "bitflags" version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -94,6 +190,115 @@ dependencies = [ "objc2", ] +[[package]] +name = "borsh" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd1e3f8955a5d7de9fab72fc8373fade9fb8a703968cb200ae3dc6cf08e185a" +dependencies = [ + "borsh-derive", + "bytes", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfcfdc083699101d5a7965e49925975f2f55060f94f9a05e7187be95d530ca59" +dependencies = [ + "once_cell", + "proc-macro-crate 3.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "btoi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd6407f73a9b8b6162d8a2ef999fe6afd7cc15902ebf42c5cd296addf17e0ad" +dependencies = [ + "num-traits", +] + +[[package]] +name = "btoi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b5ab9db53bcda568284df0fd39f6eac24ad6f7ba7ff1168b9e76eba6576b976" +dependencies = [ + "num-traits", +] + +[[package]] +name = "bufstream" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cc" +version = "1.2.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -106,12 +311,37 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "chunked_transfer" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.6.1" @@ -140,10 +370,10 @@ version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -152,12 +382,27 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "cpufeatures" version = "0.2.17" @@ -167,6 +412,30 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "crypto-common" version = "0.1.7" @@ -188,6 +457,61 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derive_utils" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "362f47930db19fe7735f527e6595e4900316b893ebf6d48ad3d31be928d57dd6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "digest" version = "0.10.7" @@ -218,9 +542,15 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "equivalent" version = "1.0.2" @@ -243,6 +573,29 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "libz-sys", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -258,6 +611,68 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "frunk" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28aef0f9aa070bce60767c12ba9cb41efeaf1a2bc6427f87b7d83f11239a16d7" +dependencies = [ + "frunk_core", + "frunk_derives", + "frunk_proc_macros", + "serde", +] + +[[package]] +name = "frunk_core" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "476eeaa382e3462b84da5d6ba3da97b5786823c2d0d3a0d04ef088d073da225c" +dependencies = [ + "serde", +] + +[[package]] +name = "frunk_derives" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0b4095fc99e1d858e5b8c7125d2638372ec85aa0fe6c807105cf10b0265ca6c" +dependencies = [ + "frunk_proc_macro_helpers", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "frunk_proc_macro_helpers" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1952b802269f2db12ab7c0bd328d0ae8feaabf19f352a7b0af7bb0c5693abfce" +dependencies = [ + "frunk_core", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "frunk_proc_macros" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3462f590fa236005bd7ca4847f81438bd6fe0febd4d04e11968d4c2e96437e78" +dependencies = [ + "frunk_core", + "frunk_proc_macro_helpers", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "fuser" version = "0.16.0" @@ -273,6 +688,30 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -285,32 +724,76 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.4.2" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", - "r-efi", - "wasip2", - "wasip3", + "wasi", ] [[package]] -name = "hashbrown" -version = "0.15.5" +name = "getrandom" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ - "foldhash", + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", ] [[package]] -name = "hashbrown" +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -329,6 +812,30 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.2.0" @@ -417,6 +924,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -450,18 +963,64 @@ dependencies = [ "serde_core", ] +[[package]] +name = "io-enum" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7de9008599afe8527a8c9d70423437363b321649161e98473f433de802d76107" +dependencies = [ + "derive_utils", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "leb128fmt" version = "0.1.0" @@ -474,6 +1033,33 @@ version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libz-sys" +version = "1.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3a226e576f50782b3305c5ccf458698f92798987f551c6a02efe8276721e22" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -492,12 +1078,162 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" + [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "msql-srv" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b821d09e9a4ed6b61015a889597446b3b6c7721544d0f4b617bcfdacf6ee7877" +dependencies = [ + "byteorder", + "chrono", + "mysql_common 0.31.0", + "nom", +] + +[[package]] +name = "mysql" +version = "28.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a732193888328fc060ab901c0ed1355521267a51ffbfd9a0b3786434c6b8e7f" +dependencies = [ + "bufstream", + "bytes", + "crossbeam-queue", + "crossbeam-utils", + "flate2", + "io-enum", + "libc", + "lru", + "mysql_common 0.37.1", + "named_pipe", + "pem", + "percent-encoding", + "socket2", + "twox-hash", + "url", +] + +[[package]] +name = "mysql-common-derive" +version = "0.30.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56b0d8a0db9bf6d2213e11f2c701cb91387b0614361625ab7b9743b41aa4938f" +dependencies = [ + "darling", + "heck 0.4.1", + "num-bigint", + "proc-macro-crate 1.3.1", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.117", + "termcolor", + "thiserror 1.0.69", +] + +[[package]] +name = "mysql_common" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06f19e4cfa0ab5a76b627cec2d81331c49b034988eaf302c3bafeada684eadef" +dependencies = [ + "base64 0.21.7", + "bigdecimal", + "bindgen", + "bitflags", + "bitvec", + "btoi 0.4.3", + "byteorder", + "bytes", + "cc", + "chrono", + "cmake", + "crc32fast", + "flate2", + "frunk", + "lazy_static", + "mysql-common-derive", + "num-bigint", + "num-traits", + "rand", + "regex", + "rust_decimal", + "saturating", + "serde", + "serde_json", + "sha1", + "sha2", + "smallvec", + "subprocess", + "thiserror 1.0.69", + "time", + "uuid", + "zstd", +] + +[[package]] +name = "mysql_common" +version = "0.37.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffc2127d4035fa5a614935c663a15a4468e64e798473e0cc21c8df40a607588" +dependencies = [ + "base64 0.22.1", + "bitflags", + "btoi 0.5.0", + "byteorder", + "bytes", + "crc32fast", + "flate2", + "getrandom 0.3.4", + "num-bigint", + "num-traits", + "regex", + "saturating", + "serde", + "serde_json", + "sha1", + "sha2", + "thiserror 2.0.18", + "uuid", +] + +[[package]] +name = "named_pipe" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad9c443cce91fc3e12f017290db75dde490d685cdaaf508d7159d7cf41f0eb2b" +dependencies = [ + "winapi", +] + [[package]] name = "nix" version = "0.29.0" @@ -522,6 +1258,50 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "objc2" version = "0.6.4" @@ -559,12 +1339,34 @@ dependencies = [ "winapi", ] +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64 0.22.1", + "serde_core", +] + [[package]] name = "percent-encoding" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + [[package]] name = "potential_utf" version = "0.1.5" @@ -574,6 +1376,21 @@ dependencies = [ "zerovec", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -581,7 +1398,50 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit 0.19.15", +] + +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit 0.25.11+spec-1.1.0", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", ] [[package]] @@ -593,20 +1453,172 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "quote" version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rust_decimal" +version = "1.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ce901f9a19d251159075a4c37af514c3b8ef99c22e02dd8c19161cf397ee94a" dependencies = [ - "proc-macro2", + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", + "wasm-bindgen", ] [[package]] -name = "r-efi" -version = "6.0.0" +name = "rustc-hash" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustix" @@ -621,6 +1633,24 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "saturating" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "semver" version = "1.0.28" @@ -654,7 +1684,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -670,6 +1700,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha2" version = "0.10.9" @@ -681,12 +1722,46 @@ dependencies = [ "digest", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -699,6 +1774,27 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "subprocess" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c56e8662b206b9892d7a5a3f2ecdbcb455d3d6b259111373b7e08b8055158a8" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.117" @@ -718,9 +1814,15 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.27.0" @@ -728,12 +1830,90 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tiny_http" version = "0.12.0" @@ -756,6 +1936,74 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" + +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "toml_datetime 0.6.11", + "winnow 0.5.40", +] + +[[package]] +name = "toml_edit" +version = "0.25.11+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +dependencies = [ + "indexmap", + "toml_datetime 1.1.1+spec-1.1.0", + "toml_parser", + "winnow 1.0.2", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow 1.0.2", +] + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + [[package]] name = "typenum" version = "1.20.0" @@ -798,12 +2046,34 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "wasip2" version = "1.0.3+wasi-0.2.9" @@ -822,6 +2092,52 @@ dependencies = [ "wit-bindgen 0.51.0", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "serde", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" +dependencies = [ + "unicode-ident", +] + [[package]] name = "wasm-encoder" version = "0.244.0" @@ -872,18 +2188,80 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -893,6 +2271,24 @@ dependencies = [ "windows-link", ] +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + +[[package]] +name = "winnow" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -915,7 +2311,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "wit-parser", ] @@ -926,10 +2322,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "indexmap", "prettyplease", - "syn", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -945,7 +2341,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -992,12 +2388,14 @@ name = "wp-cow" version = "0.1.0" dependencies = [ "anyhow", - "base64", + "base64 0.22.1", "clap", "ctrlc", "fuser", "hex", "libc", + "msql-srv", + "mysql", "serde", "serde_json", "sha2", @@ -1012,6 +2410,15 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "yoke" version = "0.8.2" @@ -1031,7 +2438,7 @@ checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -1052,7 +2459,7 @@ checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1072,7 +2479,7 @@ checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -1106,7 +2513,7 @@ checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1114,3 +2521,32 @@ name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/experiments/remote-wp-cow/Cargo.toml b/experiments/remote-wp-cow/Cargo.toml index 14172524..56dd4d9f 100644 --- a/experiments/remote-wp-cow/Cargo.toml +++ b/experiments/remote-wp-cow/Cargo.toml @@ -14,6 +14,8 @@ ctrlc = "3.4" fuser = "0.16" hex = "0.4" libc = "0.2" +msql-srv = { version = "0.11", default-features = false } +mysql = { version = "28", default-features = false, features = ["minimal-rust"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" sha2 = "0.10" diff --git a/experiments/remote-wp-cow/DB_ROW_COW.md b/experiments/remote-wp-cow/DB_ROW_COW.md index 9de99844..fb96b0fb 100644 --- a/experiments/remote-wp-cow/DB_ROW_COW.md +++ b/experiments/remote-wp-cow/DB_ROW_COW.md @@ -51,6 +51,12 @@ handles a statement, WordPress continues against the local database or receives the merged result. If a write is not row-level safe, the existing table promotion/materialization fallback remains the conservative path. +`wp-cow run` also exposes a local MySQL protocol proxy. The generated +`wp-config.php` points `DB_HOST` at this proxy so plugins that bypass `$wpdb` +still go through the COW routing layer. The drop-in itself uses +`WPCOW_LOCAL_DB_HOST` to connect directly to local MariaDB and avoid recursively +calling the proxy. + Promotion is overlay-preserving. Before importing a full remote table, wp-cow dumps the current local upper rows for that table, imports the remote lower table, restores the local upper rows, then reapplies tombstones. This keeps diff --git a/experiments/remote-wp-cow/PRD.md b/experiments/remote-wp-cow/PRD.md index 6f727c5f..aaaa6cc4 100644 --- a/experiments/remote-wp-cow/PRD.md +++ b/experiments/remote-wp-cow/PRD.md @@ -43,7 +43,6 @@ wp-cow-lab-serve ## Non-Goals - Perfect visual fidelity for every media asset on first page load. -- A transparent MySQL protocol proxy. - Full production snapshot semantics. - Supporting non-Linux runtime hosts. @@ -61,10 +60,12 @@ Startup should do: 3. Initialize empty local DB schema. 4. Start a persistent SSH tunnel for safe remote DB reads when the remote DB is reachable over TCP from the SSH host. -5. Start local PHP immediately with generated local `wp-config.php`, DB drop-in, +5. Start a local MySQL protocol proxy for plugins that bypass `$wpdb` and use + the generated `DB_HOST` constant directly. +6. Start local PHP immediately with generated local `wp-config.php`, DB drop-in, and safety MU plugin. -6. Serve files lazily and persistently cache only the files touched by requests. -7. For the first dynamic browser request, show a temporary splash page that +7. Serve files lazily and persistently cache only the files touched by requests. +8. For the first dynamic browser request, show a temporary splash page that polls real file-cache progress while a bypass request warms WordPress. ## File Materialization Policy diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index da377f05..b1014c15 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -152,6 +152,14 @@ subprocess per WordPress read query. Write-class SQL is still blocked from the remote database and materialized locally first. Set `WPCOW_REMOTE_DB_TUNNEL=0` to fall back to daemon-mediated remote reads. +`wp-cow run` also starts a local MySQL protocol proxy on the generated `DB_HOST` +port. Core WordPress still uses the generated `db.php` drop-in with a direct +local MariaDB connection to avoid recursion, but plugins that open their own +`mysqli` connection using `DB_HOST` hit the proxy instead of the empty local +schema. The proxy applies the same row-COW/read-routing/write-blocking rules as +the drop-in before forwarding anything to local MariaDB or the remote read-only +lower layer. + On first WordPress boot, `wp-cow` special-cases the options-table bootstrap query. It materializes only autoloaded option rows plus core identity/theme/plugin option names into the local database, then routes those matching reads locally. @@ -300,10 +308,12 @@ run/ - deletions are recorded as whiteouts. - Generated local `wp-config.php`, `wp-content/db.php`, and safety MU plugin. - Schema import and full-table DB materialization through remote `mysqldump`. -- A local control HTTP server used by the DB drop-in: +- A local control HTTP server used by the DB drop-in and MySQL proxy: - read queries can be served from the remote DB through daemon-mediated PHP, - write-class SQL is never sent to the remote DB, - writes materialize affected table groups before executing locally. +- A local MySQL protocol proxy for code paths that bypass WordPress's `$wpdb` + object and connect with the generated `DB_HOST` constant. ## Requirements @@ -322,6 +332,7 @@ Remote host: ## Notes -This is an MVP. The DB layer uses a WordPress `db.php` drop-in plus daemon -control endpoints; it does not yet implement a transparent MySQL protocol -proxy, row-level overlays, or true point-in-time snapshot support. +This is an MVP. The DB layer now has both a WordPress `db.php` drop-in and a +local MySQL protocol proxy, but it is still conservative: complex SQL promotes +tables instead of attempting unsafe partial merges, and it does not provide true +point-in-time snapshot support without cooperation from the remote host. diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs index 1987e10e..24247003 100644 --- a/experiments/remote-wp-cow/src/config.rs +++ b/experiments/remote-wp-cow/src/config.rs @@ -23,6 +23,8 @@ pub struct Manifest { pub created_at_unix: u64, pub probe: Probe, pub local_db: LocalDb, + #[serde(default = "default_db_proxy")] + pub db_proxy: DbProxy, #[serde(default = "default_remote_db_tunnel")] pub remote_db_tunnel: RemoteDbTunnel, pub control_url: String, @@ -63,6 +65,12 @@ pub struct RemoteDbTunnel { pub port: u16, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DbProxy { + pub host: String, + pub port: u16, +} + #[derive(Debug, Clone)] pub struct ClonePaths { pub root: PathBuf, @@ -112,6 +120,7 @@ impl Manifest { host: "127.0.0.1".to_string(), port: 33071, }, + db_proxy: default_db_proxy(), remote_db_tunnel: default_remote_db_tunnel(), control_url: "http://127.0.0.1:39070".to_string(), cache_max_file_bytes: cache_max_file_bytes_from_env(), @@ -135,6 +144,13 @@ fn default_remote_db_tunnel() -> RemoteDbTunnel { } } +fn default_db_proxy() -> DbProxy { + DbProxy { + host: "127.0.0.1".to_string(), + port: 33070, + } +} + fn cache_max_file_bytes_from_env() -> u64 { std::env::var("WPCOW_CACHE_MAX_FILE_MB") .ok() diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index 0234b36c..49221494 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -1217,7 +1217,7 @@ fn qualified_table(manifest: &Manifest, table: &str) -> String { ) } -fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> { +pub(crate) fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> { let mut command = local_mysql_command(manifest); command.arg("--execute").arg(sql_text); let status = command.status().context("run local mysql")?; @@ -1227,7 +1227,7 @@ fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> { Ok(()) } -fn local_query_result(manifest: &Manifest, sql_text: &str) -> Result { +pub(crate) fn local_query_result(manifest: &Manifest, sql_text: &str) -> Result { let output = local_mysql_command(manifest) .arg("--batch") .arg("--raw") @@ -1381,6 +1381,10 @@ mod tests { host: "127.0.0.1".to_string(), port: 33071, }, + db_proxy: crate::config::DbProxy { + host: "127.0.0.1".to_string(), + port: 33070, + }, remote_db_tunnel: crate::config::RemoteDbTunnel { host: "127.0.0.1".to_string(), port: 33072, diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index f1c97a0f..c8ecb5ba 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -45,7 +45,8 @@ pub fn wp_config_php(manifest: &Manifest, paths: &ClonePaths) -> String { define( 'DB_NAME', {local_db_name} ); define( 'DB_USER', {local_db_user} ); define( 'DB_PASSWORD', {local_db_password} ); -define( 'DB_HOST', {local_db_host} ); +define( 'DB_HOST', {proxy_db_host} ); +define( 'WPCOW_LOCAL_DB_HOST', {local_db_host} ); define( 'WP_HOME', {local_url} ); define( 'WP_SITEURL', {local_url} ); @@ -86,6 +87,10 @@ require_once ABSPATH . 'wp-settings.php'; "{}:{}", manifest.local_db.host, manifest.local_db.port )), + proxy_db_host = php_string(&format!( + "{}:{}", + manifest.db_proxy.host, manifest.db_proxy.port + )), local_url = php_string(&manifest.local_url), table_prefix = php_string(&manifest.probe.table_prefix), clone_name = php_string(&manifest.name), @@ -463,7 +468,7 @@ class Cow_DB extends wpdb { } } -$wpdb = new Cow_DB( DB_USER, DB_PASSWORD, DB_NAME, DB_HOST ); +$wpdb = new Cow_DB( DB_USER, DB_PASSWORD, DB_NAME, defined( 'WPCOW_LOCAL_DB_HOST' ) ? WPCOW_LOCAL_DB_HOST : DB_HOST ); "# } @@ -832,7 +837,9 @@ pub fn generated_file_paths(root: &Path) -> Vec { #[cfg(test)] mod tests { use super::*; - use crate::config::{clone_paths, LocalDb, Manifest, Probe, RemoteDbTunnel, MANIFEST_VERSION}; + use crate::config::{ + clone_paths, DbProxy, LocalDb, Manifest, Probe, RemoteDbTunnel, MANIFEST_VERSION, + }; use std::io::{Read, Write}; use std::net::{TcpListener, TcpStream}; use std::process::Command; @@ -862,6 +869,10 @@ mod tests { host: "127.0.0.1".to_string(), port: 33071, }, + db_proxy: DbProxy { + host: "127.0.0.1".to_string(), + port: 33070, + }, remote_db_tunnel: RemoteDbTunnel { host: "127.0.0.1".to_string(), port: 33072, @@ -878,6 +889,8 @@ mod tests { let paths = clone_paths(temp.path(), "example"); let php = wp_config_php(&manifest(), &paths); assert!(php.contains("define( 'DB_NAME', 'cow_example' );")); + assert!(php.contains("define( 'DB_HOST', '127.0.0.1:33070' );")); + assert!(php.contains("define( 'WPCOW_LOCAL_DB_HOST', '127.0.0.1:33071' );")); assert!(php.contains("define( 'WP_HOME', 'http://example.test' );")); assert!(php.contains("$table_prefix = 'wp_';")); assert!(php.contains("WPCOW_CONTROL_URL")); @@ -885,6 +898,7 @@ mod tests { assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); assert!(php.contains("wp-cow DB/runtime error")); assert!(php.contains("wp-content/db.php")); + assert!(db_dropin_php().contains("WPCOW_LOCAL_DB_HOST")); } #[test] diff --git a/experiments/remote-wp-cow/src/main.rs b/experiments/remote-wp-cow/src/main.rs index 7ba66499..6d0b9f99 100644 --- a/experiments/remote-wp-cow/src/main.rs +++ b/experiments/remote-wp-cow/src/main.rs @@ -4,6 +4,7 @@ mod control; mod db; mod fusefs; mod generate; +mod mysql_proxy; mod overlay; mod remote; mod row_cow; diff --git a/experiments/remote-wp-cow/src/mysql_proxy.rs b/experiments/remote-wp-cow/src/mysql_proxy.rs new file mode 100644 index 00000000..11c5e91a --- /dev/null +++ b/experiments/remote-wp-cow/src/mysql_proxy.rs @@ -0,0 +1,522 @@ +use anyhow::{anyhow, Result}; +use msql_srv::{ + Column, ColumnFlags, ColumnType, ErrorKind, InitWriter, MysqlIntermediary, MysqlShim, + ParamParser, QueryResultWriter, StatementMetaWriter, ValueInner, +}; +use mysql::prelude::Queryable; +use serde_json::Value as JsonValue; +use std::collections::BTreeMap; +use std::io; +use std::net::TcpListener; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +use crate::config::{self, ClonePaths, Manifest}; +use crate::db; +use crate::remote::RemoteClient; +use crate::row_cow::CowQueryResult; +use crate::sql; + +pub fn serve_proxy( + addr: &str, + manifest: Manifest, + paths: ClonePaths, + remote: RemoteClient, + shutdown: Arc, +) -> Result<()> { + let listener = TcpListener::bind(addr).with_context(|| format!("bind MySQL proxy {addr}"))?; + listener + .set_nonblocking(true) + .context("set MySQL proxy nonblocking")?; + + while !shutdown.load(Ordering::SeqCst) { + match listener.accept() { + Ok((stream, _peer)) => { + let backend = ProxyBackend::new(manifest.clone(), paths.clone(), remote.clone()); + thread::spawn(move || { + if let Err(err) = MysqlIntermediary::run_on_tcp(backend, stream) { + eprintln!("wp-cow MySQL proxy connection ended: {err:?}"); + } + }); + } + Err(err) if err.kind() == io::ErrorKind::WouldBlock => { + thread::sleep(Duration::from_millis(50)); + } + Err(err) => return Err(err).context("accept MySQL proxy connection"), + } + } + + Ok(()) +} + +struct ProxyBackend { + manifest: Manifest, + paths: ClonePaths, + remote: RemoteClient, + local: Option, + prepared: BTreeMap, + next_statement_id: u32, +} + +impl ProxyBackend { + fn new(manifest: Manifest, paths: ClonePaths, remote: RemoteClient) -> Self { + Self { + manifest, + paths, + remote, + local: None, + prepared: BTreeMap::new(), + next_statement_id: 1, + } + } + + fn dispatch(&mut self, query: &str) -> Result { + if is_local_session_sql(query) { + return self.local_query(query); + } + + if sql::is_write_sql(query) { + if !config::is_offline(&self.paths) { + let tables = sql::extract_tables(query); + let response = + db::row_cow_query(&self.remote, &self.manifest, &self.paths, query, &tables)?; + if response.backend != "local" && !response.handled { + return Err(anyhow!("write SQL did not resolve to local backend")); + } + } + return self.local_query(query); + } + + if sql::is_safe_read_sql(query) { + if config::is_offline(&self.paths) { + return self.local_query(query); + } + + let tables = sql::extract_tables(query); + let row_cow = + db::row_cow_query(&self.remote, &self.manifest, &self.paths, query, &tables)?; + if let Some(result) = row_cow.result { + return Ok(ProxyReply::Result(result)); + } + if row_cow.backend == "local" { + return self.local_query(query); + } + + let route = + db::route_for_query(&self.remote, &self.manifest, &self.paths, query, &tables)?; + if route.backend == "local" { + self.local_query(query) + } else { + let result = db::remote_readonly_query(&self.remote, query)?; + Ok(ProxyReply::Result(CowQueryResult { + ok: result.ok, + error: result.error, + rows: result.rows, + fields: result.fields, + affected: result.affected, + })) + } + } else { + self.local_query(query) + } + } + + fn local_conn(&mut self) -> Result<&mut mysql::Conn> { + if self.local.is_none() { + let mut builder = mysql::OptsBuilder::new() + .ip_or_hostname(Some(self.manifest.local_db.host.clone())) + .tcp_port(self.manifest.local_db.port) + .user(Some(self.manifest.local_db.user.clone())) + .db_name(Some(self.manifest.local_db.name.clone())); + if !self.manifest.local_db.password.is_empty() { + builder = builder.pass(Some(self.manifest.local_db.password.clone())); + } + self.local = Some(mysql::Conn::new(builder)?); + } + self.local + .as_mut() + .ok_or_else(|| anyhow!("local MySQL connection was not initialized")) + } + + fn local_query(&mut self, query: &str) -> Result { + let result = self.local_conn()?.query_iter(query)?; + let fields = result + .columns() + .as_ref() + .iter() + .map(|column| column.name_str().to_string()) + .collect::>(); + let affected = result.affected_rows(); + let last_insert_id = result.last_insert_id().unwrap_or(0); + + if fields.is_empty() { + drop(result); + return Ok(ProxyReply::Completed { + affected_rows: affected, + last_insert_id, + }); + } + + let mut rows = Vec::new(); + for row in result { + let row = row?; + let values = row.unwrap(); + let mut out = serde_json::Map::new(); + for (idx, field) in fields.iter().enumerate() { + let value = values.get(idx).cloned().unwrap_or(mysql::Value::NULL); + out.insert(field.clone(), mysql_value_to_json(value)); + } + rows.push(out); + } + + Ok(ProxyReply::Result(CowQueryResult { + ok: true, + error: String::new(), + affected: rows.len() as i64, + rows, + fields, + })) + } +} + +enum ProxyReply { + Result(CowQueryResult), + Completed { + affected_rows: u64, + last_insert_id: u64, + }, +} + +impl MysqlShim for ProxyBackend { + type Error = io::Error; + + fn on_prepare( + &mut self, + query: &str, + info: StatementMetaWriter<'_, W>, + ) -> Result<(), Self::Error> { + let id = self.next_statement_id; + self.next_statement_id = self.next_statement_id.saturating_add(1); + self.prepared.insert(id, query.to_string()); + let params = (0..count_placeholders(query)) + .map(|idx| Column { + table: String::new(), + column: format!("param{}", idx + 1), + coltype: ColumnType::MYSQL_TYPE_STRING, + colflags: ColumnFlags::empty(), + }) + .collect::>(); + info.reply(id, ¶ms, &[]) + } + + fn on_execute( + &mut self, + id: u32, + params: ParamParser<'_>, + results: QueryResultWriter<'_, W>, + ) -> Result<(), Self::Error> { + let Some(query) = self.prepared.get(&id).cloned() else { + return Ok(results.error(ErrorKind::ER_UNKNOWN_STMT_HANDLER, b"unknown statement")?); + }; + let params = params + .into_iter() + .map(|param| mysql_param_literal(param.value.into_inner())) + .collect::>(); + let Ok(query) = substitute_placeholders(&query, ¶ms) else { + return Ok(results.error( + ErrorKind::ER_PARSE_ERROR, + b"prepared statement parameter count does not match placeholders", + )?); + }; + write_proxy_reply(self.dispatch(&query), results) + } + + fn on_close(&mut self, stmt: u32) { + self.prepared.remove(&stmt); + } + + fn on_query( + &mut self, + query: &str, + results: QueryResultWriter<'_, W>, + ) -> Result<(), Self::Error> { + write_proxy_reply(self.dispatch(query), results) + } + + fn on_init(&mut self, _schema: &str, writer: InitWriter<'_, W>) -> Result<(), Self::Error> { + writer.ok() + } +} + +fn write_proxy_reply( + reply: Result, + results: QueryResultWriter<'_, W>, +) -> Result<(), io::Error> { + match reply { + Ok(ProxyReply::Result(result)) if result.ok => write_result(result, results), + Ok(ProxyReply::Result(result)) => { + Ok(results.error(ErrorKind::ER_UNKNOWN_ERROR, result.error.as_bytes())?) + } + Ok(ProxyReply::Completed { + affected_rows, + last_insert_id, + }) => results.completed(affected_rows, last_insert_id), + Err(err) => Ok(results.error(ErrorKind::ER_UNKNOWN_ERROR, err.to_string().as_bytes())?), + } +} + +fn write_result( + result: CowQueryResult, + results: QueryResultWriter<'_, W>, +) -> Result<(), io::Error> { + let columns = result + .fields + .iter() + .map(|field| Column { + table: String::new(), + column: field.clone(), + coltype: ColumnType::MYSQL_TYPE_STRING, + colflags: ColumnFlags::empty(), + }) + .collect::>(); + let mut writer = results.start(&columns)?; + for row in result.rows { + for field in &result.fields { + match row.get(field) { + None | Some(JsonValue::Null) => writer.write_col(None::<&str>)?, + Some(JsonValue::String(value)) => writer.write_col(value.as_str())?, + Some(value) => writer.write_col(value.to_string())?, + } + } + writer.end_row()?; + } + writer.finish() +} + +fn is_local_session_sql(query: &str) -> bool { + let normalized = query.trim_start().to_ascii_uppercase(); + normalized.starts_with("SET ") + || normalized.starts_with("START TRANSACTION") + || normalized.starts_with("BEGIN") + || normalized.starts_with("COMMIT") + || normalized.starts_with("ROLLBACK") +} + +fn count_placeholders(query: &str) -> usize { + scan_placeholders(query, None).0 +} + +fn substitute_placeholders(query: &str, params: &[String]) -> Result { + let (used, out) = scan_placeholders(query, Some(params)); + if used != params.len() { + return Err(anyhow!("too many prepared statement parameters")); + } + out.ok_or_else(|| anyhow!("missing prepared statement parameter")) +} + +fn scan_placeholders(query: &str, params: Option<&[String]>) -> (usize, Option) { + let chars = query.chars().collect::>(); + let mut out = params.map(|_| String::with_capacity(query.len())); + let mut idx = 0; + let mut used = 0; + + while idx < chars.len() { + let ch = chars[idx]; + + if ch == '\'' || ch == '"' || ch == '`' { + push_char(&mut out, ch); + idx += 1; + while idx < chars.len() { + let inner = chars[idx]; + push_char(&mut out, inner); + idx += 1; + if inner == '\\' && idx < chars.len() { + push_char(&mut out, chars[idx]); + idx += 1; + continue; + } + if inner == ch { + if idx < chars.len() && chars[idx] == ch { + push_char(&mut out, chars[idx]); + idx += 1; + continue; + } + break; + } + } + continue; + } + + if ch == '-' && idx + 1 < chars.len() && chars[idx + 1] == '-' { + push_char(&mut out, ch); + push_char(&mut out, chars[idx + 1]); + idx += 2; + while idx < chars.len() { + let comment = chars[idx]; + push_char(&mut out, comment); + idx += 1; + if comment == '\n' { + break; + } + } + continue; + } + + if ch == '#' { + push_char(&mut out, ch); + idx += 1; + while idx < chars.len() { + let comment = chars[idx]; + push_char(&mut out, comment); + idx += 1; + if comment == '\n' { + break; + } + } + continue; + } + + if ch == '/' && idx + 1 < chars.len() && chars[idx + 1] == '*' { + push_char(&mut out, ch); + push_char(&mut out, chars[idx + 1]); + idx += 2; + while idx < chars.len() { + let comment = chars[idx]; + push_char(&mut out, comment); + idx += 1; + if comment == '*' && idx < chars.len() && chars[idx] == '/' { + push_char(&mut out, chars[idx]); + idx += 1; + break; + } + } + continue; + } + + if ch == '?' { + if let Some(params) = params { + let Some(param) = params.get(used) else { + return (used, None); + }; + push_str(&mut out, param); + } + used += 1; + idx += 1; + continue; + } + + push_char(&mut out, ch); + idx += 1; + } + + (used, out) +} + +fn push_char(out: &mut Option, ch: char) { + if let Some(out) = out { + out.push(ch); + } +} + +fn push_str(out: &mut Option, value: &str) { + if let Some(out) = out { + out.push_str(value); + } +} + +fn mysql_param_literal(value: ValueInner<'_>) -> String { + match value { + ValueInner::NULL => "NULL".to_string(), + ValueInner::Bytes(bytes) => format!("'{}'", mysql_string_literal_bytes(bytes)), + ValueInner::Int(value) => value.to_string(), + ValueInner::UInt(value) => value.to_string(), + ValueInner::Double(value) => value.to_string(), + ValueInner::Date(bytes) | ValueInner::Time(bytes) | ValueInner::Datetime(bytes) => { + format!("X'{}'", hex::encode(bytes)) + } + } +} + +fn mysql_value_to_json(value: mysql::Value) -> JsonValue { + match value { + mysql::Value::NULL => JsonValue::Null, + mysql::Value::Bytes(bytes) => JsonValue::String(String::from_utf8_lossy(&bytes).into()), + mysql::Value::Int(value) => JsonValue::String(value.to_string()), + mysql::Value::UInt(value) => JsonValue::String(value.to_string()), + mysql::Value::Float(value) => JsonValue::String(value.to_string()), + mysql::Value::Double(value) => JsonValue::String(value.to_string()), + mysql::Value::Date(year, month, day, hour, minute, second, micros) => { + JsonValue::String(format!( + "{year:04}-{month:02}-{day:02} {hour:02}:{minute:02}:{second:02}.{:06}", + micros + )) + } + mysql::Value::Time(negative, days, hours, minutes, seconds, micros) => { + let sign = if negative { "-" } else { "" }; + JsonValue::String(format!( + "{sign}{days} {hours:02}:{minutes:02}:{seconds:02}.{:06}", + micros + )) + } + } +} + +fn mysql_string_literal_bytes(bytes: &[u8]) -> String { + String::from_utf8_lossy(bytes) + .replace('\\', "\\\\") + .replace('\'', "\\'") +} + +trait Context { + fn context(self, msg: &'static str) -> Result; + fn with_context String>(self, f: F) -> Result; +} + +impl Context for io::Result { + fn context(self, msg: &'static str) -> Result { + self.map_err(|err| anyhow!("{msg}: {err}")) + } + + fn with_context String>(self, f: F) -> Result { + self.map_err(|err| anyhow!("{}: {err}", f())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn recognizes_session_sql_as_local_only() { + assert!(is_local_session_sql("SET NAMES utf8mb4")); + assert!(is_local_session_sql("BEGIN")); + assert!(is_local_session_sql("COMMIT")); + assert!(!is_local_session_sql("SELECT * FROM wp_posts")); + } + + #[test] + fn substitutes_prepared_placeholders_outside_literals_and_comments() { + let sql = + "SELECT '?' AS literal, col FROM wp_posts WHERE ID = ? AND post_title = ? /* ? */"; + let substituted = + substitute_placeholders(sql, &["123".to_string(), "'local \\' title'".to_string()]) + .unwrap(); + assert_eq!( + substituted, + "SELECT '?' AS literal, col FROM wp_posts WHERE ID = 123 AND post_title = 'local \\' title' /* ? */" + ); + assert_eq!(count_placeholders(sql), 2); + } + + #[test] + fn quotes_prepared_parameter_literals() { + assert_eq!( + mysql_param_literal(ValueInner::Bytes(b"a'b\\c")), + "'a\\'b\\\\c'" + ); + assert_eq!(mysql_param_literal(ValueInner::NULL), "NULL"); + assert_eq!(mysql_param_literal(ValueInner::UInt(42)), "42"); + } +} diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index 67027333..ae03f369 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -14,6 +14,7 @@ use crate::control; use crate::db; use crate::fusefs; use crate::generate::ROUTER_BASENAME; +use crate::mysql_proxy; use crate::remote::{shell_quote, RemoteClient}; pub struct RunOptions { @@ -73,6 +74,21 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R ) }); + let proxy_addr = format!("{}:{}", manifest.db_proxy.host, manifest.db_proxy.port); + let proxy_shutdown = shutdown.clone(); + let proxy_manifest = manifest.clone(); + let proxy_paths = paths.clone(); + let proxy_remote = remote.clone(); + let proxy_thread = thread::spawn(move || { + mysql_proxy::serve_proxy( + &proxy_addr, + proxy_manifest, + proxy_paths, + proxy_remote, + proxy_shutdown, + ) + }); + let mount_manifest = manifest.clone(); let mount_paths = paths.clone(); let mountpoint = options.mountpoint.clone(); @@ -145,6 +161,11 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R Err(_) => return Err(anyhow!("control thread panicked")), } + match proxy_thread.join() { + Ok(result) => result?, + Err(_) => return Err(anyhow!("MySQL proxy thread panicked")), + } + match mount_thread.join() { Ok(result) => { if let Err(err) = result { From 7d451e375a4ab9267ef87707e26511703972c863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sun, 3 May 2026 17:27:19 +0200 Subject: [PATCH 26/39] Add Codex restart loop helper --- scripts/codex-until-pass.sh | 212 ++++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100755 scripts/codex-until-pass.sh diff --git a/scripts/codex-until-pass.sh b/scripts/codex-until-pass.sh new file mode 100755 index 00000000..dd090dc1 --- /dev/null +++ b/scripts/codex-until-pass.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env bash +set -uo pipefail + +usage() { + cat <<'USAGE' +Usage: + scripts/codex-until-pass.sh --task-file TASK.md [--work-dir DIR] + scripts/codex-until-pass.sh --task "Implement ..." [--work-dir DIR] + +Runs Codex in an implement/verify loop until an independent verifier ends with +exactly "VERDICT: PASS". By default MAX_ITERATIONS=0, which means no iteration +limit. Press Ctrl-C to stop. + +Environment: + CODEX_CMD Codex command. Default: codex + MAX_ITERATIONS 0 means unlimited. Default: 0 + SLEEP_SECONDS Delay after structural failures. Default: 5 + IMPLEMENTER_TAIL_BYTES Bytes of implementer output sent to verifier. Default: 24000 + FEEDBACK_TAIL_BYTES Bytes of verifier feedback kept for next iteration. Default: 32000 + CODEX_UNTIL_PASS_BYPASS_SANDBOX 1 passes --dangerously-bypass-approvals-and-sandbox. Default: 1 +USAGE +} + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +TASK_TEXT="" +TASK_FILE="" +WORK_DIR="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --task) + [[ $# -ge 2 ]] || { echo "missing value for --task" >&2; exit 64; } + TASK_TEXT="$2" + shift 2 + ;; + --task-file) + [[ $# -ge 2 ]] || { echo "missing value for --task-file" >&2; exit 64; } + TASK_FILE="$2" + shift 2 + ;; + --work-dir) + [[ $# -ge 2 ]] || { echo "missing value for --work-dir" >&2; exit 64; } + WORK_DIR="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown argument: $1" >&2 + usage >&2 + exit 64 + ;; + esac +done + +if [[ -n "$TASK_TEXT" && -n "$TASK_FILE" ]]; then + echo "pass either --task or --task-file, not both" >&2 + exit 64 +fi +if [[ -z "$TASK_TEXT" && -z "$TASK_FILE" ]]; then + echo "missing --task or --task-file" >&2 + usage >&2 + exit 64 +fi + +CODEX_CMD="${CODEX_CMD:-codex}" +MAX_ITERATIONS="${MAX_ITERATIONS:-0}" +SLEEP_SECONDS="${SLEEP_SECONDS:-5}" +IMPLEMENTER_TAIL_BYTES="${IMPLEMENTER_TAIL_BYTES:-24000}" +FEEDBACK_TAIL_BYTES="${FEEDBACK_TAIL_BYTES:-32000}" +CODEX_UNTIL_PASS_BYPASS_SANDBOX="${CODEX_UNTIL_PASS_BYPASS_SANDBOX:-1}" + +if [[ -z "$WORK_DIR" ]]; then + WORK_DIR=".codex-until-pass/$(date +%Y%m%d-%H%M%S)" +fi +mkdir -p "$WORK_DIR" + +RAW_TASK="$WORK_DIR/task.raw.md" +TASK="$WORK_DIR/task.md" +FEEDBACK="$WORK_DIR/feedback.md" +LOG="$WORK_DIR/log.md" +PASS_FILE="$WORK_DIR/passed-on-iteration.txt" + +if [[ -n "$TASK_FILE" ]]; then + cp "$TASK_FILE" "$RAW_TASK" +else + printf '%s\n' "$TASK_TEXT" > "$RAW_TASK" +fi + +# Prevent nested Codex runs from interpreting task text as another request to +# launch this or any adversarial loop. The original text remains in task.raw.md. +sed \ + -e 's/\$adversarial-loop/[adversarial-loop trigger disabled inside codex-until-pass]/g' \ + -e 's#/adversarial-loop#[adversarial-loop trigger disabled inside codex-until-pass]#g' \ + "$RAW_TASK" > "$TASK" + +: > "$FEEDBACK" +: > "$LOG" + +codex_args=() +if [[ "$CODEX_UNTIL_PASS_BYPASS_SANDBOX" == "1" ]]; then + codex_args+=(--dangerously-bypass-approvals-and-sandbox) +fi + +iteration=0 +while true; do + iteration=$((iteration + 1)) + if [[ "$MAX_ITERATIONS" != "0" && "$iteration" -gt "$MAX_ITERATIONS" ]]; then + echo "Did not converge after $MAX_ITERATIONS iterations" | tee -a "$LOG" + exit 1 + fi + + echo "=== Iteration $iteration ===" | tee -a "$LOG" + + impl_prompt="$WORK_DIR/iter-$iteration-impl-prompt.md" + impl_out="$WORK_DIR/iter-$iteration-impl-output.md" + impl_status="$WORK_DIR/iter-$iteration-impl-status" + verify_prompt="$WORK_DIR/iter-$iteration-verify-prompt.md" + verify_out="$WORK_DIR/iter-$iteration-verify-output.md" + verify_status="$WORK_DIR/iter-$iteration-verify-status" + + { + echo "# Task" + cat "$TASK" + echo + echo "# Harness Rules" + echo "- You are already inside scripts/codex-until-pass.sh." + echo "- Do not invoke adversarial-loop, codex-until-pass, or any recursive Codex restart loop." + echo "- Make real edits in this repository. Do not stop at a proposal." + echo "- If full completion is impossible, implement the next concrete blocker and explain the remaining blocker precisely." + echo + echo "# Prior Verifier Feedback" + if [[ -s "$FEEDBACK" ]]; then + cat "$FEEDBACK" + else + echo "(none)" + fi + echo + echo "# Final Output Contract" + echo "End with a compact summary of files changed and checks run." + } > "$impl_prompt" + + set +e + "$CODEX_CMD" exec "${codex_args[@]}" < "$impl_prompt" > "$impl_out" 2>&1 + code=$? + set -e + printf '%s\n' "$code" > "$impl_status" + if [[ "$code" -ne 0 ]]; then + echo "Implementer exited $code on iteration $iteration; restarting after ${SLEEP_SECONDS}s" | tee -a "$LOG" + sleep "$SLEEP_SECONDS" + continue + fi + + git_status_file="$WORK_DIR/iter-$iteration-git-status.txt" + git_diff_stat_file="$WORK_DIR/iter-$iteration-git-diff-stat.txt" + git status --short > "$git_status_file" 2>&1 || true + git diff --stat > "$git_diff_stat_file" 2>&1 || true + + { + echo "# Task" + cat "$TASK" + echo + echo "# Verifier Instructions" + echo "Independently inspect the actual working tree. Do not trust the implementer output." + echo "Run whatever checks are needed. Do not make edits." + echo "PASS only when the task is fully complete by code, tests, and docs where relevant." + echo "If not complete, include a concise '## Issues' section with concrete actionable blockers." + echo + echo "End with exactly one final line:" + echo "VERDICT: PASS" + echo "or" + echo "VERDICT: FAIL" + echo + echo "# Current Git Status" + cat "$git_status_file" + echo + echo "# Current Diff Stat" + cat "$git_diff_stat_file" + echo + echo "# Implementer Output Tail" + tail -c "$IMPLEMENTER_TAIL_BYTES" "$impl_out" + } > "$verify_prompt" + + set +e + "$CODEX_CMD" exec "${codex_args[@]}" < "$verify_prompt" > "$verify_out" 2>&1 + code=$? + set -e + printf '%s\n' "$code" > "$verify_status" + if [[ "$code" -ne 0 ]]; then + echo "Verifier exited $code on iteration $iteration; restarting verifier/implementer after ${SLEEP_SECONDS}s" | tee -a "$LOG" + sleep "$SLEEP_SECONDS" + continue + fi + + if tail -n 20 "$verify_out" | grep -qx 'VERDICT: PASS'; then + echo "PASS on iteration $iteration" | tee -a "$LOG" + printf '%s\n' "$iteration" > "$PASS_FILE" + exit 0 + fi + + echo "FAIL on iteration $iteration; feeding capped verifier output back to implementer" | tee -a "$LOG" + { + echo + echo "## Iteration $iteration Verifier Feedback" + tail -c "$FEEDBACK_TAIL_BYTES" "$verify_out" + } >> "$FEEDBACK" +done From fbf714ccea5f63aa49a65799d0586698a3e71116 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 4 May 2026 00:18:57 +0200 Subject: [PATCH 27/39] Harden remote WordPress COW experiment --- experiments/remote-wp-cow/.dockerignore | 5 + experiments/remote-wp-cow/.env.example | 47 +- experiments/remote-wp-cow/README.md | 30 +- experiments/remote-wp-cow/compose.yaml | 1 + experiments/remote-wp-cow/docker/Dockerfile | 1 - .../scripts/live-site-acceptance.sh | 476 +++++ .../remote-wp-cow/scripts/strict-harness.sh | 106 ++ experiments/remote-wp-cow/src/cli.rs | 187 +- experiments/remote-wp-cow/src/config.rs | 26 + experiments/remote-wp-cow/src/db.rs | 354 +++- experiments/remote-wp-cow/src/fusefs.rs | 313 +++- experiments/remote-wp-cow/src/generate.rs | 1527 ++++++++++++++++- experiments/remote-wp-cow/src/overlay.rs | 438 ++++- experiments/remote-wp-cow/src/remote.rs | 85 +- experiments/remote-wp-cow/src/row_cow.rs | 240 ++- experiments/remote-wp-cow/src/run.rs | 346 ++-- experiments/remote-wp-cow/src/sql.rs | 118 +- 17 files changed, 3916 insertions(+), 384 deletions(-) create mode 100755 experiments/remote-wp-cow/scripts/live-site-acceptance.sh create mode 100755 experiments/remote-wp-cow/scripts/strict-harness.sh diff --git a/experiments/remote-wp-cow/.dockerignore b/experiments/remote-wp-cow/.dockerignore index eed9c00b..aef7117e 100644 --- a/experiments/remote-wp-cow/.dockerignore +++ b/experiments/remote-wp-cow/.dockerignore @@ -1,3 +1,8 @@ /target/ /.adversarial-loop/ /.git/ +/.env +/.env.* +!/.env.example +/*.log +/.wp-cow/ diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 911cb1c0..06411e67 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -1,31 +1,48 @@ +# Docker lab defaults for wp-cow. +# +# Copy this file to .env, fill in the remote site values, then run: +# docker compose build +# docker compose up -d +# docker compose exec wp-cow-lab bash + +# Host port exposed by Docker Desktop. The container always listens on 8080. +WPCOW_HTTP_PORT=9481 + +# Clone identity and remote WordPress site. WPCOW_NAME=example -WPCOW_SSH=mysite -WPCOW_PATH=/home/user/public_html -WPCOW_REMOTE_URL=https://example.com -# Leave blank to derive http://localhost:$WPCOW_HTTP_PORT in wp-cow-lab-serve. +WPCOW_SSH= +WPCOW_PATH= +WPCOW_REMOTE_URL= + +# Leave blank to derive http://localhost:${WPCOW_HTTP_PORT}. WPCOW_LOCAL_URL= -WPCOW_HTTP_PORT=8080 + +# Docker Desktop resolver fallback. WPCOW_DNS1=1.1.1.1 WPCOW_DNS2=8.8.8.8 -WPCOW_CACHE_MAX_FILE_MB=64 + +# Runtime defaults. +WPCOW_WEB_SERVER=frankenphp +WPCOW_SPLASH=1 WPCOW_REMOTE_DB_TUNNEL=1 -WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=60 -WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 -WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 WPCOW_REMOTE_QUERY_CACHE=1 WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 +WPCOW_ENABLE_PLUGINS=1 +WPCOW_CACHE_MAX_FILE_MB=64 WPCOW_FUSE_TTL_SECS=60 +WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=60 +WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 +WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 WPCOW_PHP_MAX_EXECUTION_SECS=90 WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 WPCOW_PHP_WORKERS=4 -WPCOW_WEB_SERVER=frankenphp -WPCOW_SPLASH=1 -WPCOW_LOCAL_ADMIN_PASSWORD= + +# Optional local-only admin override used with wp-cow-lab-sever. +# This updates only the local clone DB after the relevant user rows are copied. WPCOW_LOCAL_ADMIN_LOGIN= +WPCOW_LOCAL_ADMIN_PASSWORD= -# Set this to 1 for a filesystem-only smoke test that does not export DB schema. +# Testing/debug switches. WPCOW_SKIP_SCHEMA=0 - -# Set this to 1 only when you want to use defaults instead of probing WordPress. WPCOW_NO_PROBE=0 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index b1014c15..43bbdd28 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -24,6 +24,16 @@ model, but they should not be read as a proposed final integration shape. cargo build ``` +## Strict harness + +```bash +scripts/strict-harness.sh +``` + +The harness runs the full Rust/PHP test suite plus targeted checks for lazy +file caching, installer blocking, row-level DB write isolation, offline guards, +FrankenPHP routing, local admin override wiring, and Docker lab port exposure. + ## Docker lab on macOS Use this when you are on a Mac and want a Linux shell with FUSE, FrankenPHP, @@ -178,8 +188,12 @@ renders to reuse the program files WordPress just touched. The Docker lab defaults `WPCOW_FUSE_TTL_SECS` to `60`; lower values make live remote changes visible sooner, while higher values reduce repeated path walking. FrankenPHP also enables OPcache for parsed PHP code in the local web runtime. -`WPCOW_PREFETCH_RUNTIME=1` can still be used for an explicit background warm, -but it is off by default so normal `serve` remains request-driven. +There is no recursive runtime warm-up: PHP files, themes, plugins, and uploads +are fetched only when a request touches them, then cached for repeated reads. +Remote plugin and language directories stay visible through the lazy lower +layer by default so the local site can render the same active code as the +remote site. Set `WPCOW_ENABLE_PLUGINS=0` only when you need to suppress active +plugins during testing; files still remain lazy and are not copied up front. The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the @@ -201,11 +215,13 @@ wp-cow-lab-sever wp-cow-lab-run ``` -`wp-cow-lab-sever` materializes the core WordPress tables into local MariaDB, -sets the admin password only in the local DB when requested, caches the -WordPress admin/runtime program files needed for offline admin access, and -writes `run/offline.json`. After that marker exists, `wp-cow run` does not open -SSH, does not start the remote DB tunnel, and routes DB reads locally. +`wp-cow-lab-sever` materializes only the WordPress tables already touched by the +clone, plus the user tables needed for a requested local admin password +override. It does not walk or prefetch the remote WordPress tree; pages and +admin screens you want available offline must be loaded once before severing so +their PHP files and DB rows are already materialized. It then writes +`run/offline.json`. After that marker exists, `wp-cow run` does not open SSH, +does not start the remote DB tunnel, and routes DB reads locally. Open this on the Mac: diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 9567d679..87915159 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -35,6 +35,7 @@ services: WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" WPCOW_REMOTE_QUERY_CACHE: "${WPCOW_REMOTE_QUERY_CACHE:-1}" WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS: "${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" + WPCOW_ENABLE_PLUGINS: "${WPCOW_ENABLE_PLUGINS:-1}" WPCOW_FUSE_TTL_SECS: "${WPCOW_FUSE_TTL_SECS:-60}" WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" diff --git a/experiments/remote-wp-cow/docker/Dockerfile b/experiments/remote-wp-cow/docker/Dockerfile index d3803f02..b0b2bd1c 100644 --- a/experiments/remote-wp-cow/docker/Dockerfile +++ b/experiments/remote-wp-cow/docker/Dockerfile @@ -33,7 +33,6 @@ RUN install-php-extensions mysqli opcache pdo_mysql \ mariadb-server \ openssh-client \ pkg-config \ - rsync \ tini \ vim-tiny \ && rm -rf /var/lib/apt/lists/* diff --git a/experiments/remote-wp-cow/scripts/live-site-acceptance.sh b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh new file mode 100755 index 00000000..52230ce1 --- /dev/null +++ b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh @@ -0,0 +1,476 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT" + +fail() { + echo "live-acceptance: $*" >&2 + exit 1 +} + +need_cmd() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +require_env() { + local name="$1" + if [ -z "${!name:-}" ]; then + fail "missing required environment variable: $name" + fi +} + +wait_for_tcp() { + local host="$1" + local port="$2" + local timeout="${3:-20}" + local start + start="$(date +%s)" + while true; do + if (exec 3<>"/dev/tcp/$host/$port") >/dev/null 2>&1; then + exec 3>&- + exec 3<&- + return 0 + fi + if [ $(( $(date +%s) - start )) -ge "$timeout" ]; then + return 1 + fi + sleep 0.2 + done +} + +wait_for_tcp_closed() { + local host="$1" + local port="$2" + local timeout="${3:-20}" + local start + start="$(date +%s)" + while true; do + if ! (exec 3<>"/dev/tcp/$host/$port") >/dev/null 2>&1; then + return 0 + fi + exec 3>&- + exec 3<&- + if [ $(( $(date +%s) - start )) -ge "$timeout" ]; then + return 1 + fi + sleep 0.2 + done +} + +kill_lingering_runtime_processes() { + local pid args + while read -r pid args; do + [ -n "${pid:-}" ] || continue + [ "$pid" = "$$" ] && continue + case "$args" in + *"$WORK_DIR"*) + case "$args" in + *mariadbd*|*mariadb-install-db*|*mysqladmin*|*mysql\ *) ;; + *) kill "$pid" >/dev/null 2>&1 || true ;; + esac + ;; + esac + done < <(ps -eo pid=,args=) +} + +unmount_acceptance_mountpoint() { + [ -n "${MOUNTPOINT:-}" ] || return 0 + fusermount3 -u "$MOUNTPOINT" >/dev/null 2>&1 || + fusermount3 -uz "$MOUNTPOINT" >/dev/null 2>&1 || + fusermount -u "$MOUNTPOINT" >/dev/null 2>&1 || + fusermount -uz "$MOUNTPOINT" >/dev/null 2>&1 || + true +} + +http_body() { + local url="$1" + local output="$2" + local max_time="${3:-60}" + local status + status="$(curl -L -sS --max-time "$max_time" --connect-timeout 5 \ + -o "$output" -w '%{http_code}' "$url")" + case "$status" in + 2*|3*) return 0 ;; + *) echo "HTTP $status for $url" >&2; return 1 ;; + esac +} + +mysql_exec() { + mysql --protocol=TCP -h127.0.0.1 -P33071 -uroot "$@" +} + +mysql_scalar() { + mysql_exec --batch --raw --skip-column-names --execute "$1" +} + +remote_post_count() { + local title="$1" + local code + code=' +error_reporting(0); +if (!defined("WP_INSTALLING")) { define("WP_INSTALLING", true); } +require_once rtrim(getcwd(), "/") . "/wp-load.php"; +global $wpdb; +$title = $argv[1]; +$count = (int) $wpdb->get_var($wpdb->prepare("SELECT COUNT(*) FROM {$wpdb->posts} WHERE post_title = %s", $title)); +echo $count, "\n"; +' + HOME="$SSH_HOME" ssh "$SSH_TARGET" "cd '$WPCOW_PATH' && php -r $(printf '%q' "$code") -- $(printf '%q' "$title")" +} + +cleanup() { + set +e + if [ -n "${SERVE_PID:-}" ] && kill -0 "$SERVE_PID" >/dev/null 2>&1; then + kill "$SERVE_PID" >/dev/null 2>&1 || true + wait "$SERVE_PID" >/dev/null 2>&1 || true + fi + if [ -n "${WORK_DIR:-}" ]; then + for pid in $(pgrep -f "$WORK_DIR" 2>/dev/null || true); do + if [ "$pid" != "$$" ]; then + kill "$pid" >/dev/null 2>&1 || true + fi + done + fi + unmount_acceptance_mountpoint + if [ -n "${MYSQL_PID:-}" ] && kill -0 "$MYSQL_PID" >/dev/null 2>&1; then + kill "$MYSQL_PID" >/dev/null 2>&1 || true + wait "$MYSQL_PID" >/dev/null 2>&1 || true + fi + if [ "${WPCOW_KEEP_ACCEPTANCE_STATE:-0}" != "1" ] && [ -n "${WORK_DIR:-}" ]; then + rm -rf "$WORK_DIR" + elif [ -n "${WORK_DIR:-}" ]; then + echo "live-acceptance: kept state at $WORK_DIR" + fi +} +trap cleanup EXIT + +require_env WPCOW_SSH +require_env WPCOW_PATH +require_env WPCOW_REMOTE_URL + +need_cmd cargo +need_cmd curl +need_cmd mariadb-install-db +need_cmd mariadbd +need_cmd mysql +need_cmd mysqladmin +need_cmd php +need_cmd ssh +need_cmd fusermount3 + +cargo build --locked + +WP_COW_BIN="${WP_COW_BIN:-$ROOT/target/debug/wp-cow}" +NAME="${WPCOW_NAME:-live-acceptance}" +HTTP_PORT="${WPCOW_HTTP_PORT:-9481}" +HTTP_ADDR="${WPCOW_HTTP:-127.0.0.1:${HTTP_PORT}}" +LOCAL_URL="${WPCOW_LOCAL_URL:-http://127.0.0.1:${HTTP_PORT}}" +ADMIN_PASSWORD="${WPCOW_LOCAL_ADMIN_PASSWORD:-8u239huiwdsj91das}" +EXPECT_TEXT="${WPCOW_EXPECT_TEXT:-}" +WORK_DIR="${WPCOW_ACCEPTANCE_WORK_DIR:-$(mktemp -d /tmp/wp-cow-live-acceptance.XXXXXX)}" +STATE_DIR="$WORK_DIR/state" +MOUNTPOINT="$WORK_DIR/mount" +UPPER_DIR="$STATE_DIR/clones/$NAME/upper" +MYSQL_DATA="$WORK_DIR/mysql-data" +MYSQL_SOCKET="$WORK_DIR/mysql.sock" +MYSQL_LOG="$WORK_DIR/mariadb.log" +MYSQL_INSTALL_LOG="$WORK_DIR/mariadb-install.log" +SERVE_LOG="$WORK_DIR/serve.log" +COOKIE_JAR="$WORK_DIR/cookies.txt" +TITLE="WP COW Local Only $(date +%s)-$$" + +mkdir -p "$MOUNTPOINT" "$STATE_DIR" + +SSH_TARGET="$WPCOW_SSH" +SSH_HOME="$HOME" +if [[ "$WPCOW_SSH" == *[[:space:]]* ]]; then + # Accept the same pasted SSH command shape as the Docker helper by creating a + # temporary OpenSSH host alias for this acceptance run. + SSH_TARGET="wp-cow-live-acceptance" + SSH_HOME="$WORK_DIR/ssh-home" + SSH_CONFIG="$SSH_HOME/.ssh/config" + mkdir -p "$SSH_HOME/.ssh" + chmod 700 "$SSH_HOME/.ssh" + eval "set -- $WPCOW_SSH" + [ "${1:-}" = "ssh" ] && shift + host="" + user="" + port="" + identity="" + while [ "$#" -gt 0 ]; do + case "$1" in + -p) port="${2:-}"; shift 2 ;; + -p*) port="${1#-p}"; shift ;; + -i) identity="${2:-}"; shift 2 ;; + -i*) identity="${1#-i}"; shift ;; + -l) user="${2:-}"; shift 2 ;; + -l*) user="${1#-l}"; shift ;; + -o) shift 2 ;; + -o*) shift ;; + ssh) shift ;; + --) shift; break ;; + -*) fail "unsupported SSH option in WPCOW_SSH for live acceptance: $1" ;; + *) host="$1"; shift ;; + esac + done + if [[ "$host" == *@* ]]; then + [ -z "$user" ] && user="${host%@*}" + host="${host#*@}" + fi + [ -n "$host" ] || fail "could not parse SSH host from WPCOW_SSH" + if [[ "$identity" == "~/"* ]]; then + identity="$HOME/${identity#~/}" + fi + { + echo "Host $SSH_TARGET" + echo " HostName $host" + [ -n "$user" ] && echo " User $user" + [ -n "$port" ] && echo " Port $port" + [ -n "$identity" ] && echo " IdentityFile $identity" + [ -n "$identity" ] && echo " IdentitiesOnly yes" + echo " BatchMode yes" + echo " StrictHostKeyChecking accept-new" + } > "$SSH_CONFIG" + chmod 600 "$SSH_CONFIG" +fi + +if mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + fail "port 33071 already has a MySQL server; stop it or run inside the Docker lab" +fi + +MARIADBD_PATH="$(readlink -f "$(command -v mariadbd)")" +BASE_DIR="$(cd "$(dirname "$MARIADBD_PATH")/.." && pwd)" +if ! mariadb-install-db \ + "--basedir=$BASE_DIR" \ + "--datadir=$MYSQL_DATA" \ + --innodb-log-file-size=16M \ + --innodb-buffer-pool-size=64M \ + --auth-root-authentication-method=normal \ + --skip-test-db \ + >"$MYSQL_INSTALL_LOG" 2>&1; then + cat "$MYSQL_INSTALL_LOG" >&2 + fail "mariadb-install-db failed" +fi + +mariadbd \ + --no-defaults \ + "--basedir=$BASE_DIR" \ + "--datadir=$MYSQL_DATA" \ + "--socket=$MYSQL_SOCKET" \ + --port=33071 \ + --bind-address=127.0.0.1 \ + "--pid-file=$WORK_DIR/mysql.pid" \ + --innodb-log-file-size=16M \ + --innodb-buffer-pool-size=64M \ + --aria-pagecache-buffer-size=8M \ + --key-buffer-size=8M \ + --skip-networking=0 \ + --skip-grant-tables \ + >"$MYSQL_LOG" 2>&1 & +MYSQL_PID="$!" + +for _ in $(seq 1 100); do + if mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1; then + break + fi + sleep 0.2 +done +mysqladmin --protocol=tcp --host=127.0.0.1 --port=33071 --user=root ping >/dev/null 2>&1 || + fail "temporary MariaDB did not start; see $MYSQL_LOG" + +before_remote="$(remote_post_count "$TITLE" | tr -d '[:space:]')" +[ "$before_remote" = "0" ] || fail "remote already has unexpected acceptance title" + +WPCOW_WEB_SERVER="${WPCOW_WEB_SERVER:-php}" \ +WPCOW_SPLASH="${WPCOW_SPLASH:-1}" \ +WPCOW_PROXY_FRONTEND=0 \ +WPCOW_REMOTE_QUERY_CACHE=1 \ +WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS="${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" \ +WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ +WPCOW_PHP_WORKERS="${WPCOW_PHP_WORKERS:-1}" \ +HOME="$SSH_HOME" \ +"$WP_COW_BIN" serve \ + --state-dir "$STATE_DIR" \ + --name "$NAME" \ + --ssh "$SSH_TARGET" \ + --path "$WPCOW_PATH" \ + --remote-url "$WPCOW_REMOTE_URL" \ + --local-url "$LOCAL_URL" \ + --mountpoint "$MOUNTPOINT" \ + --http "$HTTP_ADDR" \ + >"$SERVE_LOG" 2>&1 & +SERVE_PID="$!" + +host="${HTTP_ADDR%:*}" +port="${HTTP_ADDR##*:}" +wait_for_tcp "$host" "$port" 45 || { + tail -n 200 "$SERVE_LOG" >&2 || true + fail "wp-cow server did not open $HTTP_ADDR" +} + +first_splash="$WORK_DIR/first-splash.html" +http_body "$LOCAL_URL/" "$first_splash" 10 || { + tail -n 200 "$SERVE_LOG" >&2 || true + fail "first splash/progress request failed" +} +if rg -qi 'WordPress.*Installation|wp-admin/install.php|wp-cow DB/runtime error|wp-cow did not load the remote site' "$first_splash"; then + sed -n '1,80p' "$first_splash" >&2 + fail "first splash request returned installer or wp-cow runtime error" +fi + +first_body="$WORK_DIR/first.html" +actual_timeout="${WPCOW_ACTUAL_TIMEOUT_SECS:-180}" +http_body "$LOCAL_URL/?__wp_cow_bypass_splash=1" "$first_body" "$actual_timeout" || { + tail -n 200 "$SERVE_LOG" >&2 || true + fail "first WordPress request failed" +} +if rg -qi 'WordPress.*Installation|wp-admin/install.php|wp-cow DB/runtime error|wp-cow did not load the remote site' "$first_body"; then + sed -n '1,80p' "$first_body" >&2 + fail "first request returned installer or wp-cow runtime error" +fi +if [ -n "$EXPECT_TEXT" ]; then + rg -q "$EXPECT_TEXT" "$first_body" || fail "first response did not contain WPCOW_EXPECT_TEXT=$EXPECT_TEXT" +fi + +second_body="$WORK_DIR/second.html" +http_body "$LOCAL_URL/?__wp_cow_bypass_splash=1" "$second_body" "${WPCOW_SECOND_TIMEOUT_SECS:-60}" || + fail "second cached WordPress request failed" + +php_create="$WORK_DIR/create-local-page.php" +cat > "$php_create" <<'PHP' + $title, + 'post_content' => 'local-only acceptance content', + 'post_status' => 'publish', + 'post_type' => 'page', +), true); +if (is_wp_error($post_id)) { + fwrite(STDERR, $post_id->get_error_message() . "\n"); + exit(1); +} +echo 'WPCOW_POST_ID=' . (int) $post_id . "\n"; +PHP +mkdir -p "$UPPER_DIR" +cp "$php_create" "$UPPER_DIR/.wp-cow-create-local-page.php" +post_output="$( + cd "$MOUNTPOINT" && + WPCOW_ACCEPTANCE_TITLE="$TITLE" \ + WPCOW_ACCEPTANCE_HTTP_HOST="${LOCAL_URL#http://}" \ + php .wp-cow-create-local-page.php +)" +printf '%s\n' "$post_output" > "$WORK_DIR/create-local-page.out" +post_id="$(sed -n 's/^WPCOW_POST_ID=//p' "$WORK_DIR/create-local-page.out" | tail -n 1 | tr -dc '0-9')" +[ -n "$post_id" ] || fail "local wp_insert_post did not return a post id" + +local_body="$WORK_DIR/local-page.html" +http_body "$LOCAL_URL/?p=$post_id&__wp_cow_bypass_splash=1" "$local_body" 30 || + fail "local-only page did not render" +rg -q "$TITLE" "$local_body" || fail "local-only page response did not contain its title" + +after_remote="$(remote_post_count "$TITLE" | tr -d '[:space:]')" +[ "$after_remote" = "0" ] || fail "local-only page title appeared in remote database" + +sever_log="$WORK_DIR/sever.log" +HOME="$SSH_HOME" \ +"$WP_COW_BIN" sever "$NAME" \ + --state-dir "$STATE_DIR" \ + --admin-password "$ADMIN_PASSWORD" \ + >"$sever_log" 2>&1 || { + cat "$sever_log" >&2 + fail "wp-cow sever failed" + } +admin_user="$(sed -n "s/.*set local administrator password for '\([^']*\)'.*/\1/p" "$sever_log" | tail -n 1)" +[ -n "$admin_user" ] || fail "could not determine local admin user from sever output" + +kill "$SERVE_PID" >/dev/null 2>&1 || true +wait "$SERVE_PID" >/dev/null 2>&1 || true +SERVE_PID="" +kill_lingering_runtime_processes +wait_for_tcp_closed "$host" "$port" 30 || + fail "old web server did not release $HTTP_ADDR before offline restart" +wait_for_tcp_closed 127.0.0.1 39070 30 || + fail "old control server did not release 127.0.0.1:39070 before offline restart" +wait_for_tcp_closed 127.0.0.1 33070 30 || + fail "old MySQL proxy did not release 127.0.0.1:33070 before offline restart" +wait_for_tcp_closed 127.0.0.1 33072 30 || true +unmount_acceptance_mountpoint + +manifest="$STATE_DIR/clones/$NAME/manifest.json" +php -r '$p=$argv[1]; $j=json_decode(file_get_contents($p), true); $j["ssh"]="wp-cow-offline-should-not-connect"; file_put_contents($p, json_encode($j, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n");' "$manifest" + +WPCOW_WEB_SERVER="${WPCOW_WEB_SERVER:-php}" \ +WPCOW_SPLASH="${WPCOW_SPLASH:-1}" \ +WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ +WPCOW_PHP_WORKERS="${WPCOW_PHP_WORKERS:-1}" \ +HOME="$SSH_HOME" \ +"$WP_COW_BIN" run "$NAME" \ + --state-dir "$STATE_DIR" \ + --mountpoint "$MOUNTPOINT" \ + --http "$HTTP_ADDR" \ + >"$SERVE_LOG.offline" 2>&1 & +SERVE_PID="$!" +wait_for_tcp "$host" "$port" 30 || { + tail -n 200 "$SERVE_LOG.offline" >&2 || true + fail "offline wp-cow server did not open $HTTP_ADDR" +} + +offline_body="$WORK_DIR/offline-local-page.html" +http_body "$LOCAL_URL/?p=$post_id&__wp_cow_bypass_splash=1" "$offline_body" 30 || + fail "offline local-only page did not render" +rg -q "$TITLE" "$offline_body" || fail "offline refresh did not use local materialized post" + +login_body="$WORK_DIR/login.html" +login_status="$(curl -L -sS --max-time 30 --connect-timeout 5 \ + -c "$COOKIE_JAR" -b "$COOKIE_JAR" \ + -d "log=$admin_user" \ + -d "pwd=$ADMIN_PASSWORD" \ + -d "wp-submit=Log In" \ + -d "redirect_to=$LOCAL_URL/wp-admin/" \ + -d "testcookie=1" \ + -o "$login_body" \ + -w '%{http_code}' \ + "$LOCAL_URL/wp-login.php")" +case "$login_status" in + 2*|3*) ;; + *) fail "local admin login returned HTTP $login_status" ;; +esac +rg -q 'wordpress_logged_in' "$COOKIE_JAR" || fail "local admin login did not set wordpress_logged_in cookie" + +admin_body="$WORK_DIR/admin.html" +http_status="$(curl -L -sS --max-time 30 --connect-timeout 5 \ + -c "$COOKIE_JAR" -b "$COOKIE_JAR" \ + -o "$admin_body" \ + -w '%{http_code}' \ + "$LOCAL_URL/wp-admin/")" +case "$http_status" in + 2*|3*) ;; + *) fail "wp-admin returned HTTP $http_status after login" ;; +esac +if rg -qi ']+id="loginform"|name="loginform"|wp-login.php' "$admin_body"; then + fail "wp-admin still shows login form after local admin login" +fi + +cache_files="$(find "$STATE_DIR/clones/$NAME/file-cache" -type f | wc -l | tr -d ' ')" +cache_bytes="$(du -sb "$STATE_DIR/clones/$NAME/file-cache" | awk '{print $1}')" +if [ -d "$STATE_DIR/clones/$NAME/file-cache/mirror/wp-content/uploads" ]; then + fail "uploads directory was mirrored into the file cache" +fi + +cat <&2 + exit 1 +} + +need_pattern() { + local file="$1" + local pattern="$2" + local label="$3" + rg -q "$pattern" "$file" || fail "$label missing in $file" +} + +deny_pattern() { + local file="$1" + local pattern="$2" + local label="$3" + if rg -q "$pattern" "$file"; then + fail "$label found in $file" + fi +} + +run_exact_test() { + local test_name="$1" + cargo test --locked "$test_name" -- --exact --nocapture +} + +run_exact_ignored_test() { + local test_name="$1" + cargo test --locked "$test_name" -- --exact --ignored --nocapture +} + +echo "== full Rust/PHP unit suite ==" +cargo test --locked + +echo "== targeted behavior proofs ==" +run_exact_test overlay::tests::lazy_remote_file_is_cached_and_survives_remote_loss +run_exact_test cli::tests::offline_core_runtime_cache_is_bounded_to_wordpress_core +run_exact_test overlay::tests::cached_only_copy_up_uses_materialized_files_without_remote +run_exact_test fusefs::tests::offline_readdir_uses_cached_remote_metadata_without_remote +run_exact_test fusefs::tests::remote_stat_metadata_survives_severed_mode_without_remote +run_exact_test generate::tests::router_splash_and_progress_smoke_responds_quickly +run_exact_test row_cow::tests::select_materializes_remote_rows_for_later_offline_reads +run_exact_test row_cow::tests::local_insert_is_not_sent_to_remote_and_appears_in_merged_select +run_exact_test row_cow::tests::update_copy_up_fetches_only_affected_primary_keys +run_exact_test row_cow::tests::delete_tombstone_hides_remote_row_from_merged_selects +run_exact_test run::tests::frankenphp_routes_wp_admin_directory_to_index +run_exact_test run::tests::frankenphp_routes_installer_paths_through_runtime_guard +run_exact_test sql::tests::extract_tables_preserves_wordpress_table_case_for_proxy_cow +run_exact_ignored_test generate::tests::runtime_cow_harness_proves_admin_login_local_mutation_and_offline_refresh +run_exact_ignored_test generate::tests::production_run_harness_proves_fuse_rust_control_and_offline_refresh + +echo "== implementation invariants ==" +need_pattern src/cli.rs 'Command::Serve' "one-command serve subcommand" +need_pattern src/cli.rs 'Command::Sever' "sever/offline subcommand" +need_pattern src/cli.rs 'cache_offline_core_runtime' "offline login/admin core runtime cache" +need_pattern src/cli.rs 'wp-content/uploads' "offline core runtime cache excludes uploads" +need_pattern src/config.rs 'offline\.json' "offline marker" +need_pattern src/run.rs 'WPCOW_WEB_SERVER' "web-server selection" +need_pattern src/run.rs 'falling back to PHP' "FrankenPHP unavailable fallback" +need_pattern src/run.rs 'start_php_dev_server' "PHP dev-server fallback" +need_pattern src/run.rs '@wpCowInstaller path /wp-admin/install\.php /wp-admin/setup-config\.php' "FrankenPHP installer guard route" +need_pattern src/run.rs '__wp_cow_installer_guard=1' "FrankenPHP installer guard router flag" +need_pattern src/fusefs.rs 'clone is severed and file is not cached locally' "offline cached-file guard" +need_pattern src/fusefs.rs 'copy_up_cached_only' "offline write-open cached-only copy-up" +need_pattern src/fusefs.rs 'put_cached_entry\(rel, &entry\)' "FUSE stat metadata persistence" +need_pattern src/overlay.rs 'clone is severed and writable lower file is not cached locally' "offline write-open remote guard" +need_pattern src/control.rs 'clone is severed from the remote database' "offline remote-DB guard" +need_pattern src/generate.rs 'will not fall back to the empty local schema' "installer/runtime failure guard" +need_pattern src/generate.rs 'wp_cow_looks_like_installer' "installer response detector" +need_pattern src/generate.rs '__wp_cow_installer_guard' "direct installer route guard" +need_pattern src/generate.rs "'1' !== getenv\\( 'WPCOW_PROXY_FRONTEND' \\)" "local-first frontend default" +need_pattern src/generate.rs 'function cow_offline' "PHP DB offline mode" +need_pattern src/db.rs 'set_local_admin_password' "local-only admin password override" +need_pattern src/row_cow.rs 'LocalOnlyInsert' "local-only content mutation path" +need_pattern src/generate.rs 'production_run_harness_proves_fuse_rust_control_and_offline_refresh' "strict production FUSE/control harness" +need_pattern src/generate.rs 'run_site_with_shutdown' "strict harness production run entry" +need_pattern src/generate.rs 'install_fake_ssh' "strict harness fake SSH remote" +need_pattern src/generate.rs 'read_line_count\(&fake_ssh_log\)' "strict harness offline no-SSH assertion" +need_pattern compose.yaml '\$\{WPCOW_HTTP_PORT:-8080\}:8080' "Docker host HTTP port exposure" +need_pattern compose.yaml 'WPCOW_HTTP: 0\.0\.0\.0:8080' "Docker in-container HTTP listener" +need_pattern .dockerignore '^/target/$' "Docker build context target exclusion" +need_pattern .dockerignore '^/\.env$' "Docker build context local env exclusion" +need_pattern .dockerignore '^!/\.env\.example$' "Docker build context env example inclusion" +need_pattern docker/wp-cow-lab-serve 'wp-cow serve' "Docker one-command serve wrapper" +need_pattern docker/wp-cow-lab-sever 'WPCOW_LOCAL_ADMIN_PASSWORD' "Docker local admin override wiring" +need_pattern .env.example '^WPCOW_HTTP_PORT=9481$' "Docker lab example host HTTP port" +need_pattern .env.example '^WPCOW_WEB_SERVER=frankenphp$' "Docker lab example FrankenPHP preference" +need_pattern .env.example '^WPCOW_SPLASH=1$' "Docker lab example splash default" +need_pattern .env.example '^WPCOW_LOCAL_ADMIN_PASSWORD=$' "Docker lab example local admin override" + +deny_pattern src 'rsync|scp[[:space:]]+-r' "eager source tree copy command" +deny_pattern src/cli.rs 'wordpress_offline_table_names' "full core-table sever materialization" +deny_pattern src/cli.rs 'prefetch_runtime_files' "sever-triggered runtime prefetch" +deny_pattern src/run.rs 'WPCOW_PREFETCH_RUNTIME|prefetch_runtime_files|wp-cow-runtime-prefetch' "background runtime prefetch" +deny_pattern src/run.rs 'tar[[:space:]]+-cf[[:space:]]+-' "recursive remote tar runtime prefetch" +deny_pattern docker/Dockerfile 'rsync|scp[[:space:]]+-r' "eager copy tooling" +deny_pattern docker/wp-cow-lab-serve 'rsync|scp[[:space:]]+-r' "eager lab serve copy command" +deny_pattern docker/wp-cow-lab-run 'rsync|scp[[:space:]]+-r' "eager lab run copy command" + +echo "strict-harness: PASS" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index 3fcb2dc2..88076616 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -1,7 +1,8 @@ use anyhow::{anyhow, Context, Result}; use clap::{Args, Parser, Subcommand}; +use std::collections::{BTreeSet, VecDeque}; use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::time::{Instant, SystemTime, UNIX_EPOCH}; use crate::config::{ @@ -10,6 +11,7 @@ use crate::config::{ }; use crate::db; use crate::generate; +use crate::overlay::OverlayStore; use crate::remote::{probe_wordpress, RemoteClient}; use crate::run::{self, RunOptions}; @@ -216,7 +218,10 @@ fn clone_site(args: CloneArgs) -> Result<()> { db::write_state(&paths, &db::DbState::default())?; if !args.skip_schema && !args.no_probe { - let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); remote.ensure_master()?; db::export_schema(&remote, &paths).context("export schema")?; } @@ -350,7 +355,10 @@ fn serve_site(args: ServeArgs) -> Result<()> { "schema is missing and --no-probe prevents discovering remote DB settings" )); } - let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); remote.ensure_master()?; db::export_schema(&remote, &paths).context("export schema")?; println!( @@ -404,7 +412,10 @@ fn export_schema(args: NameArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); let manifest = load_manifest(&paths.manifest)?; - let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); remote.ensure_master()?; db::export_schema(&remote, &paths)?; println!("exported remote schema for '{}'", manifest.name); @@ -415,7 +426,10 @@ fn materialize(args: MaterializeArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); let manifest = load_manifest(&paths.manifest)?; - let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); remote.ensure_master()?; let materialized = db::materialize_tables(&remote, &manifest, &paths, &args.tables)?; println!("{}", serde_json::to_string_pretty(&materialized)?); @@ -427,7 +441,10 @@ fn sever(args: SeverArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); let manifest = load_manifest(&paths.manifest)?; - let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); remote.ensure_master()?; if !paths.db.join("schema.sql").exists() { @@ -440,12 +457,27 @@ fn sever(args: SeverArgs) -> Result<()> { ); } - let requested_tables = db::wordpress_offline_table_names(&manifest.probe.table_prefix); + let refreshed_options = db::refresh_option_bootstrap_for_offline(&remote, &manifest, &paths) + .context("refresh remote option bootstrap rows for offline use")?; + println!( + "refreshed {} WordPress option bootstrap rows for local/offline use", + refreshed_options.len() + ); + + let mut requested_tables = db::load_state(&paths)? + .materialized_tables + .into_iter() + .collect::>(); + if args.admin_password.is_some() { + requested_tables.insert(format!("{}users", manifest.probe.table_prefix)); + requested_tables.insert(format!("{}usermeta", manifest.probe.table_prefix)); + } + let requested_tables = requested_tables.into_iter().collect::>(); let tables = db::existing_local_tables(&manifest, &requested_tables)?; let skipped = requested_tables.len().saturating_sub(tables.len()); if skipped > 0 { println!( - "skipping {} WordPress tables that are not present in the local schema", + "skipping {} previously materialized WordPress tables that are not present in the local schema", skipped ); } @@ -456,10 +488,6 @@ fn sever(args: SeverArgs) -> Result<()> { materialized.len() ); - println!("caching WordPress admin/runtime program files for offline use"); - run::prefetch_runtime_files(&manifest, &paths, &remote) - .context("cache WordPress admin/runtime files")?; - let admin = if let Some(password) = args.admin_password.as_deref() { let admin = db::set_local_admin_password(&manifest, args.admin_login.as_deref(), password) .context("set local administrator password")?; @@ -472,6 +500,12 @@ fn sever(args: SeverArgs) -> Result<()> { None }; + if admin.is_some() { + let cached = cache_offline_core_runtime(&remote, &manifest, &paths) + .context("cache WordPress core/admin runtime for offline login")?; + println!("cached {cached} WordPress core/admin runtime files for offline login"); + } + let marker = OfflineMarker { severed_at_unix: SystemTime::now() .duration_since(UNIX_EPOCH) @@ -494,6 +528,92 @@ fn sever(args: SeverArgs) -> Result<()> { Ok(()) } +fn cache_offline_core_runtime( + remote: &RemoteClient, + manifest: &Manifest, + paths: &crate::config::ClonePaths, +) -> Result { + let overlay = OverlayStore::new(paths); + let mut queue = VecDeque::from([PathBuf::new()]); + let mut cached = 0_usize; + + while let Some(dir) = queue.pop_front() { + let entries = match remote.readdir(&dir) { + Ok(entries) => entries, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue, + Err(err) => { + return Err(err).with_context(|| { + format!( + "read remote runtime directory {}", + OverlayStore::rel_string(&dir) + ) + }) + } + }; + + for entry in entries { + let rel = dir.join(&entry.name); + let _ = overlay.put_cached_entry(&rel, &entry); + + if entry.kind == "dir" && should_descend_offline_core_runtime_dir(&rel) { + queue.push_back(rel); + continue; + } + + if !should_cache_offline_core_runtime_file(&rel, &entry.kind) { + continue; + } + if entry.size > manifest.cache_max_file_bytes { + continue; + } + + overlay + .read_cached_or_remote_with_entry( + remote, + &rel, + 0, + 1, + manifest.cache_max_file_bytes, + Some(entry), + ) + .with_context(|| { + format!( + "cache remote runtime file {}", + OverlayStore::rel_string(&rel) + ) + })?; + cached += 1; + } + } + + Ok(cached) +} + +fn should_descend_offline_core_runtime_dir(rel: &Path) -> bool { + rel == Path::new("wp-admin") + || rel.starts_with(Path::new("wp-admin/")) + || rel == Path::new("wp-includes") + || rel.starts_with(Path::new("wp-includes/")) +} + +fn should_cache_offline_core_runtime_file(rel: &Path, kind: &str) -> bool { + if kind != "file" { + return false; + } + if rel.starts_with(Path::new("wp-content")) { + return false; + } + if rel.starts_with(Path::new("wp-admin")) || rel.starts_with(Path::new("wp-includes")) { + return true; + } + let Some(name) = rel.file_name().and_then(|name| name.to_str()) else { + return false; + }; + rel.parent() + .is_none_or(|parent| parent.as_os_str().is_empty()) + && (name == "index.php" || (name.starts_with("wp-") && name.ends_with(".php"))) +} + fn mount(args: MountArgs) -> Result<()> { let state_dir = args.state_dir.unwrap_or(default_state_dir()?); let paths = clone_paths(&state_dir, &args.name); @@ -518,3 +638,46 @@ fn run_clone(args: RunArgs) -> Result<()> { }; run::run_site(manifest, paths, options) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn offline_core_runtime_cache_is_bounded_to_wordpress_core() { + assert!(should_cache_offline_core_runtime_file( + Path::new("wp-login.php"), + "file" + )); + assert!(should_cache_offline_core_runtime_file( + Path::new("wp-admin/admin.php"), + "file" + )); + assert!(should_cache_offline_core_runtime_file( + Path::new("wp-includes/version.php"), + "file" + )); + assert!(should_descend_offline_core_runtime_dir(Path::new( + "wp-admin/includes" + ))); + assert!(should_descend_offline_core_runtime_dir(Path::new( + "wp-includes/blocks" + ))); + + assert!(!should_cache_offline_core_runtime_file( + Path::new("wp-content/uploads/2026/05/large.mov"), + "file" + )); + assert!(!should_cache_offline_core_runtime_file( + Path::new("wp-content/plugins/woocommerce/woocommerce.php"), + "file" + )); + assert!(!should_cache_offline_core_runtime_file( + Path::new("wp-content/themes/neve/functions.php"), + "file" + )); + assert!(!should_descend_offline_core_runtime_dir(Path::new( + "wp-content/uploads" + ))); + } +} diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs index 24247003..64eaff42 100644 --- a/experiments/remote-wp-cow/src/config.rs +++ b/experiments/remote-wp-cow/src/config.rs @@ -1,6 +1,8 @@ use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; +use std::collections::hash_map::DefaultHasher; use std::fs::{self, File, OpenOptions}; +use std::hash::{Hash, Hasher}; use std::io::{Read, Write}; use std::os::unix::fs::OpenOptionsExt; use std::path::{Path, PathBuf}; @@ -182,6 +184,16 @@ pub fn clone_paths(state_dir: &Path, name: &str) -> ClonePaths { } } +pub fn ssh_control_path(paths: &ClonePaths) -> PathBuf { + let mut hasher = DefaultHasher::new(); + paths.root.hash(&mut hasher); + let hash = hasher.finish(); + let dir = std::env::var_os("WPCOW_SSH_CONTROL_DIR") + .map(PathBuf::from) + .unwrap_or_else(std::env::temp_dir); + dir.join(format!("wp-cow-ssh-{hash:016x}.sock")) +} + pub fn offline_marker_path(paths: &ClonePaths) -> PathBuf { paths.run.join(OFFLINE_MARKER) } @@ -306,4 +318,18 @@ mod tests { assert_eq!(sanitize_name("Example Site_1"), "example-site-1"); assert_eq!(sanitize_name("...Cow!!!"), "cow"); } + + #[test] + fn ssh_control_path_stays_short_for_long_clone_paths() { + let paths = clone_paths( + Path::new("/tmp/wp-cow-live-acceptance.with-a-long-random-name/state"), + "calm-cottage-core-live-with-a-long-name", + ); + let path = ssh_control_path(&paths); + assert!( + path.to_string_lossy().len() < 100, + "OpenSSH Unix-domain control sockets need a short path: {}", + path.display() + ); + } } diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index 49221494..eadfd218 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -21,6 +21,8 @@ pub struct DbState { pub option_bootstrap_tables: BTreeSet, #[serde(default)] pub option_rows: BTreeSet, + #[serde(default)] + pub dirty_option_rows: BTreeSet, } #[derive(Debug, Clone, Serialize)] @@ -162,25 +164,6 @@ pub fn materialize_tables( Ok(changed) } -pub fn wordpress_offline_table_names(table_prefix: &str) -> Vec { - [ - "options", - "users", - "usermeta", - "posts", - "postmeta", - "terms", - "term_taxonomy", - "term_relationships", - "comments", - "commentmeta", - "links", - ] - .into_iter() - .map(|suffix| format!("{table_prefix}{suffix}")) - .collect() -} - pub fn existing_local_tables(manifest: &Manifest, tables: &[String]) -> Result> { for table in tables { validate_table_name(table)?; @@ -295,9 +278,11 @@ pub fn route_for_query( option_bootstrap_table_for_sql(&manifest.probe.table_prefix, sql_text, &expanded) { if !state.option_bootstrap_tables.contains(&options_table) { - materialize_option_bootstrap(remote, manifest, &options_table).with_context(|| { - format!("materialize option bootstrap rows for {}", options_table) - })?; + let excluded = dirty_option_names_for_table(&state, &options_table); + materialize_option_bootstrap(remote, manifest, &options_table, &excluded) + .with_context(|| { + format!("materialize option bootstrap rows for {}", options_table) + })?; state.option_bootstrap_tables.insert(options_table); write_state(paths, &state)?; } @@ -329,30 +314,31 @@ pub fn remote_readonly_query(remote: &RemoteClient, sql_text: &str) -> Result Result> { +pub fn refresh_option_bootstrap_for_offline( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, +) -> Result> { let table = format!("{}options", manifest.probe.table_prefix); validate_table_name(&table)?; - let sql_text = format!( - "SELECT option_value FROM {} WHERE option_name='{}' LIMIT 1;", - qualified_table(manifest, &table), - mysql_string_literal(name) - ); - let output = local_mysql_command(manifest) - .arg("--batch") - .arg("--raw") - .arg("--skip-column-names") - .arg("--execute") - .arg(sql_text) - .output() - .context("query local option value")?; - if !output.status.success() { - return Ok(None); + + let mut state = load_state(paths)?; + let excluded = dirty_option_names_for_table(&state, &table); + materialize_option_bootstrap(remote, manifest, &table, &excluded) + .with_context(|| format!("refresh option bootstrap rows for {}", table))?; + + state.option_bootstrap_tables.insert(table.clone()); + for name in option_bootstrap_names() { + if !excluded.iter().any(|excluded_name| excluded_name == name) { + state.option_rows.insert(option_row_key(&table, name)); + } } - let value = String::from_utf8_lossy(&output.stdout) - .lines() - .next() - .map(|line| line.to_string()); - Ok(value) + write_state(paths, &state)?; + Ok(option_bootstrap_names() + .iter() + .filter(|name| !excluded.iter().any(|excluded_name| excluded_name == *name)) + .map(|name| (*name).to_string()) + .collect()) } #[derive(Debug, Serialize)] @@ -388,7 +374,29 @@ pub fn row_cow_query( fallback: None, result: Some(result), }), - RowCowExecution::PreparedLocalWrite { .. } | RowCowExecution::LocalOnlyInsert { .. } => { + RowCowExecution::PreparedLocalWrite { + table, + pk_column, + pk_values, + .. + } => { + mark_dirty_option_rows_for_write( + manifest, + paths, + &table, + pk_column.as_deref(), + &pk_values, + )?; + Ok(RowCowResponse { + handled: true, + backend: "local".to_string(), + materialized: Vec::new(), + fallback: None, + result: None, + }) + } + RowCowExecution::LocalOnlyInsert { table } => { + mark_dirty_option_rows_from_sql(manifest, paths, sql_text, &table)?; Ok(RowCowResponse { handled: true, backend: "local".to_string(), @@ -399,10 +407,7 @@ pub fn row_cow_query( } RowCowExecution::Fallback(plan) => { let (fallback, plan_tables) = fallback_name_and_tables(plan); - if fallback == "PromoteTable" - && !plan_tables.is_empty() - && (sql::is_write_sql(sql_text) || sql::is_safe_read_sql(sql_text)) - { + if should_materialize_row_cow_fallback(sql_text, &fallback, &plan_tables) { let materialized = materialize_tables(remote, manifest, paths, &plan_tables)?; return Ok(RowCowResponse { handled: false, @@ -435,6 +440,59 @@ pub fn row_cow_query( } } +fn mark_dirty_option_rows_for_write( + manifest: &Manifest, + paths: &ClonePaths, + table: &str, + pk_column: Option<&str>, + pk_values: &[PkValue], +) -> Result<()> { + let options_table = format!("{}options", manifest.probe.table_prefix); + if table != options_table + || !pk_column.is_some_and(|column| column.eq_ignore_ascii_case("option_name")) + { + return Ok(()); + } + + let mut state = load_state(paths)?; + for value in pk_values { + state + .dirty_option_rows + .insert(option_row_key(table, &value.0)); + } + write_state(paths, &state) +} + +fn mark_dirty_option_rows_from_sql( + manifest: &Manifest, + paths: &ClonePaths, + sql_text: &str, + table: &str, +) -> Result<()> { + let options_table = format!("{}options", manifest.probe.table_prefix); + if table != options_table { + return Ok(()); + } + let names = option_write_names_for_sql(sql_text, &options_table, &[options_table.clone()]); + if names.is_empty() { + return Ok(()); + } + + let mut state = load_state(paths)?; + for name in names { + state.dirty_option_rows.insert(option_row_key(table, &name)); + } + write_state(paths, &state) +} + +fn should_materialize_row_cow_fallback( + sql_text: &str, + fallback: &str, + plan_tables: &[String], +) -> bool { + fallback == "PromoteTable" && !plan_tables.is_empty() && sql::is_write_sql(sql_text) +} + fn fallback_name_and_tables(plan: RowCowPlan) -> (String, Vec) { match plan { RowCowPlan::PromoteTable { tables, .. } => ("PromoteTable".to_string(), tables), @@ -590,6 +648,16 @@ impl RowCowBackend for MysqlRowCowBackend<'_> { Ok(pk_values.len()) } + fn local_reserve_insert_pk(&mut self, table: &str, pk_column: Option<&str>) -> Result<()> { + let Some(pk_column) = pk_column else { + return Ok(()); + }; + if !row_cow::is_auto_increment_pk_for_table(table, pk_column) { + return Ok(()); + } + reserve_local_auto_increment(self.remote, self.manifest, table, pk_column) + } + fn local_tombstones_by_pk( &mut self, table: &str, @@ -655,6 +723,78 @@ fn remote_query_to_cow_result(result: RemoteQueryResult) -> CowQueryResult { } } +fn reserve_local_auto_increment( + remote: &RemoteClient, + manifest: &Manifest, + table: &str, + pk_column: &str, +) -> Result<()> { + validate_table_name(table)?; + validate_table_name(pk_column)?; + let remote_max = remote_max_pk(remote, table, pk_column) + .with_context(|| format!("read remote max primary key for {}", table))?; + let local_max = local_max_pk(manifest, table, pk_column) + .with_context(|| format!("read local max primary key for {}", table))?; + let Some(next_id) = remote_max.max(local_max).checked_add(1) else { + return Ok(()); + }; + if next_id <= 1 { + return Ok(()); + } + let sql_text = format!( + "ALTER TABLE {} AUTO_INCREMENT = {};", + qualified_table(manifest, table), + next_id + ); + run_mysql_exec(manifest, &sql_text) +} + +fn remote_max_pk(remote: &RemoteClient, table: &str, pk_column: &str) -> Result { + let sql_text = format!( + "SELECT MAX({}) AS max_pk FROM {};", + row_cow::quote_identifier(pk_column)?, + row_cow::quote_identifier(table)? + ); + let result = remote_readonly_query(remote, &sql_text)?; + if !result.ok { + return Err(anyhow!( + "remote max primary key query failed: {}", + result.error + )); + } + max_pk_from_rows(&result.rows, "max_pk") +} + +fn local_max_pk(manifest: &Manifest, table: &str, pk_column: &str) -> Result { + let sql_text = format!( + "SELECT MAX({}) AS max_pk FROM {};", + row_cow::quote_identifier(pk_column)?, + qualified_table(manifest, table) + ); + let result = local_query_result(manifest, &sql_text)?; + max_pk_from_rows(&result.rows, "max_pk") +} + +fn max_pk_from_rows(rows: &[Row], field: &str) -> Result { + let Some(value) = rows.first().and_then(|row| row.get(field)) else { + return Ok(0); + }; + match value { + serde_json::Value::Null => Ok(0), + serde_json::Value::Number(number) => Ok(number.as_u64().unwrap_or(0)), + serde_json::Value::String(raw) => { + let raw = raw.trim(); + if raw.is_empty() || raw.eq_ignore_ascii_case("null") { + Ok(0) + } else { + raw.parse::() + .with_context(|| format!("parse max primary key value from {}", raw)) + } + } + _ => Ok(0), + } +} + fn mysql_json_value(value: Option<&serde_json::Value>) -> String { match value { None | Some(serde_json::Value::Null) => "NULL".to_string(), @@ -940,12 +1080,13 @@ fn materialize_option_bootstrap( remote: &RemoteClient, manifest: &Manifest, table: &str, + excluded_names: &[String], ) -> Result<()> { let probe = &manifest.probe; ensure_probe_has_db(probe)?; validate_table_name(table)?; - let where_sql = option_bootstrap_where_sql(); + let where_sql = option_bootstrap_where_sql_excluding(excluded_names); let delete_sql = format!( "DELETE FROM {} WHERE {};", qualified_table(manifest, table), @@ -1012,6 +1153,11 @@ fn materialize_option_rows( let missing = names .iter() .filter(|name| !state.option_rows.contains(&option_row_key(table, name))) + .filter(|name| { + !state + .dirty_option_rows + .contains(&option_row_key(table, name)) + }) .cloned() .collect::>(); if missing.is_empty() { @@ -1111,6 +1257,25 @@ fn option_names_for_sql(sql_text: &str, options_table: &str, tables: &[String]) if !sql::is_safe_read_sql(sql_text) || sql::is_write_sql(sql_text) { return Vec::new(); } + option_names_for_option_predicate(sql_text, options_table, tables) +} + +fn option_write_names_for_sql( + sql_text: &str, + options_table: &str, + tables: &[String], +) -> Vec { + if !sql::is_write_sql(sql_text) { + return Vec::new(); + } + option_names_for_option_predicate(sql_text, options_table, tables) +} + +fn option_names_for_option_predicate( + sql_text: &str, + options_table: &str, + tables: &[String], +) -> Vec { if !tables.iter().any(|table| table == options_table) { return Vec::new(); } @@ -1149,6 +1314,20 @@ fn option_bootstrap_where_sql() -> String { format!("autoload IN ('yes', 'on', 'auto-on', 'auto') OR option_name IN ({names})") } +fn option_bootstrap_where_sql_excluding(excluded_names: &[String]) -> String { + let base = option_bootstrap_where_sql(); + if excluded_names.is_empty() { + return base; + } + + let excluded = excluded_names + .iter() + .map(|name| format!("'{}'", mysql_string_literal(name))) + .collect::>() + .join(", "); + format!("({base}) AND option_name NOT IN ({excluded})") +} + fn option_names_where_sql(names: &[String]) -> String { let names = names .iter() @@ -1162,6 +1341,15 @@ fn option_row_key(table: &str, name: &str) -> String { format!("{table}:{name}") } +fn dirty_option_names_for_table(state: &DbState, table: &str) -> Vec { + let prefix = format!("{table}:"); + state + .dirty_option_rows + .iter() + .filter_map(|key| key.strip_prefix(&prefix).map(str::to_string)) + .collect() +} + fn option_bootstrap_names() -> &'static [&'static str] { &[ "siteurl", @@ -1465,6 +1653,37 @@ mod tests { ); } + #[test] + fn extracts_dirty_option_write_names() { + let tables = vec!["ady_options".to_string()]; + assert_eq!( + option_write_names_for_sql( + "UPDATE ady_options SET option_value = 'neve' WHERE option_name = 'template'", + "ady_options", + &tables, + ), + vec!["template".to_string()] + ); + assert_eq!( + option_write_names_for_sql( + "DELETE FROM ady_options WHERE option_name IN ('template', 'stylesheet')", + "ady_options", + &tables, + ), + vec!["template".to_string(), "stylesheet".to_string()] + ); + } + + #[test] + fn option_bootstrap_refresh_can_preserve_dirty_rows() { + let where_sql = option_bootstrap_where_sql_excluding(&[ + "template".to_string(), + "stylesheet".to_string(), + ]); + assert!(where_sql.contains("autoload IN")); + assert!(where_sql.contains("option_name NOT IN ('template', 'stylesheet')")); + } + #[test] fn qualifies_local_tables_for_exec_without_selected_database() { let manifest = test_manifest(); @@ -1492,4 +1711,41 @@ mod tests { vec!["DELETE FROM `cow_calm`.`wp_posts` WHERE `ID` IN ('7', '9');"] ); } + + #[test] + fn parses_max_primary_key_rows_for_auto_increment_reservation() { + let mut row = Row::new(); + row.insert( + "max_pk".to_string(), + serde_json::Value::String("184".to_string()), + ); + assert_eq!(max_pk_from_rows(&[row], "max_pk").unwrap(), 184); + + let mut null_row = Row::new(); + null_row.insert("max_pk".to_string(), serde_json::Value::Null); + assert_eq!(max_pk_from_rows(&[null_row], "max_pk").unwrap(), 0); + + assert_eq!(max_pk_from_rows(&[], "max_pk").unwrap(), 0); + } + + #[test] + fn row_cow_safe_read_fallbacks_do_not_promote_tables() { + let tables = vec!["ady_options".to_string()]; + assert!( + !should_materialize_row_cow_fallback( + "SELECT option_name, option_value FROM ady_options WHERE autoload IN ('yes', 'on')", + "PromoteTable", + &tables, + ), + "safe live-lower reads should route to the remote lower layer instead of dumping full tables" + ); + assert!( + should_materialize_row_cow_fallback( + "UPDATE ady_options SET option_value='x' WHERE autoload='yes'", + "PromoteTable", + &tables, + ), + "write fallbacks still need local table promotion before the write executes" + ); + } } diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs index 224aa6f4..04d9a3b6 100644 --- a/experiments/remote-wp-cow/src/fusefs.rs +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -177,6 +177,7 @@ impl CowFs { Err(err) => return Err(err), }; self.remote_missing_cache.remove(rel); + let _ = self.overlay.put_cached_entry(rel, &entry); self.remote_stat_cache.insert( rel.to_path_buf(), Timed { @@ -189,7 +190,10 @@ impl CowFs { fn remote_readdir(&mut self, rel: &Path) -> io::Result> { if self.offline { - return Ok(Vec::new()); + return self + .overlay + .list_cached_metadata_dir(rel) + .map_err(anyhow_to_io); } if let Some(cached) = self.remote_readdir_cache.get(rel) { @@ -418,9 +422,7 @@ impl Filesystem for CowFs { if entry.kind == "dir" { return Err(io::Error::from_raw_os_error(ENOTSUP)); } - self.overlay - .copy_up(&self.remote, &old_rel) - .map_err(anyhow_to_io)?; + self.copy_up_for_write(&old_rel)?; } fs::rename(&old_upper, &new_upper)?; @@ -448,10 +450,7 @@ impl Filesystem for CowFs { .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown inode"))?; trace_fuse("open", &rel); if wants_write(flags) { - let upper = self - .overlay - .copy_up(&self.remote, &rel) - .map_err(anyhow_to_io)?; + let upper = self.copy_up_for_write(&rel)?; let mut opts = OpenOptions::new(); opts.read(true).write(true).create(true); if flags & libc::O_TRUNC != 0 { @@ -514,17 +513,20 @@ impl Filesystem for CowFs { } } Some(Handle::Remote(rel)) => { + let rel = rel.clone(); if self.offline { return reply.error(ENOENT); } - trace_fuse("read-remote", rel); + trace_fuse("read-remote", &rel); + let entry = self.remote_stat(&rel).ok(); self.overlay - .read_cached_or_remote( + .read_cached_or_remote_with_entry( &self.remote, - rel, + &rel, offset, size, self.manifest.cache_max_file_bytes, + entry, ) .map_err(anyhow_to_io) } @@ -734,12 +736,29 @@ impl CowFs { Ok(entries) } + fn copy_up_for_write(&self, rel: &Path) -> io::Result { + if self.offline { + self.overlay + .copy_up_cached_only(rel) + .map_err(|err| io::Error::new(io::ErrorKind::NotFound, err.to_string())) + } else { + self.overlay + .copy_up(&self.remote, rel) + .map_err(anyhow_to_io) + } + } + fn is_opaque_dir_active(&self, rel: &Path) -> io::Result { let is_opaque = self.overlay.is_opaque_dir(rel).map_err(anyhow_to_io)?; if !is_opaque { return Ok(false); } - if rel.starts_with(Path::new("wp-content/plugins")) && env_bool("WPCOW_ENABLE_PLUGINS") { + if rel.starts_with(Path::new("wp-content/plugins")) + && !env_is_explicit_false("WPCOW_ENABLE_PLUGINS") + { + return Ok(false); + } + if rel.starts_with(Path::new("wp-content/languages")) { return Ok(false); } Ok(true) @@ -759,7 +778,7 @@ impl CowFs { pub fn mount_foreground(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> Result<()> { fs::create_dir_all(mountpoint)?; - let control_path = paths.run.join("ssh-control.sock"); + let control_path = config::ssh_control_path(&paths); let remote = RemoteClient::new(manifest.clone(), Some(control_path)); if !config::is_offline(&paths) { remote.ensure_master()?; @@ -817,13 +836,13 @@ fn wants_write(flags: i32) -> bool { || flags & libc::O_APPEND != 0 } -fn env_bool(name: &str) -> bool { +fn env_is_explicit_false(name: &str) -> bool { std::env::var(name) .ok() .map(|raw| { matches!( raw.to_ascii_lowercase().as_str(), - "1" | "true" | "yes" | "on" + "0" | "false" | "no" | "off" ) }) .unwrap_or(false) @@ -847,3 +866,267 @@ fn io_errno(err: &io::Error) -> i32 { fn anyhow_to_io(err: anyhow::Error) -> io::Error { io::Error::new(io::ErrorKind::Other, err.to_string()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::{ensure_clone_dirs, write_offline_marker, Manifest, OfflineMarker, Probe}; + use std::sync::{Mutex, OnceLock}; + + fn test_manifest() -> Manifest { + Manifest::new( + "example".to_string(), + "unreachable-host".to_string(), + "/remote/wp".to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ) + } + + #[test] + fn offline_write_copy_up_uses_cached_lower_without_remote() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + write_offline_marker( + &paths, + &OfflineMarker { + severed_at_unix: 1, + materialized_tables: Vec::new(), + admin_user: None, + }, + ) + .unwrap(); + + let manifest = test_manifest(); + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-admin/index.php"); + let cache_path = store.cache_path(rel); + fs::create_dir_all(cache_path.parent().unwrap()).unwrap(); + fs::write(&cache_path, b"cached admin runtime\n").unwrap(); + store + .put_cached_entry( + rel, + &RemoteEntry { + name: "index.php".to_string(), + kind: "file".to_string(), + size: 21, + mode: 0o100644, + mtime: 42, + }, + ) + .unwrap(); + + let fs = CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + let upper = fs.copy_up_for_write(rel).unwrap(); + assert_eq!(std::fs::read(&upper).unwrap(), b"cached admin runtime\n"); + + let err = fs + .copy_up_for_write(Path::new("wp-admin/missing.php")) + .unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::NotFound); + assert!( + err.to_string() + .contains("clone is severed and writable lower file is not cached locally"), + "unexpected error: {err}" + ); + } + + #[test] + fn offline_readdir_uses_cached_remote_metadata_without_remote() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + write_offline_marker( + &paths, + &OfflineMarker { + severed_at_unix: 1, + materialized_tables: Vec::new(), + admin_user: None, + }, + ) + .unwrap(); + + let manifest = test_manifest(); + let store = OverlayStore::new(&paths); + store + .put_cached_entry( + Path::new("wp-content/plugins/hello.php"), + &RemoteEntry { + name: "hello.php".to_string(), + kind: "file".to_string(), + size: 18, + mode: 0o100644, + mtime: 42, + }, + ) + .unwrap(); + store + .put_cached_entry( + Path::new("wp-content/plugins/sample"), + &RemoteEntry { + name: "sample".to_string(), + kind: "dir".to_string(), + size: 0, + mode: 0o40755, + mtime: 42, + }, + ) + .unwrap(); + + let mut fs = CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + let entries = fs.remote_readdir(Path::new("wp-content/plugins")).unwrap(); + let names = entries + .into_iter() + .map(|entry| entry.name) + .collect::>(); + + assert_eq!( + names, + vec!["hello.php".to_string(), "sample".to_string()], + "offline readdir should use cached remote metadata without touching SSH" + ); + } + + #[test] + fn remote_stat_metadata_survives_severed_mode_without_remote() { + static ENV_LOCK: OnceLock> = OnceLock::new(); + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old_path = std::env::var_os("PATH"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let fake_bin = temp.path().join("bin"); + fs::create_dir_all(remote_root.join("wp-content/themes/neve/assets/js/build/modern")) + .unwrap(); + fs::create_dir_all(&fake_bin).unwrap(); + fs::write( + remote_root.join("wp-content/themes/neve/assets/js/build/modern/frontend.js"), + b"/* theme build asset */", + ) + .unwrap(); + let fake_ssh = fake_bin.join("ssh"); + fs::write( + &fake_ssh, + r#"#!/usr/bin/env bash +set -euo pipefail +cmd="${@: -1}" +exec bash -lc "$cmd" +"#, + ) + .unwrap(); + let mut perms = fs::metadata(&fake_ssh).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&fake_ssh, perms).unwrap(); + + let path = match old_path.as_ref() { + Some(old) => format!("{}:{}", fake_bin.display(), old.to_string_lossy()), + None => fake_bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "0"); + + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let rel = Path::new("wp-content/themes/neve/assets/js/build/modern/frontend.js"); + + let mut fs = CowFs::new( + manifest.clone(), + &paths, + RemoteClient::new(manifest.clone(), None), + ); + let entry = fs.remote_stat(rel).unwrap(); + assert_eq!(entry.size, 23); + assert_eq!( + OverlayStore::new(&paths) + .cached_entry(rel) + .unwrap() + .unwrap() + .size, + 23, + "successful stat-only lookups must persist metadata for later offline theme checks" + ); + + write_offline_marker( + &paths, + &OfflineMarker { + severed_at_unix: 1, + materialized_tables: Vec::new(), + admin_user: None, + }, + ) + .unwrap(); + fs::remove_file(remote_root.join(rel)).unwrap(); + let mut offline_fs = + CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + assert_eq!( + offline_fs.remote_stat(rel).unwrap().size, + 23, + "severed clones need stat-only metadata without consulting SSH" + ); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + } + + #[test] + fn legacy_opaque_runtime_markers_stay_transparent_by_default() { + static ENV_LOCK: OnceLock> = OnceLock::new(); + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old = std::env::var_os("WPCOW_ENABLE_PLUGINS"); + std::env::remove_var("WPCOW_ENABLE_PLUGINS"); + + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + for rel in ["wp-content/plugins", "wp-content/languages"] { + let dir = paths.upper.join(rel); + fs::create_dir_all(&dir).unwrap(); + fs::write(dir.join(crate::overlay::OPAQUE_MARKER), b"legacy marker\n").unwrap(); + } + + let manifest = test_manifest(); + let fs = CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + assert!(!fs + .is_opaque_dir_active(Path::new("wp-content/plugins")) + .unwrap()); + assert!(!fs + .is_opaque_dir_active(Path::new("wp-content/languages")) + .unwrap()); + + std::env::set_var("WPCOW_ENABLE_PLUGINS", "0"); + assert!(fs + .is_opaque_dir_active(Path::new("wp-content/plugins")) + .unwrap()); + assert!(!fs + .is_opaque_dir_active(Path::new("wp-content/languages")) + .unwrap()); + + match old { + Some(value) => std::env::set_var("WPCOW_ENABLE_PLUGINS", value), + None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), + } + } +} diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index c8ecb5ba..9aab8763 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -9,8 +9,8 @@ pub const ROUTER_BASENAME: &str = ".wp-cow-router.php"; pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Result<()> { fs::create_dir_all(paths.upper.join("wp-content/mu-plugins"))?; - write_opaque_dir(paths.upper.join("wp-content/plugins"))?; - write_opaque_dir(paths.upper.join("wp-content/languages"))?; + remove_opaque_marker(paths.upper.join("wp-content/plugins"))?; + remove_opaque_marker(paths.upper.join("wp-content/languages"))?; let router = router_php(paths, manifest); fs::write( paths.upper.join("wp-config.php"), @@ -26,12 +26,11 @@ pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Res Ok(()) } -fn write_opaque_dir(path: impl AsRef) -> Result<()> { - fs::create_dir_all(path.as_ref())?; - fs::write( - path.as_ref().join(OPAQUE_MARKER), - b"local overlay hides remote lower\n", - )?; +fn remove_opaque_marker(path: impl AsRef) -> Result<()> { + let marker = path.as_ref().join(OPAQUE_MARKER); + if marker.exists() { + fs::remove_file(marker)?; + } Ok(()) } @@ -125,9 +124,47 @@ function cow_is_write_sql( $sql ) { return (bool) preg_match( '/^(INSERT|UPDATE|DELETE|REPLACE|ALTER|CREATE|DROP|TRUNCATE|RENAME|LOAD|LOCK|UNLOCK|GRANT|REVOKE|OPTIMIZE|ANALYZE|REPAIR)\b/i', $sql ); } +function cow_sql_without_literals_and_comments( $sql ) { + return preg_replace( + array( + '/\/\*.*?\*\//s', + '/--[^\n]*(?:\n|$)/', + '/#[^\n]*(?:\n|$)/', + '/\'(?:\\\\.|\'\'|[^\'\\\\])*\'/s', + '/"(?:\\\\.|""|[^"\\\\])*"/s', + ), + ' ', + $sql + ); +} + +function cow_select_has_remote_side_effect_clause( $sql ) { + $sql = cow_sql_without_literals_and_comments( $sql ); + return (bool) preg_match( '/\bINTO\b|\bFOR\s+UPDATE\b|\bLOCK\s+IN\s+SHARE\s+MODE\b/i', $sql ); +} + function cow_is_safe_read_sql( $sql ) { $sql = ltrim( preg_replace( '/^\s*(?:\/\*.*?\*\/\s*|--[^\n]*\n\s*|#[^\n]*\n\s*)*/s', '', $sql ) ); - return (bool) preg_match( '/^(SELECT|SHOW|DESCRIBE|DESC|EXPLAIN)\b/i', $sql ); + if ( ! preg_match( '/^(SELECT|SHOW|DESCRIBE|DESC|EXPLAIN)\b/i', $sql, $matches ) ) { + return false; + } + if ( 0 === strcasecmp( $matches[1], 'SELECT' ) && cow_select_has_remote_side_effect_clause( $sql ) ) { + return false; + } + return true; +} + +function cow_is_protected_theme_option_write( $sql ) { + if ( '0' === getenv( 'WPCOW_PROTECT_THEME_OPTIONS' ) ) { + return false; + } + if ( ! cow_is_write_sql( $sql ) ) { + return false; + } + if ( ! preg_match( '/\boption_name\b/i', $sql ) ) { + return false; + } + return (bool) preg_match( "/'(?:template|stylesheet|current_theme)'/i", $sql ); } function cow_tables_from_sql( $sql ) { @@ -228,6 +265,11 @@ function cow_row_cow_enabled() { return '0' !== getenv( 'WPCOW_ROW_COW' ); } +function cow_offline() { + $value = strtolower( (string) getenv( 'WPCOW_OFFLINE' ) ); + return in_array( $value, array( '1', 'true', 'yes', 'on' ), true ); +} + function cow_remote_query_cache_file( $query ) { if ( ! cow_remote_query_cache_enabled() ) { return ''; @@ -292,6 +334,14 @@ class Cow_DB extends wpdb { $tables = cow_tables_from_sql( $query ); if ( cow_is_write_sql( $query ) ) { + if ( in_array( $this->options, $tables, true ) && cow_is_protected_theme_option_write( $query ) ) { + $this->rows_affected = 0; + $this->last_error = ''; + return 0; + } + if ( cow_offline() ) { + return parent::query( $query ); + } if ( cow_row_cow_enabled() ) { $row_cow = cow_control_request( '/row-cow', array( 'tables' => $tables, 'sql' => $query ) ); if ( empty( $row_cow['ok'] ) ) { @@ -313,6 +363,9 @@ class Cow_DB extends wpdb { } if ( cow_is_safe_read_sql( $query ) ) { + if ( cow_offline() ) { + return parent::query( $query ); + } if ( cow_row_cow_enabled() ) { $row_cow = cow_control_request( '/row-cow', array( 'tables' => $tables, 'sql' => $query ) ); if ( empty( $row_cow['ok'] ) ) { @@ -491,11 +544,13 @@ if ( ! defined( 'DISABLE_WP_CRON' ) ) { define( 'DISABLE_WP_CRON', true ); } -if ( '1' !== getenv( 'WPCOW_ENABLE_PLUGINS' ) ) { +if ( '0' === getenv( 'WPCOW_ENABLE_PLUGINS' ) ) { add_filter( 'option_active_plugins', '__return_empty_array', PHP_INT_MAX ); add_filter( 'site_option_active_sitewide_plugins', '__return_empty_array', PHP_INT_MAX ); } +add_filter( 'validate_current_theme', '__return_false', PHP_INT_MAX ); + add_filter( 'pre_http_request', static function ( $preempt, $args, $url ) { if ( defined( 'WPCOW_ALLOW_OUTBOUND_HTTP' ) && WPCOW_ALLOW_OUTBOUND_HTTP ) { return $preempt; @@ -577,7 +632,7 @@ function wp_cow_is_frontend_get( $path ) { } function wp_cow_proxy_remote_frontend( $remote_url, $local_url, $path ) { - if ( '0' === getenv( 'WPCOW_PROXY_FRONTEND' ) || isset( $_GET['__wp_cow_local'] ) || ! wp_cow_is_frontend_get( $path ) ) { + if ( '1' !== getenv( 'WPCOW_PROXY_FRONTEND' ) || isset( $_GET['__wp_cow_local'] ) || ! wp_cow_is_frontend_get( $path ) ) { return false; } @@ -669,7 +724,7 @@ function wp_cow_render_wordpress( $ready_file ) { return true; } -if ( in_array( $path, array( '/wp-admin/install.php', '/wp-admin/setup-config.php' ), true ) ) { +if ( isset( $_GET['__wp_cow_installer_guard'] ) || in_array( $path, array( '/wp-admin/install.php', '/wp-admin/setup-config.php' ), true ) ) { wp_cow_runtime_error_page( 'wp-cow did not load the remote site', 'WordPress tried to show an installation/setup path. This clone refuses to treat an empty or unavailable database lower layer as a successful site load.', @@ -678,6 +733,14 @@ if ( in_array( $path, array( '/wp-admin/install.php', '/wp-admin/setup-config.ph return true; } +$wp_cow_docroot = rtrim( $_SERVER['DOCUMENT_ROOT'], '/' ); +if ( in_array( $path, array( '/wp-admin', '/wp-admin/' ), true ) && is_file( $wp_cow_docroot . '/wp-admin/index.php' ) ) { + $_SERVER['SCRIPT_NAME'] = '/wp-admin/index.php'; + $_SERVER['SCRIPT_FILENAME'] = $wp_cow_docroot . '/wp-admin/index.php'; + require $wp_cow_docroot . '/wp-admin/index.php'; + return true; +} + if ( '/' !== $path && is_file( $file ) ) { return false; } @@ -838,11 +901,17 @@ pub fn generated_file_paths(root: &Path) -> Vec { mod tests { use super::*; use crate::config::{ - clone_paths, DbProxy, LocalDb, Manifest, Probe, RemoteDbTunnel, MANIFEST_VERSION, + clone_paths, ensure_clone_dirs, write_manifest, write_offline_marker, DbProxy, LocalDb, + Manifest, OfflineMarker, Probe, RemoteDbTunnel, MANIFEST_VERSION, }; + use std::ffi::OsString; use std::io::{Read, Write}; use std::net::{TcpListener, TcpStream}; - use std::process::Command; + use std::os::unix::fs::PermissionsExt; + use std::path::{Path, PathBuf}; + use std::process::{Child, Command, Stdio}; + use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::{Arc, Mutex, OnceLock}; use std::thread; use std::time::{Duration, Instant}; @@ -883,6 +952,14 @@ mod tests { } } + fn php_single_quoted_path(path: &Path) -> String { + let mut value = path.to_string_lossy().into_owned(); + if path.is_dir() && !value.ends_with('/') { + value.push('/'); + } + value.replace('\\', "\\\\").replace('\'', "\\'") + } + #[test] fn generated_config_shadows_urls_and_database() { let temp = tempfile::tempdir().unwrap(); @@ -905,26 +982,123 @@ mod tests { fn db_dropin_blocks_write_classes() { let php = db_dropin_php(); assert!(php.contains("cow_is_write_sql")); + assert!(php.contains("cow_select_has_remote_side_effect_clause")); assert!(php.contains("/materialize")); assert!(php.contains("cow_remote_mysqli")); assert!(php.contains("cow_remote_query_cache_get")); assert!(php.contains("cow_remote_query_cache_set")); + assert!(php.contains("cow_is_protected_theme_option_write")); + assert!(php.contains("WPCOW_PROTECT_THEME_OPTIONS")); assert!(php.contains("cow_db_runtime_fail")); assert!(php.contains("will not fall back to the empty local schema")); assert!(php.contains("'sql' => $query")); assert!(php.contains("INSERT|REPLACE")); } + #[test] + fn db_dropin_rejects_read_shaped_remote_writes() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping generated PHP SQL safety test because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let wp_includes = temp.path().join("wp-includes"); + fs::create_dir_all(&wp_includes).unwrap(); + fs::write( + wp_includes.join("class-wpdb.php"), + " true, + "SELECT * FROM wp_posts WHERE post_title = 'FOR UPDATE'" => true, + 'SELECT * FROM wp_posts /* FOR UPDATE */ WHERE ID = 1' => true, + "SELECT * FROM wp_posts INTO OUTFILE '/tmp/wp-cow-leak'" => false, + 'SELECT * FROM wp_posts WHERE ID = 1 FOR UPDATE' => false, + 'SELECT * FROM wp_posts WHERE ID = 1 LOCK IN SHARE MODE' => false, +); +foreach ( $cases as $sql => $expected ) {{ + $actual = cow_is_safe_read_sql( $sql ); + if ( $actual !== $expected ) {{ + fwrite( STDERR, $sql . ' expected ' . ( $expected ? 'safe' : 'unsafe' ) . ' got ' . ( $actual ? 'safe' : 'unsafe' ) . PHP_EOL ); + exit( 1 ); + }} +}} +"#, + php_single_quoted_path(temp.path()), + php_single_quoted_path(&db_dropin) + ); + fs::write(&check, script).unwrap(); + + let output = Command::new("php") + .arg(&check) + .output() + .unwrap_or_else(|err| panic!("run PHP SQL safety check: {err}")); + assert!( + output.status.success(), + "PHP SQL safety check failed: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + #[test] fn safety_plugin_blocks_side_effects() { let php = safety_mu_plugin_php(); assert!(php.contains("pre_wp_mail")); assert!(php.contains("X-Robots-Tag")); assert!(php.contains("pre_http_request")); + assert!(php.contains("validate_current_theme")); assert!(php.contains("WPCOW_ENABLE_PLUGINS")); + assert!(php.contains("'0' === getenv( 'WPCOW_ENABLE_PLUGINS' )")); assert!(php.contains("option_active_plugins")); } + #[test] + fn generated_overrides_keep_remote_runtime_dirs_visible_by_default() { + let temp = tempfile::tempdir().unwrap(); + let paths = clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let plugins = paths.upper.join("wp-content/plugins"); + let languages = paths.upper.join("wp-content/languages"); + fs::create_dir_all(&plugins).unwrap(); + fs::create_dir_all(&languages).unwrap(); + fs::write( + plugins.join(crate::overlay::OPAQUE_MARKER), + b"legacy opaque marker\n", + ) + .unwrap(); + fs::write( + languages.join(crate::overlay::OPAQUE_MARKER), + b"legacy opaque marker\n", + ) + .unwrap(); + + write_wordpress_overrides(&paths, &manifest()).unwrap(); + + assert!( + !plugins.join(crate::overlay::OPAQUE_MARKER).exists(), + "plugin files should remain backed by the lazy remote lower layer by default" + ); + assert!( + !languages.join(crate::overlay::OPAQUE_MARKER).exists(), + "language files should remain backed by the lazy remote lower layer by default" + ); + } + #[test] fn router_exposes_splash_and_progress_endpoint() { let temp = tempfile::tempdir().unwrap(); @@ -936,6 +1110,7 @@ mod tests { assert!(php.contains("wp_cow_proxy_remote_frontend")); assert!(php.contains("X-WP-COW-Frontend-Proxy")); assert!(php.contains("WordPress tried to show the installation wizard")); + assert!(php.contains("__wp_cow_installer_guard")); assert!(php.contains("Cache-Control: no-store")); assert!(!php.contains("__WPCOW_PROGRESS_FILE__")); assert!(!php.contains("__WPCOW_READY_FILE__")); @@ -1100,6 +1275,565 @@ mod tests { let _ = child.wait(); } + #[test] + #[ignore = "strict harness only: starts temporary MariaDB and PHP servers"] + fn runtime_cow_harness_proves_admin_login_local_mutation_and_offline_refresh() { + require_command("php"); + require_command("mariadbd"); + require_command("mariadb-install-db"); + require_command("mysql"); + + let temp = tempfile::tempdir().unwrap(); + let mysql_port = free_tcp_port(); + let mysql = ChildGuard::new(start_mariadb(temp.path(), mysql_port)); + wait_for_mysql(mysql_port); + create_harness_databases(mysql_port); + + let state_dir = temp.path().join("state"); + let paths = clone_paths(&state_dir, "example"); + fs::create_dir_all(&paths.generated).unwrap(); + fs::create_dir_all(&paths.file_cache).unwrap(); + fs::create_dir_all(&paths.run).unwrap(); + + let remote_public = HarnessHttpServer::start("REMOTE PUBLIC BYPASS"); + let control_port = free_tcp_port(); + let site_port = free_tcp_port(); + let mut harness_manifest = manifest(); + harness_manifest.remote_url = format!("http://127.0.0.1:{}", remote_public.port); + harness_manifest.local_url = format!("http://127.0.0.1:{site_port}"); + harness_manifest.control_url = format!("http://127.0.0.1:{control_port}"); + harness_manifest.probe.db_name = "remote_wp".to_string(); + harness_manifest.probe.db_host = format!("127.0.0.1:{mysql_port}"); + harness_manifest.probe.db_user = "root".to_string(); + harness_manifest.probe.db_password = String::new(); + harness_manifest.local_db = LocalDb { + name: "local_wp".to_string(), + user: "root".to_string(), + password: String::new(), + host: "127.0.0.1".to_string(), + port: mysql_port, + }; + harness_manifest.remote_db_tunnel = RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: mysql_port, + }; + + crate::db::set_local_admin_password(&harness_manifest, Some("admin"), "local-pass") + .unwrap(); + + let docroot = temp.path().join("docroot"); + write_runtime_harness_docroot(&docroot, &paths, &harness_manifest); + let router = paths.generated.join("router.php"); + fs::write(&router, router_php(&paths, &harness_manifest)).unwrap(); + + let control_docroot = temp.path().join("control"); + fs::create_dir_all(&control_docroot).unwrap(); + let control_router = control_docroot.join("control.php"); + fs::write(&control_router, runtime_harness_control_php()).unwrap(); + let mut control = ChildGuard::new(start_php_server( + &control_docroot, + &control_router, + control_port, + &[("WPCOW_HARNESS_MYSQL_PORT", mysql_port.to_string())], + )); + wait_for_port(control_port); + + let mut site = ChildGuard::new(start_php_server( + &docroot, + &router, + site_port, + &[("WPCOW_SPLASH", "0".to_string())], + )); + wait_for_port(site_port); + + let first = http_get_nonempty(site_port, "/", Duration::from_secs(5)); + assert!( + first.contains("Remote Harness Page"), + "unexpected first page response: {}", + first + ); + assert!( + !first.contains("REMOTE PUBLIC BYPASS"), + "frontend request was served by the remote public proxy instead of local WordPress: {}", + first + ); + assert!( + !remote_public.was_hit(), + "default frontend handling must not call the remote public URL" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT post_title FROM local_wp.wp_posts WHERE ID=1;" + ), + "Remote Harness Page", + "row-level frontend read must materialize the page locally" + ); + + let login = http_post( + site_port, + "/wp-login.php", + "log=admin&pwd=local-pass", + Duration::from_secs(5), + ); + assert!( + login.contains("LOGIN OK"), + "local admin password override did not log in: {}", + login + ); + let cookie = response_cookie(&login).expect("login response should set an auth cookie"); + let admin = http_get_with_headers( + site_port, + "/wp-admin/", + &[("Cookie", cookie.as_str())], + Duration::from_secs(5), + ); + assert!( + admin.contains("WP ADMIN LOCAL DASHBOARD"), + "wp-admin did not render after local login: {}", + admin + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT user_pass = MD5('remote-pass') FROM remote_wp.wp_users WHERE ID=1;" + ), + "1", + "local admin password override must not change the remote password" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT user_pass = MD5('local-pass') FROM local_wp.wp_users WHERE ID=1;" + ), + "1", + "local admin password override must update only the local DB" + ); + + let created = http_post( + site_port, + "/wp-admin/post-new.php", + "title=Local+Only", + Duration::from_secs(5), + ); + assert!( + created.contains("LOCAL POST CREATED"), + "local post creation failed: {}", + created + ); + let local_only = http_get_nonempty(site_port, "/local-only-page", Duration::from_secs(5)); + assert!( + local_only.contains("Local Only Harness Page"), + "local-only page was not visible locally: {}", + local_only + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT COUNT(*) FROM remote_wp.wp_posts WHERE ID=99;" + ), + "0", + "local page creation must not write to remote" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT COUNT(*) FROM local_wp.wp_posts WHERE ID=99;" + ), + "1", + "local page creation must write to the local overlay" + ); + + let edited = http_post( + site_port, + "/wp-admin/post.php", + "post_ID=1", + Duration::from_secs(5), + ); + assert!( + edited.contains("LOCAL POST EDITED"), + "local post edit failed: {}", + edited + ); + let edited_page = http_get_nonempty(site_port, "/", Duration::from_secs(5)); + assert!( + edited_page.contains("Locally Edited Harness Page"), + "edited local page did not render: {}", + edited_page + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT post_title FROM remote_wp.wp_posts WHERE ID=1;" + ), + "Remote Harness Page", + "local edit must not change the remote row" + ); + + site.kill_wait(); + control.kill_wait(); + mysql_exec( + mysql_port, + "UPDATE remote_wp.wp_posts SET post_title='Remote Changed After Sever' WHERE ID=1;", + ); + + let mut offline_site = ChildGuard::new(start_php_server( + &docroot, + &router, + site_port, + &[ + ("WPCOW_SPLASH", "0".to_string()), + ("WPCOW_OFFLINE", "1".to_string()), + ("WPCOW_REMOTE_DB_TUNNEL", "0".to_string()), + ], + )); + wait_for_port(site_port); + let offline = http_get_nonempty(site_port, "/", Duration::from_secs(5)); + assert!( + offline.contains("Locally Edited Harness Page"), + "offline refresh did not use local materialized state: {}", + offline + ); + assert!( + !offline.contains("Remote Changed After Sever"), + "offline refresh read from the remote lower layer: {}", + offline + ); + + offline_site.kill_wait(); + drop(mysql); + } + + #[test] + #[ignore = "strict harness only: starts temporary MariaDB, PHP, FUSE, and fake SSH"] + fn production_run_harness_proves_fuse_rust_control_and_offline_refresh() { + static ENV_LOCK: OnceLock> = OnceLock::new(); + let _lock = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + require_command("php"); + require_command("mariadbd"); + require_command("mariadb-install-db"); + require_command("mysql"); + require_command("fusermount3"); + assert!( + Path::new("/dev/fuse").exists(), + "strict production harness requires /dev/fuse" + ); + + let temp = tempfile::tempdir().unwrap(); + let mysql_port = free_tcp_port(); + let mysql = ChildGuard::new(start_mariadb(temp.path(), mysql_port)); + wait_for_mysql(mysql_port); + create_harness_databases(mysql_port); + + let state_dir = temp.path().join("state"); + let paths = clone_paths(&state_dir, "example"); + ensure_clone_dirs(&paths).unwrap(); + + let remote_public = HarnessHttpServer::start("REMOTE PUBLIC BYPASS"); + let control_port = free_tcp_port(); + let site_port = free_tcp_port(); + let mut harness_manifest = manifest(); + harness_manifest.ssh = "fake-host".to_string(); + harness_manifest.remote_url = format!("http://127.0.0.1:{}", remote_public.port); + harness_manifest.local_url = format!("http://127.0.0.1:{site_port}"); + harness_manifest.control_url = format!("http://127.0.0.1:{control_port}"); + harness_manifest.probe.db_name = "remote_wp".to_string(); + harness_manifest.probe.db_host = format!("127.0.0.1:{mysql_port}"); + harness_manifest.probe.db_user = "root".to_string(); + harness_manifest.probe.db_password = String::new(); + harness_manifest.local_db = LocalDb { + name: "local_wp".to_string(), + user: "root".to_string(), + password: String::new(), + host: "127.0.0.1".to_string(), + port: mysql_port, + }; + harness_manifest.remote_db_tunnel = RemoteDbTunnel { + host: "127.0.0.1".to_string(), + port: mysql_port, + }; + harness_manifest.cache_max_file_bytes = 1024 * 1024; + harness_manifest.remote_metadata_cache_ttl_secs = 60; + + let remote_docroot = temp.path().join("remote-docroot"); + harness_manifest.remote_path = remote_docroot.to_string_lossy().to_string(); + write_runtime_harness_docroot(&remote_docroot, &paths, &harness_manifest); + fs::create_dir_all(remote_docroot.join("wp-content/uploads/2026")).unwrap(); + fs::write( + remote_docroot.join("wp-content/uploads/2026/huge-file.txt"), + b"uploads must stay lazy", + ) + .unwrap(); + write_manifest(&paths.manifest, &harness_manifest).unwrap(); + write_wordpress_overrides(&paths, &harness_manifest).unwrap(); + crate::db::set_local_admin_password(&harness_manifest, Some("admin"), "local-pass") + .unwrap(); + + let fake_bin = temp.path().join("fake-bin"); + let fake_ssh_log = temp.path().join("fake-ssh.log"); + install_fake_ssh(&fake_bin, &fake_ssh_log); + let _env = EnvVarGuard::set(&[ + ( + "PATH", + prepend_path(&fake_bin, std::env::var_os("PATH").as_ref()), + ), + ( + "WPCOW_FAKE_SSH_LOG", + fake_ssh_log.to_string_lossy().into_owned(), + ), + ("WPCOW_WEB_SERVER", "php".to_string()), + ("WPCOW_SPLASH", "0".to_string()), + ("WPCOW_PROXY_FRONTEND", "0".to_string()), + ("WPCOW_REMOTE_DB_TUNNEL", "0".to_string()), + ("WPCOW_REMOTE_FILE_HELPER", "0".to_string()), + ("WPCOW_CONTROL_REQUEST_TIMEOUT_SECS", "10".to_string()), + ("WPCOW_FUSE_TTL_SECS", "1".to_string()), + ("WPCOW_PHP_WORKERS", "1".to_string()), + ]); + + let mountpoint = temp.path().join("mount"); + let shutdown = Arc::new(AtomicBool::new(false)); + let run_manifest = harness_manifest.clone(); + let run_paths = paths.clone(); + let run_mountpoint = mountpoint.clone(); + let run_shutdown = shutdown.clone(); + let run_thread = thread::spawn(move || { + crate::run::run_site_with_shutdown( + run_manifest, + run_paths, + crate::run::RunOptions { + mountpoint: run_mountpoint, + http_addr: format!("127.0.0.1:{site_port}"), + skip_php: false, + }, + run_shutdown, + ) + }); + wait_for_port(site_port); + + let first = http_get_nonempty(site_port, "/", Duration::from_secs(10)); + assert!( + first.contains("Remote Harness Page"), + "production run did not render the remote page through local WordPress: {}", + first + ); + assert!( + !remote_public.was_hit(), + "production run must not proxy the frontend public URL by default" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT post_title FROM local_wp.wp_posts WHERE ID=1;" + ), + "Remote Harness Page", + "Rust control row-COW should materialize the rendered page locally" + ); + assert!( + cached_file_count(&paths) > 0, + "FUSE run should cache requested runtime files" + ); + assert!( + !paths.file_cache.join("mirror/wp-content/uploads").exists(), + "uploads must not be mirrored or prefetched" + ); + let ssh_log = fs::read_to_string(&fake_ssh_log).unwrap_or_default(); + assert!( + !ssh_log.contains("wp-content/uploads"), + "production run should not touch uploads unless requested:\n{}", + ssh_log + ); + assert!( + !ssh_log.contains("tar -cf -"), + "production run must not recursively tar runtime files:\n{}", + ssh_log + ); + + let installer = + http_get_nonempty(site_port, "/wp-admin/install.php", Duration::from_secs(5)); + assert!( + installer.starts_with("HTTP/1.1 500"), + "installer path must be reported as a runtime failure: {}", + installer + ); + assert!( + installer.contains("wp-cow did not load the remote site"), + "installer path did not use the wp-cow runtime guard: {}", + installer + ); + + let login = http_post( + site_port, + "/wp-login.php", + "log=admin&pwd=local-pass", + Duration::from_secs(10), + ); + assert!( + login.contains("LOGIN OK"), + "local-only admin password did not authenticate through production run: {}", + login + ); + let cookie = response_cookie(&login).expect("login response should set an auth cookie"); + let admin = http_get_with_headers( + site_port, + "/wp-admin/", + &[("Cookie", cookie.as_str())], + Duration::from_secs(10), + ); + assert!( + admin.contains("WP ADMIN LOCAL DASHBOARD"), + "wp-admin did not render after local login through production run: {}", + admin + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT user_pass = MD5('remote-pass') FROM remote_wp.wp_users WHERE ID=1;" + ), + "1", + "production local admin override must not update the remote password" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT user_pass = MD5('local-pass') FROM local_wp.wp_users WHERE ID=1;" + ), + "1", + "production local admin override must update only the local DB" + ); + + let created = http_post( + site_port, + "/wp-admin/post-new.php", + "title=Local+Only", + Duration::from_secs(10), + ); + assert!( + created.contains("LOCAL POST CREATED"), + "production local post creation failed: {}", + created + ); + let local_only = http_get_nonempty(site_port, "/local-only-page", Duration::from_secs(10)); + assert!( + local_only.contains("Local Only Harness Page"), + "production local-only page was not visible locally: {}", + local_only + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT COUNT(*) FROM remote_wp.wp_posts WHERE ID=99;" + ), + "0", + "production local page creation must not write to remote" + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT COUNT(*) FROM local_wp.wp_posts WHERE ID=99;" + ), + "1", + "production local page creation must write to the local overlay" + ); + + let edited = http_post( + site_port, + "/wp-admin/post.php", + "post_ID=1", + Duration::from_secs(10), + ); + assert!( + edited.contains("LOCAL POST EDITED"), + "production local post edit failed: {}", + edited + ); + let edited_page = http_get_nonempty(site_port, "/", Duration::from_secs(10)); + assert!( + edited_page.contains("Locally Edited Harness Page"), + "production edited local page did not render: {}", + edited_page + ); + assert_eq!( + mysql_scalar( + mysql_port, + "SELECT post_title FROM remote_wp.wp_posts WHERE ID=1;" + ), + "Remote Harness Page", + "production local edit must not change the remote row" + ); + + shutdown.store(true, Ordering::SeqCst); + match run_thread.join() { + Ok(result) => result.unwrap(), + Err(_) => panic!("production run thread panicked"), + } + wait_for_port_closed(site_port); + + write_offline_marker( + &paths, + &OfflineMarker { + severed_at_unix: 1, + materialized_tables: vec![ + "wp_posts".to_string(), + "wp_users".to_string(), + "wp_usermeta".to_string(), + ], + admin_user: Some("admin".to_string()), + }, + ) + .unwrap(); + mysql_exec( + mysql_port, + "UPDATE remote_wp.wp_posts SET post_title='Remote Changed After Sever' WHERE ID=1;", + ); + fs::rename(&remote_docroot, temp.path().join("remote-docroot-gone")).unwrap(); + let ssh_lines_before_offline = read_line_count(&fake_ssh_log); + + let offline_shutdown = Arc::new(AtomicBool::new(false)); + let offline_manifest = harness_manifest.clone(); + let offline_paths = paths.clone(); + let offline_mountpoint = mountpoint.clone(); + let offline_thread_shutdown = offline_shutdown.clone(); + let offline_thread = thread::spawn(move || { + crate::run::run_site_with_shutdown( + offline_manifest, + offline_paths, + crate::run::RunOptions { + mountpoint: offline_mountpoint, + http_addr: format!("127.0.0.1:{site_port}"), + skip_php: false, + }, + offline_thread_shutdown, + ) + }); + wait_for_port(site_port); + let offline = http_get_nonempty(site_port, "/", Duration::from_secs(10)); + assert!( + offline.contains("Locally Edited Harness Page"), + "offline production refresh did not use local materialized state: {}", + offline + ); + assert!( + !offline.contains("Remote Changed After Sever"), + "offline production refresh read from the remote lower layer: {}", + offline + ); + assert_eq!( + read_line_count(&fake_ssh_log), + ssh_lines_before_offline, + "offline production run must not invoke SSH" + ); + + offline_shutdown.store(true, Ordering::SeqCst); + match offline_thread.join() { + Ok(result) => result.unwrap(), + Err(_) => panic!("offline production run thread panicked"), + } + wait_for_port_closed(site_port); + drop(mysql); + } + fn free_tcp_port() -> u16 { let listener = TcpListener::bind(("127.0.0.1", 0)).unwrap(); listener.local_addr().unwrap().port() @@ -1132,4 +1866,769 @@ mod tests { } } } + + fn http_get_with_headers( + port: u16, + path: &str, + headers: &[(&str, &str)], + timeout: Duration, + ) -> String { + http_request(port, "GET", path, headers, "", timeout) + .unwrap_or_else(|err| panic!("GET {path} failed: {err}")) + } + + fn http_post(port: u16, path: &str, body: &str, timeout: Duration) -> String { + http_request( + port, + "POST", + path, + &[("Content-Type", "application/x-www-form-urlencoded")], + body, + timeout, + ) + .unwrap_or_else(|err| panic!("POST {path} failed: {err}")) + } + + fn http_request( + port: u16, + method: &str, + path: &str, + headers: &[(&str, &str)], + body: &str, + timeout: Duration, + ) -> std::io::Result { + let mut stream = TcpStream::connect(("127.0.0.1", port))?; + stream.set_read_timeout(Some(timeout))?; + stream.set_write_timeout(Some(timeout))?; + let mut request = format!( + "{method} {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\nContent-Length: {}\r\n", + body.len() + ); + for (name, value) in headers { + request.push_str(&format!("{name}: {value}\r\n")); + } + request.push_str("\r\n"); + request.push_str(body); + stream.write_all(request.as_bytes())?; + + let mut response = String::new(); + stream.read_to_string(&mut response)?; + Ok(response) + } + + fn response_cookie(response: &str) -> Option { + response.lines().find_map(|line| { + let (name, value) = line.split_once(':')?; + if !name.eq_ignore_ascii_case("set-cookie") { + return None; + } + value.trim().split(';').next().map(str::to_string) + }) + } + + fn prepend_path(dir: &Path, old_path: Option<&OsString>) -> String { + match old_path { + Some(old) if !old.is_empty() => format!("{}:{}", dir.display(), old.to_string_lossy()), + _ => dir.display().to_string(), + } + } + + fn install_fake_ssh(bin: &Path, log: &Path) { + fs::create_dir_all(bin).unwrap(); + let script = bin.join("ssh"); + fs::write( + &script, + r#"#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "${WPCOW_FAKE_SSH_LOG:?}" + +for arg in "$@"; do + if [ "$arg" = "-O" ]; then + exit 0 + fi +done + +for arg in "$@"; do + if [ "$arg" = "-MNf" ]; then + exit 0 + fi + if [ "$arg" = "-N" ]; then + while true; do sleep 60; done + fi +done + +last="${!#}" +if [ "$last" = "fake-host" ]; then + exit 0 +fi +exec bash -lc "$last" +"#, + ) + .unwrap(); + let mut perms = fs::metadata(&script).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&script, perms).unwrap(); + fs::write(log, b"").unwrap(); + } + + fn read_line_count(path: &Path) -> usize { + fs::read_to_string(path).unwrap_or_default().lines().count() + } + + fn cached_file_count(paths: &crate::config::ClonePaths) -> usize { + fn visit(path: &Path, count: &mut usize) { + let Ok(entries) = fs::read_dir(path) else { + return; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + visit(&path, count); + } else if path.is_file() { + *count += 1; + } + } + } + + let mut count = 0; + visit(&paths.file_cache, &mut count); + count + } + + struct EnvVarGuard { + old: Vec<(&'static str, Option)>, + } + + impl EnvVarGuard { + fn set(vars: &[(&'static str, String)]) -> Self { + let old = vars + .iter() + .map(|(name, _)| (*name, std::env::var_os(name))) + .collect::>(); + for (name, value) in vars { + std::env::set_var(name, value); + } + Self { old } + } + } + + impl Drop for EnvVarGuard { + fn drop(&mut self) { + for (name, value) in self.old.drain(..).rev() { + match value { + Some(value) => std::env::set_var(name, value), + None => std::env::remove_var(name), + } + } + } + } + + struct ChildGuard { + child: Option, + } + + impl ChildGuard { + fn new(child: Child) -> Self { + Self { child: Some(child) } + } + + fn kill_wait(&mut self) { + if let Some(mut child) = self.child.take() { + let _ = child.kill(); + let _ = child.wait(); + } + } + } + + impl Drop for ChildGuard { + fn drop(&mut self) { + self.kill_wait(); + } + } + + struct HarnessHttpServer { + port: u16, + hit: Arc, + shutdown: Arc, + handle: Option>, + } + + impl HarnessHttpServer { + fn start(body: &'static str) -> Self { + let listener = TcpListener::bind(("127.0.0.1", 0)).unwrap(); + let port = listener.local_addr().unwrap().port(); + listener.set_nonblocking(true).unwrap(); + let hit = Arc::new(AtomicBool::new(false)); + let shutdown = Arc::new(AtomicBool::new(false)); + let thread_hit = hit.clone(); + let thread_shutdown = shutdown.clone(); + let handle = thread::spawn(move || { + while !thread_shutdown.load(Ordering::SeqCst) { + match listener.accept() { + Ok((mut stream, _)) => { + thread_hit.store(true, Ordering::SeqCst); + let _ = stream.set_read_timeout(Some(Duration::from_millis(200))); + let mut buf = [0_u8; 1024]; + let _ = stream.read(&mut buf); + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => { + thread::sleep(Duration::from_millis(20)); + } + Err(_) => break, + } + } + }); + Self { + port, + hit, + shutdown, + handle: Some(handle), + } + } + + fn was_hit(&self) -> bool { + self.hit.load(Ordering::SeqCst) + } + } + + impl Drop for HarnessHttpServer { + fn drop(&mut self) { + self.shutdown.store(true, Ordering::SeqCst); + let _ = TcpStream::connect(("127.0.0.1", self.port)); + if let Some(handle) = self.handle.take() { + let _ = handle.join(); + } + } + } + + fn require_command(name: &str) { + assert!( + command_path(name).is_some(), + "strict runtime harness requires {name} on PATH" + ); + } + + fn command_path(name: &str) -> Option { + if name.contains('/') { + let path = PathBuf::from(name); + return path.is_file().then_some(path); + } + std::env::var_os("PATH").and_then(|paths| { + std::env::split_paths(&paths) + .map(|dir| dir.join(name)) + .find(|path| path.is_file()) + }) + } + + fn start_mariadb(temp: &Path, port: u16) -> Child { + let datadir = temp.join("mysql-data"); + let basedir = command_path("mariadbd") + .and_then(|path| fs::canonicalize(path).ok()) + .and_then(|path| { + path.parent() + .and_then(|bin| bin.parent()) + .map(Path::to_path_buf) + }) + .expect("resolve mariadbd basedir"); + + let mut install = Command::new("mariadb-install-db"); + install + .arg(format!("--basedir={}", basedir.display())) + .arg(format!("--datadir={}", datadir.display())) + .arg("--auth-root-authentication-method=normal") + .arg("--skip-test-db"); + if let Ok(user) = std::env::var("USER") { + install.arg(format!("--user={user}")); + } + let output = install + .output() + .unwrap_or_else(|err| panic!("run mariadb-install-db: {err}")); + assert!( + output.status.success(), + "mariadb-install-db failed: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + Command::new("mariadbd") + .arg("--no-defaults") + .arg(format!("--basedir={}", basedir.display())) + .arg(format!("--datadir={}", datadir.display())) + .arg(format!("--socket={}", temp.join("mysql.sock").display())) + .arg(format!("--port={port}")) + .arg("--bind-address=127.0.0.1") + .arg(format!("--pid-file={}", temp.join("mysql.pid").display())) + .arg("--skip-networking=0") + .arg("--skip-grant-tables") + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .unwrap_or_else(|err| panic!("start mariadbd: {err}")) + } + + fn wait_for_mysql(port: u16) { + let started = Instant::now(); + while started.elapsed() < Duration::from_secs(10) { + let output = Command::new("mysql") + .arg("--protocol=TCP") + .arg("-h127.0.0.1") + .arg(format!("-P{port}")) + .arg("-uroot") + .arg("--execute") + .arg("SELECT 1;") + .output(); + if matches!(output, Ok(output) if output.status.success()) { + return; + } + thread::sleep(Duration::from_millis(100)); + } + panic!("temporary MariaDB did not become ready on port {port}"); + } + + fn mysql_exec(port: u16, sql: &str) { + let output = Command::new("mysql") + .arg("--protocol=TCP") + .arg("-h127.0.0.1") + .arg(format!("-P{port}")) + .arg("-uroot") + .arg("--execute") + .arg(sql) + .output() + .unwrap_or_else(|err| panic!("run mysql: {err}")); + assert!( + output.status.success(), + "mysql failed for SQL:\n{sql}\n{}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + + fn mysql_scalar(port: u16, sql: &str) -> String { + let output = Command::new("mysql") + .arg("--protocol=TCP") + .arg("-h127.0.0.1") + .arg(format!("-P{port}")) + .arg("-uroot") + .arg("--batch") + .arg("--raw") + .arg("--skip-column-names") + .arg("--execute") + .arg(sql) + .output() + .unwrap_or_else(|err| panic!("run mysql scalar: {err}")); + assert!( + output.status.success(), + "mysql scalar failed for SQL:\n{sql}\n{}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8_lossy(&output.stdout) + .trim_end_matches(['\r', '\n']) + .to_string() + } + + fn create_harness_databases(port: u16) { + mysql_exec( + port, + r#" +CREATE DATABASE remote_wp DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; +CREATE DATABASE local_wp DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; +CREATE TABLE remote_wp.wp_posts ( + ID bigint unsigned NOT NULL, + post_title text NOT NULL, + post_content longtext NOT NULL, + post_name varchar(200) NOT NULL DEFAULT '', + post_status varchar(20) NOT NULL DEFAULT 'publish', + post_type varchar(20) NOT NULL DEFAULT 'post', + PRIMARY KEY (ID) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +CREATE TABLE local_wp.wp_posts LIKE remote_wp.wp_posts; +CREATE TABLE remote_wp.wp_users ( + ID bigint unsigned NOT NULL, + user_login varchar(60) NOT NULL, + user_pass varchar(255) NOT NULL, + user_nicename varchar(50) NOT NULL DEFAULT '', + user_email varchar(100) NOT NULL DEFAULT '', + user_registered datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + user_activation_key varchar(255) NOT NULL DEFAULT '', + user_status int NOT NULL DEFAULT 0, + display_name varchar(250) NOT NULL DEFAULT '', + PRIMARY KEY (ID), + KEY user_login_key (user_login) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +CREATE TABLE local_wp.wp_users LIKE remote_wp.wp_users; +CREATE TABLE remote_wp.wp_usermeta ( + umeta_id bigint unsigned NOT NULL, + user_id bigint unsigned NOT NULL DEFAULT 0, + meta_key varchar(255) DEFAULT NULL, + meta_value longtext, + PRIMARY KEY (umeta_id), + KEY user_id (user_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +CREATE TABLE local_wp.wp_usermeta LIKE remote_wp.wp_usermeta; +INSERT INTO remote_wp.wp_posts (ID, post_title, post_content, post_name, post_status, post_type) +VALUES (1, 'Remote Harness Page', 'remote lower content', 'remote-harness-page', 'publish', 'page'); +INSERT INTO remote_wp.wp_users (ID, user_login, user_pass, user_nicename, user_email, display_name) +VALUES (1, 'admin', MD5('remote-pass'), 'admin', 'admin@example.test', 'Admin'); +INSERT INTO remote_wp.wp_usermeta (umeta_id, user_id, meta_key, meta_value) +VALUES (1, 1, 'wp_capabilities', 'a:1:{s:13:"administrator";b:1;}'); +INSERT INTO local_wp.wp_users SELECT * FROM remote_wp.wp_users WHERE ID=1; +INSERT INTO local_wp.wp_usermeta SELECT * FROM remote_wp.wp_usermeta WHERE user_id=1; +"#, + ); + } + + fn start_php_server( + docroot: &Path, + router: &Path, + port: u16, + envs: &[(&str, String)], + ) -> Child { + let mut command = Command::new("php"); + command + .env("PHP_CLI_SERVER_WORKERS", "4") + .current_dir(docroot) + .arg("-S") + .arg(format!("127.0.0.1:{port}")) + .arg("-t") + .arg(docroot) + .arg(router) + .stdout(Stdio::null()) + .stderr(if std::env::var_os("WPCOW_HARNESS_PHP_STDERR").is_some() { + Stdio::inherit() + } else { + Stdio::null() + }); + for (name, value) in envs { + command.env(name, value); + } + command + .spawn() + .unwrap_or_else(|err| panic!("start php server on {port}: {err}")) + } + + fn wait_for_port(port: u16) { + let started = Instant::now(); + while started.elapsed() < Duration::from_secs(5) { + if TcpStream::connect(("127.0.0.1", port)).is_ok() { + return; + } + thread::sleep(Duration::from_millis(50)); + } + panic!("port {port} did not open"); + } + + fn wait_for_port_closed(port: u16) { + let started = Instant::now(); + while started.elapsed() < Duration::from_secs(5) { + if TcpStream::connect(("127.0.0.1", port)).is_err() { + return; + } + thread::sleep(Duration::from_millis(50)); + } + panic!("port {port} did not close"); + } + + fn write_runtime_harness_docroot( + docroot: &Path, + paths: &crate::config::ClonePaths, + manifest: &Manifest, + ) { + fs::create_dir_all(docroot.join("wp-includes")).unwrap(); + fs::create_dir_all(docroot.join("wp-content")).unwrap(); + fs::create_dir_all(docroot.join("wp-admin")).unwrap(); + fs::write( + docroot.join("wp-config.php"), + wp_config_php(manifest, paths), + ) + .unwrap(); + fs::write(docroot.join("wp-content/db.php"), db_dropin_php()).unwrap(); + fs::write( + docroot.join("wp-includes/class-wpdb.php"), + runtime_harness_wpdb_php(), + ) + .unwrap(); + fs::write( + docroot.join("wp-settings.php"), + runtime_harness_settings_php(), + ) + .unwrap(); + fs::write( + docroot.join("index.php"), + " &'static str { + r#"dbh = mysqli_connect( $host, $dbuser, $dbpassword, $dbname, $port ); + if ( ! $this->dbh ) { + throw new RuntimeException( 'mysqli connect failed: ' . mysqli_connect_error() ); + } + mysqli_set_charset( $this->dbh, $this->charset ); + } + + public function flush() { + $this->last_error = ''; + $this->last_result = array(); + $this->col_info = array(); + $this->num_rows = 0; + $this->rows_affected = 0; + $this->insert_id = 0; + } + + public function query( $query ) { + $this->flush(); + $this->last_query = $query; + $result = mysqli_query( $this->dbh, $query ); + if ( false === $result ) { + $this->last_error = mysqli_error( $this->dbh ); + return false; + } + if ( true === $result ) { + $this->rows_affected = mysqli_affected_rows( $this->dbh ); + $this->insert_id = mysqli_insert_id( $this->dbh ); + return $this->rows_affected; + } + foreach ( mysqli_fetch_fields( $result ) as $field ) { + $this->col_info[] = (object) array( 'name' => $field->name ); + } + while ( $row = mysqli_fetch_object( $result ) ) { + $this->last_result[] = $row; + } + $this->num_rows = count( $this->last_result ); + mysqli_free_result( $result ); + return $this->num_rows; + } +} +"# + } + + fn runtime_harness_settings_php() -> &'static str { + r#"query( 'SELECT * FROM wp_posts WHERE ID = ' . (int) $id ); + return $wpdb->last_result ? $wpdb->last_result[0] : null; +} + +function wpcow_harness_user() { + global $wpdb; + $wpdb->query( 'SELECT * FROM wp_users WHERE ID = 1' ); + return $wpdb->last_result ? $wpdb->last_result[0] : null; +} + +$path = parse_url( $_SERVER['REQUEST_URI'], PHP_URL_PATH ); + +if ( '/wp-login.php' === $path ) { + if ( 'POST' === $_SERVER['REQUEST_METHOD'] ) { + $user = wpcow_harness_user(); + if ( $user && isset( $_POST['log'], $_POST['pwd'] ) && 'admin' === $_POST['log'] && md5( $_POST['pwd'] ) === $user->user_pass ) { + setcookie( 'wp_cow_harness_auth', '1', 0, '/' ); + echo "LOGIN OK\n"; + return; + } + http_response_code( 403 ); + echo "LOGIN FAILED\n"; + return; + } + echo "LOGIN FORM\n"; + return; +} + +if ( in_array( $path, array( '/wp-admin', '/wp-admin/', '/wp-admin/index.php' ), true ) ) { + if ( empty( $_COOKIE['wp_cow_harness_auth'] ) ) { + http_response_code( 403 ); + echo "AUTH REQUIRED\n"; + return; + } + echo "WP ADMIN LOCAL DASHBOARD\n"; + return; +} + +if ( '/wp-admin/post-new.php' === $path ) { + global $wpdb; + $wpdb->query( "INSERT INTO wp_posts (ID, post_title, post_content, post_name, post_status, post_type) VALUES (99, 'Local Only Harness Page', 'local only content', 'local-only-page', 'publish', 'page')" ); + echo "LOCAL POST CREATED\n"; + return; +} + +if ( '/wp-admin/post.php' === $path ) { + global $wpdb; + $wpdb->query( "UPDATE wp_posts SET post_title = 'Locally Edited Harness Page', post_content = 'edited local content' WHERE ID = 1" ); + echo "LOCAL POST EDITED\n"; + return; +} + +if ( '/local-only-page' === $path ) { + $post = wpcow_harness_post( 99 ); + if ( ! $post ) { + http_response_code( 404 ); + echo "LOCAL ONLY MISSING\n"; + return; + } + echo "

" . htmlspecialchars( $post->post_title, ENT_QUOTES, 'UTF-8' ) . "

\n"; + echo "
" . htmlspecialchars( $post->post_content, ENT_QUOTES, 'UTF-8' ) . "
\n"; + return; +} + +$post = wpcow_harness_post( 1 ); +if ( ! $post ) { + http_response_code( 500 ); + echo "REMOTE PAGE MISSING\n"; + return; +} +echo "

" . htmlspecialchars( $post->post_title, ENT_QUOTES, 'UTF-8' ) . "

\n"; +echo "
" . htmlspecialchars( $post->post_content, ENT_QUOTES, 'UTF-8' ) . "
\n"; +"# + } + + fn runtime_harness_control_php() -> &'static str { + r#" false, 'error' => mysqli_connect_error() ) ); + exit; + } + mysqli_set_charset( $db, 'utf8mb4' ); + $dbs[ $name ] = $db; + return $db; +} + +function hrows( $db, $sql ) { + $result = mysqli_query( $db, $sql ); + if ( false === $result ) { + return array(); + } + $rows = array(); + while ( $row = mysqli_fetch_assoc( $result ) ) { + $rows[] = $row; + } + return $rows; +} + +function hresult( $rows ) { + $fields = array(); + foreach ( $rows as $row ) { + foreach ( array_keys( $row ) as $field ) { + if ( ! in_array( $field, $fields, true ) ) { + $fields[] = $field; + } + } + } + return array( + 'ok' => true, + 'error' => '', + 'rows' => array_values( $rows ), + 'fields' => $fields, + 'affected' => count( $rows ), + ); +} + +function hrespond( $payload ) { + header( 'Content-Type: application/json' ); + echo json_encode( $payload ); + exit; +} + +function hselect_id( $table, $id ) { + $remote = hrows( hdb( 'remote_wp' ), "SELECT * FROM `$table` WHERE ID = " . (int) $id ); + $local = hrows( hdb( 'local_wp' ), "SELECT * FROM `$table` WHERE ID = " . (int) $id ); + if ( $remote && ! $local ) { + mysqli_query( hdb( 'local_wp' ), "REPLACE INTO local_wp.`$table` SELECT * FROM remote_wp.`$table` WHERE ID = " . (int) $id ); + $local = hrows( hdb( 'local_wp' ), "SELECT * FROM `$table` WHERE ID = " . (int) $id ); + } + $merged = array(); + foreach ( $remote as $row ) { + $merged[ $row['ID'] ] = $row; + } + foreach ( $local as $row ) { + $merged[ $row['ID'] ] = $row; + } + return array_values( $merged ); +} + +$path = parse_url( $_SERVER['REQUEST_URI'], PHP_URL_PATH ); +$payload = json_decode( file_get_contents( 'php://input' ), true ); +$sql = is_array( $payload ) && isset( $payload['sql'] ) ? $payload['sql'] : ''; + +if ( '/row-cow' === $path ) { + if ( preg_match( '/^SELECT\s+\*\s+FROM\s+`?(wp_posts|wp_users)`?\s+WHERE\s+`?ID`?\s*=\s*([0-9]+)/i', $sql, $matches ) ) { + hrespond( array( 'ok' => true, 'handled' => true, 'backend' => 'cow', 'result' => hresult( hselect_id( $matches[1], (int) $matches[2] ) ) ) ); + } + if ( preg_match( '/^UPDATE\s+`?(wp_posts|wp_users)`?.*WHERE\s+`?ID`?\s*=\s*([0-9]+)/i', $sql, $matches ) ) { + mysqli_query( hdb( 'local_wp' ), "REPLACE INTO local_wp.`{$matches[1]}` SELECT * FROM remote_wp.`{$matches[1]}` WHERE ID = " . (int) $matches[2] ); + hrespond( array( 'ok' => true, 'handled' => true, 'backend' => 'local' ) ); + } + if ( preg_match( '/^(INSERT|REPLACE)\s+(?:IGNORE\s+)?INTO\s+`?(wp_posts|wp_users)`?/i', $sql ) ) { + hrespond( array( 'ok' => true, 'handled' => true, 'backend' => 'local' ) ); + } + hrespond( array( 'ok' => true, 'handled' => false, 'backend' => 'local' ) ); +} + +if ( '/route' === $path || '/materialize' === $path ) { + hrespond( array( 'ok' => true, 'backend' => 'local', 'materialized' => array() ) ); +} + +if ( '/query' === $path ) { + hrespond( array( 'ok' => false, 'error' => 'strict harness does not allow fallback remote queries', 'rows' => array(), 'fields' => array(), 'affected' => 0 ) ); +} + +hrespond( array( 'ok' => false, 'error' => 'unknown harness control path ' . $path ) ); +"# + } } diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index 5a3fcb20..bc2aec4a 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -104,12 +104,56 @@ impl OverlayStore { Ok(metadata.entries.get(&Self::rel_string(rel)).cloned()) } + pub fn list_cached_metadata_dir(&self, rel: &Path) -> Result> { + let metadata = self.load_metadata()?; + let rel = Self::clean_rel(rel)?; + let mut out = Vec::new(); + + for (entry_rel, entry) in metadata.entries { + let entry_path = PathBuf::from(&entry_rel); + let parent = entry_path.parent().unwrap_or_else(|| Path::new("")); + if parent == rel { + out.push(entry); + } + } + + out.sort_by(|a, b| a.name.cmp(&b.name)); + Ok(out) + } + pub fn put_cached_entry(&self, rel: &Path, entry: &RemoteEntry) -> Result<()> { let mut metadata = self.load_metadata()?; - metadata - .entries - .insert(Self::rel_string(rel), entry.clone()); - self.write_metadata(&metadata) + let rel = Self::clean_rel(rel)?; + let mut journal_entries = Vec::new(); + let rel_string = Self::rel_string(&rel); + metadata.entries.insert(rel_string.clone(), entry.clone()); + journal_entries.push((rel_string, Some(entry.clone()))); + let mut current = rel.parent(); + while let Some(parent) = current { + if parent.as_os_str().is_empty() { + break; + } + let Some(name) = parent.file_name() else { + break; + }; + let parent_string = Self::rel_string(parent); + if !metadata.entries.contains_key(&parent_string) { + let parent_entry = RemoteEntry { + name: name.to_string_lossy().to_string(), + kind: "dir".to_string(), + size: 0, + mode: 0o40755, + mtime: entry.mtime, + }; + metadata + .entries + .insert(parent_string.clone(), parent_entry.clone()); + journal_entries.push((parent_string, Some(parent_entry))); + } + current = parent.parent(); + } + *self.metadata.borrow_mut() = Some(metadata); + self.append_metadata_journal(&journal_entries) } pub fn remove_cached(&self, rel: &Path) -> Result<()> { @@ -118,8 +162,10 @@ impl OverlayStore { fs::remove_file(path)?; } let mut metadata = self.load_metadata()?; - metadata.entries.remove(&Self::rel_string(rel)); - self.write_metadata(&metadata) + let rel_string = Self::rel_string(rel); + metadata.entries.remove(&rel_string); + *self.metadata.borrow_mut() = Some(metadata); + self.append_metadata_journal(&[(rel_string, None)]) } pub fn is_whiteout(&self, rel: &Path) -> Result { @@ -193,6 +239,48 @@ impl OverlayStore { Ok(upper) } + pub fn copy_up_cached_only(&self, rel: &Path) -> Result { + let upper = self.upper_path(rel)?; + if upper.exists() { + return Ok(upper); + } + + if let Some(parent) = upper.parent() { + fs::create_dir_all(parent)?; + } + + if let Some(cached) = self.cached_file_path(rel) { + fs::copy(cached, &upper)?; + return Ok(upper); + } + + let mirror = self.mirror_path(rel)?; + if let Ok(metadata) = fs::symlink_metadata(&mirror) { + if metadata.file_type().is_dir() { + fs::create_dir_all(&upper)?; + return Ok(upper); + } + if metadata.file_type().is_symlink() { + let target = fs::read_link(&mirror)?; + std::os::unix::fs::symlink(target, &upper)?; + return Ok(upper); + } + } + + if let Some(entry) = self.cached_entry(rel)? { + if entry.kind == "dir" { + fs::create_dir_all(&upper)?; + return Ok(upper); + } + } + + Err(anyhow!( + "clone is severed and writable lower file is not cached locally: {}", + Self::rel_string(rel) + )) + } + + #[cfg(test)] pub fn read_cached_or_remote( &self, remote: &RemoteClient, @@ -200,6 +288,18 @@ impl OverlayStore { offset: i64, size: u32, cache_limit: u64, + ) -> Result> { + self.read_cached_or_remote_with_entry(remote, rel, offset, size, cache_limit, None) + } + + pub fn read_cached_or_remote_with_entry( + &self, + remote: &RemoteClient, + rel: &Path, + offset: i64, + size: u32, + cache_limit: u64, + entry: Option, ) -> Result> { if offset < 0 { return Ok(Vec::new()); @@ -210,7 +310,13 @@ impl OverlayStore { return read_range_from_file(&cache_path, offset as u64, size as usize); } - let entry = remote.stat(rel)?; + let entry = match entry { + Some(entry) => entry, + None => match self.cached_entry(rel)? { + Some(entry) => entry, + None => remote.stat(rel)?, + }, + }; if entry.kind == "file" && entry.size <= cache_limit { if let Some(parent) = cache_path.parent() { fs::create_dir_all(parent)?; @@ -337,6 +443,10 @@ impl OverlayStore { self.file_cache.join("metadata.json") } + fn metadata_journal_path(&self) -> PathBuf { + self.file_cache.join("metadata.jsonl") + } + fn progress_path(&self) -> PathBuf { self.file_cache.join("progress.json") } @@ -347,17 +457,20 @@ impl OverlayStore { } let path = self.metadata_path(); if !path.exists() { - let metadata = MetadataFile::default(); + let mut metadata = MetadataFile::default(); + self.apply_metadata_journal(&mut metadata)?; *self.metadata.borrow_mut() = Some(metadata.clone()); return Ok(metadata); } let mut json = String::new(); File::open(path)?.read_to_string(&mut json)?; - let metadata: MetadataFile = serde_json::from_str(&json)?; + let mut metadata: MetadataFile = serde_json::from_str(&json)?; + self.apply_metadata_journal(&mut metadata)?; *self.metadata.borrow_mut() = Some(metadata.clone()); Ok(metadata) } + #[allow(dead_code)] fn write_metadata(&self, metadata: &MetadataFile) -> Result<()> { fs::create_dir_all(&self.file_cache)?; let json = serde_json::to_vec_pretty(metadata)?; @@ -371,10 +484,65 @@ impl OverlayStore { file.write_all(b"\n")?; drop(file); fs::rename(tmp, self.metadata_path())?; + let _ = fs::remove_file(self.metadata_journal_path()); *self.metadata.borrow_mut() = Some(metadata.clone()); Ok(()) } + fn apply_metadata_journal(&self, metadata: &mut MetadataFile) -> Result<()> { + let path = self.metadata_journal_path(); + if !path.exists() { + return Ok(()); + } + + let mut jsonl = String::new(); + File::open(path)?.read_to_string(&mut jsonl)?; + for line in jsonl.lines().filter(|line| !line.trim().is_empty()) { + let value: serde_json::Value = serde_json::from_str(line)?; + let Some(path) = value.get("path").and_then(|value| value.as_str()) else { + continue; + }; + match value.get("op").and_then(|value| value.as_str()) { + Some("put") => { + let Some(entry) = value.get("entry") else { + continue; + }; + metadata + .entries + .insert(path.to_string(), serde_json::from_value(entry.clone())?); + } + Some("delete") => { + metadata.entries.remove(path); + } + _ => {} + } + } + Ok(()) + } + + fn append_metadata_journal(&self, entries: &[(String, Option)]) -> Result<()> { + if entries.is_empty() { + return Ok(()); + } + + fs::create_dir_all(&self.file_cache)?; + let mut file = OpenOptions::new() + .create(true) + .append(true) + .open(self.metadata_journal_path())?; + for (path, entry) in entries { + let value = match entry { + Some(entry) => { + serde_json::json!({ "op": "put", "path": path, "entry": entry }) + } + None => serde_json::json!({ "op": "delete", "path": path }), + }; + serde_json::to_writer(&mut file, &value)?; + file.write_all(b"\n")?; + } + Ok(()) + } + fn load_progress(&self) -> Result { let path = self.progress_path(); if !path.exists() { @@ -488,7 +656,12 @@ impl MetadataMode for std::fs::Metadata { #[cfg(test)] mod tests { use super::*; - use crate::config::ClonePaths; + use crate::config::{ensure_clone_dirs, ClonePaths, Manifest, Probe}; + use crate::remote::RemoteClient; + use std::os::unix::fs::PermissionsExt; + use std::sync::{Mutex, OnceLock}; + + static ENV_LOCK: OnceLock> = OnceLock::new(); #[test] fn stores_whiteouts() { @@ -548,7 +721,252 @@ mod tests { store.put_cached_entry(rel, &entry).unwrap(); assert_eq!(store.cached_entry(rel).unwrap().unwrap().size, 123); + let reloaded = OverlayStore::new(&paths); + assert_eq!( + reloaded.cached_entry(rel).unwrap().unwrap().size, + 123, + "metadata journal must be enough to rebuild cached entries after restart" + ); + assert_eq!( + store + .cached_entry(Path::new("wp-includes")) + .unwrap() + .unwrap() + .kind, + "dir", + "offline lookups need cached parent directory metadata" + ); store.remove_cached(rel).unwrap(); assert!(store.cached_entry(rel).unwrap().is_none()); + let reloaded = OverlayStore::new(&paths); + assert!(reloaded.cached_entry(rel).unwrap().is_none()); + } + + #[test] + fn cached_only_copy_up_uses_materialized_files_without_remote() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let store = OverlayStore::new(&paths); + + let rel = Path::new("wp-includes/version.php"); + let entry = RemoteEntry { + name: "version.php".to_string(), + kind: "file".to_string(), + size: 15, + mode: 0o100644, + mtime: 42, + }; + let cache_path = store.cache_path(rel); + fs::create_dir_all(cache_path.parent().unwrap()).unwrap(); + fs::write(&cache_path, b"cached runtime\n").unwrap(); + store.put_cached_entry(rel, &entry).unwrap(); + + let upper = store.copy_up_cached_only(rel).unwrap(); + assert_eq!(fs::read(&upper).unwrap(), b"cached runtime\n"); + + let mirror_rel = Path::new("wp-admin/index.php"); + let mirror_path = store.mirror_path(mirror_rel).unwrap(); + fs::create_dir_all(mirror_path.parent().unwrap()).unwrap(); + fs::write(&mirror_path, b"mirror runtime\n").unwrap(); + let upper = store.copy_up_cached_only(mirror_rel).unwrap(); + assert_eq!(fs::read(&upper).unwrap(), b"mirror runtime\n"); + + let dir_rel = Path::new("wp-content/themes/example"); + store + .put_cached_entry( + dir_rel, + &RemoteEntry { + name: "example".to_string(), + kind: "dir".to_string(), + size: 0, + mode: 0o40755, + mtime: 42, + }, + ) + .unwrap(); + assert!(store.copy_up_cached_only(dir_rel).unwrap().is_dir()); + + let err = store + .copy_up_cached_only(Path::new("wp-content/missing.php")) + .unwrap_err() + .to_string(); + assert!( + err.contains("clone is severed and writable lower file is not cached locally"), + "unexpected error: {err}" + ); + } + + #[test] + fn lazy_remote_file_is_cached_and_survives_remote_loss() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_log = std::env::var_os("WPCOW_FAKE_SSH_LOG"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let bin = temp.path().join("bin"); + let log = temp.path().join("ssh.log"); + fs::create_dir_all(&remote_root).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(remote_root.join("index.php"), b"remote wordpress").unwrap(); + + let fake_ssh = bin.join("ssh"); + fs::write( + &fake_ssh, + r#"#!/usr/bin/env bash +set -euo pipefail +printf 'CALL\n' >> "$WPCOW_FAKE_SSH_LOG" +cmd="${@: -1}" +exec bash -lc "$cmd" +"#, + ) + .unwrap(); + let mut perms = fs::metadata(&fake_ssh).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&fake_ssh, perms).unwrap(); + + let path = match old_path.as_ref() { + Some(old) => format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_FAKE_SSH_LOG", &log); + + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let mut manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + manifest.cache_max_file_bytes = 1024; + let remote = RemoteClient::new(manifest, None); + let store = OverlayStore::new(&paths); + let rel = Path::new("index.php"); + + let first = store + .read_cached_or_remote(&remote, rel, 0, 1024, 1024) + .unwrap(); + assert_eq!(first, b"remote wordpress"); + fs::remove_file(remote_root.join("index.php")).unwrap(); + let ssh_after_first = fs::read_to_string(&log).unwrap().lines().count(); + + let second = store + .read_cached_or_remote(&remote, rel, 0, 1024, 1024) + .unwrap(); + assert_eq!(second, b"remote wordpress"); + let ssh_after_second = fs::read_to_string(&log).unwrap().lines().count(); + assert_eq!( + ssh_after_second, ssh_after_first, + "cached read must not invoke ssh after the remote file disappears" + ); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_log { + Some(value) => std::env::set_var("WPCOW_FAKE_SSH_LOG", value), + None => std::env::remove_var("WPCOW_FAKE_SSH_LOG"), + } + } + + #[test] + fn supplied_metadata_skips_remote_stat_before_caching_file() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_log = std::env::var_os("WPCOW_FAKE_SSH_LOG"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let bin = temp.path().join("bin"); + let log = temp.path().join("ssh.log"); + fs::create_dir_all(&remote_root).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(remote_root.join("index.php"), b"remote wordpress").unwrap(); + + let fake_ssh = bin.join("ssh"); + fs::write( + &fake_ssh, + r#"#!/usr/bin/env bash + set -euo pipefail + printf 'CALL\n' >> "$WPCOW_FAKE_SSH_LOG" + cmd="${@: -1}" + exec bash -lc "$cmd" + "#, + ) + .unwrap(); + let mut perms = fs::metadata(&fake_ssh).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&fake_ssh, perms).unwrap(); + + let path = match old_path.as_ref() { + Some(old) => format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_FAKE_SSH_LOG", &log); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "0"); + + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let remote = RemoteClient::new(manifest, None); + let store = OverlayStore::new(&paths); + let rel = Path::new("index.php"); + let entry = RemoteEntry { + name: "index.php".to_string(), + kind: "file".to_string(), + size: 16, + mode: 0o100644, + mtime: 42, + }; + + let first = store + .read_cached_or_remote_with_entry(&remote, rel, 0, 1024, 1024, Some(entry)) + .unwrap(); + assert_eq!(first, b"remote wordpress"); + let ssh_lines = fs::read_to_string(&log) + .unwrap() + .lines() + .filter(|line| *line == "CALL") + .count(); + assert_eq!( + ssh_lines, 1, + "FUSE lookup metadata should let the first read fetch file bytes without a second remote stat command" + ); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_log { + Some(value) => std::env::set_var("WPCOW_FAKE_SSH_LOG", value), + None => std::env::remove_var("WPCOW_FAKE_SSH_LOG"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } } } diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index 762f69ef..b110080b 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -2,12 +2,13 @@ use anyhow::{anyhow, Context, Result}; use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; use serde::{Deserialize, Serialize}; use std::ffi::OsStr; -use std::io::{self, BufRead, BufReader, Write}; +use std::io::{self, Read, Write}; +use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; use std::sync::{Arc, Mutex}; use std::thread; -use std::time::Duration; +use std::time::{Duration, Instant}; use crate::config::{Manifest, Probe}; use crate::overlay::OverlayStore; @@ -193,11 +194,6 @@ impl RemoteClient { remote_port ); let mut command = Command::new("ssh"); - if let Some(control_path) = &self.control_path { - command.arg("-S").arg(control_path); - command.arg("-o").arg("ControlMaster=auto"); - command.arg("-o").arg("ControlPersist=600"); - } self.add_ssh_safety_options(&mut command); command .arg("-o") @@ -464,14 +460,7 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a helper.stdin.write_all(b"\n")?; helper.stdin.flush()?; - let mut line = String::new(); - let read = helper.stdout.read_line(&mut line)?; - if read == 0 { - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - "remote file helper closed", - )); - } + let line = read_helper_line(&mut helper.stdout)?; let response: serde_json::Value = serde_json::from_str(&line) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; if response.get("ok").and_then(|value| value.as_bool()) == Some(true) { @@ -509,7 +498,7 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a Ok(RemoteFileHelper { child, stdin, - stdout: BufReader::new(stdout), + stdout, }) } @@ -552,7 +541,7 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a struct RemoteFileHelper { child: Child, stdin: ChildStdin, - stdout: BufReader, + stdout: ChildStdout, } impl Drop for RemoteFileHelper { @@ -581,6 +570,61 @@ fn decode_helper_data(response: serde_json::Value) -> io::Result> { .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) } +fn read_helper_line(stdout: &mut ChildStdout) -> io::Result { + let timeout = Duration::from_secs(remote_file_helper_timeout_secs()); + let deadline = Instant::now() + timeout; + let fd = stdout.as_raw_fd(); + let mut out = Vec::new(); + + loop { + let now = Instant::now(); + if now >= deadline { + return Err(io::Error::new( + io::ErrorKind::TimedOut, + format!( + "remote file helper did not respond within {} seconds", + timeout.as_secs() + ), + )); + } + + let remaining = deadline.saturating_duration_since(now); + let timeout_ms = remaining.as_millis().min(i32::MAX as u128) as i32; + let mut fd_set = libc::pollfd { + fd, + events: libc::POLLIN, + revents: 0, + }; + let ready = unsafe { libc::poll(&mut fd_set, 1, timeout_ms) }; + if ready < 0 { + return Err(io::Error::last_os_error()); + } + if ready == 0 { + continue; + } + if fd_set.revents & libc::POLLIN == 0 { + return Err(io::Error::new( + io::ErrorKind::BrokenPipe, + "remote file helper pipe closed", + )); + } + + let mut chunk = [0_u8; 8192]; + let read = stdout.read(&mut chunk)?; + if read == 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "remote file helper closed", + )); + } + out.extend_from_slice(&chunk[..read]); + if out.last() == Some(&b'\n') || out.contains(&b'\n') { + return String::from_utf8(out) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)); + } + } +} + fn remote_file_helper_enabled() -> bool { env_bool("WPCOW_REMOTE_FILE_HELPER", true).unwrap_or(true) } @@ -750,6 +794,13 @@ fn remote_command_timeout_secs() -> u64 { env_u64("WPCOW_REMOTE_COMMAND_TIMEOUT_SECS", 20) } +fn remote_file_helper_timeout_secs() -> u64 { + env_u64( + "WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", + remote_command_timeout_secs(), + ) +} + fn remote_db_query_timeout_secs() -> u64 { env_u64("WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS", 10) } diff --git a/experiments/remote-wp-cow/src/row_cow.rs b/experiments/remote-wp-cow/src/row_cow.rs index 5d7952cc..4c6d65c8 100644 --- a/experiments/remote-wp-cow/src/row_cow.rs +++ b/experiments/remote-wp-cow/src/row_cow.rs @@ -125,6 +125,10 @@ pub trait RowCowBackend { pk_values: &[PkValue], ) -> Result; + fn local_reserve_insert_pk(&mut self, _table: &str, _pk_column: Option<&str>) -> Result<()> { + Ok(()) + } + fn local_tombstones_by_pk( &mut self, table: &str, @@ -179,6 +183,9 @@ pub fn execute_row_cow( }) } RowCowPlan::RowLevel(RowCowOp::Insert(insert)) => { + if insert.pk_values.is_empty() { + backend.local_reserve_insert_pk(&insert.table, insert.pk_column.as_deref())?; + } if let Some(pk_column) = &insert.pk_column { backend.local_clear_tombstone_by_pk(&insert.table, pk_column, &insert.pk_values)?; } @@ -193,9 +200,37 @@ pub fn execute_row_cow( fn execute_select(backend: &mut B, select: &RowSelect) -> Result { let tombstones = backend.local_tombstones_by_pk(&select.table, &select.pk_column, &select.pk_values)?; - let remote = - backend.remote_select_by_pk(&select.table, &select.pk_column, &select.pk_values)?; let local = backend.local_select_by_pk(&select.table, &select.pk_column, &select.pk_values)?; + let local_pks = local + .rows + .iter() + .filter_map(|row| row_pk_value(row, &select.pk_column)) + .collect::>(); + let missing_values = select + .pk_values + .iter() + .filter(|value| !tombstones.contains(*value) && !local_pks.contains(*value)) + .cloned() + .collect::>(); + + let remote = if missing_values.is_empty() { + CowQueryResult::ok(Vec::new(), Vec::new()) + } else { + let remote = + backend.remote_select_by_pk(&select.table, &select.pk_column, &missing_values)?; + let rows_to_materialize = remote + .rows + .iter() + .filter(|row| { + row_pk_value(row, &select.pk_column) + .map(|pk| !tombstones.contains(&pk) && !local_pks.contains(&pk)) + .unwrap_or(false) + }) + .cloned() + .collect::>(); + backend.local_upsert_rows(&select.table, &rows_to_materialize)?; + remote + }; let mut merged = BTreeMap::::new(); for row in remote.rows { @@ -936,6 +971,7 @@ pub fn is_supported_pk_column(column: &str) -> bool { [ "ID", "option_id", + "option_name", "umeta_id", "meta_id", "term_id", @@ -989,7 +1025,29 @@ pub fn expected_pk_for_table(table: &str) -> Option<&'static str> { None } +pub fn auto_increment_pk_for_table(table: &str) -> Option<&'static str> { + let pk = expected_pk_for_table(table)?; + let lower = table.to_ascii_lowercase(); + if lower == "term_relationships" || lower.ends_with("_term_relationships") { + return None; + } + Some(pk) +} + +pub fn is_auto_increment_pk_for_table(table: &str, pk_column: &str) -> bool { + auto_increment_pk_for_table(table) + .map(|expected| expected.eq_ignore_ascii_case(pk_column)) + .unwrap_or(false) +} + fn canonical_pk_column(table: &str, column: &str) -> Option { + let lower = table.to_ascii_lowercase(); + if (lower == "options" || lower.ends_with("_options")) + && column.eq_ignore_ascii_case("option_name") + { + return Some("option_name".to_string()); + } + if let Some(expected) = expected_pk_for_table(table) { if expected.eq_ignore_ascii_case(column) { return Some(expected.to_string()); @@ -1285,6 +1343,7 @@ mod tests { local: BTreeMap>, tombstones: BTreeSet<(String, String, PkValue)>, remote_calls: Vec, + reserved_inserts: Vec<(String, Option)>, } impl FakeCowBackend { @@ -1356,7 +1415,14 @@ mod tests { fn local_upsert_rows(&mut self, table: &str, rows: &[Row]) -> Result { let table_rows = self.local.entry(table.to_string()).or_default(); for row in rows { - let pk = row_pk_value(row, expected_pk_for_table(table).unwrap()).unwrap(); + let pk_column = if (table == "options" || table.ends_with("_options")) + && row_value_ci(row, "option_name").is_some() + { + "option_name" + } else { + expected_pk_for_table(table).unwrap() + }; + let pk = row_pk_value(row, pk_column).unwrap(); table_rows.insert(pk, row.clone()); } Ok(rows.len()) @@ -1417,6 +1483,12 @@ mod tests { Ok(removed) } + fn local_reserve_insert_pk(&mut self, table: &str, pk_column: Option<&str>) -> Result<()> { + self.reserved_inserts + .push((table.to_string(), pk_column.map(str::to_string))); + Ok(()) + } + fn local_tombstones_by_pk( &mut self, table: &str, @@ -1492,6 +1564,39 @@ mod tests { vec![PkValue("1".to_string()), PkValue("2".to_string())] ); } + + assert_eq!(auto_increment_pk_for_table("wp_posts"), Some("ID")); + assert_eq!(auto_increment_pk_for_table("wp_term_relationships"), None); + } + + #[test] + fn plans_wordpress_options_by_unique_option_name() { + let RowCowPlan::RowLevel(RowCowOp::Update(write)) = + plan_sql("UPDATE wp_options SET option_value = 'local' WHERE option_name = 'blogname'") + else { + panic!("options writes by option_name should be row-level safe"); + }; + assert_eq!(write.table, "wp_options"); + assert_eq!(write.pk_column, "option_name"); + assert_eq!(write.pk_values, vec![PkValue("blogname".to_string())]); + + let RowCowPlan::RowLevel(RowCowOp::Delete(write)) = + plan_sql("DELETE FROM wp_options WHERE option_name = '_transient_example'") + else { + panic!("options deletes by option_name should be row-level safe"); + }; + assert_eq!(write.pk_column, "option_name"); + + let RowCowPlan::RowLevel(RowCowOp::Select(select)) = + plan_sql("SELECT option_value FROM wp_options WHERE option_name = 'siteurl'") + else { + panic!("options reads by option_name should be row-level safe"); + }; + assert_eq!(select.pk_column, "option_name"); + assert_eq!( + select.projection, + Projection::Columns(vec!["option_value".to_string()]) + ); } #[test] @@ -1569,6 +1674,45 @@ mod tests { ); } + #[test] + fn options_update_copy_up_fetches_only_named_option() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote( + "wp_options", + "option_name", + "blogname", + &[("option_id", "1"), ("option_value", "Remote Name")], + ); + backend.insert_remote( + "wp_options", + "option_name", + "siteurl", + &[("option_id", "2"), ("option_value", "https://example.com")], + ); + + let execution = execute_row_cow( + &mut backend, + "UPDATE wp_options SET option_value = 'Local Name' WHERE option_name = 'blogname'", + ) + .unwrap(); + + assert!(matches!( + execution, + RowCowExecution::PreparedLocalWrite { + pk_column: Some(pk_column), + copied_rows: 1, + .. + } if pk_column == "option_name" + )); + assert_eq!( + backend.remote_select_values(), + vec![vec![PkValue("blogname".to_string())]] + ); + assert!(backend.local["wp_options"].contains_key(&PkValue("blogname".to_string()))); + assert!(!backend.local["wp_options"].contains_key(&PkValue("siteurl".to_string()))); + backend.assert_no_remote_writes(); + } + #[test] fn delete_tombstone_hides_remote_row_from_merged_selects() { let mut backend = FakeCowBackend::default(); @@ -1589,6 +1733,77 @@ mod tests { backend.assert_no_remote_writes(); } + #[test] + fn select_materializes_remote_rows_for_later_offline_reads() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!(result.rows.len(), 1); + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("remote".to_string())) + ); + assert_eq!( + backend.local["wp_posts"][&PkValue("42".to_string())].get("post_title"), + Some(&Value::String("remote".to_string())), + "row-level reads must materialize remote rows so offline refresh can use local state" + ); + backend.assert_no_remote_writes(); + } + + #[test] + fn repeated_select_uses_materialized_local_row_without_remote_read() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + backend.remote_calls.clear(); + + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("remote".to_string())) + ); + assert!( + backend.remote_calls.is_empty(), + "materialized row-level reads should be served from local COW state" + ); + } + + #[test] + fn select_materialization_preserves_local_overlay_rows() { + let mut backend = FakeCowBackend::default(); + backend.insert_remote("wp_posts", "ID", "42", &[("post_title", "remote")]); + backend.insert_local("wp_posts", "ID", "42", &[("post_title", "local")]); + + let execution = + execute_row_cow(&mut backend, "SELECT * FROM wp_posts WHERE ID = 42").unwrap(); + let RowCowExecution::Select(result) = execution else { + panic!("expected row-level select"); + }; + + assert_eq!( + result.rows[0].get("post_title"), + Some(&Value::String("local".to_string())) + ); + assert_eq!( + backend.local["wp_posts"][&PkValue("42".to_string())].get("post_title"), + Some(&Value::String("local".to_string())) + ); + backend.assert_no_remote_writes(); + } + #[test] fn insert_after_delete_clears_tombstone_and_shadows_remote_row() { let mut backend = FakeCowBackend::default(); @@ -1657,6 +1872,25 @@ mod tests { backend.assert_no_remote_writes(); } + #[test] + fn local_insert_without_pk_reserves_auto_increment_before_write() { + let mut backend = FakeCowBackend::default(); + let execution = execute_row_cow( + &mut backend, + "INSERT INTO wp_posts (post_title) VALUES ('local auto id')", + ) + .unwrap(); + assert!(matches!( + execution, + RowCowExecution::LocalOnlyInsert { table } if table == "wp_posts" + )); + assert_eq!( + backend.reserved_inserts, + vec![("wp_posts".to_string(), Some("ID".to_string()))] + ); + backend.assert_no_remote_writes(); + } + #[test] fn ambiguous_sql_is_never_row_level_safe() { assert_not_row_level( diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index ae03f369..dc143f03 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -1,21 +1,19 @@ use anyhow::{anyhow, Context, Result}; -use std::collections::BTreeSet; use std::fs; -use std::io::{Read, Write}; +use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::{Child, Command, Stdio}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::thread::{self, JoinHandle}; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::time::Duration; use crate::config::{self, ClonePaths, Manifest}; use crate::control; -use crate::db; use crate::fusefs; use crate::generate::ROUTER_BASENAME; use crate::mysql_proxy; -use crate::remote::{shell_quote, RemoteClient}; +use crate::remote::RemoteClient; pub struct RunOptions { pub mountpoint: PathBuf, @@ -26,9 +24,27 @@ pub struct RunOptions { pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> Result<()> { let shutdown = Arc::new(AtomicBool::new(false)); install_signal_handler(shutdown.clone())?; + run_site_until_shutdown(manifest, paths, options, shutdown) +} + +#[cfg(test)] +pub(crate) fn run_site_with_shutdown( + manifest: Manifest, + paths: ClonePaths, + options: RunOptions, + shutdown: Arc, +) -> Result<()> { + run_site_until_shutdown(manifest, paths, options, shutdown) +} +fn run_site_until_shutdown( + manifest: Manifest, + paths: ClonePaths, + options: RunOptions, + shutdown: Arc, +) -> Result<()> { let control_addr = control_addr_from_url(&manifest.control_url)?; - let remote = RemoteClient::new(manifest.clone(), Some(paths.run.join("ssh-control.sock"))); + let remote = RemoteClient::new(manifest.clone(), Some(config::ssh_control_path(&paths))); let offline = config::is_offline(&paths); let mut db_tunnel = if offline { eprintln!( @@ -118,17 +134,6 @@ pub fn run_site(manifest: Manifest, paths: ClonePaths, options: RunOptions) -> R )?) }; - if !offline && env_bool("WPCOW_PREFETCH_RUNTIME", false) { - let warm_manifest = manifest.clone(); - let warm_paths = paths.clone(); - let warm_remote = remote.clone(); - thread::spawn(move || { - if let Err(err) = prefetch_runtime_files(&warm_manifest, &warm_paths, &warm_remote) { - eprintln!("wp-cow runtime prefetch skipped: {err:#}"); - } - }); - } - eprintln!( "wp-cow running clone '{}' at {} from {}", manifest.name, @@ -182,210 +187,54 @@ pub fn mount_only(manifest: Manifest, paths: ClonePaths, mountpoint: &Path) -> R fusefs::mount_foreground(manifest, paths, mountpoint) } -pub(crate) fn prefetch_runtime_files( - manifest: &Manifest, - paths: &ClonePaths, - remote: &RemoteClient, -) -> Result<()> { - let mirror = paths.file_cache.join("mirror"); - fs::create_dir_all(&mirror)?; - let stamp = mirror.join(".wp-cow-runtime-prefetch-v3"); - if stamp.is_file() { - return Ok(()); - } - - let mut rels = [ - "index.php", - "wp-activate.php", - "wp-blog-header.php", - "wp-comments-post.php", - "wp-cron.php", - "wp-load.php", - "wp-login.php", - "wp-mail.php", - "wp-settings.php", - "wp-signup.php", - "wp-trackback.php", - "xmlrpc.php", - "wp-admin", - "wp-includes", - ] - .into_iter() - .map(|rel| rel.to_string()) - .collect::>(); - let mut themes = BTreeSet::new(); - for option in ["template", "stylesheet"] { - if let Some(theme) = db::local_option_value(manifest, option)? { - if let Some(theme) = clean_theme_name(&theme) { - themes.insert(theme); - } - } - } - if themes.is_empty() { - for theme in remote_active_theme_names(manifest, remote)? { - themes.insert(theme); - } - } - for theme in themes { - rels.push(format!("wp-content/themes/{theme}")); - } - - eprintln!( - "wp-cow warming runtime file cache in background: {}", - rels.join(", ") - ); - let _ = write_prefetch_progress(paths, "prefetching-runtime", &rels.join(", "), 0, 0, 0); - let remote_paths = rels.iter().map(shell_quote).collect::>().join(" "); - let remote_command = format!( - "cd {} && tar -cf - --ignore-failed-read {}", - shell_quote(&manifest.remote_path), - remote_paths - ); - let mut ssh = remote - .command(&remote_command) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .context("start remote theme tar")?; - let mut tar = Command::new("tar") - .arg("-C") - .arg(&mirror) - .arg("-xf") - .arg("-") - .stdin(Stdio::piped()) - .spawn() - .context("start local theme tar")?; - +fn start_web_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { + match std::env::var("WPCOW_WEB_SERVER") + .unwrap_or_else(|_| "auto".to_string()) + .to_ascii_lowercase() + .as_str() { - let mut ssh_stdout = ssh.stdout.take().expect("ssh stdout piped"); - let mut tar_stdin = tar.stdin.take().expect("tar stdin piped"); - let mut buf = [0_u8; 64 * 1024]; - let mut bytes = 0_u64; - loop { - let read = ssh_stdout.read(&mut buf)?; - if read == 0 { - break; - } - tar_stdin.write_all(&buf[..read])?; - bytes = bytes.saturating_add(read as u64); - if bytes == read as u64 || bytes % (1024 * 1024) < read as u64 { - let _ = write_prefetch_progress( - paths, - "prefetching-runtime", - &rels.join(", "), - bytes, - 0, - bytes, - ); + "php" | "php-dev" | "php-dev-server" => start_php_dev_server(paths, mountpoint, http_addr), + "auto" | "frankenphp" => { + let bin = frankenphp_bin(); + if command_exists(&bin) { + start_frankenphp_server(paths, mountpoint, http_addr) + } else { + eprintln!("wp-cow FrankenPHP binary '{bin}' was not found; falling back to PHP's development server"); + start_php_dev_server(paths, mountpoint, http_addr) } } + other => Err(anyhow!( + "unsupported WPCOW_WEB_SERVER={other}; expected auto, frankenphp, or php" + )), } - - let ssh_output = ssh.wait_with_output()?; - let tar_status = tar.wait()?; - if !ssh_output.status.success() { - return Err(anyhow!( - "remote theme tar failed: {}", - String::from_utf8_lossy(&ssh_output.stderr) - )); - } - if !tar_status.success() { - return Err(anyhow!("local theme tar failed with status {}", tar_status)); - } - fs::write(&stamp, b"ok\n")?; - let _ = write_prefetch_progress(paths, "cached", "", 0, 0, 0); - Ok(()) } -fn remote_active_theme_names( - manifest: &Manifest, - remote: &RemoteClient, -) -> Result> { - let mut out = BTreeSet::new(); - let Some(table) = safe_mysql_identifier(&format!("{}options", manifest.probe.table_prefix)) - else { - return Ok(out); - }; - let sql = format!( - "SELECT option_name, option_value FROM `{table}` WHERE option_name IN ('template','stylesheet')" - ); - let result = db::remote_readonly_query(remote, &sql)?; - if !result.ok { - return Ok(out); - } - for row in result.rows { - let Some(value) = row.get("option_value").and_then(|value| value.as_str()) else { - continue; - }; - if let Some(theme) = clean_theme_name(value) { - out.insert(theme); - } - } - Ok(out) +fn frankenphp_bin() -> String { + std::env::var("WPCOW_FRANKENPHP_BIN").unwrap_or_else(|_| "frankenphp".to_string()) } -fn safe_mysql_identifier(value: &str) -> Option { - if value.is_empty() - || !value - .chars() - .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '$') - { - return None; +fn command_exists(bin: &str) -> bool { + let path = Path::new(bin); + if path.components().count() > 1 { + return is_executable_file(path); } - Some(value.to_string()) -} -fn write_prefetch_progress( - paths: &ClonePaths, - phase: &str, - active_path: &str, - active_bytes: u64, - active_total: u64, - bytes_cached: u64, -) -> Result<()> { - fs::create_dir_all(&paths.file_cache)?; - let progress = serde_json::json!({ - "phase": phase, - "active_path": active_path, - "active_bytes": active_bytes, - "active_total": active_total, - "files_cached": 0, - "bytes_cached": bytes_cached, - "last_cached_path": active_path, - "updated_at_unix_ms": now_unix_ms(), - }); - let progress_path = paths.file_cache.join("progress.json"); - let tmp = paths - .file_cache - .join(format!("progress.json.prefetch.{}.tmp", std::process::id())); - fs::write(&tmp, serde_json::to_vec_pretty(&progress)?)?; - fs::rename(tmp, progress_path)?; - Ok(()) + std::env::var_os("PATH") + .map(|paths| std::env::split_paths(&paths).any(|dir| is_executable_file(&dir.join(bin)))) + .unwrap_or(false) } -fn clean_theme_name(value: &str) -> Option { - let value = value.trim(); - if value.is_empty() - || !value - .chars() - .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' || ch == '.') - { - return None; - } - Some(value.to_string()) +fn is_executable_file(path: &Path) -> bool { + fs::metadata(path) + .map(|metadata| metadata.is_file() && metadata.permissions().mode() & 0o111 != 0) + .unwrap_or(false) } -fn start_web_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { - match std::env::var("WPCOW_WEB_SERVER") - .unwrap_or_else(|_| "frankenphp".to_string()) - .to_ascii_lowercase() - .as_str() - { - "php" | "php-dev" | "php-dev-server" => start_php_dev_server(paths, mountpoint, http_addr), - "frankenphp" => start_frankenphp_server(paths, mountpoint, http_addr), - other => Err(anyhow!( - "unsupported WPCOW_WEB_SERVER={other}; expected frankenphp or php" - )), +fn apply_web_server_env(command: &mut Command, paths: &ClonePaths) { + if config::is_offline(paths) { + command + .env("WPCOW_OFFLINE", "1") + .env("WPCOW_REMOTE_DB_TUNNEL", "0"); } } @@ -400,7 +249,9 @@ fn start_frankenphp_server( &caddyfile, frankenphp_caddyfile(paths, mountpoint, http_addr), )?; - Command::new(std::env::var("WPCOW_FRANKENPHP_BIN").unwrap_or_else(|_| "frankenphp".to_string())) + let mut command = Command::new(frankenphp_bin()); + apply_web_server_env(&mut command, paths); + command .arg("run") .arg("--config") .arg(&caddyfile) @@ -412,11 +263,13 @@ fn start_frankenphp_server( } fn start_php_dev_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) -> Result { - Command::new("php") - .env( - "PHP_CLI_SERVER_WORKERS", - env_u64("WPCOW_PHP_WORKERS", 4).to_string(), - ) + let mut command = Command::new("php"); + apply_web_server_env(&mut command, paths); + let workers = env_u64("WPCOW_PHP_WORKERS", 4); + if workers > 1 { + command.env("PHP_CLI_SERVER_WORKERS", workers.to_string()); + } + command .arg("-d") .arg(format!( "max_execution_time={}", @@ -432,6 +285,14 @@ fn start_php_dev_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) "mysqlnd.net_read_timeout={}", env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15) )) + .arg("-d") + .arg("opcache.enable_cli=1") + .arg("-d") + .arg("opcache.memory_consumption=192") + .arg("-d") + .arg("opcache.max_accelerated_files=20000") + .arg("-d") + .arg("opcache.validate_timestamps=1") .arg("-S") .arg(http_addr) .arg("-t") @@ -486,6 +347,12 @@ fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) php }} + @wpCowInstaller path /wp-admin/install.php /wp-admin/setup-config.php + handle @wpCowInstaller {{ + rewrite * {router}?__wp_cow_installer_guard=1 + php + }} + @static {{ file not path *.php @@ -564,25 +431,6 @@ fn env_u64(name: &str, default: u64) -> u64 { .unwrap_or(default) } -fn now_unix_ms() -> u128 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|duration| duration.as_millis()) - .unwrap_or_default() -} - -fn env_bool(name: &str, default: bool) -> bool { - std::env::var(name) - .ok() - .map(|raw| { - matches!( - raw.to_ascii_lowercase().as_str(), - "1" | "true" | "yes" | "on" - ) - }) - .unwrap_or(default) -} - #[cfg(test)] mod tests { use super::*; @@ -606,6 +454,40 @@ mod tests { assert!(caddyfile.contains("@wpAdminIndex path /wp-admin /wp-admin/")); assert!(caddyfile.contains("rewrite * /wp-admin/index.php")); } + + #[test] + fn frankenphp_routes_installer_paths_through_runtime_guard() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + let caddyfile = frankenphp_caddyfile(&paths, Path::new("/tmp/mount"), "127.0.0.1:9481"); + assert!(caddyfile + .contains("@wpCowInstaller path /wp-admin/install.php /wp-admin/setup-config.php")); + assert!(caddyfile.contains("rewrite * /.wp-cow-router.php?__wp_cow_installer_guard=1")); + assert!( + caddyfile.find("@wpCowInstaller").unwrap() < caddyfile.find("@phpFiles").unwrap(), + "installer guard must run before the generic PHP file handler" + ); + } + + #[test] + fn command_exists_requires_an_executable_file() { + let temp = tempfile::tempdir().unwrap(); + let fake = temp.path().join("frankenphp"); + fs::write(&fake, b"#!/bin/sh\nexit 0\n").unwrap(); + + let mut permissions = fs::metadata(&fake).unwrap().permissions(); + permissions.set_mode(0o644); + fs::set_permissions(&fake, permissions).unwrap(); + assert!( + !command_exists(fake.to_str().unwrap()), + "a non-executable FrankenPHP file must not suppress the PHP fallback" + ); + + let mut permissions = fs::metadata(&fake).unwrap().permissions(); + permissions.set_mode(0o755); + fs::set_permissions(&fake, permissions).unwrap(); + assert!(command_exists(fake.to_str().unwrap())); + } } fn wait_for_mount(mountpoint: &Path, mount_thread: &JoinHandle>) -> Result<()> { diff --git a/experiments/remote-wp-cow/src/sql.rs b/experiments/remote-wp-cow/src/sql.rs index 0fc6e867..f7d6a3fd 100644 --- a/experiments/remote-wp-cow/src/sql.rs +++ b/experiments/remote-wp-cow/src/sql.rs @@ -24,10 +24,11 @@ pub fn is_write_sql(sql: &str) -> bool { } pub fn is_safe_read_sql(sql: &str) -> bool { - matches!( - first_keyword(sql).as_deref(), - Some("SELECT") | Some("SHOW") | Some("DESCRIBE") | Some("DESC") | Some("EXPLAIN") - ) + match first_keyword(sql).as_deref() { + Some("SELECT") => !select_has_remote_side_effect_clause(sql), + Some("SHOW") | Some("DESCRIBE") | Some("DESC") | Some("EXPLAIN") => true, + _ => false, + } } #[allow(dead_code)] @@ -37,7 +38,7 @@ pub fn extract_tables(sql: &str) -> Vec { let table_markers = ["FROM", "JOIN", "UPDATE", "INTO", "TABLE"]; let mut i = 0; while i < tokens.len() { - if table_markers.contains(&tokens[i].as_str()) { + if table_markers.contains(&tokens[i].to_ascii_uppercase().as_str()) { if let Some(next) = tokens.get(i + 1) { if !is_keyword(next) { tables.insert(next.trim_matches('`').to_string()); @@ -82,6 +83,19 @@ fn first_keyword(sql: &str) -> Option { .map(|part| part.trim_matches('`').to_ascii_uppercase()) } +fn select_has_remote_side_effect_clause(sql: &str) -> bool { + let tokens = tokenize(strip_leading_comments(sql)) + .into_iter() + .map(|token| token.to_ascii_uppercase()) + .collect::>(); + + tokens.iter().any(|token| token == "INTO") + || tokens.windows(2).any(|window| window == ["FOR", "UPDATE"]) + || tokens + .windows(4) + .any(|window| window == ["LOCK", "IN", "SHARE", "MODE"]) +} + fn strip_leading_comments(mut sql: &str) -> &str { loop { let trimmed = sql.trim_start(); @@ -115,29 +129,82 @@ fn tokenize(sql: &str) -> Vec { let mut tokens = Vec::new(); let mut current = String::new(); let mut quote = None; + let mut chars = sql.chars().peekable(); - for ch in sql.chars() { + while let Some(ch) = chars.next() { if let Some(q) = quote { + if ch == '\\' { + let _ = chars.next(); + continue; + } if ch == q { + if q == '\'' && chars.peek() == Some(&'\'') { + let _ = chars.next(); + continue; + } quote = None; } continue; } if ch == '\'' || ch == '"' { + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } quote = Some(ch); continue; } + if ch == '-' && chars.peek() == Some(&'-') { + let _ = chars.next(); + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } + for skipped in chars.by_ref() { + if skipped == '\n' { + break; + } + } + continue; + } + if ch == '#' { + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } + for skipped in chars.by_ref() { + if skipped == '\n' { + break; + } + } + continue; + } + if ch == '/' && chars.peek() == Some(&'*') { + let _ = chars.next(); + if !current.is_empty() { + tokens.push(current.trim_matches('`').to_string()); + current.clear(); + } + let mut prev = '\0'; + for skipped in chars.by_ref() { + if prev == '*' && skipped == '/' { + break; + } + prev = skipped; + } + continue; + } if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' || ch == '`' { current.push(ch); } else if !current.is_empty() { - tokens.push(current.trim_matches('`').to_ascii_uppercase()); + tokens.push(current.trim_matches('`').to_string()); current.clear(); } } if !current.is_empty() { - tokens.push(current.trim_matches('`').to_ascii_uppercase()); + tokens.push(current.trim_matches('`').to_string()); } tokens } @@ -145,7 +212,7 @@ fn tokenize(sql: &str) -> Vec { #[allow(dead_code)] fn is_keyword(token: &str) -> bool { matches!( - token, + token.to_ascii_uppercase().as_str(), "SELECT" | "WHERE" | "SET" | "ON" | "USING" | "VALUES" | "INNER" | "LEFT" | "RIGHT" ) } @@ -161,6 +228,21 @@ mod tests { "UPDATE wp_posts SET post_title = 'x' WHERE ID = 1" )); assert!(is_write_sql("LOAD DATA INFILE 'x' INTO TABLE wp_posts")); + assert!(!is_safe_read_sql( + "SELECT * FROM wp_posts INTO OUTFILE '/tmp/wp-cow-leak'" + )); + assert!(!is_safe_read_sql( + "SELECT post_title FROM wp_posts WHERE ID = 1 FOR UPDATE" + )); + assert!(!is_safe_read_sql( + "SELECT post_title FROM wp_posts WHERE ID = 1 LOCK IN SHARE MODE" + )); + assert!(is_safe_read_sql( + "SELECT * FROM wp_posts WHERE post_title = 'FOR UPDATE'" + )); + assert!(is_safe_read_sql( + "SELECT * FROM wp_posts /* FOR UPDATE */ WHERE ID = 1" + )); } #[test] @@ -170,4 +252,22 @@ mod tests { assert!(expanded.contains(&"wp_postmeta".to_string())); assert!(expanded.contains(&"wp_term_relationships".to_string())); } + + #[test] + fn extract_tables_preserves_wordpress_table_case_for_proxy_cow() { + assert_eq!( + extract_tables("UPDATE wp_posts SET post_title = 'x' WHERE ID = 1"), + vec!["wp_posts".to_string()] + ); + assert_eq!( + extract_tables("INSERT INTO `wp_postmeta` (`post_id`) VALUES (1)"), + vec!["wp_postmeta".to_string()] + ); + assert_eq!( + extract_tables( + "SELECT * FROM wp_posts JOIN wp_postmeta ON wp_postmeta.post_id = wp_posts.ID" + ), + vec!["wp_postmeta".to_string(), "wp_posts".to_string()] + ); + } } From c68af249e27e4f4d248c7470dca944e970a7c5b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 4 May 2026 00:46:10 +0200 Subject: [PATCH 28/39] Speed safe COW rerenders --- .../remote-wp-cow/scripts/strict-harness.sh | 7 + experiments/remote-wp-cow/src/control.rs | 2 +- experiments/remote-wp-cow/src/db.rs | 168 ++++++++++++++++- experiments/remote-wp-cow/src/generate.rs | 177 +++++++++++++++++- experiments/remote-wp-cow/src/mysql_proxy.rs | 2 +- 5 files changed, 344 insertions(+), 12 deletions(-) diff --git a/experiments/remote-wp-cow/scripts/strict-harness.sh b/experiments/remote-wp-cow/scripts/strict-harness.sh index 3b84af8b..cfea7572 100755 --- a/experiments/remote-wp-cow/scripts/strict-harness.sh +++ b/experiments/remote-wp-cow/scripts/strict-harness.sh @@ -45,6 +45,8 @@ run_exact_test overlay::tests::cached_only_copy_up_uses_materialized_files_witho run_exact_test fusefs::tests::offline_readdir_uses_cached_remote_metadata_without_remote run_exact_test fusefs::tests::remote_stat_metadata_survives_severed_mode_without_remote run_exact_test generate::tests::router_splash_and_progress_smoke_responds_quickly +run_exact_test db::tests::remote_query_cache_round_trips_safe_read_results +run_exact_test db::tests::dirty_row_overlay_tables_are_local_state run_exact_test row_cow::tests::select_materializes_remote_rows_for_later_offline_reads run_exact_test row_cow::tests::local_insert_is_not_sent_to_remote_and_appears_in_merged_select run_exact_test row_cow::tests::update_copy_up_fetches_only_affected_primary_keys @@ -75,6 +77,10 @@ need_pattern src/generate.rs 'will not fall back to the empty local schema' "ins need_pattern src/generate.rs 'wp_cow_looks_like_installer' "installer response detector" need_pattern src/generate.rs '__wp_cow_installer_guard' "direct installer route guard" need_pattern src/generate.rs "'1' !== getenv\\( 'WPCOW_PROXY_FRONTEND' \\)" "local-first frontend default" +need_pattern src/db.rs 'cached_remote_readonly_query' "Rust remote read cache for MySQL proxy/control" +need_pattern src/db.rs 'dirty_tables' "dirty row-overlay table routing state" +need_pattern src/generate.rs 'cow_cached_remote_read_is_safe_without_control' "PHP cached remote read fast path" +need_pattern src/generate.rs 'cow_safe_local_read_without_control' "PHP local read fast path for materialized runtime data" need_pattern src/generate.rs 'function cow_offline' "PHP DB offline mode" need_pattern src/db.rs 'set_local_admin_password' "local-only admin password override" need_pattern src/row_cow.rs 'LocalOnlyInsert' "local-only content mutation path" @@ -99,6 +105,7 @@ deny_pattern src/cli.rs 'wordpress_offline_table_names' "full core-table sever m deny_pattern src/cli.rs 'prefetch_runtime_files' "sever-triggered runtime prefetch" deny_pattern src/run.rs 'WPCOW_PREFETCH_RUNTIME|prefetch_runtime_files|wp-cow-runtime-prefetch' "background runtime prefetch" deny_pattern src/run.rs 'tar[[:space:]]+-cf[[:space:]]+-' "recursive remote tar runtime prefetch" +deny_pattern src/generate.rs 'function cow_remote_query_cache_clear|cow_remote_query_cache_clear\(\);' "global remote query cache invalidation" deny_pattern docker/Dockerfile 'rsync|scp[[:space:]]+-r' "eager copy tooling" deny_pattern docker/wp-cow-lab-serve 'rsync|scp[[:space:]]+-r' "eager lab serve copy command" deny_pattern docker/wp-cow-lab-run 'rsync|scp[[:space:]]+-r' "eager lab run copy command" diff --git a/experiments/remote-wp-cow/src/control.rs b/experiments/remote-wp-cow/src/control.rs index 8a47b111..180e970c 100644 --- a/experiments/remote-wp-cow/src/control.rs +++ b/experiments/remote-wp-cow/src/control.rs @@ -157,7 +157,7 @@ fn control_response( })); } let sql = input.sql.ok_or_else(|| anyhow!("missing sql"))?; - let result = db::remote_readonly_query(remote, &sql)?; + let result = db::cached_remote_readonly_query(remote, paths, &sql)?; Ok(json!({ "ok": result.ok, "error": result.error, diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index eadfd218..736a494f 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -1,10 +1,12 @@ use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; use std::collections::BTreeSet; use std::fs::{self, File}; use std::io::{self, Read}; use std::path::PathBuf; use std::process::{Command, Stdio}; +use std::time::{SystemTime, UNIX_EPOCH}; use crate::config::{parse_host_port, ClonePaths, Manifest}; use crate::remote::{shell_quote, RemoteClient, RemoteQueryResult}; @@ -18,6 +20,8 @@ pub struct DbState { #[serde(default)] pub materialized_tables: BTreeSet, #[serde(default)] + pub dirty_tables: BTreeSet, + #[serde(default)] pub option_bootstrap_tables: BTreeSet, #[serde(default)] pub option_rows: BTreeSet, @@ -248,7 +252,7 @@ pub fn route_for_tables( let expanded = sql::expand_wordpress_groups(&manifest.probe.table_prefix, tables); let touches_local = expanded .iter() - .any(|table| state.materialized_tables.contains(table)); + .any(|table| table_has_local_state(&state, table)); if touches_local { let materialized = materialize_tables(remote, manifest, paths, &expanded)?; @@ -264,6 +268,10 @@ pub fn route_for_tables( } } +fn table_has_local_state(state: &DbState, table: &str) -> bool { + state.materialized_tables.contains(table) || state.dirty_tables.contains(table) +} + pub fn route_for_query( remote: &RemoteClient, manifest: &Manifest, @@ -314,6 +322,103 @@ pub fn remote_readonly_query(remote: &RemoteClient, sql_text: &str) -> Result Result { + if let Some(result) = remote_query_cache_get(paths, sql_text)? { + return Ok(result); + } + + let result = remote_readonly_query(remote, sql_text)?; + remote_query_cache_set(paths, sql_text, &result)?; + Ok(result) +} + +#[derive(Debug, Serialize, Deserialize)] +struct RemoteQueryCacheEntry { + sql: String, + result: RemoteQueryResult, +} + +fn remote_query_cache_enabled() -> bool { + std::env::var("WPCOW_REMOTE_QUERY_CACHE") + .ok() + .map(|raw| { + !matches!( + raw.to_ascii_lowercase().as_str(), + "0" | "false" | "no" | "off" + ) + }) + .unwrap_or(true) +} + +fn remote_query_cache_max_rows() -> usize { + std::env::var("WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS") + .ok() + .and_then(|raw| raw.parse::().ok()) + .filter(|rows| *rows > 0) + .unwrap_or(5000) +} + +fn remote_query_cache_dir(paths: &ClonePaths) -> PathBuf { + paths.db.join("query-cache") +} + +fn remote_query_cache_file(paths: &ClonePaths, sql_text: &str) -> PathBuf { + let digest = Sha256::digest(sql_text.as_bytes()); + remote_query_cache_dir(paths).join(format!("{}.json", hex::encode(digest))) +} + +fn remote_query_cache_get(paths: &ClonePaths, sql_text: &str) -> Result> { + if !remote_query_cache_enabled() { + return Ok(None); + } + let path = remote_query_cache_file(paths, sql_text); + let Ok(bytes) = fs::read(&path) else { + return Ok(None); + }; + let entry: RemoteQueryCacheEntry = match serde_json::from_slice(&bytes) { + Ok(entry) => entry, + Err(_) => return Ok(None), + }; + if entry.sql == sql_text { + Ok(Some(entry.result)) + } else { + Ok(None) + } +} + +fn remote_query_cache_set( + paths: &ClonePaths, + sql_text: &str, + result: &RemoteQueryResult, +) -> Result<()> { + if !remote_query_cache_enabled() || !result.ok { + return Ok(()); + } + if result.rows.len() > remote_query_cache_max_rows() { + return Ok(()); + } + + let dir = remote_query_cache_dir(paths); + fs::create_dir_all(&dir)?; + let path = remote_query_cache_file(paths, sql_text); + let nonce = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_nanos()) + .unwrap_or_default(); + let tmp = path.with_extension(format!("{}.{}.tmp", std::process::id(), nonce)); + let entry = RemoteQueryCacheEntry { + sql: sql_text.to_string(), + result: result.clone(), + }; + fs::write(&tmp, serde_json::to_vec(&entry)?)?; + fs::rename(tmp, path)?; + Ok(()) +} + pub fn refresh_option_bootstrap_for_offline( remote: &RemoteClient, manifest: &Manifest, @@ -380,6 +485,7 @@ pub fn row_cow_query( pk_values, .. } => { + mark_dirty_table(paths, &table)?; mark_dirty_option_rows_for_write( manifest, paths, @@ -396,6 +502,7 @@ pub fn row_cow_query( }) } RowCowExecution::LocalOnlyInsert { table } => { + mark_dirty_table(paths, &table)?; mark_dirty_option_rows_from_sql(manifest, paths, sql_text, &table)?; Ok(RowCowResponse { handled: true, @@ -440,6 +547,16 @@ pub fn row_cow_query( } } +fn mark_dirty_table(paths: &ClonePaths, table: &str) -> Result<()> { + validate_table_name(table)?; + let mut state = load_state(paths)?; + if !state.materialized_tables.contains(table) { + state.dirty_tables.insert(table.to_string()); + write_state(paths, &state)?; + } + Ok(()) +} + fn mark_dirty_option_rows_for_write( manifest: &Manifest, paths: &ClonePaths, @@ -1748,4 +1865,53 @@ mod tests { "write fallbacks still need local table promotion before the write executes" ); } + + #[test] + fn dirty_row_overlay_tables_are_local_state() { + let mut state = DbState::default(); + assert!(!table_has_local_state(&state, "wp_posts")); + state.dirty_tables.insert("wp_posts".to_string()); + assert!( + table_has_local_state(&state, "wp_posts"), + "complex plugin reads must not route to remote after local row overlays or tombstones" + ); + } + + #[test] + fn remote_query_cache_round_trips_safe_read_results() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + crate::config::ensure_clone_dirs(&paths).unwrap(); + let sql = "SELECT ID, post_title FROM wp_posts WHERE post_status = 'publish'"; + + let mut row = serde_json::Map::new(); + row.insert("ID".to_string(), serde_json::Value::String("7".to_string())); + row.insert( + "post_title".to_string(), + serde_json::Value::String("Cached".to_string()), + ); + let result = RemoteQueryResult { + ok: true, + error: String::new(), + rows: vec![row], + fields: vec!["ID".to_string(), "post_title".to_string()], + affected: 1, + }; + + remote_query_cache_set(&paths, sql, &result).unwrap(); + assert_eq!( + remote_query_cache_get(&paths, sql) + .unwrap() + .unwrap() + .rows + .len(), + 1 + ); + assert!( + remote_query_cache_get(&paths, "SELECT ID FROM wp_posts") + .unwrap() + .is_none(), + "cache files are keyed and verified by SQL text" + ); + } } diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 9aab8763..8978aabd 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -59,6 +59,7 @@ define( 'WPCOW_REMOTE_DB_USER', {remote_db_user} ); define( 'WPCOW_REMOTE_DB_PASSWORD', {remote_db_password} ); define( 'WPCOW_REMOTE_DB_HOST', {remote_db_host} ); define( 'WPCOW_QUERY_CACHE_DIR', {query_cache_dir} ); +define( 'WPCOW_DB_STATE_FILE', {db_state_file} ); define( 'FS_METHOD', 'direct' ); define( 'DISABLE_WP_CRON', true ); @@ -102,6 +103,7 @@ require_once ABSPATH . 'wp-settings.php'; manifest.remote_db_tunnel.host, manifest.remote_db_tunnel.port )), query_cache_dir = php_string(paths.db.join("query-cache").to_string_lossy().as_ref()), + db_state_file = php_string(paths.db.join("state.json").to_string_lossy().as_ref()), ) } @@ -305,18 +307,161 @@ function cow_remote_query_cache_set( $query, $result ) { if ( ! is_dir( dirname( $file ) ) && ! mkdir( dirname( $file ), 0777, true ) && ! is_dir( dirname( $file ) ) ) { return; } - $tmp = $file . '.' . getmypid() . '.tmp'; + $tmp = $file . '.' . getmypid() . '.' . str_replace( array( ' ', '.' ), '', microtime() . uniqid( '', true ) ) . '.tmp'; file_put_contents( $tmp, json_encode( array( 'sql' => $query, 'result' => $result ) ) ); @rename( $tmp, $file ); } -function cow_remote_query_cache_clear() { - if ( ! cow_remote_query_cache_enabled() || ! is_dir( WPCOW_QUERY_CACHE_DIR ) ) { - return; +function cow_local_state() { + static $cached = null; + static $cached_mtime = null; + + if ( ! defined( 'WPCOW_DB_STATE_FILE' ) || '' === WPCOW_DB_STATE_FILE || ! is_file( WPCOW_DB_STATE_FILE ) ) { + return array( + 'materialized_tables' => array(), + 'dirty_tables' => array(), + 'option_bootstrap_tables' => array(), + 'option_rows' => array(), + 'dirty_option_rows' => array(), + ); + } + + $mtime = @filemtime( WPCOW_DB_STATE_FILE ); + if ( is_array( $cached ) && $cached_mtime === $mtime ) { + return $cached; + } + + $decoded = json_decode( file_get_contents( WPCOW_DB_STATE_FILE ), true ); + $state = array( + 'materialized_tables' => array(), + 'dirty_tables' => array(), + 'option_bootstrap_tables' => array(), + 'option_rows' => array(), + 'dirty_option_rows' => array(), + ); + if ( is_array( $decoded ) ) { + foreach ( array( 'materialized_tables', 'dirty_tables', 'option_bootstrap_tables', 'option_rows', 'dirty_option_rows' ) as $key ) { + if ( isset( $decoded[ $key ] ) && is_array( $decoded[ $key ] ) ) { + foreach ( $decoded[ $key ] as $value ) { + $state[ $key ][ strtolower( (string) $value ) ] = true; + } + } + } + } + + $cached = $state; + $cached_mtime = $mtime; + return $cached; +} + +function cow_local_state_tables() { + $state = cow_local_state(); + $tables = array(); + foreach ( array( 'materialized_tables', 'dirty_tables', 'option_bootstrap_tables' ) as $key ) { + foreach ( $state[ $key ] as $table => $_present ) { + $tables[ $table ] = true; + } + } + foreach ( array( 'option_rows', 'dirty_option_rows' ) as $key ) { + foreach ( $state[ $key ] as $row_key => $_present ) { + $parts = explode( ':', (string) $row_key, 2 ); + if ( '' !== $parts[0] ) { + $tables[ strtolower( $parts[0] ) ] = true; + } + } + } + return $tables; +} + +function cow_all_tables_in_state_set( $tables, $state_key ) { + if ( empty( $tables ) ) { + return false; + } + $state = cow_local_state(); + foreach ( $tables as $table ) { + if ( ! isset( $state[ $state_key ][ strtolower( (string) $table ) ] ) ) { + return false; + } + } + return true; +} + +function cow_option_bootstrap_names() { + return array( + 'siteurl', + 'home', + 'blogname', + 'blogdescription', + 'admin_email', + 'active_plugins', + 'template', + 'stylesheet', + 'current_theme', + 'permalink_structure', + 'rewrite_rules', + 'sidebars_widgets', + 'stylesheet_root', + 'template_root', + 'upload_path', + 'upload_url_path', + ); +} + +function cow_query_matches_option_bootstrap( $query, $tables, $options_table ) { + if ( ! in_array( $options_table, $tables, true ) ) { + return false; + } + $state = cow_local_state(); + if ( ! isset( $state['option_bootstrap_tables'][ strtolower( $options_table ) ] ) ) { + return false; + } + + $lower = strtolower( $query ); + if ( false !== strpos( $lower, 'autoload' ) ) { + return true; + } + if ( false !== strpos( $lower, 'option_name' ) ) { + foreach ( cow_option_bootstrap_names() as $name ) { + if ( false !== strpos( $lower, "'" . $name . "'" ) ) { + return true; + } + } + } + return false; +} + +function cow_safe_local_read_without_control( $query, $tables, $options_table ) { + if ( empty( $tables ) ) { + return false; + } + if ( cow_all_tables_in_state_set( $tables, 'materialized_tables' ) ) { + return true; } - foreach ( glob( rtrim( WPCOW_QUERY_CACHE_DIR, '/' ) . '/*.json' ) as $file ) { - @unlink( $file ); + if ( cow_query_matches_option_bootstrap( $query, $tables, $options_table ) ) { + return true; } + return false; +} + +function cow_cached_remote_read_is_safe_without_control( $tables ) { + if ( empty( $tables ) ) { + return false; + } + $local_tables = cow_local_state_tables(); + foreach ( $tables as $table ) { + if ( isset( $local_tables[ strtolower( (string) $table ) ] ) ) { + return false; + } + } + return true; +} + +function cow_options_table_name( $wpdb ) { + if ( isset( $wpdb->options ) && '' !== $wpdb->options ) { + return $wpdb->options; + } + global $table_prefix; + return (string) $table_prefix . 'options'; } class Cow_DB extends wpdb { @@ -332,9 +477,10 @@ class Cow_DB extends wpdb { $this->last_query = $query; $tables = cow_tables_from_sql( $query ); + $options_table = cow_options_table_name( $this ); if ( cow_is_write_sql( $query ) ) { - if ( in_array( $this->options, $tables, true ) && cow_is_protected_theme_option_write( $query ) ) { + if ( in_array( $options_table, $tables, true ) && cow_is_protected_theme_option_write( $query ) ) { $this->rows_affected = 0; $this->last_error = ''; return 0; @@ -349,7 +495,6 @@ class Cow_DB extends wpdb { cow_db_runtime_fail( 'control /row-cow failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } if ( ! empty( $row_cow['handled'] ) || ( isset( $row_cow['backend'] ) && 'local' === $row_cow['backend'] ) ) { - cow_remote_query_cache_clear(); return parent::query( $query ); } } @@ -358,7 +503,6 @@ class Cow_DB extends wpdb { $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow materialization failed'; cow_db_runtime_fail( 'control /materialize failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); } - cow_remote_query_cache_clear(); return parent::query( $query ); } @@ -366,6 +510,15 @@ class Cow_DB extends wpdb { if ( cow_offline() ) { return parent::query( $query ); } + if ( cow_safe_local_read_without_control( $query, $tables, $options_table ) ) { + return parent::query( $query ); + } + if ( cow_cached_remote_read_is_safe_without_control( $tables ) ) { + $cached = cow_remote_query_cache_get( $query ); + if ( is_array( $cached ) ) { + return $this->cow_apply_remote_result( $cached ); + } + } if ( cow_row_cow_enabled() ) { $row_cow = cow_control_request( '/row-cow', array( 'tables' => $tables, 'sql' => $query ) ); if ( empty( $row_cow['ok'] ) ) { @@ -973,6 +1126,7 @@ mod tests { assert!(php.contains("WPCOW_CONTROL_URL")); assert!(php.contains("WPCOW_REMOTE_DB_HOST")); assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); + assert!(php.contains("WPCOW_DB_STATE_FILE")); assert!(php.contains("wp-cow DB/runtime error")); assert!(php.contains("wp-content/db.php")); assert!(db_dropin_php().contains("WPCOW_LOCAL_DB_HOST")); @@ -987,6 +1141,11 @@ mod tests { assert!(php.contains("cow_remote_mysqli")); assert!(php.contains("cow_remote_query_cache_get")); assert!(php.contains("cow_remote_query_cache_set")); + assert!(php.contains("cow_cached_remote_read_is_safe_without_control")); + assert!(php.contains("cow_safe_local_read_without_control")); + assert!(php.contains("cow_query_matches_option_bootstrap")); + assert!(php.contains("dirty_tables")); + assert!(!php.contains("cow_remote_query_cache_clear")); assert!(php.contains("cow_is_protected_theme_option_write")); assert!(php.contains("WPCOW_PROTECT_THEME_OPTIONS")); assert!(php.contains("cow_db_runtime_fail")); diff --git a/experiments/remote-wp-cow/src/mysql_proxy.rs b/experiments/remote-wp-cow/src/mysql_proxy.rs index 11c5e91a..4bc85729 100644 --- a/experiments/remote-wp-cow/src/mysql_proxy.rs +++ b/experiments/remote-wp-cow/src/mysql_proxy.rs @@ -109,7 +109,7 @@ impl ProxyBackend { if route.backend == "local" { self.local_query(query) } else { - let result = db::remote_readonly_query(&self.remote, query)?; + let result = db::cached_remote_readonly_query(&self.remote, &self.paths, query)?; Ok(ProxyReply::Result(CowQueryResult { ok: result.ok, error: result.error, From c247a3d47daa84b940ffcde5052ca83c0a62b816 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 4 May 2026 00:56:53 +0200 Subject: [PATCH 29/39] Keep production DB secrets out of PHP --- experiments/remote-wp-cow/.env.example | 2 +- experiments/remote-wp-cow/README.md | 14 ++- experiments/remote-wp-cow/compose.yaml | 2 +- .../remote-wp-cow/scripts/strict-harness.sh | 3 + experiments/remote-wp-cow/src/generate.rs | 109 +----------------- experiments/remote-wp-cow/src/remote.rs | 2 +- 6 files changed, 20 insertions(+), 112 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 06411e67..5344867d 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -24,7 +24,7 @@ WPCOW_DNS2=8.8.8.8 # Runtime defaults. WPCOW_WEB_SERVER=frankenphp WPCOW_SPLASH=1 -WPCOW_REMOTE_DB_TUNNEL=1 +WPCOW_REMOTE_DB_TUNNEL=0 WPCOW_REMOTE_QUERY_CACHE=1 WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 WPCOW_ENABLE_PLUGINS=1 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 43bbdd28..cdc3b8a6 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -156,11 +156,12 @@ polling can continue while the warm request is running. Set `WPCOW_WEB_SERVER=php` only when you explicitly want the old PHP built-in development server fallback. -The lab also starts a persistent SSH tunnel for remote database reads when the -remote `DB_HOST` is TCP-reachable from the SSH host. This avoids one SSH/PHP -subprocess per WordPress read query. Write-class SQL is still blocked from the -remote database and materialized locally first. Set `WPCOW_REMOTE_DB_TUNNEL=0` -to fall back to daemon-mediated remote reads. +Remote database reads are mediated by the local daemon by default. Generated +PHP does not contain the production DB name, user, password, or host, so plugins +using the normal `DB_*` constants see only the local COW proxy. Write-class SQL +is blocked from the remote database and materialized locally first. +`WPCOW_REMOTE_DB_TUNNEL=1` is an opt-in debugging/performance mode for hosts +where you explicitly accept opening a local SSH tunnel to the remote DB. `wp-cow run` also starts a local MySQL protocol proxy on the generated `DB_HOST` port. Core WordPress still uses the generated `db.php` drop-in with a direct @@ -181,7 +182,8 @@ Remote read queries that still need the lower database are cached under loads reuse local query results instead of crossing SSH/remote MySQL again. Set `WPCOW_REMOTE_QUERY_CACHE=0` to disable it or adjust `WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS` for large result sets. Local write-class SQL -clears this cache before executing. +does not globally clear this cache; cached remote reads are used only while the +referenced tables have no local overlay state. The FUSE mount also keeps warmed path metadata live long enough for repeat renders to reuse the program files WordPress just touched. The Docker lab diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 87915159..aabaaff2 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -29,7 +29,7 @@ services: WPCOW_DNS2: "${WPCOW_DNS2:-8.8.8.8}" WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" - WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-1}" + WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-0}" WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-60}" WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" diff --git a/experiments/remote-wp-cow/scripts/strict-harness.sh b/experiments/remote-wp-cow/scripts/strict-harness.sh index cfea7572..d7d4c57a 100755 --- a/experiments/remote-wp-cow/scripts/strict-harness.sh +++ b/experiments/remote-wp-cow/scripts/strict-harness.sh @@ -81,6 +81,7 @@ need_pattern src/db.rs 'cached_remote_readonly_query' "Rust remote read cache fo need_pattern src/db.rs 'dirty_tables' "dirty row-overlay table routing state" need_pattern src/generate.rs 'cow_cached_remote_read_is_safe_without_control' "PHP cached remote read fast path" need_pattern src/generate.rs 'cow_safe_local_read_without_control' "PHP local read fast path for materialized runtime data" +need_pattern src/remote.rs 'WPCOW_REMOTE_DB_TUNNEL", false' "remote DB SSH tunnel is opt-in" need_pattern src/generate.rs 'function cow_offline' "PHP DB offline mode" need_pattern src/db.rs 'set_local_admin_password' "local-only admin password override" need_pattern src/row_cow.rs 'LocalOnlyInsert' "local-only content mutation path" @@ -97,6 +98,7 @@ need_pattern docker/wp-cow-lab-serve 'wp-cow serve' "Docker one-command serve wr need_pattern docker/wp-cow-lab-sever 'WPCOW_LOCAL_ADMIN_PASSWORD' "Docker local admin override wiring" need_pattern .env.example '^WPCOW_HTTP_PORT=9481$' "Docker lab example host HTTP port" need_pattern .env.example '^WPCOW_WEB_SERVER=frankenphp$' "Docker lab example FrankenPHP preference" +need_pattern .env.example '^WPCOW_REMOTE_DB_TUNNEL=0$' "Docker lab example disables remote DB tunnel by default" need_pattern .env.example '^WPCOW_SPLASH=1$' "Docker lab example splash default" need_pattern .env.example '^WPCOW_LOCAL_ADMIN_PASSWORD=$' "Docker lab example local admin override" @@ -106,6 +108,7 @@ deny_pattern src/cli.rs 'prefetch_runtime_files' "sever-triggered runtime prefet deny_pattern src/run.rs 'WPCOW_PREFETCH_RUNTIME|prefetch_runtime_files|wp-cow-runtime-prefetch' "background runtime prefetch" deny_pattern src/run.rs 'tar[[:space:]]+-cf[[:space:]]+-' "recursive remote tar runtime prefetch" deny_pattern src/generate.rs 'function cow_remote_query_cache_clear|cow_remote_query_cache_clear\(\);' "global remote query cache invalidation" +deny_pattern src/generate.rs "define\\( 'WPCOW_REMOTE_DB_(NAME|USER|PASSWORD|HOST)'|function cow_remote_mysqli|real_connect" "remote DB credentials in generated PHP" deny_pattern docker/Dockerfile 'rsync|scp[[:space:]]+-r' "eager copy tooling" deny_pattern docker/wp-cow-lab-serve 'rsync|scp[[:space:]]+-r' "eager lab serve copy command" deny_pattern docker/wp-cow-lab-run 'rsync|scp[[:space:]]+-r' "eager lab run copy command" diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 8978aabd..8fab3c6a 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -54,10 +54,6 @@ $table_prefix = {table_prefix}; define( 'WPCOW_CLONE', {clone_name} ); define( 'WPCOW_CONTROL_URL', {control_url} ); -define( 'WPCOW_REMOTE_DB_NAME', {remote_db_name} ); -define( 'WPCOW_REMOTE_DB_USER', {remote_db_user} ); -define( 'WPCOW_REMOTE_DB_PASSWORD', {remote_db_password} ); -define( 'WPCOW_REMOTE_DB_HOST', {remote_db_host} ); define( 'WPCOW_QUERY_CACHE_DIR', {query_cache_dir} ); define( 'WPCOW_DB_STATE_FILE', {db_state_file} ); @@ -95,13 +91,6 @@ require_once ABSPATH . 'wp-settings.php'; table_prefix = php_string(&manifest.probe.table_prefix), clone_name = php_string(&manifest.name), control_url = php_string(&manifest.control_url), - remote_db_name = php_string(&manifest.probe.db_name), - remote_db_user = php_string(&manifest.probe.db_user), - remote_db_password = php_string(&manifest.probe.db_password), - remote_db_host = php_string(&format!( - "{}:{}", - manifest.remote_db_tunnel.host, manifest.remote_db_tunnel.port - )), query_cache_dir = php_string(paths.db.join("query-cache").to_string_lossy().as_ref()), db_state_file = php_string(paths.db.join("state.json").to_string_lossy().as_ref()), ) @@ -465,9 +454,6 @@ function cow_options_table_name( $wpdb ) { } class Cow_DB extends wpdb { - private $cow_remote_mysqli = null; - private $cow_remote_failed = false; - public function query( $query ) { if ( ! $query ) { return false; @@ -552,40 +538,6 @@ class Cow_DB extends wpdb { return $this->cow_apply_remote_result( $cached ); } - $remote = $this->cow_remote_mysqli(); - if ( $remote instanceof mysqli ) { - $result = $remote->query( $query, MYSQLI_STORE_RESULT ); - if ( false === $result ) { - $this->last_error = $remote->error; - cow_db_runtime_fail( 'remote mysqli query failed: ' . $this->last_error . "\n\nSQL:\n" . $query ); - } - - $remote_result = array( - 'ok' => true, - 'error' => '', - 'rows' => array(), - 'fields' => array(), - 'affected' => 0, - ); - - if ( true === $result ) { - $remote_result['affected'] = (int) $remote->affected_rows; - cow_remote_query_cache_set( $query, $remote_result ); - return $this->cow_apply_remote_result( $remote_result ); - } - - foreach ( $result->fetch_fields() as $field ) { - $remote_result['fields'][] = $field->name; - } - while ( $row = $result->fetch_assoc() ) { - $remote_result['rows'][] = $row; - } - $remote_result['affected'] = count( $remote_result['rows'] ); - - cow_remote_query_cache_set( $query, $remote_result ); - return $this->cow_apply_remote_result( $remote_result ); - } - $result = cow_control_request( '/query', array( 'sql' => $query ) ); if ( empty( $result['ok'] ) ) { $this->last_error = isset( $result['error'] ) ? $result['error'] : 'wp-cow remote query failed'; @@ -619,59 +571,6 @@ class Cow_DB extends wpdb { return $this->num_rows; } - private function cow_remote_mysqli() { - if ( $this->cow_remote_mysqli instanceof mysqli ) { - return $this->cow_remote_mysqli; - } - if ( $this->cow_remote_failed ) { - return null; - } - if ( '0' === getenv( 'WPCOW_REMOTE_DB_TUNNEL' ) ) { - $this->cow_remote_failed = true; - return null; - } - - if ( - ! defined( 'WPCOW_REMOTE_DB_NAME' ) || - ! defined( 'WPCOW_REMOTE_DB_USER' ) || - ! defined( 'WPCOW_REMOTE_DB_HOST' ) || - '' === WPCOW_REMOTE_DB_NAME || - '' === WPCOW_REMOTE_DB_USER - ) { - return null; - } - - $host = WPCOW_REMOTE_DB_HOST; - $port = null; - $socket = null; - if ( preg_match( '/^(.+):([0-9]+)$/', $host, $matches ) ) { - $host = $matches[1]; - $port = (int) $matches[2]; - } elseif ( preg_match( '/^([^:]+):(\/.*)$/', $host, $matches ) ) { - $host = $matches[1]; - $socket = $matches[2]; - } - - if ( function_exists( 'mysqli_report' ) ) { - mysqli_report( MYSQLI_REPORT_OFF ); - } - - $mysqli = mysqli_init(); - if ( ! $mysqli ) { - $this->cow_remote_failed = true; - return null; - } - - @$mysqli->options( MYSQLI_OPT_CONNECT_TIMEOUT, 2 ); - if ( ! @$mysqli->real_connect( $host, WPCOW_REMOTE_DB_USER, WPCOW_REMOTE_DB_PASSWORD, WPCOW_REMOTE_DB_NAME, $port, $socket ) ) { - $this->cow_remote_failed = true; - return null; - } - - @$mysqli->set_charset( $this->charset ? $this->charset : 'utf8mb4' ); - $this->cow_remote_mysqli = $mysqli; - return $this->cow_remote_mysqli; - } } $wpdb = new Cow_DB( DB_USER, DB_PASSWORD, DB_NAME, defined( 'WPCOW_LOCAL_DB_HOST' ) ? WPCOW_LOCAL_DB_HOST : DB_HOST ); @@ -1124,9 +1023,12 @@ mod tests { assert!(php.contains("define( 'WP_HOME', 'http://example.test' );")); assert!(php.contains("$table_prefix = 'wp_';")); assert!(php.contains("WPCOW_CONTROL_URL")); - assert!(php.contains("WPCOW_REMOTE_DB_HOST")); assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); assert!(php.contains("WPCOW_DB_STATE_FILE")); + assert!(!php.contains("WPCOW_REMOTE_DB_NAME")); + assert!(!php.contains("WPCOW_REMOTE_DB_USER")); + assert!(!php.contains("WPCOW_REMOTE_DB_PASSWORD")); + assert!(!php.contains("WPCOW_REMOTE_DB_HOST")); assert!(php.contains("wp-cow DB/runtime error")); assert!(php.contains("wp-content/db.php")); assert!(db_dropin_php().contains("WPCOW_LOCAL_DB_HOST")); @@ -1138,9 +1040,10 @@ mod tests { assert!(php.contains("cow_is_write_sql")); assert!(php.contains("cow_select_has_remote_side_effect_clause")); assert!(php.contains("/materialize")); - assert!(php.contains("cow_remote_mysqli")); assert!(php.contains("cow_remote_query_cache_get")); assert!(php.contains("cow_remote_query_cache_set")); + assert!(!php.contains("cow_remote_mysqli")); + assert!(!php.contains("WPCOW_REMOTE_DB_PASSWORD")); assert!(php.contains("cow_cached_remote_read_is_safe_without_control")); assert!(php.contains("cow_safe_local_read_without_control")); assert!(php.contains("cow_query_matches_option_bootstrap")); diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index b110080b..6d8c3b76 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -171,7 +171,7 @@ impl RemoteClient { } pub fn start_db_tunnel(&self) -> Result> { - if env_bool("WPCOW_REMOTE_DB_TUNNEL", true) == Some(false) { + if env_bool("WPCOW_REMOTE_DB_TUNNEL", false) != Some(true) { return Ok(None); } if self.manifest.probe.db_host.is_empty() From 1ef2ba23bc33424c0681157383770069e5e82e5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 4 May 2026 01:00:55 +0200 Subject: [PATCH 30/39] Guard plugin side-effect primitives --- experiments/remote-wp-cow/.env.example | 1 + experiments/remote-wp-cow/README.md | 9 ++++ experiments/remote-wp-cow/compose.yaml | 1 + .../remote-wp-cow/scripts/strict-harness.sh | 5 ++ experiments/remote-wp-cow/src/generate.rs | 2 +- experiments/remote-wp-cow/src/run.rs | 50 ++++++++++++++++++- 6 files changed, 66 insertions(+), 2 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 5344867d..ebc42dee 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -28,6 +28,7 @@ WPCOW_REMOTE_DB_TUNNEL=0 WPCOW_REMOTE_QUERY_CACHE=1 WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 WPCOW_ENABLE_PLUGINS=1 +WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0 WPCOW_CACHE_MAX_FILE_MB=64 WPCOW_FUSE_TTL_SECS=60 WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=60 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index cdc3b8a6..5cadfa13 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -197,6 +197,15 @@ layer by default so the local site can render the same active code as the remote site. Set `WPCOW_ENABLE_PLUGINS=0` only when you need to suppress active plugins during testing; files still remain lazy and are not copied up front. +Because active plugins are production code, the launched PHP runtime also +disables common side-effect escape hatches by default: process spawning, +`mail()`, raw socket clients, and URL-based includes. That is in addition to the +mu-plugin guards for WordPress mail and HTTP APIs. The generated DB drop-in +still needs local HTTP for daemon control calls, so direct plugin cURL or URL +file-wrapper calls are not fully sandboxed yet. Set +`WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=1` only when you intentionally want to +let plugin code spawn local processes or use raw sockets. + The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the browser spinning forever. Adjust the defaults with diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index aabaaff2..910776d0 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -36,6 +36,7 @@ services: WPCOW_REMOTE_QUERY_CACHE: "${WPCOW_REMOTE_QUERY_CACHE:-1}" WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS: "${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" WPCOW_ENABLE_PLUGINS: "${WPCOW_ENABLE_PLUGINS:-1}" + WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS: "${WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS:-0}" WPCOW_FUSE_TTL_SECS: "${WPCOW_FUSE_TTL_SECS:-60}" WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" diff --git a/experiments/remote-wp-cow/scripts/strict-harness.sh b/experiments/remote-wp-cow/scripts/strict-harness.sh index d7d4c57a..891a0199 100755 --- a/experiments/remote-wp-cow/scripts/strict-harness.sh +++ b/experiments/remote-wp-cow/scripts/strict-harness.sh @@ -53,6 +53,7 @@ run_exact_test row_cow::tests::update_copy_up_fetches_only_affected_primary_keys run_exact_test row_cow::tests::delete_tombstone_hides_remote_row_from_merged_selects run_exact_test run::tests::frankenphp_routes_wp_admin_directory_to_index run_exact_test run::tests::frankenphp_routes_installer_paths_through_runtime_guard +run_exact_test run::tests::web_runtime_disables_common_plugin_side_effect_primitives run_exact_test sql::tests::extract_tables_preserves_wordpress_table_case_for_proxy_cow run_exact_ignored_test generate::tests::runtime_cow_harness_proves_admin_login_local_mutation_and_offline_refresh run_exact_ignored_test generate::tests::production_run_harness_proves_fuse_rust_control_and_offline_refresh @@ -82,6 +83,9 @@ need_pattern src/db.rs 'dirty_tables' "dirty row-overlay table routing state" need_pattern src/generate.rs 'cow_cached_remote_read_is_safe_without_control' "PHP cached remote read fast path" need_pattern src/generate.rs 'cow_safe_local_read_without_control' "PHP local read fast path for materialized runtime data" need_pattern src/remote.rs 'WPCOW_REMOTE_DB_TUNNEL", false' "remote DB SSH tunnel is opt-in" +need_pattern src/run.rs 'WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS' "plugin side-effect escape hatch is explicit" +need_pattern src/run.rs 'disable_functions' "PHP side-effect functions are disabled by default" +need_pattern src/run.rs 'stream_socket_client' "raw plugin socket egress is disabled by default" need_pattern src/generate.rs 'function cow_offline' "PHP DB offline mode" need_pattern src/db.rs 'set_local_admin_password' "local-only admin password override" need_pattern src/row_cow.rs 'LocalOnlyInsert' "local-only content mutation path" @@ -100,6 +104,7 @@ need_pattern .env.example '^WPCOW_HTTP_PORT=9481$' "Docker lab example host HTTP need_pattern .env.example '^WPCOW_WEB_SERVER=frankenphp$' "Docker lab example FrankenPHP preference" need_pattern .env.example '^WPCOW_REMOTE_DB_TUNNEL=0$' "Docker lab example disables remote DB tunnel by default" need_pattern .env.example '^WPCOW_SPLASH=1$' "Docker lab example splash default" +need_pattern .env.example '^WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0$' "Docker lab example keeps PHP side-effect guards enabled" need_pattern .env.example '^WPCOW_LOCAL_ADMIN_PASSWORD=$' "Docker lab example local admin override" deny_pattern src 'rsync|scp[[:space:]]+-r' "eager source tree copy command" diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 8fab3c6a..592cb3d7 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -204,7 +204,7 @@ function cow_control_request( $path, $payload ) { $body = json_encode( $payload ); $timeout = cow_control_timeout_secs(); - if ( function_exists( 'curl_init' ) ) { + if ( function_exists( 'curl_init' ) && function_exists( 'curl_exec' ) ) { $ch = curl_init( $url ); curl_setopt( $ch, CURLOPT_POST, true ); curl_setopt( $ch, CURLOPT_HTTPHEADER, array( 'Content-Type: application/json' ) ); diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index dc143f03..7e4d53e4 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -238,6 +238,31 @@ fn apply_web_server_env(command: &mut Command, paths: &ClonePaths) { } } +fn php_side_effect_guards_enabled() -> bool { + !matches!( + std::env::var("WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS") + .unwrap_or_default() + .to_ascii_lowercase() + .as_str(), + "1" | "true" | "yes" | "on" + ) +} + +fn php_disabled_functions() -> &'static str { + "exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client" +} + +fn php_safety_ini_entries() -> Vec<(&'static str, String)> { + if !php_side_effect_guards_enabled() { + return Vec::new(); + } + + vec![ + ("disable_functions", php_disabled_functions().to_string()), + ("allow_url_include", "0".to_string()), + ] +} + fn start_frankenphp_server( paths: &ClonePaths, mountpoint: &Path, @@ -293,12 +318,16 @@ fn start_php_dev_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) .arg("opcache.max_accelerated_files=20000") .arg("-d") .arg("opcache.validate_timestamps=1") + .stdin(Stdio::null()); + for (name, value) in php_safety_ini_entries() { + command.arg("-d").arg(format!("{name}={value}")); + } + command .arg("-S") .arg(http_addr) .arg("-t") .arg(mountpoint) .arg(paths.generated.join("router.php")) - .stdin(Stdio::null()) .spawn() .context("start php built-in server") } @@ -307,6 +336,10 @@ fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) let threads = env_u64("WPCOW_PHP_WORKERS", 4); let max_execution = env_u64("WPCOW_PHP_MAX_EXECUTION_SECS", 90); let socket_timeout = env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15); + let safety_ini = php_safety_ini_entries() + .into_iter() + .map(|(name, value)| format!("\t\tphp_ini {name} {value}\n")) + .collect::(); let listen = caddy_listen(http_addr); let root = caddy_quote(&mountpoint.to_string_lossy()); let router = format!("/{ROUTER_BASENAME}"); @@ -330,6 +363,7 @@ fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) php_ini opcache.max_accelerated_files 20000 php_ini opcache.validate_timestamps 1 php_ini opcache.revalidate_freq 2 +{safety_ini} }} }} @@ -469,6 +503,20 @@ mod tests { ); } + #[test] + fn web_runtime_disables_common_plugin_side_effect_primitives() { + assert!(php_disabled_functions().contains("stream_socket_client")); + assert!(php_disabled_functions().contains("proc_open")); + assert!(php_disabled_functions().contains("mail")); + + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + let caddyfile = frankenphp_caddyfile(&paths, Path::new("/tmp/mount"), "127.0.0.1:9481"); + assert!(caddyfile.contains("php_ini disable_functions")); + assert!(caddyfile.contains("stream_socket_client")); + assert!(caddyfile.contains("php_ini allow_url_include 0")); + } + #[test] fn command_exists_requires_an_executable_file() { let temp = tempfile::tempdir().unwrap(); From 83285fc051aa8381e01a6e580844a3b53e6fae5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 4 May 2026 01:55:49 +0200 Subject: [PATCH 31/39] Speed warmed COW renders --- experiments/remote-wp-cow/.env.example | 2 + experiments/remote-wp-cow/README.md | 20 ++- experiments/remote-wp-cow/compose.yaml | 2 + .../remote-wp-cow/scripts/strict-harness.sh | 8 ++ experiments/remote-wp-cow/src/config.rs | 12 +- experiments/remote-wp-cow/src/fusefs.rs | 121 +++++++++++++++++- experiments/remote-wp-cow/src/generate.rs | 61 ++++++++- experiments/remote-wp-cow/src/overlay.rs | 108 ++++++++++++++++ experiments/remote-wp-cow/src/remote.rs | 68 ++++++++++ experiments/remote-wp-cow/src/row_cow.rs | 109 +++++++++++++++- experiments/remote-wp-cow/src/run.rs | 20 ++- 11 files changed, 515 insertions(+), 16 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index ebc42dee..e4d49474 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -31,6 +31,7 @@ WPCOW_ENABLE_PLUGINS=1 WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0 WPCOW_CACHE_MAX_FILE_MB=64 WPCOW_FUSE_TTL_SECS=60 +WPCOW_REMOTE_METADATA_CACHE_TTL_SECS=3600 WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=60 WPCOW_REMOTE_COMMAND_TIMEOUT_SECS=20 WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS=10 @@ -38,6 +39,7 @@ WPCOW_SSH_CONNECT_TIMEOUT_SECS=8 WPCOW_PHP_MAX_EXECUTION_SECS=90 WPCOW_PHP_SOCKET_TIMEOUT_SECS=15 WPCOW_PHP_WORKERS=4 +WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0 # Optional local-only admin override used with wp-cow-lab-sever. # This updates only the local clone DB after the relevant user rows are copied. diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 5cadfa13..f51643bb 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -183,13 +183,20 @@ loads reuse local query results instead of crossing SSH/remote MySQL again. Set `WPCOW_REMOTE_QUERY_CACHE=0` to disable it or adjust `WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS` for large result sets. Local write-class SQL does not globally clear this cache; cached remote reads are used only while the -referenced tables have no local overlay state. +referenced tables have no dirty local overlay state. The FUSE mount also keeps warmed path metadata live long enough for repeat renders to reuse the program files WordPress just touched. The Docker lab -defaults `WPCOW_FUSE_TTL_SECS` to `60`; lower values make live remote changes -visible sooner, while higher values reduce repeated path walking. +defaults `WPCOW_FUSE_TTL_SECS` to `60` for kernel attribute caching and +`WPCOW_REMOTE_METADATA_CACHE_TTL_SECS` to `3600` for daemon-side remote metadata, +including negative lookups for files or directories that WordPress probes but +the remote site does not have. Lower values make live remote changes visible +sooner, while higher values reduce repeated path walking. FrankenPHP also enables OPcache for parsed PHP code in the local web runtime. +By default `WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0`, so warmed PHP files do not get +restatted through FUSE on every render. Restart `wp-cow run` after editing PHP +program files, or set `WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=1` while actively +working on plugin/theme code. There is no recursive runtime warm-up: PHP files, themes, plugins, and uploads are fetched only when a request touches them, then cached for repeated reads. Remote plugin and language directories stay visible through the lazy lower @@ -271,8 +278,11 @@ Remote file contents are cached separately from local mutations in `wp-cow-state` volume. Files up to `WPCOW_CACHE_MAX_FILE_MB` are cached as whole files on first read, and their remote metadata is recorded in `file-cache/metadata.json` so later runs do not need to stat those files -remotely again. Larger files are streamed by range. The Docker lab defaults that -limit to 64 MB. Check or clear the cache with: +remotely again. Negative lookups are recorded in `file-cache/missing.json` for +the metadata TTL so repeated renders do not keep rechecking absent plugins, +languages, template directories, or optional WordPress files. Larger files are +streamed by range. The Docker lab defaults the whole-file cache limit to 64 MB. +Check or clear the cache with: ```bash wp-cow-lab-cache status diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 910776d0..b1f86489 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -38,10 +38,12 @@ services: WPCOW_ENABLE_PLUGINS: "${WPCOW_ENABLE_PLUGINS:-1}" WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS: "${WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS:-0}" WPCOW_FUSE_TTL_SECS: "${WPCOW_FUSE_TTL_SECS:-60}" + WPCOW_REMOTE_METADATA_CACHE_TTL_SECS: "${WPCOW_REMOTE_METADATA_CACHE_TTL_SECS:-3600}" WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" WPCOW_PHP_SOCKET_TIMEOUT_SECS: "${WPCOW_PHP_SOCKET_TIMEOUT_SECS:-15}" WPCOW_PHP_WORKERS: "${WPCOW_PHP_WORKERS:-4}" + WPCOW_OPCACHE_VALIDATE_TIMESTAMPS: "${WPCOW_OPCACHE_VALIDATE_TIMESTAMPS:-0}" WPCOW_WEB_SERVER: "${WPCOW_WEB_SERVER:-frankenphp}" WPCOW_SPLASH: "${WPCOW_SPLASH:-1}" WPCOW_LOCAL_ADMIN_PASSWORD: "${WPCOW_LOCAL_ADMIN_PASSWORD:-}" diff --git a/experiments/remote-wp-cow/scripts/strict-harness.sh b/experiments/remote-wp-cow/scripts/strict-harness.sh index 891a0199..f0a92bc8 100755 --- a/experiments/remote-wp-cow/scripts/strict-harness.sh +++ b/experiments/remote-wp-cow/scripts/strict-harness.sh @@ -44,16 +44,19 @@ run_exact_test cli::tests::offline_core_runtime_cache_is_bounded_to_wordpress_co run_exact_test overlay::tests::cached_only_copy_up_uses_materialized_files_without_remote run_exact_test fusefs::tests::offline_readdir_uses_cached_remote_metadata_without_remote run_exact_test fusefs::tests::remote_stat_metadata_survives_severed_mode_without_remote +run_exact_test fusefs::tests::remote_missing_metadata_survives_daemon_restart run_exact_test generate::tests::router_splash_and_progress_smoke_responds_quickly run_exact_test db::tests::remote_query_cache_round_trips_safe_read_results run_exact_test db::tests::dirty_row_overlay_tables_are_local_state run_exact_test row_cow::tests::select_materializes_remote_rows_for_later_offline_reads +run_exact_test row_cow::tests::primary_key_single_row_selects_allow_safe_order_and_limit_clauses run_exact_test row_cow::tests::local_insert_is_not_sent_to_remote_and_appears_in_merged_select run_exact_test row_cow::tests::update_copy_up_fetches_only_affected_primary_keys run_exact_test row_cow::tests::delete_tombstone_hides_remote_row_from_merged_selects run_exact_test run::tests::frankenphp_routes_wp_admin_directory_to_index run_exact_test run::tests::frankenphp_routes_installer_paths_through_runtime_guard run_exact_test run::tests::web_runtime_disables_common_plugin_side_effect_primitives +run_exact_test run::tests::web_runtime_defaults_to_no_opcache_timestamp_revalidation run_exact_test sql::tests::extract_tables_preserves_wordpress_table_case_for_proxy_cow run_exact_ignored_test generate::tests::runtime_cow_harness_proves_admin_login_local_mutation_and_offline_refresh run_exact_ignored_test generate::tests::production_run_harness_proves_fuse_rust_control_and_offline_refresh @@ -72,7 +75,9 @@ need_pattern src/run.rs '__wp_cow_installer_guard=1' "FrankenPHP installer guard need_pattern src/fusefs.rs 'clone is severed and file is not cached locally' "offline cached-file guard" need_pattern src/fusefs.rs 'copy_up_cached_only' "offline write-open cached-only copy-up" need_pattern src/fusefs.rs 'put_cached_entry\(rel, &entry\)' "FUSE stat metadata persistence" +need_pattern src/fusefs.rs 'put_cached_missing' "FUSE missing metadata persistence" need_pattern src/overlay.rs 'clone is severed and writable lower file is not cached locally' "offline write-open remote guard" +need_pattern src/overlay.rs 'missing\.json' "persistent missing metadata cache" need_pattern src/control.rs 'clone is severed from the remote database' "offline remote-DB guard" need_pattern src/generate.rs 'will not fall back to the empty local schema' "installer/runtime failure guard" need_pattern src/generate.rs 'wp_cow_looks_like_installer' "installer response detector" @@ -86,6 +91,7 @@ need_pattern src/remote.rs 'WPCOW_REMOTE_DB_TUNNEL", false' "remote DB SSH tunne need_pattern src/run.rs 'WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS' "plugin side-effect escape hatch is explicit" need_pattern src/run.rs 'disable_functions' "PHP side-effect functions are disabled by default" need_pattern src/run.rs 'stream_socket_client' "raw plugin socket egress is disabled by default" +need_pattern src/run.rs 'WPCOW_OPCACHE_VALIDATE_TIMESTAMPS' "OPcache timestamp validation is configurable" need_pattern src/generate.rs 'function cow_offline' "PHP DB offline mode" need_pattern src/db.rs 'set_local_admin_password' "local-only admin password override" need_pattern src/row_cow.rs 'LocalOnlyInsert' "local-only content mutation path" @@ -105,6 +111,8 @@ need_pattern .env.example '^WPCOW_WEB_SERVER=frankenphp$' "Docker lab example Fr need_pattern .env.example '^WPCOW_REMOTE_DB_TUNNEL=0$' "Docker lab example disables remote DB tunnel by default" need_pattern .env.example '^WPCOW_SPLASH=1$' "Docker lab example splash default" need_pattern .env.example '^WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0$' "Docker lab example keeps PHP side-effect guards enabled" +need_pattern .env.example '^WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0$' "Docker lab example keeps warm render OPcache fast path enabled" +need_pattern .env.example '^WPCOW_REMOTE_METADATA_CACHE_TTL_SECS=3600$' "Docker lab example keeps remote metadata warm long enough for rerenders" need_pattern .env.example '^WPCOW_LOCAL_ADMIN_PASSWORD=$' "Docker lab example local admin override" deny_pattern src 'rsync|scp[[:space:]]+-r' "eager source tree copy command" diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs index 64eaff42..73f9d224 100644 --- a/experiments/remote-wp-cow/src/config.rs +++ b/experiments/remote-wp-cow/src/config.rs @@ -12,7 +12,7 @@ use url::Url; pub const MANIFEST_VERSION: u32 = 1; const OFFLINE_MARKER: &str = "offline.json"; const DEFAULT_CACHE_MAX_FILE_BYTES: u64 = 8 * 1024 * 1024; -const DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS: u64 = 30; +const DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS: u64 = 3600; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Manifest { @@ -126,7 +126,7 @@ impl Manifest { remote_db_tunnel: default_remote_db_tunnel(), control_url: "http://127.0.0.1:39070".to_string(), cache_max_file_bytes: cache_max_file_bytes_from_env(), - remote_metadata_cache_ttl_secs: DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS, + remote_metadata_cache_ttl_secs: remote_metadata_cache_ttl_secs_from_env(), } } } @@ -139,6 +139,14 @@ fn default_remote_metadata_cache_ttl_secs() -> u64 { DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS } +fn remote_metadata_cache_ttl_secs_from_env() -> u64 { + std::env::var("WPCOW_REMOTE_METADATA_CACHE_TTL_SECS") + .ok() + .and_then(|raw| raw.parse::().ok()) + .filter(|ttl| *ttl > 0) + .unwrap_or(DEFAULT_REMOTE_METADATA_CACHE_TTL_SECS) +} + fn default_remote_db_tunnel() -> RemoteDbTunnel { RemoteDbTunnel { host: "127.0.0.1".to_string(), diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs index 04d9a3b6..5afd6fc7 100644 --- a/experiments/remote-wp-cow/src/fusefs.rs +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -159,6 +159,14 @@ impl CowFs { ); return Ok(entry); } + if self.overlay.cached_missing(rel).map_err(anyhow_to_io)? { + self.remote_missing_cache + .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + return Err(io::Error::new( + io::ErrorKind::NotFound, + "cached remote miss", + )); + } if self.offline { return Err(io::Error::new( @@ -172,6 +180,9 @@ impl CowFs { Err(err) if err.kind() == io::ErrorKind::NotFound => { self.remote_missing_cache .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + let _ = self + .overlay + .put_cached_missing(rel, self.remote_cache_ttl.as_secs()); return Err(err); } Err(err) => return Err(err), @@ -201,8 +212,27 @@ impl CowFs { return Ok(cached.value.clone()); } } + if self.overlay.cached_missing(rel).map_err(anyhow_to_io)? { + self.remote_missing_cache + .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + return Err(io::Error::new( + io::ErrorKind::NotFound, + "cached remote miss", + )); + } - let entries = self.remote.readdir(rel)?; + let entries = match self.remote.readdir(rel) { + Ok(entries) => entries, + Err(err) if err.kind() == io::ErrorKind::NotFound => { + self.remote_missing_cache + .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); + let _ = self + .overlay + .put_cached_missing(rel, self.remote_cache_ttl.as_secs()); + return Err(err); + } + Err(err) => return Err(err), + }; let expires_at = Instant::now() + self.remote_cache_ttl; for entry in &entries { let _ = self.overlay.put_cached_entry(&rel.join(&entry.name), entry); @@ -1091,6 +1121,95 @@ exec bash -lc "$cmd" } } + #[test] + fn remote_missing_metadata_survives_daemon_restart() { + static ENV_LOCK: OnceLock> = OnceLock::new(); + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old_path = std::env::var_os("PATH"); + let old_log = std::env::var_os("WPCOW_FAKE_SSH_LOG"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let fake_bin = temp.path().join("bin"); + let fake_ssh_log = temp.path().join("fake-ssh.log"); + fs::create_dir_all(&remote_root).unwrap(); + fs::create_dir_all(&fake_bin).unwrap(); + fs::write( + fake_bin.join("ssh"), + r#"#!/usr/bin/env bash +set -euo pipefail +printf 'CALL\n' >> "$WPCOW_FAKE_SSH_LOG" +cmd="${@: -1}" +exec bash -lc "$cmd" +"#, + ) + .unwrap(); + let mut perms = fs::metadata(fake_bin.join("ssh")).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(fake_bin.join("ssh"), perms).unwrap(); + + let path = match old_path.as_ref() { + Some(old) => format!("{}:{}", fake_bin.display(), old.to_string_lossy()), + None => fake_bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_FAKE_SSH_LOG", &fake_ssh_log); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "0"); + + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let mut manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + manifest.remote_metadata_cache_ttl_secs = 3600; + let rel = Path::new("wp-content/missing-plugin"); + + let mut fs = CowFs::new( + manifest.clone(), + &paths, + RemoteClient::new(manifest.clone(), None), + ); + assert_eq!( + fs.remote_stat(rel).unwrap_err().kind(), + io::ErrorKind::NotFound + ); + let ssh_lines_after_first = fs::read_to_string(&fake_ssh_log).unwrap().lines().count(); + + let mut reloaded_fs = + CowFs::new(manifest.clone(), &paths, RemoteClient::new(manifest, None)); + assert_eq!( + reloaded_fs.remote_stat(rel).unwrap_err().kind(), + io::ErrorKind::NotFound + ); + assert_eq!( + fs::read_to_string(&fake_ssh_log).unwrap().lines().count(), + ssh_lines_after_first, + "cached missing metadata should avoid repeated remote stats after restart" + ); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_log { + Some(value) => std::env::set_var("WPCOW_FAKE_SSH_LOG", value), + None => std::env::remove_var("WPCOW_FAKE_SSH_LOG"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + } + #[test] fn legacy_opaque_runtime_markers_stay_transparent_by_default() { static ENV_LOCK: OnceLock> = OnceLock::new(); diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 592cb3d7..cbbf8bf7 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -432,13 +432,27 @@ function cow_safe_local_read_without_control( $query, $tables, $options_table ) return false; } +function cow_table_has_dirty_state( $table ) { + $table = strtolower( (string) $table ); + $state = cow_local_state(); + if ( isset( $state['dirty_tables'][ $table ] ) ) { + return true; + } + foreach ( $state['dirty_option_rows'] as $row_key => $_present ) { + $parts = explode( ':', (string) $row_key, 2 ); + if ( $table === strtolower( $parts[0] ) ) { + return true; + } + } + return false; +} + function cow_cached_remote_read_is_safe_without_control( $tables ) { if ( empty( $tables ) ) { return false; } - $local_tables = cow_local_state_tables(); foreach ( $tables as $table ) { - if ( isset( $local_tables[ strtolower( (string) $table ) ] ) ) { + if ( cow_table_has_dirty_state( $table ) ) { return false; } } @@ -1072,6 +1086,7 @@ mod tests { " $expected ) {{ exit( 1 ); }} }} +file_put_contents( + WPCOW_DB_STATE_FILE, + json_encode( + array( + 'option_rows' => array( 'wp_options:siteurl' ), + 'dirty_tables' => array(), + 'dirty_option_rows' => array(), + ) + ) +); +if ( ! cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) {{ + fwrite( STDERR, 'clean copied rows should not block the remote query cache' . PHP_EOL ); + exit( 1 ); +}} +file_put_contents( + WPCOW_DB_STATE_FILE, + json_encode( + array( + 'option_rows' => array( 'wp_options:siteurl' ), + 'dirty_tables' => array(), + 'dirty_option_rows' => array( 'wp_options:siteurl' ), + ) + ) +); +touch( WPCOW_DB_STATE_FILE, time() + 2 ); +clearstatcache( true, WPCOW_DB_STATE_FILE ); +if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) {{ + fwrite( STDERR, 'dirty copied rows must block the remote query cache' . PHP_EOL ); + exit( 1 ); +}} "#, php_single_quoted_path(temp.path()), + php_single_quoted_path(&state_file), php_single_quoted_path(&db_dropin) ); fs::write(&check, script).unwrap(); @@ -1359,6 +1406,7 @@ foreach ( $cases as $sql => $expected ) {{ let remote_public = HarnessHttpServer::start("REMOTE PUBLIC BYPASS"); let control_port = free_tcp_port(); + let db_proxy_port = free_tcp_port(); let site_port = free_tcp_port(); let mut harness_manifest = manifest(); harness_manifest.remote_url = format!("http://127.0.0.1:{}", remote_public.port); @@ -1379,6 +1427,10 @@ foreach ( $cases as $sql => $expected ) {{ host: "127.0.0.1".to_string(), port: mysql_port, }; + harness_manifest.db_proxy = DbProxy { + host: "127.0.0.1".to_string(), + port: db_proxy_port, + }; crate::db::set_local_admin_password(&harness_manifest, Some("admin"), "local-pass") .unwrap(); @@ -1594,6 +1646,7 @@ foreach ( $cases as $sql => $expected ) {{ let remote_public = HarnessHttpServer::start("REMOTE PUBLIC BYPASS"); let control_port = free_tcp_port(); + let db_proxy_port = free_tcp_port(); let site_port = free_tcp_port(); let mut harness_manifest = manifest(); harness_manifest.ssh = "fake-host".to_string(); @@ -1615,6 +1668,10 @@ foreach ( $cases as $sql => $expected ) {{ host: "127.0.0.1".to_string(), port: mysql_port, }; + harness_manifest.db_proxy = DbProxy { + host: "127.0.0.1".to_string(), + port: db_proxy_port, + }; harness_manifest.cache_max_file_bytes = 1024 * 1024; harness_manifest.remote_metadata_cache_ttl_secs = 60; diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index bc2aec4a..fc9adde1 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -23,6 +23,11 @@ struct MetadataFile { entries: BTreeMap, } +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +struct MissingFile { + expires_at_unix: BTreeMap, +} + #[derive(Debug, Default, Serialize, Deserialize)] struct CacheProgress { phase: String, @@ -42,6 +47,7 @@ pub struct OverlayStore { whiteouts_path: PathBuf, whiteouts: RefCell>, metadata: RefCell>, + missing: RefCell>, } impl OverlayStore { @@ -52,6 +58,7 @@ impl OverlayStore { whiteouts_path: paths.whiteouts.clone(), whiteouts: RefCell::new(None), metadata: RefCell::new(None), + missing: RefCell::new(None), } } @@ -104,6 +111,38 @@ impl OverlayStore { Ok(metadata.entries.get(&Self::rel_string(rel)).cloned()) } + pub fn cached_missing(&self, rel: &Path) -> Result { + let mut missing = self.load_missing()?; + let rel_string = Self::rel_string(&Self::clean_rel(rel)?); + let now = now_unix_secs(); + let Some(expires_at) = missing.expires_at_unix.get(&rel_string).copied() else { + return Ok(false); + }; + if expires_at > now { + return Ok(true); + } + missing.expires_at_unix.remove(&rel_string); + self.write_missing(&missing)?; + Ok(false) + } + + pub fn put_cached_missing(&self, rel: &Path, ttl_secs: u64) -> Result<()> { + let mut missing = self.load_missing()?; + let rel_string = Self::rel_string(&Self::clean_rel(rel)?); + let expires_at = now_unix_secs().saturating_add(ttl_secs.max(1)); + missing.expires_at_unix.insert(rel_string, expires_at); + self.write_missing(&missing) + } + + pub fn remove_cached_missing(&self, rel: &Path) -> Result<()> { + let mut missing = self.load_missing()?; + let rel_string = Self::rel_string(&Self::clean_rel(rel)?); + if missing.expires_at_unix.remove(&rel_string).is_some() { + self.write_missing(&missing)?; + } + Ok(()) + } + pub fn list_cached_metadata_dir(&self, rel: &Path) -> Result> { let metadata = self.load_metadata()?; let rel = Self::clean_rel(rel)?; @@ -126,6 +165,7 @@ impl OverlayStore { let rel = Self::clean_rel(rel)?; let mut journal_entries = Vec::new(); let rel_string = Self::rel_string(&rel); + let _ = self.remove_cached_missing(&rel); metadata.entries.insert(rel_string.clone(), entry.clone()); journal_entries.push((rel_string, Some(entry.clone()))); let mut current = rel.parent(); @@ -161,6 +201,7 @@ impl OverlayStore { if path.exists() { fs::remove_file(path)?; } + let _ = self.remove_cached_missing(rel); let mut metadata = self.load_metadata()?; let rel_string = Self::rel_string(rel); metadata.entries.remove(&rel_string); @@ -447,6 +488,10 @@ impl OverlayStore { self.file_cache.join("metadata.jsonl") } + fn missing_path(&self) -> PathBuf { + self.file_cache.join("missing.json") + } + fn progress_path(&self) -> PathBuf { self.file_cache.join("progress.json") } @@ -470,6 +515,40 @@ impl OverlayStore { Ok(metadata) } + fn load_missing(&self) -> Result { + if let Some(missing) = self.missing.borrow().as_ref() { + return Ok(missing.clone()); + } + let path = self.missing_path(); + if !path.exists() { + let missing = MissingFile::default(); + *self.missing.borrow_mut() = Some(missing.clone()); + return Ok(missing); + } + let mut json = String::new(); + File::open(path)?.read_to_string(&mut json)?; + let missing: MissingFile = serde_json::from_str(&json)?; + *self.missing.borrow_mut() = Some(missing.clone()); + Ok(missing) + } + + fn write_missing(&self, missing: &MissingFile) -> Result<()> { + fs::create_dir_all(&self.file_cache)?; + let json = serde_json::to_vec_pretty(missing)?; + let tmp = self.missing_path().with_extension("json.tmp"); + let mut file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&tmp)?; + file.write_all(&json)?; + file.write_all(b"\n")?; + drop(file); + fs::rename(tmp, self.missing_path())?; + *self.missing.borrow_mut() = Some(missing.clone()); + Ok(()) + } + #[allow(dead_code)] fn write_metadata(&self, metadata: &MetadataFile) -> Result<()> { fs::create_dir_all(&self.file_cache)?; @@ -631,6 +710,13 @@ fn now_unix_ms() -> u128 { .unwrap_or_default() } +fn now_unix_secs() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_secs()) + .unwrap_or_default() +} + fn read_range_from_file(path: &Path, offset: u64, size: usize) -> Result> { let mut file = File::open(path)?; file.seek(SeekFrom::Start(offset))?; @@ -742,6 +828,28 @@ mod tests { assert!(reloaded.cached_entry(rel).unwrap().is_none()); } + #[test] + fn stores_cached_remote_missing_paths() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-content/missing-plugin"); + + assert!(!store.cached_missing(rel).unwrap()); + store.put_cached_missing(rel, 3600).unwrap(); + assert!(store.cached_missing(rel).unwrap()); + + let reloaded = OverlayStore::new(&paths); + assert!( + reloaded.cached_missing(rel).unwrap(), + "negative remote metadata should survive daemon restarts" + ); + + store.remove_cached(rel).unwrap(); + assert!(!store.cached_missing(rel).unwrap()); + } + #[test] fn cached_only_copy_up_uses_materialized_files_without_remote() { let temp = tempfile::tempdir().unwrap(); diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index 6d8c3b76..75d1e62a 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -227,6 +227,13 @@ impl RemoteClient { } pub fn stat(&self, rel: &Path) -> io::Result { + let started = Instant::now(); + let result = self.stat_inner(rel); + trace_remote_result("stat", &OverlayStore::rel_string(rel), started, &result); + result + } + + fn stat_inner(&self, rel: &Path) -> io::Result { let full = self.remote_full_path(rel)?; if remote_file_helper_enabled() { let request = serde_json::json!({ @@ -261,6 +268,13 @@ echo json_encode(array( } pub fn readdir(&self, rel: &Path) -> io::Result> { + let started = Instant::now(); + let result = self.readdir_inner(rel); + trace_remote_result("readdir", &OverlayStore::rel_string(rel), started, &result); + result + } + + fn readdir_inner(&self, rel: &Path) -> io::Result> { let full = self.remote_full_path(rel)?; if remote_file_helper_enabled() { let request = serde_json::json!({ @@ -295,6 +309,18 @@ echo json_encode($out); } pub fn read_range(&self, rel: &Path, offset: u64, length: usize) -> io::Result> { + let started = Instant::now(); + let result = self.read_range_inner(rel, offset, length); + trace_remote_result( + "read_range", + &format!("{}@{}+{}", OverlayStore::rel_string(rel), offset, length), + started, + &result, + ); + result + } + + fn read_range_inner(&self, rel: &Path, offset: u64, length: usize) -> io::Result> { let full = self.remote_full_path(rel)?; if remote_file_helper_enabled() { let request = serde_json::json!({ @@ -319,6 +345,18 @@ echo fread($f,$length); } pub fn read_file(&self, rel: &Path) -> io::Result> { + let started = Instant::now(); + let result = self.read_file_inner(rel); + trace_remote_result( + "read_file", + &OverlayStore::rel_string(rel), + started, + &result, + ); + result + } + + fn read_file_inner(&self, rel: &Path) -> io::Result> { let full = self.remote_full_path(rel)?; if remote_file_helper_enabled() { let request = serde_json::json!({ @@ -342,6 +380,13 @@ while(!feof($f)){ } pub fn readlink(&self, rel: &Path) -> io::Result { + let started = Instant::now(); + let result = self.readlink_inner(rel); + trace_remote_result("readlink", &OverlayStore::rel_string(rel), started, &result); + result + } + + fn readlink_inner(&self, rel: &Path) -> io::Result { let full = self.remote_full_path(rel)?; if remote_file_helper_enabled() { let request = serde_json::json!({ @@ -366,6 +411,13 @@ echo $target; } pub fn remote_query_readonly(&self, sql: &str) -> Result { + let started = Instant::now(); + let result = self.remote_query_readonly_inner(sql); + trace_remote_result("query", sql, started, &result); + result + } + + fn remote_query_readonly_inner(&self, sql: &str) -> Result { let probe = &self.manifest.probe; let code = r#" $host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$sql=$argv[5];$timeout=(int)$argv[6]; @@ -629,6 +681,22 @@ fn remote_file_helper_enabled() -> bool { env_bool("WPCOW_REMOTE_FILE_HELPER", true).unwrap_or(true) } +fn trace_remote_result( + op: &str, + target: &str, + started: Instant, + result: &std::result::Result, +) { + if std::env::var("WPCOW_TRACE_REMOTE").ok().as_deref() != Some("1") { + return; + } + let elapsed_ms = started.elapsed().as_millis(); + match result { + Ok(_) => eprintln!("wp-cow remote {op} ok {elapsed_ms}ms {target}"), + Err(err) => eprintln!("wp-cow remote {op} err {elapsed_ms}ms {target}: {err}"), + } +} + fn remote_file_helper_php() -> &'static str { r#" error_reporting(0); diff --git a/experiments/remote-wp-cow/src/row_cow.rs b/experiments/remote-wp-cow/src/row_cow.rs index 4c6d65c8..2d2e7f71 100644 --- a/experiments/remote-wp-cow/src/row_cow.rs +++ b/experiments/remote-wp-cow/src/row_cow.rs @@ -341,9 +341,6 @@ fn plan_select(tokens: &[Token]) -> RowCowPlan { if contains_keyword(tokens, "GROUP") || contains_keyword(tokens, "HAVING") { return promote(tables, "grouped reads need table promotion"); } - if contains_keyword(tokens, "ORDER") || contains_keyword(tokens, "LIMIT") { - return promote(tables, "ordered or limited reads need table promotion"); - } if contains_keyword(tokens, "DISTINCT") { return promote(tables, "distinct reads need table promotion"); } @@ -366,7 +363,8 @@ fn plan_select(tokens: &[Token]) -> RowCowPlan { let Some(where_idx) = find_keyword(tokens, "WHERE") else { return promote(vec![table], "SELECT without primary-key predicate"); }; - let predicate_tokens = &tokens[where_idx + 1..]; + let (predicate_tokens, trailing_tokens) = + split_select_predicate_and_trailing(&tokens[where_idx + 1..]); let Some(predicate) = parse_pk_predicate(predicate_tokens) else { return promote( vec![table], @@ -385,6 +383,9 @@ fn plan_select(tokens: &[Token]) -> RowCowPlan { let Some(projection) = parse_projection(&tokens[1..from_idx], &table, alias.as_deref()) else { return promote(vec![table], "SELECT projection cannot be row-merged safely"); }; + if !select_trailing_clauses_are_row_safe(trailing_tokens, predicate.values.len()) { + return promote(vec![table], "ordered or limited reads need table promotion"); + } RowCowPlan::RowLevel(RowCowOp::Select(RowSelect { table, @@ -394,6 +395,68 @@ fn plan_select(tokens: &[Token]) -> RowCowPlan { })) } +fn split_select_predicate_and_trailing(tokens: &[Token]) -> (&[Token], &[Token]) { + let mut depth = 0_i32; + for (idx, token) in tokens.iter().enumerate() { + match token_symbol(token) { + Some('(') => depth += 1, + Some(')') => depth -= 1, + _ => {} + } + if depth == 0 && (token_is(token, "ORDER") || token_is(token, "LIMIT")) { + return (&tokens[..idx], &tokens[idx..]); + } + } + (tokens, &[]) +} + +fn select_trailing_clauses_are_row_safe(tokens: &[Token], pk_values_len: usize) -> bool { + let tokens = trim_statement_semicolons(tokens); + if tokens.is_empty() { + return true; + } + if pk_values_len != 1 { + return false; + } + + let mut idx = 0; + if token_is(&tokens[idx], "ORDER") { + idx += 1; + if !tokens.get(idx).is_some_and(|token| token_is(token, "BY")) { + return false; + } + idx += 1; + while idx < tokens.len() && !token_is(&tokens[idx], "LIMIT") { + idx += 1; + } + } + + if idx == tokens.len() { + return true; + } + if !token_is(&tokens[idx], "LIMIT") { + return false; + } + idx += 1; + + let Some(first) = tokens.get(idx).and_then(token_usize) else { + return false; + }; + idx += 1; + let safe_limit = if tokens.get(idx).and_then(token_symbol) == Some(',') { + idx += 1; + let Some(count) = tokens.get(idx).and_then(token_usize) else { + return false; + }; + idx += 1; + first == 0 && count == 1 + } else { + first == 1 + }; + + safe_limit && trim_statement_semicolons(&tokens[idx..]).is_empty() +} + fn plan_update(tokens: &[Token]) -> RowCowPlan { let tables = extract_table_refs(tokens); if contains_keyword(tokens, "JOIN") { @@ -1120,6 +1183,17 @@ fn is_statement_end(token: &Token) -> bool { token_symbol(token) == Some(';') } +fn trim_statement_semicolons(mut tokens: &[Token]) -> &[Token] { + while tokens + .last() + .and_then(token_symbol) + .is_some_and(|symbol| symbol == ';') + { + tokens = &tokens[..tokens.len() - 1]; + } + tokens +} + #[derive(Debug, Clone, PartialEq, Eq)] enum TokenKind { Word, @@ -1152,6 +1226,14 @@ fn token_symbol(token: &Token) -> Option { } } +fn token_usize(token: &Token) -> Option { + if matches!(token.kind, TokenKind::Number) { + token.text.parse::().ok() + } else { + None + } +} + fn lex(sql: &str) -> Option> { let chars = sql.chars().collect::>(); let mut tokens = Vec::new(); @@ -1622,6 +1704,25 @@ mod tests { assert_eq!(write.pk_values, vec![PkValue("1".to_string())]); } + #[test] + fn primary_key_single_row_selects_allow_safe_order_and_limit_clauses() { + for sql in [ + "SELECT * FROM wp_posts WHERE ID = 74 LIMIT 1", + "SELECT * FROM wp_posts WHERE ID = 74 LIMIT 0, 1", + "SELECT * FROM wp_posts WHERE ID = 74 ORDER BY post_date DESC LIMIT 1", + ] { + let RowCowPlan::RowLevel(RowCowOp::Select(select)) = plan_sql(sql) else { + panic!("{sql} should be row-level safe"); + }; + assert_eq!(select.table, "wp_posts"); + assert_eq!(select.pk_column, "ID"); + assert_eq!(select.pk_values, vec![PkValue("74".to_string())]); + } + + assert_not_row_level("SELECT * FROM wp_posts WHERE ID = 74 LIMIT 1, 1"); + assert_not_row_level("SELECT * FROM wp_posts WHERE ID IN (74, 75) LIMIT 1"); + } + #[test] fn update_copy_up_fetches_only_affected_primary_keys() { let mut backend = FakeCowBackend::default(); diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index 7e4d53e4..90ef2737 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -263,6 +263,10 @@ fn php_safety_ini_entries() -> Vec<(&'static str, String)> { ] } +fn opcache_validate_timestamps() -> u64 { + env_u64("WPCOW_OPCACHE_VALIDATE_TIMESTAMPS", 0).min(1) +} + fn start_frankenphp_server( paths: &ClonePaths, mountpoint: &Path, @@ -317,7 +321,10 @@ fn start_php_dev_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) .arg("-d") .arg("opcache.max_accelerated_files=20000") .arg("-d") - .arg("opcache.validate_timestamps=1") + .arg(format!( + "opcache.validate_timestamps={}", + opcache_validate_timestamps() + )) .stdin(Stdio::null()); for (name, value) in php_safety_ini_entries() { command.arg("-d").arg(format!("{name}={value}")); @@ -336,6 +343,7 @@ fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) let threads = env_u64("WPCOW_PHP_WORKERS", 4); let max_execution = env_u64("WPCOW_PHP_MAX_EXECUTION_SECS", 90); let socket_timeout = env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15); + let opcache_validate = opcache_validate_timestamps(); let safety_ini = php_safety_ini_entries() .into_iter() .map(|(name, value)| format!("\t\tphp_ini {name} {value}\n")) @@ -361,7 +369,7 @@ fn frankenphp_caddyfile(_paths: &ClonePaths, mountpoint: &Path, http_addr: &str) php_ini opcache.enable 1 php_ini opcache.memory_consumption 192 php_ini opcache.max_accelerated_files 20000 - php_ini opcache.validate_timestamps 1 + php_ini opcache.validate_timestamps {opcache_validate} php_ini opcache.revalidate_freq 2 {safety_ini} }} @@ -517,6 +525,14 @@ mod tests { assert!(caddyfile.contains("php_ini allow_url_include 0")); } + #[test] + fn web_runtime_defaults_to_no_opcache_timestamp_revalidation() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + let caddyfile = frankenphp_caddyfile(&paths, Path::new("/tmp/mount"), "127.0.0.1:9481"); + assert!(caddyfile.contains("php_ini opcache.validate_timestamps 0")); + } + #[test] fn command_exists_requires_an_executable_file() { let temp = tempfile::tempdir().unwrap(); From 00e75686174eb66e3ea62d3f461a5eddb451c5f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 4 May 2026 15:38:33 +0200 Subject: [PATCH 32/39] Harden remote COW lower helpers --- experiments/remote-wp-cow/.env.example | 3 + experiments/remote-wp-cow/README.md | 16 +- experiments/remote-wp-cow/compose.yaml | 3 + .../scripts/live-site-acceptance.sh | 6 + .../remote-wp-cow/scripts/strict-harness.sh | 21 + experiments/remote-wp-cow/src/fusefs.rs | 98 +++- experiments/remote-wp-cow/src/overlay.rs | 92 +++ experiments/remote-wp-cow/src/remote.rs | 552 +++++++++++++++++- 8 files changed, 773 insertions(+), 18 deletions(-) diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index e4d49474..195234ba 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -25,11 +25,14 @@ WPCOW_DNS2=8.8.8.8 WPCOW_WEB_SERVER=frankenphp WPCOW_SPLASH=1 WPCOW_REMOTE_DB_TUNNEL=0 +WPCOW_REMOTE_DB_HELPER=1 WPCOW_REMOTE_QUERY_CACHE=1 WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 WPCOW_ENABLE_PLUGINS=1 WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0 WPCOW_CACHE_MAX_FILE_MB=64 +WPCOW_REMOTE_STAT_PREFETCH_MAX_KB=0 +WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB=0 WPCOW_FUSE_TTL_SECS=60 WPCOW_REMOTE_METADATA_CACHE_TTL_SECS=3600 WPCOW_CONTROL_REQUEST_TIMEOUT_SECS=60 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index f51643bb..320dddbb 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -160,6 +160,9 @@ Remote database reads are mediated by the local daemon by default. Generated PHP does not contain the production DB name, user, password, or host, so plugins using the normal `DB_*` constants see only the local COW proxy. Write-class SQL is blocked from the remote database and materialized locally first. +`WPCOW_REMOTE_DB_HELPER=1` keeps a read-only PHP/MySQL helper open over SSH so a +cold render does not spawn a fresh remote PHP process for every safe lower-layer +read. `WPCOW_REMOTE_DB_TUNNEL=1` is an opt-in debugging/performance mode for hosts where you explicitly accept opening a local SSH tunnel to the remote DB. @@ -199,6 +202,14 @@ program files, or set `WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=1` while actively working on plugin/theme code. There is no recursive runtime warm-up: PHP files, themes, plugins, and uploads are fetched only when a request touches them, then cached for repeated reads. +There are experimental cold-start knobs for hosts where batch reads beat +single-file SSH round trips. Set `WPCOW_REMOTE_STAT_PREFETCH_MAX_KB` to prefetch +PHP/JSON/translation file bytes during stat, and +`WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB` to batch same-directory runtime siblings. +Both default to `0` because the real SiteGround trace showed broad prefetching +can make cold start worse when WordPress stats assets it will not read during +render. These knobs never recurse into uploads and never fetch CSS, JS, or media +unless the browser asks for them. Remote plugin and language directories stay visible through the lazy lower layer by default so the local site can render the same active code as the remote site. Set `WPCOW_ENABLE_PLUGINS=0` only when you need to suppress active @@ -276,12 +287,13 @@ cat /mnt/wp-cow/example/wp-config.php Remote file contents are cached separately from local mutations in `~/.wp-cow/clones//file-cache`, which is persisted by the Docker `wp-cow-state` volume. Files up to `WPCOW_CACHE_MAX_FILE_MB` are cached as whole -files on first read, and their remote metadata is recorded in +files on first touch/read, and their remote metadata is recorded in `file-cache/metadata.json` so later runs do not need to stat those files remotely again. Negative lookups are recorded in `file-cache/missing.json` for the metadata TTL so repeated renders do not keep rechecking absent plugins, languages, template directories, or optional WordPress files. Larger files are -streamed by range. The Docker lab defaults the whole-file cache limit to 64 MB. +streamed by range. The Docker lab defaults the whole-file cache limit to 64 MB +and leaves experimental cold-start prefetch knobs off unless explicitly enabled. Check or clear the cache with: ```bash diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index b1f86489..414a62c5 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -30,6 +30,7 @@ services: WPCOW_CACHE_MAX_FILE_MB: "${WPCOW_CACHE_MAX_FILE_MB:-64}" WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-0}" + WPCOW_REMOTE_DB_HELPER: "${WPCOW_REMOTE_DB_HELPER:-1}" WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-60}" WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" @@ -38,6 +39,8 @@ services: WPCOW_ENABLE_PLUGINS: "${WPCOW_ENABLE_PLUGINS:-1}" WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS: "${WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS:-0}" WPCOW_FUSE_TTL_SECS: "${WPCOW_FUSE_TTL_SECS:-60}" + WPCOW_REMOTE_STAT_PREFETCH_MAX_KB: "${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" + WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB: "${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" WPCOW_REMOTE_METADATA_CACHE_TTL_SECS: "${WPCOW_REMOTE_METADATA_CACHE_TTL_SECS:-3600}" WPCOW_SSH_CONNECT_TIMEOUT_SECS: "${WPCOW_SSH_CONNECT_TIMEOUT_SECS:-8}" WPCOW_PHP_MAX_EXECUTION_SECS: "${WPCOW_PHP_MAX_EXECUTION_SECS:-90}" diff --git a/experiments/remote-wp-cow/scripts/live-site-acceptance.sh b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh index 52230ce1..24f4a133 100755 --- a/experiments/remote-wp-cow/scripts/live-site-acceptance.sh +++ b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh @@ -285,9 +285,12 @@ before_remote="$(remote_post_count "$TITLE" | tr -d '[:space:]')" WPCOW_WEB_SERVER="${WPCOW_WEB_SERVER:-php}" \ WPCOW_SPLASH="${WPCOW_SPLASH:-1}" \ WPCOW_PROXY_FRONTEND=0 \ +WPCOW_REMOTE_DB_HELPER="${WPCOW_REMOTE_DB_HELPER:-1}" \ WPCOW_REMOTE_QUERY_CACHE=1 \ WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS="${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" \ WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ +WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" \ +WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" \ WPCOW_PHP_WORKERS="${WPCOW_PHP_WORKERS:-1}" \ HOME="$SSH_HOME" \ "$WP_COW_BIN" serve \ @@ -408,7 +411,10 @@ php -r '$p=$argv[1]; $j=json_decode(file_get_contents($p), true); $j["ssh"]="wp- WPCOW_WEB_SERVER="${WPCOW_WEB_SERVER:-php}" \ WPCOW_SPLASH="${WPCOW_SPLASH:-1}" \ +WPCOW_REMOTE_DB_HELPER="${WPCOW_REMOTE_DB_HELPER:-1}" \ WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ +WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" \ +WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" \ WPCOW_PHP_WORKERS="${WPCOW_PHP_WORKERS:-1}" \ HOME="$SSH_HOME" \ "$WP_COW_BIN" run "$NAME" \ diff --git a/experiments/remote-wp-cow/scripts/strict-harness.sh b/experiments/remote-wp-cow/scripts/strict-harness.sh index f0a92bc8..a1405a2f 100755 --- a/experiments/remote-wp-cow/scripts/strict-harness.sh +++ b/experiments/remote-wp-cow/scripts/strict-harness.sh @@ -40,11 +40,15 @@ cargo test --locked echo "== targeted behavior proofs ==" run_exact_test overlay::tests::lazy_remote_file_is_cached_and_survives_remote_loss +run_exact_test overlay::tests::stat_prefetched_bytes_are_reused_without_remote_read +run_exact_ignored_test remote::tests::stat_prefetch_returns_small_file_bytes_from_helper +run_exact_ignored_test remote::tests::prefetch_dir_batches_only_runtime_file_types run_exact_test cli::tests::offline_core_runtime_cache_is_bounded_to_wordpress_core run_exact_test overlay::tests::cached_only_copy_up_uses_materialized_files_without_remote run_exact_test fusefs::tests::offline_readdir_uses_cached_remote_metadata_without_remote run_exact_test fusefs::tests::remote_stat_metadata_survives_severed_mode_without_remote run_exact_test fusefs::tests::remote_missing_metadata_survives_daemon_restart +run_exact_test fusefs::tests::stat_prefetch_is_limited_to_runtime_read_files run_exact_test generate::tests::router_splash_and_progress_smoke_responds_quickly run_exact_test db::tests::remote_query_cache_round_trips_safe_read_results run_exact_test db::tests::dirty_row_overlay_tables_are_local_state @@ -88,6 +92,17 @@ need_pattern src/db.rs 'dirty_tables' "dirty row-overlay table routing state" need_pattern src/generate.rs 'cow_cached_remote_read_is_safe_without_control' "PHP cached remote read fast path" need_pattern src/generate.rs 'cow_safe_local_read_without_control' "PHP local read fast path for materialized runtime data" need_pattern src/remote.rs 'WPCOW_REMOTE_DB_TUNNEL", false' "remote DB SSH tunnel is opt-in" +need_pattern src/remote.rs 'WPCOW_REMOTE_DB_HELPER", true' "remote DB lower reads use persistent helper by default" +need_pattern src/remote.rs 'remote_db_helper_php' "remote DB helper keeps one read-only mysqli session open" +need_pattern src/remote.rs 'is_remote_db_connection_lost' "remote DB helper reconnects after idle connection loss" +need_pattern src/remote.rs 'WPCOW_REFUSED_WRITE' "remote DB helper refuses write-shaped SQL" +need_pattern src/fusefs.rs 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB' "FUSE stat path can prefetch small file bytes" +need_pattern src/fusefs.rs 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB' "FUSE batches same-directory runtime files with a byte cap" +need_pattern src/remote.rs '\$op === "prefetch_dir"' "remote file helper supports bounded directory batch reads" +need_pattern src/overlay.rs 'put_cached_file_bytes' "stat-prefetched file bytes are stored in the normal file cache" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_REMOTE_DB_HELPER="\$\{WPCOW_REMOTE_DB_HELPER:-1\}"' "live acceptance runs through persistent remote DB helper" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="\$\{WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0\}"' "live acceptance keeps experimental stat prefetch off by default" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="\$\{WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0\}"' "live acceptance keeps experimental sibling prefetch off by default" need_pattern src/run.rs 'WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS' "plugin side-effect escape hatch is explicit" need_pattern src/run.rs 'disable_functions' "PHP side-effect functions are disabled by default" need_pattern src/run.rs 'stream_socket_client' "raw plugin socket egress is disabled by default" @@ -101,6 +116,9 @@ need_pattern src/generate.rs 'install_fake_ssh' "strict harness fake SSH remote" need_pattern src/generate.rs 'read_line_count\(&fake_ssh_log\)' "strict harness offline no-SSH assertion" need_pattern compose.yaml '\$\{WPCOW_HTTP_PORT:-8080\}:8080' "Docker host HTTP port exposure" need_pattern compose.yaml 'WPCOW_HTTP: 0\.0\.0\.0:8080' "Docker in-container HTTP listener" +need_pattern compose.yaml 'WPCOW_REMOTE_DB_HELPER: "\$\{WPCOW_REMOTE_DB_HELPER:-1\}"' "Docker compose defaults persistent DB helper on" +need_pattern compose.yaml 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB: "\$\{WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0\}"' "Docker compose defaults experimental stat prefetch off" +need_pattern compose.yaml 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB: "\$\{WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0\}"' "Docker compose defaults experimental sibling prefetch off" need_pattern .dockerignore '^/target/$' "Docker build context target exclusion" need_pattern .dockerignore '^/\.env$' "Docker build context local env exclusion" need_pattern .dockerignore '^!/\.env\.example$' "Docker build context env example inclusion" @@ -109,9 +127,12 @@ need_pattern docker/wp-cow-lab-sever 'WPCOW_LOCAL_ADMIN_PASSWORD' "Docker local need_pattern .env.example '^WPCOW_HTTP_PORT=9481$' "Docker lab example host HTTP port" need_pattern .env.example '^WPCOW_WEB_SERVER=frankenphp$' "Docker lab example FrankenPHP preference" need_pattern .env.example '^WPCOW_REMOTE_DB_TUNNEL=0$' "Docker lab example disables remote DB tunnel by default" +need_pattern .env.example '^WPCOW_REMOTE_DB_HELPER=1$' "Docker lab example uses persistent remote DB helper" need_pattern .env.example '^WPCOW_SPLASH=1$' "Docker lab example splash default" need_pattern .env.example '^WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0$' "Docker lab example keeps PHP side-effect guards enabled" need_pattern .env.example '^WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0$' "Docker lab example keeps warm render OPcache fast path enabled" +need_pattern .env.example '^WPCOW_REMOTE_STAT_PREFETCH_MAX_KB=0$' "Docker lab example keeps experimental stat prefetch off by default" +need_pattern .env.example '^WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB=0$' "Docker lab example keeps experimental sibling prefetch off by default" need_pattern .env.example '^WPCOW_REMOTE_METADATA_CACHE_TTL_SECS=3600$' "Docker lab example keeps remote metadata warm long enough for rerenders" need_pattern .env.example '^WPCOW_LOCAL_ADMIN_PASSWORD=$' "Docker lab example local admin override" diff --git a/experiments/remote-wp-cow/src/fusefs.rs b/experiments/remote-wp-cow/src/fusefs.rs index 5afd6fc7..1f060ee4 100644 --- a/experiments/remote-wp-cow/src/fusefs.rs +++ b/experiments/remote-wp-cow/src/fusefs.rs @@ -4,7 +4,7 @@ use fuser::{ ReplyDirectory, ReplyEmpty, ReplyEntry, ReplyOpen, ReplyWrite, Request, }; use libc::{EIO, ENOENT, ENOTSUP}; -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::ffi::{OsStr, OsString}; use std::fs::{self, File, OpenOptions}; use std::io; @@ -42,6 +42,7 @@ pub struct CowFs { remote_stat_cache: HashMap>, remote_missing_cache: HashMap, remote_readdir_cache: HashMap>>, + runtime_prefetch_dirs: HashSet, remote_cache_ttl: Duration, kernel_cache_ttl: Duration, offline: bool, @@ -73,6 +74,7 @@ impl CowFs { remote_stat_cache: HashMap::new(), remote_missing_cache: HashMap::new(), remote_readdir_cache: HashMap::new(), + runtime_prefetch_dirs: HashSet::new(), remote_cache_ttl, kernel_cache_ttl, offline, @@ -175,8 +177,13 @@ impl CowFs { )); } - let entry = match self.remote.stat(rel) { - Ok(entry) => entry, + let max_prefetch = if should_prefetch_bytes_during_stat(rel) { + remote_stat_prefetch_max_bytes().min(self.manifest.cache_max_file_bytes) + } else { + 0 + }; + let stat = match self.remote.stat_prefetch(rel, max_prefetch) { + Ok(stat) => stat, Err(err) if err.kind() == io::ErrorKind::NotFound => { self.remote_missing_cache .insert(rel.to_path_buf(), Instant::now() + self.remote_cache_ttl); @@ -187,8 +194,14 @@ impl CowFs { } Err(err) => return Err(err), }; + let entry = stat.entry; self.remote_missing_cache.remove(rel); - let _ = self.overlay.put_cached_entry(rel, &entry); + if let Some(bytes) = stat.data { + let _ = self.overlay.put_cached_file_bytes(rel, &entry, &bytes); + } else { + let _ = self.overlay.put_cached_entry(rel, &entry); + } + self.prefetch_runtime_siblings(rel); self.remote_stat_cache.insert( rel.to_path_buf(), Timed { @@ -199,6 +212,41 @@ impl CowFs { Ok(entry) } + fn prefetch_runtime_siblings(&mut self, rel: &Path) { + if self.offline || !should_prefetch_bytes_during_stat(rel) { + return; + } + let dir = rel.parent().unwrap_or_else(|| Path::new("")).to_path_buf(); + if !should_prefetch_runtime_sibling_dir(&dir) { + return; + } + if !self.runtime_prefetch_dirs.insert(dir.clone()) { + return; + } + let max_file = remote_stat_prefetch_max_bytes().min(self.manifest.cache_max_file_bytes); + let max_total = runtime_sibling_prefetch_max_bytes(); + let Ok(files) = self.remote.prefetch_dir(&dir, max_file, max_total) else { + return; + }; + for stat in files { + let child = dir.join(&stat.entry.name); + if let Some(bytes) = stat.data { + let _ = self + .overlay + .put_cached_file_bytes(&child, &stat.entry, &bytes); + } else { + let _ = self.overlay.put_cached_entry(&child, &stat.entry); + } + self.remote_stat_cache.insert( + child, + Timed { + value: stat.entry, + expires_at: Instant::now() + self.remote_cache_ttl, + }, + ); + } + } + fn remote_readdir(&mut self, rel: &Path) -> io::Result> { if self.offline { return self @@ -885,6 +933,28 @@ fn env_u64(name: &str, default: u64) -> u64 { .unwrap_or(default) } +fn remote_stat_prefetch_max_bytes() -> u64 { + env_u64("WPCOW_REMOTE_STAT_PREFETCH_MAX_KB", 0).saturating_mul(1024) +} + +fn runtime_sibling_prefetch_max_bytes() -> u64 { + env_u64("WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB", 0).saturating_mul(1024 * 1024) +} + +fn should_prefetch_bytes_during_stat(rel: &Path) -> bool { + matches!( + rel.extension().and_then(|ext| ext.to_str()), + Some("php" | "json" | "mo") + ) +} + +fn should_prefetch_runtime_sibling_dir(dir: &Path) -> bool { + !(dir.as_os_str().is_empty() + || dir == Path::new("wp-includes") + || dir == Path::new("wp-admin") + || dir == Path::new("wp-admin/includes")) +} + fn io_errno(err: &io::Error) -> i32 { match err.kind() { io::ErrorKind::NotFound => ENOENT, @@ -1248,4 +1318,24 @@ exec bash -lc "$cmd" None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), } } + + #[test] + fn stat_prefetch_is_limited_to_runtime_read_files() { + assert!(should_prefetch_bytes_during_stat(Path::new("wp-load.php"))); + assert!(should_prefetch_bytes_during_stat(Path::new( + "wp-includes/theme.json" + ))); + assert!(!should_prefetch_bytes_during_stat(Path::new( + "wp-content/themes/neve/style-main-new.min.css" + ))); + assert!(!should_prefetch_bytes_during_stat(Path::new( + "wp-content/uploads/2026/05/hero.jpg" + ))); + assert!(!should_prefetch_runtime_sibling_dir(Path::new( + "wp-includes" + ))); + assert!(should_prefetch_runtime_sibling_dir(Path::new( + "wp-includes/rest-api/endpoints" + ))); + } } diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index fc9adde1..fb5b97f6 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -196,6 +196,43 @@ impl OverlayStore { self.append_metadata_journal(&journal_entries) } + pub fn put_cached_file_bytes( + &self, + rel: &Path, + entry: &RemoteEntry, + bytes: &[u8], + ) -> Result<()> { + if entry.kind != "file" { + return self.put_cached_entry(rel, entry); + } + let rel = Self::clean_rel(rel)?; + let rel_string = Self::rel_string(&rel); + let actual_size = bytes.len() as u64; + if actual_size != entry.size { + return Err(anyhow!( + "remote file changed while prefetching {}: stat size {}, read size {}", + rel_string, + entry.size, + actual_size + )); + } + + let cache_path = self.cache_path(&rel); + if !cache_path.exists() { + if let Some(parent) = cache_path.parent() { + fs::create_dir_all(parent)?; + } + let tmp = self.cache_tmp_path(&cache_path); + let mut out = File::create(&tmp)?; + out.write_all(bytes)?; + drop(out); + fs::rename(tmp, &cache_path)?; + let _ = self.finish_cache_progress(&rel_string, entry.size); + } + + self.put_cached_entry(&rel, entry) + } + pub fn remove_cached(&self, rel: &Path) -> Result<()> { let path = self.cache_path(rel); if path.exists() { @@ -1077,4 +1114,59 @@ exec bash -lc "$cmd" None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), } } + + #[test] + fn stat_prefetched_bytes_are_reused_without_remote_read() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path().join("state").as_path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let store = OverlayStore::new(&paths); + let rel = Path::new("wp-content/themes/example/style.css"); + let entry = RemoteEntry { + name: "style.css".to_string(), + kind: "file".to_string(), + size: 17, + mode: 0o100644, + mtime: 42, + }; + store + .put_cached_file_bytes(rel, &entry, b"body{color:black}") + .unwrap(); + + std::env::set_var("PATH", temp.path().join("missing-bin")); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "0"); + let remote = RemoteClient::new( + Manifest::new( + "example".to_string(), + "unreachable-host".to_string(), + "/remote/wp".to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ), + None, + ); + let bytes = store + .read_cached_or_remote_with_entry(&remote, rel, 0, 1024, 1024, Some(entry.clone())) + .unwrap(); + assert_eq!(bytes, b"body{color:black}"); + assert_eq!(store.cached_entry(rel).unwrap().unwrap().size, 17); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + } } diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index 75d1e62a..1d343110 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -22,11 +22,18 @@ pub struct RemoteEntry { pub mtime: u64, } +#[derive(Debug, Clone)] +pub struct RemoteStat { + pub entry: RemoteEntry, + pub data: Option>, +} + #[derive(Debug, Clone)] pub struct RemoteClient { manifest: Manifest, control_path: Option, file_helper: Arc>>, + db_helper: Arc>>, } impl RemoteClient { @@ -35,6 +42,7 @@ impl RemoteClient { manifest, control_path, file_helper: Arc::new(Mutex::new(None)), + db_helper: Arc::new(Mutex::new(None)), } } @@ -234,16 +242,46 @@ impl RemoteClient { } fn stat_inner(&self, rel: &Path) -> io::Result { + self.stat_prefetch_inner(rel, 0).map(|stat| stat.entry) + } + + pub fn stat_prefetch(&self, rel: &Path, max_file_bytes: u64) -> io::Result { + let started = Instant::now(); + let result = self.stat_prefetch_inner(rel, max_file_bytes); + trace_remote_result( + "stat_prefetch", + &format!("{}<= {}", OverlayStore::rel_string(rel), max_file_bytes), + started, + &result, + ); + result + } + + fn stat_prefetch_inner(&self, rel: &Path, max_file_bytes: u64) -> io::Result { let full = self.remote_full_path(rel)?; if remote_file_helper_enabled() { let request = serde_json::json!({ "op": "stat", "path": full, + "max_file_bytes": max_file_bytes, }); if let Ok(response) = self.file_helper_request(request) { if let Some(entry) = response.get("entry") { - return serde_json::from_value(entry.clone()) - .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)); + let entry: RemoteEntry = serde_json::from_value(entry.clone()) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + let data = if response.get("data").is_some() { + match decode_helper_data(response) { + Ok(bytes) + if entry.kind == "file" && bytes.len() as u64 == entry.size => + { + Some(bytes) + } + _ => None, + } + } else { + None + }; + return Ok(RemoteStat { entry, data }); } } } @@ -263,8 +301,9 @@ echo json_encode(array( )); "#; let bytes = self.php_eval(code, &[full])?; - serde_json::from_slice(&bytes) - .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) + let entry = serde_json::from_slice(&bytes) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + Ok(RemoteStat { entry, data: None }) } pub fn readdir(&self, rel: &Path) -> io::Result> { @@ -308,6 +347,70 @@ echo json_encode($out); .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) } + pub fn prefetch_dir( + &self, + rel: &Path, + max_file_bytes: u64, + max_total_bytes: u64, + ) -> io::Result> { + let started = Instant::now(); + let result = self.prefetch_dir_inner(rel, max_file_bytes, max_total_bytes); + trace_remote_result( + "prefetch_dir", + &format!( + "{}<= file:{} total:{}", + OverlayStore::rel_string(rel), + max_file_bytes, + max_total_bytes + ), + started, + &result, + ); + result + } + + fn prefetch_dir_inner( + &self, + rel: &Path, + max_file_bytes: u64, + max_total_bytes: u64, + ) -> io::Result> { + if max_file_bytes == 0 || max_total_bytes == 0 || !remote_file_helper_enabled() { + return Ok(Vec::new()); + } + let full = self.remote_full_path(rel)?; + let request = serde_json::json!({ + "op": "prefetch_dir", + "path": full, + "max_file_bytes": max_file_bytes, + "max_total_bytes": max_total_bytes, + }); + let response = self.file_helper_request(request)?; + let mut out = Vec::new(); + let Some(files) = response.get("files").and_then(|value| value.as_array()) else { + return Ok(out); + }; + for file in files { + let Some(entry_value) = file.get("entry") else { + continue; + }; + let entry: RemoteEntry = serde_json::from_value(entry_value.clone()) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + let data = if file.get("data").is_some() { + match decode_helper_data(file.clone()) { + Ok(bytes) if entry.kind == "file" && bytes.len() as u64 == entry.size => { + Some(bytes) + } + _ => None, + } + } else { + None + }; + out.push(RemoteStat { entry, data }); + } + Ok(out) + } + pub fn read_range(&self, rel: &Path, offset: u64, length: usize) -> io::Result> { let started = Instant::now(); let result = self.read_range_inner(rel, offset, length); @@ -418,6 +521,21 @@ echo $target; } fn remote_query_readonly_inner(&self, sql: &str) -> Result { + if remote_db_helper_enabled() { + if let Ok(result) = self.db_helper_query(sql) { + if result.ok || !is_remote_db_connection_lost(&result.error) { + return Ok(result); + } + if let Ok(retry) = self.reset_db_helper_and_retry(sql) { + return Ok(retry); + } + } + } + + self.remote_query_readonly_oneshot(sql) + } + + fn remote_query_readonly_oneshot(&self, sql: &str) -> Result { let probe = &self.manifest.probe; let code = r#" $host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$sql=$argv[5];$timeout=(int)$argv[6]; @@ -473,6 +591,68 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a Ok(result) } + fn db_helper_query(&self, sql: &str) -> Result { + let mut last_error = None; + for _ in 0..2 { + match self.db_helper_query_once(sql) { + Ok(response) => return Ok(response), + Err(err) => { + last_error = Some(err); + let mut helper = self + .db_helper + .lock() + .map_err(|_| anyhow!("remote DB helper lock"))?; + reset_db_helper(&mut helper); + } + } + } + Err(last_error.unwrap_or_else(|| anyhow!("remote DB helper failed"))) + } + + fn reset_db_helper_and_retry(&self, sql: &str) -> Result { + { + let mut helper = self + .db_helper + .lock() + .map_err(|_| anyhow!("remote DB helper lock"))?; + reset_db_helper(&mut helper); + } + self.db_helper_query(sql) + } + + fn db_helper_query_once(&self, sql: &str) -> Result { + let mut helper = self + .db_helper + .lock() + .map_err(|_| anyhow!("remote DB helper lock"))?; + if helper.is_none() { + *helper = Some(self.start_db_helper()?); + } + let helper = helper + .as_mut() + .ok_or_else(|| anyhow!("remote DB helper missing"))?; + let request = serde_json::to_vec(&serde_json::json!({ "sql": sql }))?; + helper.stdin.write_all(&request)?; + helper.stdin.write_all(b"\n")?; + helper.stdin.flush()?; + + let timeout = Duration::from_secs(remote_db_query_timeout_secs().saturating_add(2)); + let line = read_helper_line(&mut helper.stdout, timeout, "remote DB helper")?; + let response: serde_json::Value = serde_json::from_str(&line)?; + if response + .get("ok") + .and_then(|value| value.as_bool()) + .is_none() + { + let error = response + .get("error") + .and_then(|value| value.as_str()) + .unwrap_or("remote DB helper response missing ok"); + return Err(anyhow!(error.to_string())); + } + Ok(serde_json::from_value(response)?) + } + fn file_helper_request(&self, request: serde_json::Value) -> io::Result { let mut last_error = None; for _ in 0..2 { @@ -512,7 +692,11 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a helper.stdin.write_all(b"\n")?; helper.stdin.flush()?; - let line = read_helper_line(&mut helper.stdout)?; + let line = read_helper_line( + &mut helper.stdout, + Duration::from_secs(remote_file_helper_timeout_secs()), + "remote file helper", + )?; let response: serde_json::Value = serde_json::from_str(&line) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; if response.get("ok").and_then(|value| value.as_bool()) == Some(true) { @@ -554,6 +738,40 @@ echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"a }) } + fn start_db_helper(&self) -> io::Result { + let probe = &self.manifest.probe; + let mut remote_command = format!("php -r {} --", shell_quote(remote_db_helper_php())); + for arg in [ + probe.db_host.clone(), + probe.db_user.clone(), + probe.db_password.clone(), + probe.db_name.clone(), + remote_db_query_timeout_secs().to_string(), + ] { + remote_command.push(' '); + remote_command.push_str(&shell_quote(arg)); + } + let mut command = self.ssh_command(&remote_command, 0); + let mut child = command + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn()?; + let stdin = child + .stdin + .take() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "DB helper stdin"))?; + let stdout = child + .stdout + .take() + .ok_or_else(|| io::Error::new(io::ErrorKind::BrokenPipe, "DB helper stdout"))?; + Ok(RemoteDbHelper { + child, + stdin, + stdout, + }) + } + fn php_eval(&self, code: &str, args: &[String]) -> io::Result> { let mut command = format!("php -r {} --", shell_quote(code)); for arg in args { @@ -596,6 +814,13 @@ struct RemoteFileHelper { stdout: ChildStdout, } +#[derive(Debug)] +struct RemoteDbHelper { + child: Child, + stdin: ChildStdin, + stdout: ChildStdout, +} + impl Drop for RemoteFileHelper { fn drop(&mut self) { let _ = self.child.kill(); @@ -603,6 +828,13 @@ impl Drop for RemoteFileHelper { } } +impl Drop for RemoteDbHelper { + fn drop(&mut self) { + let _ = self.child.kill(); + let _ = self.child.wait(); + } +} + fn reset_file_helper(helper: &mut Option) { if let Some(mut helper) = helper.take() { let _ = helper.child.kill(); @@ -610,6 +842,13 @@ fn reset_file_helper(helper: &mut Option) { } } +fn reset_db_helper(helper: &mut Option) { + if let Some(mut helper) = helper.take() { + let _ = helper.child.kill(); + let _ = helper.child.wait(); + } +} + fn decode_helper_data(response: serde_json::Value) -> io::Result> { let data = response .get("data") @@ -622,8 +861,11 @@ fn decode_helper_data(response: serde_json::Value) -> io::Result> { .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) } -fn read_helper_line(stdout: &mut ChildStdout) -> io::Result { - let timeout = Duration::from_secs(remote_file_helper_timeout_secs()); +fn read_helper_line( + stdout: &mut ChildStdout, + timeout: Duration, + label: &str, +) -> io::Result { let deadline = Instant::now() + timeout; let fd = stdout.as_raw_fd(); let mut out = Vec::new(); @@ -634,7 +876,8 @@ fn read_helper_line(stdout: &mut ChildStdout) -> io::Result { return Err(io::Error::new( io::ErrorKind::TimedOut, format!( - "remote file helper did not respond within {} seconds", + "{} did not respond within {} seconds", + label, timeout.as_secs() ), )); @@ -657,7 +900,7 @@ fn read_helper_line(stdout: &mut ChildStdout) -> io::Result { if fd_set.revents & libc::POLLIN == 0 { return Err(io::Error::new( io::ErrorKind::BrokenPipe, - "remote file helper pipe closed", + format!("{label} pipe closed"), )); } @@ -666,7 +909,7 @@ fn read_helper_line(stdout: &mut ChildStdout) -> io::Result { if read == 0 { return Err(io::Error::new( io::ErrorKind::UnexpectedEof, - "remote file helper closed", + format!("{label} closed"), )); } out.extend_from_slice(&chunk[..read]); @@ -681,6 +924,18 @@ fn remote_file_helper_enabled() -> bool { env_bool("WPCOW_REMOTE_FILE_HELPER", true).unwrap_or(true) } +fn remote_db_helper_enabled() -> bool { + env_bool("WPCOW_REMOTE_DB_HELPER", true).unwrap_or(true) +} + +fn is_remote_db_connection_lost(error: &str) -> bool { + let error = error.to_ascii_lowercase(); + error.contains("server has gone away") + || error.contains("lost connection") + || error.contains("error while sending") + || error.contains("connection was killed") +} + fn trace_remote_result( op: &str, target: &str, @@ -716,17 +971,27 @@ while (($line = fgets(STDIN)) !== false) { $op = isset($request["op"]) ? $request["op"] : ""; $path = isset($request["path"]) ? $request["path"] : ""; if ($op === "stat") { + $max_file_bytes = isset($request["max_file_bytes"]) ? max(0, (int)$request["max_file_bytes"]) : 0; clearstatcache(true, $path); $s = @lstat($path); if ($s === false) { wpcow_not_found(); continue; } $kind = is_link($path) ? "symlink" : (is_dir($path) ? "dir" : (is_file($path) ? "file" : "other")); - wpcow_send(array("ok"=>true,"entry"=>array( + $entry = array( "name"=>basename($path), "kind"=>$kind, "size"=>(int)$s["size"], "mode"=>(int)$s["mode"], "mtime"=>(int)$s["mtime"] - ))); + ); + $payload = array("ok"=>true,"entry"=>$entry); + if ($max_file_bytes > 0 && $kind === "file" && (int)$s["size"] <= $max_file_bytes) { + $data = @file_get_contents($path); + if ($data !== false && strlen($data) === (int)$s["size"]) { + $payload["data"] = base64_encode($data); + $payload["size"] = strlen($data); + } + } + wpcow_send($payload); continue; } if ($op === "readdir") { @@ -743,6 +1008,35 @@ while (($line = fgets(STDIN)) !== false) { wpcow_send(array("ok"=>true,"entries"=>$out)); continue; } + if ($op === "prefetch_dir") { + if (!is_dir($path)) { wpcow_not_found(); continue; } + $max_file_bytes = isset($request["max_file_bytes"]) ? max(0, (int)$request["max_file_bytes"]) : 0; + $max_total_bytes = isset($request["max_total_bytes"]) ? max(0, (int)$request["max_total_bytes"]) : 0; + $total = 0; + $out = array(); + foreach (scandir($path) as $name) { + if ($name === "." || $name === "..") { continue; } + $ext = strtolower(pathinfo($name, PATHINFO_EXTENSION)); + if ($ext !== "php" && $ext !== "json" && $ext !== "mo") { continue; } + $child = $path . DIRECTORY_SEPARATOR . $name; + $s = @lstat($child); + if ($s === false || !is_file($child)) { continue; } + $size = (int)$s["size"]; + if ($size > $max_file_bytes || $size + $total > $max_total_bytes) { continue; } + $data = @file_get_contents($child); + if ($data === false || strlen($data) !== $size) { continue; } + $total += $size; + $out[] = array("entry"=>array( + "name"=>$name, + "kind"=>"file", + "size"=>$size, + "mode"=>(int)$s["mode"], + "mtime"=>(int)$s["mtime"] + ),"data"=>base64_encode($data)); + } + wpcow_send(array("ok"=>true,"files"=>$out,"bytes"=>$total)); + continue; + } if ($op === "read_file") { if (!is_file($path)) { wpcow_not_found(); continue; } $data = @file_get_contents($path); @@ -772,6 +1066,63 @@ while (($line = fgets(STDIN)) !== false) { "# } +fn remote_db_helper_php() -> &'static str { + r#" +error_reporting(0); +$host=$argv[1];$user=$argv[2];$pass=$argv[3];$db=$argv[4];$timeout=(int)$argv[5]; +if($timeout<1){$timeout=10;} +@set_time_limit(0); +if(function_exists("mysqli_report")){mysqli_report(MYSQLI_REPORT_OFF);} +$port=null;$socket=null; +if(preg_match('/^(.+):([0-9]+)$/',$host,$m)){ + $host=$m[1];$port=(int)$m[2]; +} elseif(preg_match('/^([^:]+):(\/.*)$/',$host,$m)){ + $host=$m[1];$socket=$m[2]; +} +$mysqli=mysqli_init(); +@$mysqli->options(MYSQLI_OPT_CONNECT_TIMEOUT, min(5,$timeout)); +if(!@$mysqli->real_connect($host,$user,$pass,$db,$port,$socket)){ + echo json_encode(array("ok"=>false,"error"=>mysqli_connect_error(),"rows"=>array(),"fields"=>array(),"affected"=>0)), "\n"; + flush(); + exit(0); +} +@$mysqli->set_charset("utf8mb4"); +@$mysqli->query("SET SESSION max_execution_time=".max(1,$timeout * 1000)); +@$mysqli->query("SET SESSION max_statement_time=".max(1,$timeout)); +while (($line = fgets(STDIN)) !== false) { + $request = json_decode($line, true); + if (!is_array($request) || !isset($request["sql"])) { + echo json_encode(array("ok"=>false,"error"=>"invalid request","rows"=>array(),"fields"=>array(),"affected"=>0)), "\n"; + flush(); + continue; + } + $sql = $request["sql"]; + if(!preg_match('/^\s*(SELECT|SHOW|DESCRIBE|DESC|EXPLAIN)\b/i',$sql)){ + echo json_encode(array("ok"=>false,"error"=>"WPCOW_REFUSED_WRITE","rows"=>array(),"fields"=>array(),"affected"=>0)), "\n"; + flush(); + continue; + } + $res=$mysqli->query($sql, MYSQLI_STORE_RESULT); + if($res===false){ + echo json_encode(array("ok"=>false,"error"=>$mysqli->error,"rows"=>array(),"fields"=>array(),"affected"=>0)), "\n"; + flush(); + continue; + } + if($res===true){ + echo json_encode(array("ok"=>true,"error"=>"","rows"=>array(),"fields"=>array(),"affected"=>$mysqli->affected_rows)), "\n"; + flush(); + continue; + } + $fields=array(); + foreach($res->fetch_fields() as $field){$fields[]=$field->name;} + $rows=array(); + while($row=$res->fetch_assoc()){$rows[]=$row;} + echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"affected"=>count($rows))), "\n"; + flush(); +} +"# +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RemoteQueryResult { pub ok: bool, @@ -919,6 +1270,12 @@ fn remote_db_tcp_target(db_host: &str) -> Option<(String, u16)> { #[cfg(test)] mod tests { use super::*; + use std::fs; + use std::os::unix::fs::PermissionsExt; + use std::path::Path; + use std::sync::{Mutex, OnceLock}; + + static ENV_LOCK: OnceLock> = OnceLock::new(); #[test] fn quotes_shell_strings() { @@ -939,4 +1296,175 @@ mod tests { ); assert_eq!(remote_db_tcp_target("localhost:/tmp/mysql.sock"), None); } + + #[test] + fn classifies_remote_db_connection_loss_errors() { + assert!(is_remote_db_connection_lost("MySQL server has gone away")); + assert!(is_remote_db_connection_lost( + "Lost connection to MySQL server during query" + )); + assert!(!is_remote_db_connection_lost("Unknown column 'x'")); + } + + #[test] + #[ignore = "strict harness only: mutates process SSH helper env"] + fn stat_prefetch_returns_small_file_bytes_from_helper() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + let old_timeout = std::env::var_os("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let bin = temp.path().join("bin"); + fs::create_dir_all(&remote_root).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(remote_root.join("index.php"), b" format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "1"); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", "5"); + + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let remote = RemoteClient::new(manifest, None); + + let prefetched = remote + .stat_prefetch(Path::new("index.php"), 1024) + .expect("stat prefetch"); + assert_eq!(prefetched.entry.size, 20); + assert_eq!( + prefetched.data.as_deref(), + Some(&b" std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + match old_timeout { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"), + } + } + + #[test] + #[ignore = "strict harness only: mutates process SSH helper env"] + fn prefetch_dir_batches_only_runtime_file_types() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_helper = std::env::var_os("WPCOW_REMOTE_FILE_HELPER"); + let old_timeout = std::env::var_os("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let runtime_dir = remote_root.join("wp-content/plugins/example/includes"); + let bin = temp.path().join("bin"); + fs::create_dir_all(&runtime_dir).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(runtime_dir.join("a.php"), b" format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER", "1"); + std::env::set_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", "5"); + + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let remote = RemoteClient::new(manifest, None); + let files = remote + .prefetch_dir(Path::new("wp-content/plugins/example/includes"), 1024, 4096) + .expect("prefetch dir"); + let names = files + .iter() + .map(|stat| stat.entry.name.as_str()) + .collect::>(); + assert!(names.contains(&"a.php")); + assert!(names.contains(&"b.json")); + assert!(!names.contains(&"style.css")); + assert_eq!(files.iter().filter(|stat| stat.data.is_some()).count(), 2); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_helper { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER"), + } + match old_timeout { + Some(value) => std::env::set_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS", value), + None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"), + } + } } From 64a6704579302258b0a266e0ab09bd79be7e97c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 4 May 2026 19:14:33 +0200 Subject: [PATCH 33/39] Add bounded runtime cache for COW cold starts --- experiments/remote-wp-cow/.env.example | 10 +- experiments/remote-wp-cow/compose.yaml | 9 +- .../scripts/live-site-acceptance.sh | 40 +- .../remote-wp-cow/scripts/strict-harness.sh | 25 ++ experiments/remote-wp-cow/src/cli.rs | 55 ++- experiments/remote-wp-cow/src/config.rs | 8 + experiments/remote-wp-cow/src/generate.rs | 30 +- experiments/remote-wp-cow/src/main.rs | 1 + experiments/remote-wp-cow/src/overlay.rs | 105 ++++- experiments/remote-wp-cow/src/remote.rs | 383 +++++++++++++++++- experiments/remote-wp-cow/src/run.rs | 32 +- .../remote-wp-cow/src/runtime_cache.rs | 326 +++++++++++++++ 12 files changed, 1009 insertions(+), 15 deletions(-) create mode 100644 experiments/remote-wp-cow/src/runtime_cache.rs diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index 195234ba..d556919f 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -26,9 +26,17 @@ WPCOW_WEB_SERVER=frankenphp WPCOW_SPLASH=1 WPCOW_REMOTE_DB_TUNNEL=0 WPCOW_REMOTE_DB_HELPER=1 +WPCOW_RUNTIME_CODE_PACK=1 +WPCOW_RUNTIME_CODE_PACK_MAX_MB=256 +WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB=8 +WPCOW_RUNTIME_CODE_PACK_MAX_FILES=20000 +WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS=180 +WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN=0 +WPCOW_MATERIALIZE_OPTIONS_TABLE=1 WPCOW_REMOTE_QUERY_CACHE=1 WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 -WPCOW_ENABLE_PLUGINS=1 +# Keep arbitrary production plugins disabled unless explicitly debugging them. +WPCOW_ENABLE_PLUGINS=0 WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0 WPCOW_CACHE_MAX_FILE_MB=64 WPCOW_REMOTE_STAT_PREFETCH_MAX_KB=0 diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 414a62c5..7590a064 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -31,12 +31,19 @@ services: WPCOW_HTTP_PORT: "${WPCOW_HTTP_PORT:-8080}" WPCOW_REMOTE_DB_TUNNEL: "${WPCOW_REMOTE_DB_TUNNEL:-0}" WPCOW_REMOTE_DB_HELPER: "${WPCOW_REMOTE_DB_HELPER:-1}" + WPCOW_RUNTIME_CODE_PACK: "${WPCOW_RUNTIME_CODE_PACK:-1}" + WPCOW_RUNTIME_CODE_PACK_MAX_MB: "${WPCOW_RUNTIME_CODE_PACK_MAX_MB:-256}" + WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB: "${WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB:-8}" + WPCOW_RUNTIME_CODE_PACK_MAX_FILES: "${WPCOW_RUNTIME_CODE_PACK_MAX_FILES:-20000}" + WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS: "${WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS:-180}" + WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN: "${WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0}" + WPCOW_MATERIALIZE_OPTIONS_TABLE: "${WPCOW_MATERIALIZE_OPTIONS_TABLE:-1}" WPCOW_CONTROL_REQUEST_TIMEOUT_SECS: "${WPCOW_CONTROL_REQUEST_TIMEOUT_SECS:-60}" WPCOW_REMOTE_COMMAND_TIMEOUT_SECS: "${WPCOW_REMOTE_COMMAND_TIMEOUT_SECS:-20}" WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS: "${WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS:-10}" WPCOW_REMOTE_QUERY_CACHE: "${WPCOW_REMOTE_QUERY_CACHE:-1}" WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS: "${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" - WPCOW_ENABLE_PLUGINS: "${WPCOW_ENABLE_PLUGINS:-1}" + WPCOW_ENABLE_PLUGINS: "${WPCOW_ENABLE_PLUGINS:-0}" WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS: "${WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS:-0}" WPCOW_FUSE_TTL_SECS: "${WPCOW_FUSE_TTL_SECS:-60}" WPCOW_REMOTE_STAT_PREFETCH_MAX_KB: "${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" diff --git a/experiments/remote-wp-cow/scripts/live-site-acceptance.sh b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh index 24f4a133..aeb72a35 100755 --- a/experiments/remote-wp-cow/scripts/live-site-acceptance.sh +++ b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh @@ -286,6 +286,13 @@ WPCOW_WEB_SERVER="${WPCOW_WEB_SERVER:-php}" \ WPCOW_SPLASH="${WPCOW_SPLASH:-1}" \ WPCOW_PROXY_FRONTEND=0 \ WPCOW_REMOTE_DB_HELPER="${WPCOW_REMOTE_DB_HELPER:-1}" \ +WPCOW_RUNTIME_CODE_PACK="${WPCOW_RUNTIME_CODE_PACK:-1}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_MB="${WPCOW_RUNTIME_CODE_PACK_MAX_MB:-256}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB="${WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB:-8}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_FILES="${WPCOW_RUNTIME_CODE_PACK_MAX_FILES:-20000}" \ +WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS="${WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS:-180}" \ +WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN="${WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0}" \ +WPCOW_MATERIALIZE_OPTIONS_TABLE="${WPCOW_MATERIALIZE_OPTIONS_TABLE:-1}" \ WPCOW_REMOTE_QUERY_CACHE=1 \ WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS="${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" \ WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ @@ -321,6 +328,30 @@ if rg -qi 'WordPress.*Installation|wp-admin/install.php|wp-cow DB/runtime error| sed -n '1,80p' "$first_splash" >&2 fail "first splash request returned installer or wp-cow runtime error" fi +pack_wait="${WPCOW_RUNTIME_CODE_PACK_WAIT_SECS:-120}" +pack_started="$(date +%s)" +progress_path="$STATE_DIR/clones/$NAME/file-cache/progress.json" +while true; do + if [ -f "$progress_path" ]; then + progress_json="$(cat "$progress_path")" + else + progress_json="$(curl -sS --max-time 5 --connect-timeout 2 "$LOCAL_URL/__wp-cow/progress" || true)" + fi + phase="$(php -r '$j=json_decode(stream_get_contents(STDIN), true); echo is_array($j) && isset($j["phase"]) ? $j["phase"] : "";' <<<"$progress_json")" + if [ "${WPCOW_RUNTIME_CODE_PACK:-1}" = "0" ] && [ -z "$phase" ]; then + break + fi + case "$phase" in + runtime-code-pack-starting|runtime-code-pack|"") + if [ $(( $(date +%s) - pack_started )) -ge "$pack_wait" ]; then + echo "$progress_json" >&2 + fail "runtime code pack did not finish within ${pack_wait}s" + fi + sleep 0.5 + ;; + *) break ;; + esac +done first_body="$WORK_DIR/first.html" actual_timeout="${WPCOW_ACTUAL_TIMEOUT_SECS:-180}" @@ -412,6 +443,13 @@ php -r '$p=$argv[1]; $j=json_decode(file_get_contents($p), true); $j["ssh"]="wp- WPCOW_WEB_SERVER="${WPCOW_WEB_SERVER:-php}" \ WPCOW_SPLASH="${WPCOW_SPLASH:-1}" \ WPCOW_REMOTE_DB_HELPER="${WPCOW_REMOTE_DB_HELPER:-1}" \ +WPCOW_RUNTIME_CODE_PACK="${WPCOW_RUNTIME_CODE_PACK:-1}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_MB="${WPCOW_RUNTIME_CODE_PACK_MAX_MB:-256}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB="${WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB:-8}" \ +WPCOW_RUNTIME_CODE_PACK_MAX_FILES="${WPCOW_RUNTIME_CODE_PACK_MAX_FILES:-20000}" \ +WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS="${WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS:-180}" \ +WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN="${WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0}" \ +WPCOW_MATERIALIZE_OPTIONS_TABLE="${WPCOW_MATERIALIZE_OPTIONS_TABLE:-1}" \ WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" \ WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" \ @@ -460,7 +498,7 @@ case "$http_status" in 2*|3*) ;; *) fail "wp-admin returned HTTP $http_status after login" ;; esac -if rg -qi ']+id="loginform"|name="loginform"|wp-login.php' "$admin_body"; then +if rg -qi ']+id="loginform"|name="loginform"' "$admin_body"; then fail "wp-admin still shows login form after local admin login" fi diff --git a/experiments/remote-wp-cow/scripts/strict-harness.sh b/experiments/remote-wp-cow/scripts/strict-harness.sh index a1405a2f..67ec78d2 100755 --- a/experiments/remote-wp-cow/scripts/strict-harness.sh +++ b/experiments/remote-wp-cow/scripts/strict-harness.sh @@ -41,9 +41,13 @@ cargo test --locked echo "== targeted behavior proofs ==" run_exact_test overlay::tests::lazy_remote_file_is_cached_and_survives_remote_loss run_exact_test overlay::tests::stat_prefetched_bytes_are_reused_without_remote_read +run_exact_test overlay::tests::cached_metadata_refreshes_when_another_overlay_appends_journal run_exact_ignored_test remote::tests::stat_prefetch_returns_small_file_bytes_from_helper run_exact_ignored_test remote::tests::prefetch_dir_batches_only_runtime_file_types +run_exact_ignored_test remote::tests::runtime_code_pack_streams_bounded_runtime_files run_exact_test cli::tests::offline_core_runtime_cache_is_bounded_to_wordpress_core +run_exact_test runtime_cache::tests::runtime_code_roots_are_bounded_to_core_theme_and_active_plugins +run_exact_test runtime_cache::tests::runtime_code_roots_respect_disabled_plugins run_exact_test overlay::tests::cached_only_copy_up_uses_materialized_files_without_remote run_exact_test fusefs::tests::offline_readdir_uses_cached_remote_metadata_without_remote run_exact_test fusefs::tests::remote_stat_metadata_survives_severed_mode_without_remote @@ -67,6 +71,7 @@ run_exact_ignored_test generate::tests::production_run_harness_proves_fuse_rust_ echo "== implementation invariants ==" need_pattern src/cli.rs 'Command::Serve' "one-command serve subcommand" +need_pattern src/cli.rs 'WPCOW_MATERIALIZE_OPTIONS_TABLE' "serve materializes bounded WordPress options table for plugin bootstrap" need_pattern src/cli.rs 'Command::Sever' "sever/offline subcommand" need_pattern src/cli.rs 'cache_offline_core_runtime' "offline login/admin core runtime cache" need_pattern src/cli.rs 'wp-content/uploads' "offline core runtime cache excludes uploads" @@ -96,16 +101,27 @@ need_pattern src/remote.rs 'WPCOW_REMOTE_DB_HELPER", true' "remote DB lower read need_pattern src/remote.rs 'remote_db_helper_php' "remote DB helper keeps one read-only mysqli session open" need_pattern src/remote.rs 'is_remote_db_connection_lost' "remote DB helper reconnects after idle connection loss" need_pattern src/remote.rs 'WPCOW_REFUSED_WRITE' "remote DB helper refuses write-shaped SQL" +need_pattern src/remote.rs 'runtime_code_pack_php' "remote runtime code pack streams bounded executable files" +need_pattern src/runtime_cache.rs 'warm_runtime_code_cache' "one-command runtime code warmup" +need_pattern src/runtime_cache.rs 'WPCOW_RUNTIME_CODE_PACK_MAX_MB' "runtime code cache has a byte cap" +need_pattern src/runtime_cache.rs 'warm_runtime_code_cache_with_admin' "sever path explicitly warms admin runtime" +need_pattern src/runtime_cache.rs 'wp-content/uploads' "runtime code cache excludes uploads" +need_pattern src/runtime_cache.rs 'active_plugins' "runtime code cache includes active plugin roots" +need_pattern src/cli.rs 'uploads/media remain lazy' "serve explains media remains lazy after runtime code pack" need_pattern src/fusefs.rs 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB' "FUSE stat path can prefetch small file bytes" need_pattern src/fusefs.rs 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB' "FUSE batches same-directory runtime files with a byte cap" need_pattern src/remote.rs '\$op === "prefetch_dir"' "remote file helper supports bounded directory batch reads" need_pattern src/overlay.rs 'put_cached_file_bytes' "stat-prefetched file bytes are stored in the normal file cache" +need_pattern src/overlay.rs 'metadata_journal_len_on_disk' "mounted FUSE metadata view refreshes runtime cache journal writes" need_pattern scripts/live-site-acceptance.sh 'WPCOW_REMOTE_DB_HELPER="\$\{WPCOW_REMOTE_DB_HELPER:-1\}"' "live acceptance runs through persistent remote DB helper" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_RUNTIME_CODE_PACK="\$\{WPCOW_RUNTIME_CODE_PACK:-1\}"' "live acceptance runs through bounded runtime code cache" +need_pattern scripts/live-site-acceptance.sh 'WPCOW_MATERIALIZE_OPTIONS_TABLE="\$\{WPCOW_MATERIALIZE_OPTIONS_TABLE:-1\}"' "live acceptance materializes options table" need_pattern scripts/live-site-acceptance.sh 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="\$\{WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0\}"' "live acceptance keeps experimental stat prefetch off by default" need_pattern scripts/live-site-acceptance.sh 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="\$\{WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0\}"' "live acceptance keeps experimental sibling prefetch off by default" need_pattern src/run.rs 'WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS' "plugin side-effect escape hatch is explicit" need_pattern src/run.rs 'disable_functions' "PHP side-effect functions are disabled by default" need_pattern src/run.rs 'stream_socket_client' "raw plugin socket egress is disabled by default" +need_pattern src/run.rs 'curl_exec' "direct plugin cURL egress is disabled by default" need_pattern src/run.rs 'WPCOW_OPCACHE_VALIDATE_TIMESTAMPS' "OPcache timestamp validation is configurable" need_pattern src/generate.rs 'function cow_offline' "PHP DB offline mode" need_pattern src/db.rs 'set_local_admin_password' "local-only admin password override" @@ -117,6 +133,9 @@ need_pattern src/generate.rs 'read_line_count\(&fake_ssh_log\)' "strict harness need_pattern compose.yaml '\$\{WPCOW_HTTP_PORT:-8080\}:8080' "Docker host HTTP port exposure" need_pattern compose.yaml 'WPCOW_HTTP: 0\.0\.0\.0:8080' "Docker in-container HTTP listener" need_pattern compose.yaml 'WPCOW_REMOTE_DB_HELPER: "\$\{WPCOW_REMOTE_DB_HELPER:-1\}"' "Docker compose defaults persistent DB helper on" +need_pattern compose.yaml 'WPCOW_RUNTIME_CODE_PACK: "\$\{WPCOW_RUNTIME_CODE_PACK:-1\}"' "Docker compose defaults bounded runtime code cache on" +need_pattern compose.yaml 'WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN: "\$\{WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0\}"' "Docker compose keeps admin pack out of frontend warmup" +need_pattern compose.yaml 'WPCOW_MATERIALIZE_OPTIONS_TABLE: "\$\{WPCOW_MATERIALIZE_OPTIONS_TABLE:-1\}"' "Docker compose defaults options table materialization on" need_pattern compose.yaml 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB: "\$\{WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0\}"' "Docker compose defaults experimental stat prefetch off" need_pattern compose.yaml 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB: "\$\{WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0\}"' "Docker compose defaults experimental sibling prefetch off" need_pattern .dockerignore '^/target/$' "Docker build context target exclusion" @@ -128,6 +147,12 @@ need_pattern .env.example '^WPCOW_HTTP_PORT=9481$' "Docker lab example host HTTP need_pattern .env.example '^WPCOW_WEB_SERVER=frankenphp$' "Docker lab example FrankenPHP preference" need_pattern .env.example '^WPCOW_REMOTE_DB_TUNNEL=0$' "Docker lab example disables remote DB tunnel by default" need_pattern .env.example '^WPCOW_REMOTE_DB_HELPER=1$' "Docker lab example uses persistent remote DB helper" +need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK=1$' "Docker lab example uses bounded runtime code cache" +need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK_MAX_MB=256$' "Docker lab example caps runtime code cache" +need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK_MAX_FILES=20000$' "Docker lab example allows large active plugin sets within cap" +need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN=0$' "Docker lab example keeps admin pack out of frontend warmup" +need_pattern .env.example '^WPCOW_MATERIALIZE_OPTIONS_TABLE=1$' "Docker lab example materializes options table" +need_pattern .env.example '^WPCOW_ENABLE_PLUGINS=0$' "Docker lab example keeps arbitrary production plugins disabled by default" need_pattern .env.example '^WPCOW_SPLASH=1$' "Docker lab example splash default" need_pattern .env.example '^WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0$' "Docker lab example keeps PHP side-effect guards enabled" need_pattern .env.example '^WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0$' "Docker lab example keeps warm render OPcache fast path enabled" diff --git a/experiments/remote-wp-cow/src/cli.rs b/experiments/remote-wp-cow/src/cli.rs index 88076616..11f4c003 100644 --- a/experiments/remote-wp-cow/src/cli.rs +++ b/experiments/remote-wp-cow/src/cli.rs @@ -14,6 +14,7 @@ use crate::generate; use crate::overlay::OverlayStore; use crate::remote::{probe_wordpress, RemoteClient}; use crate::run::{self, RunOptions}; +use crate::runtime_cache; #[derive(Debug, Parser)] #[command(name = "wp-cow")] @@ -306,7 +307,8 @@ fn serve_site(args: ServeArgs) -> Result<()> { && (should_probe || manifest.probe.db_name.is_empty() || manifest.probe.db_host.is_empty() - || manifest.probe.db_user.is_empty()) + || manifest.probe.db_user.is_empty() + || (manifest.probe.template.is_empty() && manifest.probe.stylesheet.is_empty())) { manifest.probe = probe_wordpress(&manifest.ssh, &manifest.remote_path)?; changed = true; @@ -342,7 +344,7 @@ fn serve_site(args: ServeArgs) -> Result<()> { ); } println!( - "runtime/plugin/theme/upload trees stay lazy for '{}'; requested files will be cached on demand", + "runtime code is cached in a bounded pack for '{}'; uploads/media remain lazy and are cached on demand", manifest.name ); @@ -383,6 +385,28 @@ fn serve_site(args: ServeArgs) -> Result<()> { ); } + if materialize_options_table_enabled() { + let phase_started = Instant::now(); + let remote = RemoteClient::new( + manifest.clone(), + Some(crate::config::ssh_control_path(&paths)), + ); + remote.ensure_master()?; + let options_table = format!("{}options", manifest.probe.table_prefix); + let materialized = db::materialize_tables( + &remote, + &manifest, + &paths, + std::slice::from_ref(&options_table), + ) + .context("materialize WordPress options table")?; + println!( + "materialized {} WordPress options table(s) for local plugin/runtime reads in {:.2}s", + materialized.len(), + phase_started.elapsed().as_secs_f64() + ); + } + println!( "starting lazy COW server after {:.2}s; files and database rows are fetched on demand, not copied up front", serve_started.elapsed().as_secs_f64() @@ -501,9 +525,18 @@ fn sever(args: SeverArgs) -> Result<()> { }; if admin.is_some() { - let cached = cache_offline_core_runtime(&remote, &manifest, &paths) - .context("cache WordPress core/admin runtime for offline login")?; - println!("cached {cached} WordPress core/admin runtime files for offline login"); + let warmed = runtime_cache::warm_runtime_code_cache_with_admin(&remote, &manifest, &paths) + .context("cache WordPress runtime code for offline login")?; + if warmed.files > 0 { + println!( + "cached {} bounded runtime code files for offline login", + warmed.files + ); + } else { + let cached = cache_offline_core_runtime(&remote, &manifest, &paths) + .context("cache WordPress core/admin runtime for offline login")?; + println!("cached {cached} WordPress core/admin runtime files for offline login"); + } } let marker = OfflineMarker { @@ -639,6 +672,18 @@ fn run_clone(args: RunArgs) -> Result<()> { run::run_site(manifest, paths, options) } +fn materialize_options_table_enabled() -> bool { + std::env::var("WPCOW_MATERIALIZE_OPTIONS_TABLE") + .ok() + .map(|raw| { + matches!( + raw.to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) + }) + .unwrap_or(true) +} + #[cfg(test)] mod tests { use super::*; diff --git a/experiments/remote-wp-cow/src/config.rs b/experiments/remote-wp-cow/src/config.rs index 73f9d224..6894584b 100644 --- a/experiments/remote-wp-cow/src/config.rs +++ b/experiments/remote-wp-cow/src/config.rs @@ -49,6 +49,14 @@ pub struct Probe { pub db_password: String, pub siteurl: String, pub home: String, + #[serde(default)] + pub template: String, + #[serde(default)] + pub stylesheet: String, + #[serde(default)] + pub active_plugins: Vec, + #[serde(default)] + pub active_sitewide_plugins: Vec, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index cbbf8bf7..f9e91de7 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -901,8 +901,32 @@ if ( $should_show_splash ) { } } + function runtimePackActive(progress) { + return progress && ( + progress.phase === 'runtime-code-pack-starting' || + progress.phase === 'runtime-code-pack' + ); + } + + async function waitForRuntimePack() { + for (;;) { + try { + const response = await fetch('/__wp-cow/progress', { cache: 'no-store' }); + if (response.ok) { + const progress = await response.json(); + render(progress); + if (!runtimePackActive(progress)) return; + } + } catch (error) { + return; + } + await new Promise(resolve => setTimeout(resolve, 500)); + } + } + async function warm() { try { + await waitForRuntimePack(); const response = await fetch(target.toString(), { cache: 'no-store' }); const html = await response.text(); warmDone = true; @@ -1758,8 +1782,12 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { "uploads must not be mirrored or prefetched" ); let ssh_log = fs::read_to_string(&fake_ssh_log).unwrap_or_default(); + let remote_uploads_path = format!( + "{}/wp-content/uploads", + remote_docroot.to_string_lossy().trim_end_matches('/') + ); assert!( - !ssh_log.contains("wp-content/uploads"), + !ssh_log.contains(&remote_uploads_path), "production run should not touch uploads unless requested:\n{}", ssh_log ); diff --git a/experiments/remote-wp-cow/src/main.rs b/experiments/remote-wp-cow/src/main.rs index 6d0b9f99..300e3b2c 100644 --- a/experiments/remote-wp-cow/src/main.rs +++ b/experiments/remote-wp-cow/src/main.rs @@ -9,6 +9,7 @@ mod overlay; mod remote; mod row_cow; mod run; +mod runtime_cache; mod sql; fn main() -> anyhow::Result<()> { diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index fb5b97f6..b5afa068 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -47,6 +47,7 @@ pub struct OverlayStore { whiteouts_path: PathBuf, whiteouts: RefCell>, metadata: RefCell>, + metadata_journal_len: RefCell, missing: RefCell>, } @@ -58,6 +59,7 @@ impl OverlayStore { whiteouts_path: paths.whiteouts.clone(), whiteouts: RefCell::new(None), metadata: RefCell::new(None), + metadata_journal_len: RefCell::new(0), missing: RefCell::new(None), } } @@ -201,6 +203,25 @@ impl OverlayStore { rel: &Path, entry: &RemoteEntry, bytes: &[u8], + ) -> Result<()> { + self.put_cached_file_bytes_inner(rel, entry, bytes, true) + } + + pub fn put_cached_file_bytes_without_progress( + &self, + rel: &Path, + entry: &RemoteEntry, + bytes: &[u8], + ) -> Result<()> { + self.put_cached_file_bytes_inner(rel, entry, bytes, false) + } + + fn put_cached_file_bytes_inner( + &self, + rel: &Path, + entry: &RemoteEntry, + bytes: &[u8], + update_progress: bool, ) -> Result<()> { if entry.kind != "file" { return self.put_cached_entry(rel, entry); @@ -227,12 +248,29 @@ impl OverlayStore { out.write_all(bytes)?; drop(out); fs::rename(tmp, &cache_path)?; - let _ = self.finish_cache_progress(&rel_string, entry.size); + if update_progress { + let _ = self.finish_cache_progress(&rel_string, entry.size); + } } self.put_cached_entry(&rel, entry) } + pub fn note_cache_fetch( + &self, + rel: &Path, + phase: &str, + active_bytes: u64, + active_total: u64, + ) -> Result<()> { + self.write_cache_progress( + &Self::rel_string(&Self::clean_rel(rel)?), + phase, + active_bytes, + active_total, + ) + } + pub fn remove_cached(&self, rel: &Path) -> Result<()> { let path = self.cache_path(rel); if path.exists() { @@ -534,7 +572,16 @@ impl OverlayStore { } fn load_metadata(&self) -> Result { - if let Some(metadata) = self.metadata.borrow().as_ref() { + let journal_len = self.metadata_journal_len_on_disk(); + let cached_metadata = { self.metadata.borrow().clone() }; + if let Some(metadata) = cached_metadata { + if *self.metadata_journal_len.borrow() == journal_len { + return Ok(metadata); + } + let mut metadata = metadata; + self.apply_metadata_journal(&mut metadata)?; + *self.metadata.borrow_mut() = Some(metadata.clone()); + *self.metadata_journal_len.borrow_mut() = journal_len; return Ok(metadata.clone()); } let path = self.metadata_path(); @@ -542,6 +589,7 @@ impl OverlayStore { let mut metadata = MetadataFile::default(); self.apply_metadata_journal(&mut metadata)?; *self.metadata.borrow_mut() = Some(metadata.clone()); + *self.metadata_journal_len.borrow_mut() = journal_len; return Ok(metadata); } let mut json = String::new(); @@ -549,6 +597,7 @@ impl OverlayStore { let mut metadata: MetadataFile = serde_json::from_str(&json)?; self.apply_metadata_journal(&mut metadata)?; *self.metadata.borrow_mut() = Some(metadata.clone()); + *self.metadata_journal_len.borrow_mut() = journal_len; Ok(metadata) } @@ -602,9 +651,16 @@ impl OverlayStore { fs::rename(tmp, self.metadata_path())?; let _ = fs::remove_file(self.metadata_journal_path()); *self.metadata.borrow_mut() = Some(metadata.clone()); + *self.metadata_journal_len.borrow_mut() = 0; Ok(()) } + fn metadata_journal_len_on_disk(&self) -> u64 { + fs::metadata(self.metadata_journal_path()) + .map(|metadata| metadata.len()) + .unwrap_or(0) + } + fn apply_metadata_journal(&self, metadata: &mut MetadataFile) -> Result<()> { let path = self.metadata_journal_path(); if !path.exists() { @@ -656,6 +712,8 @@ impl OverlayStore { serde_json::to_writer(&mut file, &value)?; file.write_all(b"\n")?; } + drop(file); + *self.metadata_journal_len.borrow_mut() = self.metadata_journal_len_on_disk(); Ok(()) } @@ -865,6 +923,49 @@ mod tests { assert!(reloaded.cached_entry(rel).unwrap().is_none()); } + #[test] + fn cached_metadata_refreshes_when_another_overlay_appends_journal() { + let temp = tempfile::tempdir().unwrap(); + let paths = ClonePaths { + root: temp.path().to_path_buf(), + manifest: temp.path().join("manifest.json"), + upper: temp.path().join("upper"), + file_cache: temp.path().join("file-cache"), + db: temp.path().join("db"), + generated: temp.path().join("generated"), + run: temp.path().join("run"), + whiteouts: temp.path().join("whiteouts.json"), + }; + let mounted_view = OverlayStore::new(&paths); + assert!(mounted_view + .cached_entry(Path::new("wp-includes/load.php")) + .unwrap() + .is_none()); + + let pack_writer = OverlayStore::new(&paths); + let entry = RemoteEntry { + name: "load.php".to_string(), + kind: "file".to_string(), + size: 12, + mode: 0o100644, + mtime: 123, + }; + pack_writer + .put_cached_file_bytes_without_progress( + Path::new("wp-includes/load.php"), + &entry, + b">, } +#[derive(Debug, Clone)] +pub struct RuntimeCodePackLimits { + pub max_file_bytes: u64, + pub max_total_bytes: u64, + pub max_files: u64, +} + +#[derive(Debug, Clone)] +pub struct RuntimeCodePackFile { + pub rel: PathBuf, + pub entry: RemoteEntry, + pub bytes: Vec, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct RuntimeCodePackSummary { + pub files: u64, + pub bytes: u64, + pub skipped: u64, + pub capped: bool, +} + #[derive(Debug, Clone)] pub struct RemoteClient { manifest: Manifest, @@ -513,6 +535,130 @@ echo $target; Ok(String::from_utf8_lossy(&bytes).to_string()) } + pub fn runtime_code_pack( + &self, + roots: &[PathBuf], + limits: RuntimeCodePackLimits, + mut on_file: F, + ) -> Result + where + F: FnMut(RuntimeCodePackFile) -> Result<()>, + { + let started = Instant::now(); + let result = self.runtime_code_pack_inner(roots, limits, &mut on_file); + trace_remote_result( + "runtime_code_pack", + &format!("{} roots", roots.len()), + started, + &result, + ); + result + } + + fn runtime_code_pack_inner( + &self, + roots: &[PathBuf], + limits: RuntimeCodePackLimits, + on_file: &mut F, + ) -> Result + where + F: FnMut(RuntimeCodePackFile) -> Result<()>, + { + if limits.max_file_bytes == 0 || limits.max_total_bytes == 0 || limits.max_files == 0 { + return Ok(RuntimeCodePackSummary::default()); + } + + let roots = roots + .iter() + .map(|root| { + OverlayStore::clean_rel(root) + .map(|clean| OverlayStore::rel_string(&clean)) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err.to_string())) + }) + .collect::>>()?; + if roots.is_empty() { + return Ok(RuntimeCodePackSummary::default()); + } + + let mut remote_command = format!("php -r {} --", shell_quote(runtime_code_pack_php())); + for arg in [ + self.manifest.remote_path.clone(), + serde_json::to_string(&roots)?, + limits.max_file_bytes.to_string(), + limits.max_total_bytes.to_string(), + limits.max_files.to_string(), + ] { + remote_command.push(' '); + remote_command.push_str(&shell_quote(arg)); + } + + let mut command = self.ssh_command(&remote_command, runtime_code_pack_timeout_secs()); + let mut child = command + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("start remote runtime code pack")?; + let stdout = child + .stdout + .take() + .ok_or_else(|| anyhow!("runtime code pack stdout"))?; + + let mut summary = RuntimeCodePackSummary::default(); + for line in BufReader::new(stdout).lines() { + let line = line.context("read remote runtime code pack")?; + if line.trim().is_empty() { + continue; + } + let value: serde_json::Value = serde_json::from_str(&line) + .with_context(|| format!("decode remote runtime code pack line: {line}"))?; + match value.get("type").and_then(|value| value.as_str()) { + Some("file") => { + let rel = value + .get("path") + .and_then(|value| value.as_str()) + .ok_or_else(|| anyhow!("runtime code pack file missing path"))?; + let entry: RemoteEntry = serde_json::from_value( + value + .get("entry") + .cloned() + .ok_or_else(|| anyhow!("runtime code pack file missing entry"))?, + )?; + let bytes = decode_helper_data(value.clone())?; + if entry.kind == "file" && bytes.len() as u64 == entry.size { + on_file(RuntimeCodePackFile { + rel: PathBuf::from(rel), + entry, + bytes, + })?; + } + } + Some("summary") => { + summary = serde_json::from_value(value.clone())?; + } + Some("error") => { + let error = value + .get("error") + .and_then(|value| value.as_str()) + .unwrap_or("remote runtime code pack failed"); + return Err(anyhow!(error.to_string())); + } + _ => {} + } + } + + let output = child.wait_with_output()?; + if !output.status.success() { + return Err(anyhow!( + "remote runtime code pack exited with status {}: {}", + output.status, + String::from_utf8_lossy(&output.stderr) + )); + } + + Ok(summary) + } + pub fn remote_query_readonly(&self, sql: &str) -> Result { let started = Instant::now(); let result = self.remote_query_readonly_inner(sql); @@ -1118,8 +1264,116 @@ while (($line = fgets(STDIN)) !== false) { $rows=array(); while($row=$res->fetch_assoc()){$rows[]=$row;} echo json_encode(array("ok"=>true,"error"=>"","rows"=>$rows,"fields"=>$fields,"affected"=>count($rows))), "\n"; +flush(); +} +"# +} + +fn runtime_code_pack_php() -> &'static str { + r#" +error_reporting(0); +$base = rtrim($argv[1], "/"); +$roots = json_decode($argv[2], true); +$max_file_bytes = max(0, (int)$argv[3]); +$max_total_bytes = max(0, (int)$argv[4]); +$max_files = max(0, (int)$argv[5]); +$total = 0; +$files = 0; +$skipped = 0; +$capped = false; +if (!is_array($roots)) { $roots = array(); } +function wpcow_pack_send($payload) { + echo json_encode($payload), "\n"; flush(); } +function wpcow_pack_clean($rel) { + $rel = str_replace("\\", "/", (string)$rel); + $rel = trim($rel, "/"); + if ($rel === "") { return false; } + $parts = array(); + foreach (explode("/", $rel) as $part) { + if ($part === "" || $part === ".") { continue; } + if ($part === "..") { return false; } + $parts[] = $part; + } + return implode("/", $parts); +} +function wpcow_pack_allowed_ext($rel) { + $ext = strtolower(pathinfo($rel, PATHINFO_EXTENSION)); + return in_array($ext, array("php", "inc", "phtml", "json", "mo"), true); +} +function wpcow_pack_excluded($rel) { + return $rel === "wp-config.php" || strpos($rel . "/", "wp-content/uploads/") === 0; +} +function wpcow_pack_entry($path, $name, $size, $mtime) { + $mode = 0100644; + $stat = @lstat($path); + if (is_array($stat)) { $mode = (int)$stat["mode"]; } + return array("name"=>$name,"kind"=>"file","size"=>$size,"mode"=>$mode,"mtime"=>$mtime); +} +function wpcow_pack_file($rel, $path) { + global $max_file_bytes, $max_total_bytes, $max_files, $total, $files, $skipped, $capped; + if ($capped) { return; } + if (wpcow_pack_excluded($rel) || !wpcow_pack_allowed_ext($rel)) { $skipped++; return; } + clearstatcache(true, $path); + if (!is_file($path)) { $skipped++; return; } + $size = filesize($path); + if ($size === false) { $skipped++; return; } + $size = (int)$size; + if ($size > $max_file_bytes) { $skipped++; return; } + if ($files >= $max_files || $total + $size > $max_total_bytes) { $capped = true; return; } + $data = @file_get_contents($path); + if ($data === false || strlen($data) !== $size) { $skipped++; return; } + $mtime = @filemtime($path); + if ($mtime === false) { $mtime = 0; } + $files++; + $total += $size; + wpcow_pack_send(array( + "type"=>"file", + "path"=>$rel, + "entry"=>wpcow_pack_entry($path, basename($path), $size, (int)$mtime), + "data"=>base64_encode($data) + )); +} +function wpcow_pack_dir($rel, $path) { + global $capped; + $stack = array(array($rel, $path)); + while (!$capped && !empty($stack)) { + $item = array_pop($stack); + $dir_rel = $item[0]; + $dir_path = $item[1]; + if (wpcow_pack_excluded($dir_rel) || !is_dir($dir_path)) { continue; } + $names = @scandir($dir_path); + if (!is_array($names)) { continue; } + rsort($names, SORT_STRING); + foreach ($names as $name) { + if ($name === "." || $name === "..") { continue; } + $child_rel = $dir_rel === "" ? $name : $dir_rel . "/" . $name; + $child_path = $dir_path . DIRECTORY_SEPARATOR . $name; + if (wpcow_pack_excluded($child_rel)) { continue; } + if (is_dir($child_path) && !is_link($child_path)) { + $stack[] = array($child_rel, $child_path); + } elseif (is_file($child_path)) { + wpcow_pack_file($child_rel, $child_path); + if ($capped) { break; } + } + } + } +} +foreach ($roots as $root) { + if ($capped) { break; } + $rel = wpcow_pack_clean($root); + if ($rel === false) { $skipped++; continue; } + $path = $base . "/" . $rel; + if (is_file($path)) { + wpcow_pack_file($rel, $path); + } elseif (is_dir($path)) { + wpcow_pack_dir($rel, $path); + } else { + $skipped++; + } +} +wpcow_pack_send(array("type"=>"summary","files"=>$files,"bytes"=>$total,"skipped"=>$skipped,"capped"=>$capped)); "# } @@ -1154,7 +1408,11 @@ $out = array( 'db_user' => defined('DB_USER') ? DB_USER : '', 'db_password' => defined('DB_PASSWORD') ? DB_PASSWORD : '', 'siteurl' => function_exists('get_option') ? get_option('siteurl') : '', - 'home' => function_exists('get_option') ? get_option('home') : '' + 'home' => function_exists('get_option') ? get_option('home') : '', + 'template' => function_exists('get_template') ? get_template() : (function_exists('get_option') ? get_option('template') : ''), + 'stylesheet' => function_exists('get_stylesheet') ? get_stylesheet() : (function_exists('get_option') ? get_option('stylesheet') : ''), + 'active_plugins' => function_exists('get_option') && is_array(get_option('active_plugins')) ? array_values(get_option('active_plugins')) : array(), + 'active_sitewide_plugins' => function_exists('get_site_option') && is_array(get_site_option('active_sitewide_plugins')) ? array_keys(get_site_option('active_sitewide_plugins')) : array() ); echo json_encode($out); "#; @@ -1224,6 +1482,10 @@ fn remote_db_query_timeout_secs() -> u64 { env_u64("WPCOW_REMOTE_DB_QUERY_TIMEOUT_SECS", 10) } +fn runtime_code_pack_timeout_secs() -> u64 { + env_u64("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS", 180) +} + fn ssh_connect_timeout_secs() -> u64 { env_u64("WPCOW_SSH_CONNECT_TIMEOUT_SECS", 8) } @@ -1272,7 +1534,7 @@ mod tests { use super::*; use std::fs; use std::os::unix::fs::PermissionsExt; - use std::path::Path; + use std::path::{Path, PathBuf}; use std::sync::{Mutex, OnceLock}; static ENV_LOCK: OnceLock> = OnceLock::new(); @@ -1467,4 +1729,119 @@ exec bash -lc "$cmd" None => std::env::remove_var("WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS"), } } + + #[test] + #[ignore = "strict harness only: mutates process SSH helper env"] + fn runtime_code_pack_streams_bounded_runtime_files() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + + let old_path = std::env::var_os("PATH"); + let old_timeout = std::env::var_os("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS"); + + let temp = tempfile::tempdir().unwrap(); + let remote_root = temp.path().join("remote"); + let bin = temp.path().join("bin"); + fs::create_dir_all(remote_root.join("wp-includes")).unwrap(); + fs::create_dir_all(remote_root.join("wp-content/uploads/2026/05")).unwrap(); + fs::create_dir_all(remote_root.join("wp-content/plugins/example/assets")).unwrap(); + fs::create_dir_all(&bin).unwrap(); + fs::write(remote_root.join("index.php"), b" format!("{}:{}", bin.display(), old.to_string_lossy()), + None => bin.display().to_string(), + }; + std::env::set_var("PATH", path); + std::env::set_var("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS", "10"); + + let manifest = Manifest::new( + "example".to_string(), + "fake-host".to_string(), + remote_root.to_string_lossy().to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + ..Probe::default() + }, + ); + let remote = RemoteClient::new(manifest, None); + let mut files = Vec::new(); + let summary = remote + .runtime_code_pack( + &[ + PathBuf::from("index.php"), + PathBuf::from("wp-config.php"), + PathBuf::from("wp-includes"), + PathBuf::from("wp-content/plugins/example"), + PathBuf::from("wp-content/uploads"), + ], + RuntimeCodePackLimits { + max_file_bytes: 1024, + max_total_bytes: 8192, + max_files: 100, + }, + |file| { + files.push((file.rel, file.entry.name, file.bytes)); + Ok(()) + }, + ) + .expect("runtime code pack"); + + let paths = files + .iter() + .map(|(rel, _, _)| rel.to_string_lossy().to_string()) + .collect::>(); + assert!(paths.contains(&"index.php".to_string())); + assert!(paths.contains(&"wp-includes/load.php".to_string())); + assert!(paths.contains(&"wp-includes/blocks.json".to_string())); + assert!(paths.contains(&"wp-content/plugins/example/example.php".to_string())); + assert!(!paths.contains(&"wp-config.php".to_string())); + assert!(!paths + .iter() + .any(|path| path.starts_with("wp-content/uploads/"))); + assert!(!paths.iter().any(|path| path.ends_with(".css"))); + assert_eq!(summary.files as usize, files.len()); + + match old_path { + Some(value) => std::env::set_var("PATH", value), + None => std::env::remove_var("PATH"), + } + match old_timeout { + Some(value) => std::env::set_var("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS", value), + None => std::env::remove_var("WPCOW_RUNTIME_CODE_PACK_TIMEOUT_SECS"), + } + } } diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index 90ef2737..e76e833c 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -14,6 +14,7 @@ use crate::fusefs; use crate::generate::ROUTER_BASENAME; use crate::mysql_proxy; use crate::remote::RemoteClient; +use crate::runtime_cache; pub struct RunOptions { pub mountpoint: PathBuf, @@ -75,6 +76,34 @@ fn run_site_until_shutdown( } } }; + if !offline && runtime_cache::runtime_code_pack_enabled() { + runtime_cache::mark_runtime_code_cache_starting(&paths); + let warm_manifest = manifest.clone(); + let warm_paths = paths.clone(); + let warm_remote = remote.clone(); + thread::spawn(move || { + match runtime_cache::warm_runtime_code_cache(&warm_remote, &warm_manifest, &warm_paths) + { + Ok(summary) => { + eprintln!( + "wp-cow cached {} bounded runtime code files ({:.1} MB); uploads/media remain lazy", + summary.files, + summary.bytes as f64 / (1024.0 * 1024.0) + ); + if summary.capped { + eprintln!( + "wp-cow runtime code cache hit its configured cap; remaining runtime files stay lazy" + ); + } + } + Err(err) => { + runtime_cache::mark_runtime_code_cache_failed(&warm_paths); + eprintln!("wp-cow runtime code cache failed: {err:#}"); + eprintln!("wp-cow continuing with lazy per-file remote reads"); + } + } + }); + } let control_shutdown = shutdown.clone(); let control_manifest = manifest.clone(); @@ -249,7 +278,7 @@ fn php_side_effect_guards_enabled() -> bool { } fn php_disabled_functions() -> &'static str { - "exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client" + "exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client,curl_exec,curl_multi_exec" } fn php_safety_ini_entries() -> Vec<(&'static str, String)> { @@ -514,6 +543,7 @@ mod tests { #[test] fn web_runtime_disables_common_plugin_side_effect_primitives() { assert!(php_disabled_functions().contains("stream_socket_client")); + assert!(php_disabled_functions().contains("curl_exec")); assert!(php_disabled_functions().contains("proc_open")); assert!(php_disabled_functions().contains("mail")); diff --git a/experiments/remote-wp-cow/src/runtime_cache.rs b/experiments/remote-wp-cow/src/runtime_cache.rs new file mode 100644 index 00000000..0d55a415 --- /dev/null +++ b/experiments/remote-wp-cow/src/runtime_cache.rs @@ -0,0 +1,326 @@ +use anyhow::{Context, Result}; +use std::collections::BTreeSet; +use std::path::{Path, PathBuf}; + +use crate::config::{ClonePaths, Manifest}; +use crate::overlay::OverlayStore; +use crate::remote::{RemoteClient, RuntimeCodePackLimits, RuntimeCodePackSummary}; + +const ROOT_RUNTIME_FILES: &[&str] = &[ + "index.php", + "wp-activate.php", + "wp-blog-header.php", + "wp-comments-post.php", + "wp-cron.php", + "wp-links-opml.php", + "wp-load.php", + "wp-login.php", + "wp-mail.php", + "wp-settings.php", + "wp-signup.php", + "wp-trackback.php", + "xmlrpc.php", +]; + +pub fn warm_runtime_code_cache( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, +) -> Result { + warm_runtime_code_cache_inner(remote, manifest, paths, runtime_code_pack_include_admin()) +} + +pub fn warm_runtime_code_cache_with_admin( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, +) -> Result { + warm_runtime_code_cache_inner(remote, manifest, paths, true) +} + +fn warm_runtime_code_cache_inner( + remote: &RemoteClient, + manifest: &Manifest, + paths: &ClonePaths, + include_admin: bool, +) -> Result { + if !runtime_code_pack_enabled() { + return Ok(RuntimeCodePackSummary::default()); + } + + let roots = runtime_code_pack_roots_with_admin(manifest, include_admin); + if roots.is_empty() { + return Ok(RuntimeCodePackSummary::default()); + } + + let overlay = OverlayStore::new(paths); + let limits = RuntimeCodePackLimits { + max_file_bytes: runtime_code_pack_max_file_bytes().min(manifest.cache_max_file_bytes), + max_total_bytes: runtime_code_pack_max_bytes(), + max_files: runtime_code_pack_max_files(), + }; + + let summary = remote + .runtime_code_pack(&roots, limits, |file| { + overlay + .put_cached_file_bytes_without_progress(&file.rel, &file.entry, &file.bytes) + .with_context(|| { + format!( + "cache remote runtime code file {}", + OverlayStore::rel_string(&file.rel) + ) + })?; + let _ = overlay.note_cache_fetch( + &file.rel, + "runtime-code-pack", + file.entry.size, + file.entry.size, + ); + Ok(()) + }) + .context("cache remote runtime code pack")?; + let _ = overlay.note_cache_fetch( + Path::new(".wp-cow-runtime-code-pack"), + "runtime-code-pack-done", + summary.bytes, + summary.bytes, + ); + Ok(summary) +} + +pub fn mark_runtime_code_cache_starting(paths: &ClonePaths) { + let overlay = OverlayStore::new(paths); + let _ = overlay.note_cache_fetch( + Path::new(".wp-cow-runtime-code-pack"), + "runtime-code-pack-starting", + 0, + 0, + ); +} + +pub fn mark_runtime_code_cache_failed(paths: &ClonePaths) { + let overlay = OverlayStore::new(paths); + let _ = overlay.note_cache_fetch( + Path::new(".wp-cow-runtime-code-pack"), + "runtime-code-pack-error", + 0, + 0, + ); +} + +#[cfg(test)] +pub fn runtime_code_pack_roots(manifest: &Manifest) -> Vec { + runtime_code_pack_roots_with_admin(manifest, runtime_code_pack_include_admin()) +} + +fn runtime_code_pack_roots_with_admin(manifest: &Manifest, include_admin: bool) -> Vec { + let mut roots = BTreeSet::new(); + for file in ROOT_RUNTIME_FILES { + roots.insert(PathBuf::from(file)); + } + + roots.insert(PathBuf::from("wp-includes")); + if include_admin { + roots.insert(PathBuf::from("wp-admin")); + } + roots.insert(PathBuf::from("wp-content/mu-plugins")); + roots.insert(PathBuf::from("wp-content/languages")); + + for theme in [&manifest.probe.template, &manifest.probe.stylesheet] { + if let Some(root) = theme_runtime_root(theme) { + roots.insert(root); + } + } + + if plugins_enabled_for_runtime() { + for plugin in manifest + .probe + .active_plugins + .iter() + .chain(manifest.probe.active_sitewide_plugins.iter()) + { + if let Some(root) = plugin_runtime_root(plugin) { + roots.insert(root); + } + } + } + + roots + .into_iter() + .filter(|root| !is_upload_path(root) && root != Path::new("wp-config.php")) + .collect() +} + +fn theme_runtime_root(theme: &str) -> Option { + let theme = clean_segment(theme)?; + Some(PathBuf::from("wp-content/themes").join(theme)) +} + +fn plugin_runtime_root(plugin: &str) -> Option { + let clean = clean_rel(plugin)?; + if clean.as_os_str().is_empty() || is_upload_path(&clean) { + return None; + } + + let mut components = clean.components(); + let first = components.next()?; + if components.next().is_some() { + Some(PathBuf::from("wp-content/plugins").join(first.as_os_str())) + } else { + Some(PathBuf::from("wp-content/plugins").join(clean)) + } +} + +fn clean_segment(value: &str) -> Option { + if value.is_empty() + || value.contains('/') + || value.contains('\\') + || value == "." + || value == ".." + { + return None; + } + Some(value.to_string()) +} + +fn clean_rel(value: &str) -> Option { + OverlayStore::clean_rel(value).ok() +} + +fn is_upload_path(path: &Path) -> bool { + path.starts_with(Path::new("wp-content/uploads")) +} + +pub fn runtime_code_pack_enabled() -> bool { + env_bool("WPCOW_RUNTIME_CODE_PACK", true) +} + +fn runtime_code_pack_include_admin() -> bool { + env_bool("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN", false) +} + +fn plugins_enabled_for_runtime() -> bool { + env_bool("WPCOW_ENABLE_PLUGINS", true) +} + +fn runtime_code_pack_max_bytes() -> u64 { + env_u64("WPCOW_RUNTIME_CODE_PACK_MAX_MB", 256).saturating_mul(1024 * 1024) +} + +fn runtime_code_pack_max_file_bytes() -> u64 { + env_u64("WPCOW_RUNTIME_CODE_PACK_MAX_FILE_MB", 8).saturating_mul(1024 * 1024) +} + +fn runtime_code_pack_max_files() -> u64 { + env_u64("WPCOW_RUNTIME_CODE_PACK_MAX_FILES", 20_000) +} + +fn env_u64(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|raw| raw.parse::().ok()) + .unwrap_or(default) +} + +fn env_bool(name: &str, default: bool) -> bool { + std::env::var(name) + .ok() + .map(|raw| match raw.to_ascii_lowercase().as_str() { + "1" | "true" | "yes" | "on" => true, + "0" | "false" | "no" | "off" => false, + _ => default, + }) + .unwrap_or(default) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::Probe; + use std::sync::{Mutex, OnceLock}; + + static ENV_LOCK: OnceLock> = OnceLock::new(); + + #[test] + fn runtime_code_roots_are_bounded_to_core_theme_and_active_plugins() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old_plugins = std::env::var_os("WPCOW_ENABLE_PLUGINS"); + let old_admin = std::env::var_os("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN"); + std::env::set_var("WPCOW_ENABLE_PLUGINS", "1"); + std::env::set_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN", "1"); + + let manifest = Manifest::new( + "example".to_string(), + "example".to_string(), + "/remote".to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + template: "parent".to_string(), + stylesheet: "child".to_string(), + active_plugins: vec![ + "woocommerce/woocommerce.php".to_string(), + "hello.php".to_string(), + "../escape/escape.php".to_string(), + ], + active_sitewide_plugins: vec!["network/network.php".to_string()], + ..Probe::default() + }, + ); + + let roots = runtime_code_pack_roots(&manifest); + assert!(roots.contains(&PathBuf::from("wp-includes"))); + assert!(roots.contains(&PathBuf::from("wp-admin"))); + assert!(roots.contains(&PathBuf::from("wp-content/themes/parent"))); + assert!(roots.contains(&PathBuf::from("wp-content/themes/child"))); + assert!(roots.contains(&PathBuf::from("wp-content/plugins/woocommerce"))); + assert!(roots.contains(&PathBuf::from("wp-content/plugins/hello.php"))); + assert!(roots.contains(&PathBuf::from("wp-content/plugins/network"))); + assert!(!roots + .iter() + .any(|root| root.starts_with("wp-content/uploads"))); + assert!(!roots + .iter() + .any(|root| root.to_string_lossy().contains(".."))); + assert!(!roots.contains(&PathBuf::from("wp-config.php"))); + + match old_plugins { + Some(value) => std::env::set_var("WPCOW_ENABLE_PLUGINS", value), + None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), + } + match old_admin { + Some(value) => std::env::set_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN", value), + None => std::env::remove_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN"), + } + } + + #[test] + fn runtime_code_roots_respect_disabled_plugins() { + let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); + let old_plugins = std::env::var_os("WPCOW_ENABLE_PLUGINS"); + std::env::set_var("WPCOW_ENABLE_PLUGINS", "0"); + + let manifest = Manifest::new( + "example".to_string(), + "example".to_string(), + "/remote".to_string(), + "https://example.com".to_string(), + "http://example.test".to_string(), + Probe { + table_prefix: "wp_".to_string(), + active_plugins: vec!["woocommerce/woocommerce.php".to_string()], + ..Probe::default() + }, + ); + + let roots = runtime_code_pack_roots(&manifest); + assert!(!roots.contains(&PathBuf::from("wp-content/plugins/woocommerce"))); + assert!(roots.contains(&PathBuf::from("wp-content/mu-plugins"))); + + match old_plugins { + Some(value) => std::env::set_var("WPCOW_ENABLE_PLUGINS", value), + None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), + } + } +} From e681a3e159bd6e227cfa67a7576ca6d5de7e7d9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Tue, 5 May 2026 14:09:50 +0200 Subject: [PATCH 34/39] Add automatic plugin admission for remote COW clones --- experiments/remote-wp-cow/.env.example | 5 + experiments/remote-wp-cow/README.md | 22 +- experiments/remote-wp-cow/compose.yaml | 5 + .../scripts/live-site-acceptance.sh | 36 +- .../remote-wp-cow/scripts/strict-harness.sh | 10 + experiments/remote-wp-cow/src/generate.rs | 211 ++++++++++- experiments/remote-wp-cow/src/main.rs | 1 + .../remote-wp-cow/src/plugin_policy.rs | 236 ++++++++++++ experiments/remote-wp-cow/src/run.rs | 356 +++++++++++++++++- .../remote-wp-cow/src/runtime_cache.rs | 26 +- 10 files changed, 877 insertions(+), 31 deletions(-) create mode 100644 experiments/remote-wp-cow/src/plugin_policy.rs diff --git a/experiments/remote-wp-cow/.env.example b/experiments/remote-wp-cow/.env.example index d556919f..30c45414 100644 --- a/experiments/remote-wp-cow/.env.example +++ b/experiments/remote-wp-cow/.env.example @@ -37,7 +37,12 @@ WPCOW_REMOTE_QUERY_CACHE=1 WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS=5000 # Keep arbitrary production plugins disabled unless explicitly debugging them. WPCOW_ENABLE_PLUGINS=0 +WPCOW_PLUGIN_MODE=auto +WPCOW_PLUGIN_ADMISSION=1 +WPCOW_PLUGIN_ADMISSION_DELAY_SECS=20 +WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS=15 WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0 +WPCOW_PHP_DISABLE_FUNCTIONS=exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client,curl_exec,curl_multi_exec WPCOW_CACHE_MAX_FILE_MB=64 WPCOW_REMOTE_STAT_PREFETCH_MAX_KB=0 WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB=0 diff --git a/experiments/remote-wp-cow/README.md b/experiments/remote-wp-cow/README.md index 320dddbb..72251aef 100644 --- a/experiments/remote-wp-cow/README.md +++ b/experiments/remote-wp-cow/README.md @@ -211,18 +211,26 @@ can make cold start worse when WordPress stats assets it will not read during render. These knobs never recurse into uploads and never fetch CSS, JS, or media unless the browser asks for them. Remote plugin and language directories stay visible through the lazy lower -layer by default so the local site can render the same active code as the -remote site. Set `WPCOW_ENABLE_PLUGINS=0` only when you need to suppress active -plugins during testing; files still remain lazy and are not copied up front. +layer, but plugin execution defaults to policy mode. With +`WPCOW_PLUGIN_MODE=auto`, the generated safety mu-plugin starts with no +production plugins enabled. After the first successful local render, the daemon +tries active plugins one at a time using a bounded PHP smoke boot +(`WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS`) and records the result in +`run/plugin-policy.json`. Admitted plugins are enabled on later requests; +failing plugins are quarantined locally. Set `WPCOW_PLUGIN_MODE=off` to suppress +all plugins, or `WPCOW_PLUGIN_MODE=full` / `WPCOW_ENABLE_PLUGINS=1` only when +you intentionally want every active production plugin to run in the clone. Because active plugins are production code, the launched PHP runtime also disables common side-effect escape hatches by default: process spawning, `mail()`, raw socket clients, and URL-based includes. That is in addition to the -mu-plugin guards for WordPress mail and HTTP APIs. The generated DB drop-in -still needs local HTTP for daemon control calls, so direct plugin cURL or URL -file-wrapper calls are not fully sandboxed yet. Set +mu-plugin guards for WordPress mail and HTTP APIs. The disabled PHP function +list is configurable with `WPCOW_PHP_DISABLE_FUNCTIONS`; set it to `0` only for +debugging. The generated DB drop-in still needs local HTTP for daemon control +calls, so this is not a kernel sandbox. Set `WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=1` only when you intentionally want to -let plugin code spawn local processes or use raw sockets. +let plugin code spawn local processes, send raw socket traffic, or bypass these +PHP-level guards. The lab uses bounded request timeouts so a bad remote DB query, unreachable SSH host, or slow remote file read should fail visibly instead of leaving the diff --git a/experiments/remote-wp-cow/compose.yaml b/experiments/remote-wp-cow/compose.yaml index 7590a064..891b56a9 100644 --- a/experiments/remote-wp-cow/compose.yaml +++ b/experiments/remote-wp-cow/compose.yaml @@ -44,7 +44,12 @@ services: WPCOW_REMOTE_QUERY_CACHE: "${WPCOW_REMOTE_QUERY_CACHE:-1}" WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS: "${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" WPCOW_ENABLE_PLUGINS: "${WPCOW_ENABLE_PLUGINS:-0}" + WPCOW_PLUGIN_MODE: "${WPCOW_PLUGIN_MODE:-auto}" + WPCOW_PLUGIN_ADMISSION: "${WPCOW_PLUGIN_ADMISSION:-1}" + WPCOW_PLUGIN_ADMISSION_DELAY_SECS: "${WPCOW_PLUGIN_ADMISSION_DELAY_SECS:-20}" + WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS: "${WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS:-15}" WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS: "${WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS:-0}" + WPCOW_PHP_DISABLE_FUNCTIONS: "${WPCOW_PHP_DISABLE_FUNCTIONS:-exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client,curl_exec,curl_multi_exec}" WPCOW_FUSE_TTL_SECS: "${WPCOW_FUSE_TTL_SECS:-60}" WPCOW_REMOTE_STAT_PREFETCH_MAX_KB: "${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB: "${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" diff --git a/experiments/remote-wp-cow/scripts/live-site-acceptance.sh b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh index aeb72a35..4b70e76a 100755 --- a/experiments/remote-wp-cow/scripts/live-site-acceptance.sh +++ b/experiments/remote-wp-cow/scripts/live-site-acceptance.sh @@ -96,6 +96,15 @@ http_body() { esac } +deny_runtime_error_body() { + local file="$1" + local label="$2" + if rg -qi 'Fatal error|There has been a critical error|WordPress › Error|WordPress.*Installation|wp-admin/install.php|wp-cow DB/runtime error|wp-cow did not load the remote site' "$file"; then + sed -n '1,100p' "$file" >&2 + fail "$label returned installer, fatal error, or wp-cow runtime error" + fi +} + mysql_exec() { mysql --protocol=TCP -h127.0.0.1 -P33071 -uroot "$@" } @@ -298,6 +307,10 @@ WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS="${WPCOW_REMOTE_QUERY_CACHE_MAX_ROWS:-5000}" \ WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" \ WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" \ +WPCOW_PLUGIN_MODE="${WPCOW_PLUGIN_MODE:-auto}" \ +WPCOW_PLUGIN_ADMISSION="${WPCOW_PLUGIN_ADMISSION:-1}" \ +WPCOW_PLUGIN_ADMISSION_DELAY_SECS="${WPCOW_PLUGIN_ADMISSION_DELAY_SECS:-20}" \ +WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS="${WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS:-15}" \ WPCOW_PHP_WORKERS="${WPCOW_PHP_WORKERS:-1}" \ HOME="$SSH_HOME" \ "$WP_COW_BIN" serve \ @@ -324,10 +337,7 @@ http_body "$LOCAL_URL/" "$first_splash" 10 || { tail -n 200 "$SERVE_LOG" >&2 || true fail "first splash/progress request failed" } -if rg -qi 'WordPress.*Installation|wp-admin/install.php|wp-cow DB/runtime error|wp-cow did not load the remote site' "$first_splash"; then - sed -n '1,80p' "$first_splash" >&2 - fail "first splash request returned installer or wp-cow runtime error" -fi +deny_runtime_error_body "$first_splash" "first splash request" pack_wait="${WPCOW_RUNTIME_CODE_PACK_WAIT_SECS:-120}" pack_started="$(date +%s)" progress_path="$STATE_DIR/clones/$NAME/file-cache/progress.json" @@ -359,10 +369,7 @@ http_body "$LOCAL_URL/?__wp_cow_bypass_splash=1" "$first_body" "$actual_timeout" tail -n 200 "$SERVE_LOG" >&2 || true fail "first WordPress request failed" } -if rg -qi 'WordPress.*Installation|wp-admin/install.php|wp-cow DB/runtime error|wp-cow did not load the remote site' "$first_body"; then - sed -n '1,80p' "$first_body" >&2 - fail "first request returned installer or wp-cow runtime error" -fi +deny_runtime_error_body "$first_body" "first request" if [ -n "$EXPECT_TEXT" ]; then rg -q "$EXPECT_TEXT" "$first_body" || fail "first response did not contain WPCOW_EXPECT_TEXT=$EXPECT_TEXT" fi @@ -370,6 +377,7 @@ fi second_body="$WORK_DIR/second.html" http_body "$LOCAL_URL/?__wp_cow_bypass_splash=1" "$second_body" "${WPCOW_SECOND_TIMEOUT_SECS:-60}" || fail "second cached WordPress request failed" +deny_runtime_error_body "$second_body" "second cached WordPress request" php_create="$WORK_DIR/create-local-page.php" cat > "$php_create" <<'PHP' @@ -407,6 +415,7 @@ post_id="$(sed -n 's/^WPCOW_POST_ID=//p' "$WORK_DIR/create-local-page.out" | tai local_body="$WORK_DIR/local-page.html" http_body "$LOCAL_URL/?p=$post_id&__wp_cow_bypass_splash=1" "$local_body" 30 || fail "local-only page did not render" +deny_runtime_error_body "$local_body" "local-only page request" rg -q "$TITLE" "$local_body" || fail "local-only page response did not contain its title" after_remote="$(remote_post_count "$TITLE" | tr -d '[:space:]')" @@ -453,6 +462,10 @@ WPCOW_MATERIALIZE_OPTIONS_TABLE="${WPCOW_MATERIALIZE_OPTIONS_TABLE:-1}" \ WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS="${WPCOW_REMOTE_FILE_HELPER_TIMEOUT_SECS:-2}" \ WPCOW_REMOTE_STAT_PREFETCH_MAX_KB="${WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0}" \ WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB="${WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0}" \ +WPCOW_PLUGIN_MODE="${WPCOW_PLUGIN_MODE:-auto}" \ +WPCOW_PLUGIN_ADMISSION="${WPCOW_PLUGIN_ADMISSION:-1}" \ +WPCOW_PLUGIN_ADMISSION_DELAY_SECS="${WPCOW_PLUGIN_ADMISSION_DELAY_SECS:-20}" \ +WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS="${WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS:-15}" \ WPCOW_PHP_WORKERS="${WPCOW_PHP_WORKERS:-1}" \ HOME="$SSH_HOME" \ "$WP_COW_BIN" run "$NAME" \ @@ -469,6 +482,7 @@ wait_for_tcp "$host" "$port" 30 || { offline_body="$WORK_DIR/offline-local-page.html" http_body "$LOCAL_URL/?p=$post_id&__wp_cow_bypass_splash=1" "$offline_body" 30 || fail "offline local-only page did not render" +deny_runtime_error_body "$offline_body" "offline local-only page request" rg -q "$TITLE" "$offline_body" || fail "offline refresh did not use local materialized post" login_body="$WORK_DIR/login.html" @@ -486,6 +500,7 @@ case "$login_status" in 2*|3*) ;; *) fail "local admin login returned HTTP $login_status" ;; esac +deny_runtime_error_body "$login_body" "local admin login" rg -q 'wordpress_logged_in' "$COOKIE_JAR" || fail "local admin login did not set wordpress_logged_in cookie" admin_body="$WORK_DIR/admin.html" @@ -501,6 +516,11 @@ esac if rg -qi ']+id="loginform"|name="loginform"' "$admin_body"; then fail "wp-admin still shows login form after local admin login" fi +deny_runtime_error_body "$admin_body" "wp-admin" +if ! rg -q 'id="wpbody-content"|id="dashboard-widgets"|wp-admin-bar' "$admin_body"; then + sed -n '1,120p' "$admin_body" >&2 + fail "wp-admin response did not look like an authenticated dashboard" +fi cache_files="$(find "$STATE_DIR/clones/$NAME/file-cache" -type f | wc -l | tr -d ' ')" cache_bytes="$(du -sb "$STATE_DIR/clones/$NAME/file-cache" | awk '{print $1}')" diff --git a/experiments/remote-wp-cow/scripts/strict-harness.sh b/experiments/remote-wp-cow/scripts/strict-harness.sh index 67ec78d2..9b5ea52d 100755 --- a/experiments/remote-wp-cow/scripts/strict-harness.sh +++ b/experiments/remote-wp-cow/scripts/strict-harness.sh @@ -46,6 +46,8 @@ run_exact_ignored_test remote::tests::stat_prefetch_returns_small_file_bytes_fro run_exact_ignored_test remote::tests::prefetch_dir_batches_only_runtime_file_types run_exact_ignored_test remote::tests::runtime_code_pack_streams_bounded_runtime_files run_exact_test cli::tests::offline_core_runtime_cache_is_bounded_to_wordpress_core +run_exact_test plugin_policy::tests::policy_starts_auto_with_no_allowed_plugins +run_exact_test plugin_policy::tests::candidate_policy_allows_one_extra_plugin run_exact_test runtime_cache::tests::runtime_code_roots_are_bounded_to_core_theme_and_active_plugins run_exact_test runtime_cache::tests::runtime_code_roots_respect_disabled_plugins run_exact_test overlay::tests::cached_only_copy_up_uses_materialized_files_without_remote @@ -107,6 +109,10 @@ need_pattern src/runtime_cache.rs 'WPCOW_RUNTIME_CODE_PACK_MAX_MB' "runtime code need_pattern src/runtime_cache.rs 'warm_runtime_code_cache_with_admin' "sever path explicitly warms admin runtime" need_pattern src/runtime_cache.rs 'wp-content/uploads' "runtime code cache excludes uploads" need_pattern src/runtime_cache.rs 'active_plugins' "runtime code cache includes active plugin roots" +need_pattern src/plugin_policy.rs 'PluginPolicy' "plugin admission policy state" +need_pattern src/run.rs 'WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS' "bounded plugin admission timeout" +need_pattern src/run.rs 'WPCOW_PLUGIN_POLICY_FILE' "candidate plugin policy smoke override" +need_pattern src/generate.rs 'WPCOW_PLUGIN_POLICY_FILE' "generated plugin policy path" need_pattern src/cli.rs 'uploads/media remain lazy' "serve explains media remains lazy after runtime code pack" need_pattern src/fusefs.rs 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB' "FUSE stat path can prefetch small file bytes" need_pattern src/fusefs.rs 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB' "FUSE batches same-directory runtime files with a byte cap" @@ -136,6 +142,8 @@ need_pattern compose.yaml 'WPCOW_REMOTE_DB_HELPER: "\$\{WPCOW_REMOTE_DB_HELPER:- need_pattern compose.yaml 'WPCOW_RUNTIME_CODE_PACK: "\$\{WPCOW_RUNTIME_CODE_PACK:-1\}"' "Docker compose defaults bounded runtime code cache on" need_pattern compose.yaml 'WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN: "\$\{WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN:-0\}"' "Docker compose keeps admin pack out of frontend warmup" need_pattern compose.yaml 'WPCOW_MATERIALIZE_OPTIONS_TABLE: "\$\{WPCOW_MATERIALIZE_OPTIONS_TABLE:-1\}"' "Docker compose defaults options table materialization on" +need_pattern compose.yaml 'WPCOW_PLUGIN_MODE: "\$\{WPCOW_PLUGIN_MODE:-auto\}"' "Docker compose defaults plugin admission mode" +need_pattern compose.yaml 'WPCOW_PLUGIN_ADMISSION: "\$\{WPCOW_PLUGIN_ADMISSION:-1\}"' "Docker compose defaults plugin admission on" need_pattern compose.yaml 'WPCOW_REMOTE_STAT_PREFETCH_MAX_KB: "\$\{WPCOW_REMOTE_STAT_PREFETCH_MAX_KB:-0\}"' "Docker compose defaults experimental stat prefetch off" need_pattern compose.yaml 'WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB: "\$\{WPCOW_RUNTIME_SIBLING_PREFETCH_MAX_MB:-0\}"' "Docker compose defaults experimental sibling prefetch off" need_pattern .dockerignore '^/target/$' "Docker build context target exclusion" @@ -153,6 +161,8 @@ need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK_MAX_FILES=20000$' "Docker la need_pattern .env.example '^WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN=0$' "Docker lab example keeps admin pack out of frontend warmup" need_pattern .env.example '^WPCOW_MATERIALIZE_OPTIONS_TABLE=1$' "Docker lab example materializes options table" need_pattern .env.example '^WPCOW_ENABLE_PLUGINS=0$' "Docker lab example keeps arbitrary production plugins disabled by default" +need_pattern .env.example '^WPCOW_PLUGIN_MODE=auto$' "Docker lab example plugin admission mode" +need_pattern .env.example '^WPCOW_PLUGIN_ADMISSION=1$' "Docker lab example plugin admission enabled" need_pattern .env.example '^WPCOW_SPLASH=1$' "Docker lab example splash default" need_pattern .env.example '^WPCOW_ALLOW_UNSAFE_PLUGIN_SIDE_EFFECTS=0$' "Docker lab example keeps PHP side-effect guards enabled" need_pattern .env.example '^WPCOW_OPCACHE_VALIDATE_TIMESTAMPS=0$' "Docker lab example keeps warm render OPcache fast path enabled" diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index f9e91de7..22fda73b 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -4,6 +4,7 @@ use std::path::Path; use crate::config::{ClonePaths, Manifest}; use crate::overlay::OPAQUE_MARKER; +use crate::plugin_policy; pub const ROUTER_BASENAME: &str = ".wp-cow-router.php"; @@ -23,6 +24,7 @@ pub fn write_wordpress_overrides(paths: &ClonePaths, manifest: &Manifest) -> Res safety_mu_plugin_php(), )?; fs::write(paths.generated.join("router.php"), router)?; + plugin_policy::write_initial_policy(paths, manifest)?; Ok(()) } @@ -56,6 +58,7 @@ define( 'WPCOW_CLONE', {clone_name} ); define( 'WPCOW_CONTROL_URL', {control_url} ); define( 'WPCOW_QUERY_CACHE_DIR', {query_cache_dir} ); define( 'WPCOW_DB_STATE_FILE', {db_state_file} ); +define( 'WPCOW_PLUGIN_POLICY_FILE', {plugin_policy_file} ); define( 'FS_METHOD', 'direct' ); define( 'DISABLE_WP_CRON', true ); @@ -93,6 +96,8 @@ require_once ABSPATH . 'wp-settings.php'; control_url = php_string(&manifest.control_url), query_cache_dir = php_string(paths.db.join("query-cache").to_string_lossy().as_ref()), db_state_file = php_string(paths.db.join("state.json").to_string_lossy().as_ref()), + plugin_policy_file = + php_string(plugin_policy::policy_path(paths).to_string_lossy().as_ref()), ) } @@ -610,11 +615,124 @@ if ( ! defined( 'DISABLE_WP_CRON' ) ) { define( 'DISABLE_WP_CRON', true ); } -if ( '0' === getenv( 'WPCOW_ENABLE_PLUGINS' ) ) { - add_filter( 'option_active_plugins', '__return_empty_array', PHP_INT_MAX ); - add_filter( 'site_option_active_sitewide_plugins', '__return_empty_array', PHP_INT_MAX ); +function wp_cow_plugin_mode() { + $mode = strtolower( trim( (string) getenv( 'WPCOW_PLUGIN_MODE' ) ) ); + if ( '' !== $mode ) { + return $mode; + } + + $legacy = strtolower( trim( (string) getenv( 'WPCOW_ENABLE_PLUGINS' ) ) ); + if ( in_array( $legacy, array( '1', 'true', 'yes', 'on', 'full', 'enabled' ), true ) ) { + return 'full'; + } + if ( in_array( $legacy, array( '0', 'false', 'no', 'off', 'none', 'disabled', 'disable' ), true ) ) { + return 'off'; + } + + return 'auto'; +} + +function wp_cow_plugin_policy_file() { + $env = (string) getenv( 'WPCOW_PLUGIN_POLICY_FILE' ); + if ( '' !== $env ) { + return $env; + } + if ( defined( 'WPCOW_PLUGIN_POLICY_FILE' ) ) { + return WPCOW_PLUGIN_POLICY_FILE; + } + return ''; +} + +function wp_cow_plugin_policy() { + static $policy = null; + if ( null !== $policy ) { + return $policy; + } + + $policy = array( 'allow' => array() ); + $file = wp_cow_plugin_policy_file(); + if ( '' === $file || ! is_readable( $file ) ) { + return $policy; + } + + $decoded = json_decode( (string) file_get_contents( $file ), true ); + if ( is_array( $decoded ) ) { + $policy = array_merge( $policy, $decoded ); + } + + return $policy; +} + +function wp_cow_allowed_plugins() { + $policy = wp_cow_plugin_policy(); + $allowed = isset( $policy['allow'] ) && is_array( $policy['allow'] ) ? $policy['allow'] : array(); + $out = array(); + foreach ( $allowed as $plugin ) { + $plugin = ltrim( (string) $plugin, '/' ); + if ( '' !== $plugin ) { + $out[ $plugin ] = true; + } + } + return $out; +} + +function wp_cow_filter_active_plugins( $plugins ) { + $mode = wp_cow_plugin_mode(); + if ( in_array( $mode, array( 'full', 'on', 'enabled', '1', 'true', 'yes' ), true ) ) { + return $plugins; + } + if ( in_array( $mode, array( 'off', 'none', 'disabled', 'disable', '0', 'false', 'no' ), true ) ) { + return array(); + } + if ( ! is_array( $plugins ) ) { + return array(); + } + + $allowed = wp_cow_allowed_plugins(); + if ( empty( $allowed ) ) { + return array(); + } + + $filtered = array(); + foreach ( $plugins as $plugin ) { + $plugin = ltrim( (string) $plugin, '/' ); + if ( isset( $allowed[ $plugin ] ) ) { + $filtered[] = $plugin; + } + } + return $filtered; +} + +function wp_cow_filter_sitewide_plugins( $plugins ) { + $mode = wp_cow_plugin_mode(); + if ( in_array( $mode, array( 'full', 'on', 'enabled', '1', 'true', 'yes' ), true ) ) { + return $plugins; + } + if ( in_array( $mode, array( 'off', 'none', 'disabled', 'disable', '0', 'false', 'no' ), true ) ) { + return array(); + } + if ( ! is_array( $plugins ) ) { + return array(); + } + + $allowed = wp_cow_allowed_plugins(); + if ( empty( $allowed ) ) { + return array(); + } + + $filtered = array(); + foreach ( $plugins as $plugin => $value ) { + $plugin = ltrim( (string) $plugin, '/' ); + if ( isset( $allowed[ $plugin ] ) ) { + $filtered[ $plugin ] = $value; + } + } + return $filtered; } +add_filter( 'option_active_plugins', 'wp_cow_filter_active_plugins', PHP_INT_MAX ); +add_filter( 'site_option_active_sitewide_plugins', 'wp_cow_filter_sitewide_plugins', PHP_INT_MAX ); + add_filter( 'validate_current_theme', '__return_false', PHP_INT_MAX ); add_filter( 'pre_http_request', static function ( $preempt, $args, $url ) { @@ -1063,6 +1181,7 @@ mod tests { assert!(php.contains("WPCOW_CONTROL_URL")); assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); assert!(php.contains("WPCOW_DB_STATE_FILE")); + assert!(php.contains("WPCOW_PLUGIN_POLICY_FILE")); assert!(!php.contains("WPCOW_REMOTE_DB_NAME")); assert!(!php.contains("WPCOW_REMOTE_DB_USER")); assert!(!php.contains("WPCOW_REMOTE_DB_PASSWORD")); @@ -1196,10 +1315,90 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { assert!(php.contains("pre_http_request")); assert!(php.contains("validate_current_theme")); assert!(php.contains("WPCOW_ENABLE_PLUGINS")); - assert!(php.contains("'0' === getenv( 'WPCOW_ENABLE_PLUGINS' )")); + assert!(php.contains("WPCOW_PLUGIN_POLICY_FILE")); + assert!(php.contains("wp_cow_filter_active_plugins")); + assert!(php.contains("wp_cow_allowed_plugins")); assert!(php.contains("option_active_plugins")); } + #[test] + fn safety_plugin_auto_mode_allows_only_policy_admitted_plugins() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping generated PHP plugin policy test because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let safety = temp.path().join("wp-cow-safety.php"); + let policy = temp.path().join("plugin-policy.json"); + let check = temp.path().join("check.php"); + fs::write(&safety, safety_mu_plugin_php()).unwrap(); + fs::write( + &policy, + r#"{"version":1,"mode":"auto","active":["akismet/akismet.php","woocommerce/woocommerce.php"],"allow":["woocommerce/woocommerce.php"],"quarantine":{"akismet/akismet.php":"timeout"}}"#, + ) + .unwrap(); + fs::write( + &check, + format!( + r#" 1, 'woocommerce/woocommerce.php' => 2 ) +); +if ( $sitewide !== array( 'woocommerce/woocommerce.php' => 2 ) ) {{ + fwrite( STDERR, 'unexpected sitewide plugin filter: ' . json_encode( $sitewide ) . PHP_EOL ); + exit( 1 ); +}} +putenv( 'WPCOW_PLUGIN_MODE=full' ); +$full = call_user_func( + $filters['option_active_plugins'], + array( 'akismet/akismet.php', 'woocommerce/woocommerce.php' ) +); +if ( array_values( $full ) !== array( 'akismet/akismet.php', 'woocommerce/woocommerce.php' ) ) {{ + fwrite( STDERR, 'full mode did not preserve plugins: ' . json_encode( $full ) . PHP_EOL ); + exit( 1 ); +}} +"#, + policy = php_single_quoted_path(&policy), + safety = php_single_quoted_path(&safety) + ), + ) + .unwrap(); + + let output = Command::new("php").arg(&check).output().unwrap(); + assert!( + output.status.success(), + "PHP plugin policy check failed: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + #[test] fn generated_overrides_keep_remote_runtime_dirs_visible_by_default() { let temp = tempfile::tempdir().unwrap(); @@ -1222,6 +1421,10 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { write_wordpress_overrides(&paths, &manifest()).unwrap(); + assert!( + crate::plugin_policy::policy_path(&paths).is_file(), + "generated overrides should create the initial plugin admission policy" + ); assert!( !plugins.join(crate::overlay::OPAQUE_MARKER).exists(), "plugin files should remain backed by the lazy remote lower layer by default" diff --git a/experiments/remote-wp-cow/src/main.rs b/experiments/remote-wp-cow/src/main.rs index 300e3b2c..0be6ffb1 100644 --- a/experiments/remote-wp-cow/src/main.rs +++ b/experiments/remote-wp-cow/src/main.rs @@ -6,6 +6,7 @@ mod fusefs; mod generate; mod mysql_proxy; mod overlay; +mod plugin_policy; mod remote; mod row_cow; mod run; diff --git a/experiments/remote-wp-cow/src/plugin_policy.rs b/experiments/remote-wp-cow/src/plugin_policy.rs new file mode 100644 index 00000000..48cc940e --- /dev/null +++ b/experiments/remote-wp-cow/src/plugin_policy.rs @@ -0,0 +1,236 @@ +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::path::{Path, PathBuf}; + +use crate::config::{ClonePaths, Manifest}; + +pub const POLICY_VERSION: u32 = 1; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PluginPolicy { + pub version: u32, + pub mode: String, + #[serde(default)] + pub active: Vec, + #[serde(default)] + pub allow: Vec, + #[serde(default)] + pub quarantine: BTreeMap, +} + +impl PluginPolicy { + pub fn new(active: &[String]) -> Self { + Self { + version: POLICY_VERSION, + mode: "auto".to_string(), + active: normalized_plugins(active.iter().cloned()), + allow: Vec::new(), + quarantine: BTreeMap::new(), + } + } + + pub fn normalize(mut self, active: &[String]) -> Self { + self.version = POLICY_VERSION; + if self.mode.trim().is_empty() { + self.mode = "auto".to_string(); + } + self.active = normalized_plugins(active.iter().cloned()); + let active_set: BTreeSet<_> = self.active.iter().cloned().collect(); + self.allow = normalized_plugins( + self.allow + .into_iter() + .filter(|plugin| active_set.contains(plugin)), + ); + self.quarantine + .retain(|plugin, _| active_set.contains(plugin)); + self + } + + pub fn allows(&self, plugin: &str) -> bool { + self.allow.iter().any(|allowed| allowed == plugin) + } + + pub fn allow_plugin(&mut self, plugin: &str) { + if !self.allows(plugin) { + self.allow.push(plugin.to_string()); + self.allow.sort(); + } + self.quarantine.remove(plugin); + } + + pub fn quarantine_plugin(&mut self, plugin: &str, reason: impl Into) { + self.quarantine.insert(plugin.to_string(), reason.into()); + self.allow.retain(|allowed| allowed != plugin); + } +} + +pub fn policy_path(paths: &ClonePaths) -> PathBuf { + paths.run.join("plugin-policy.json") +} + +pub fn candidate_policy_path(paths: &ClonePaths, plugin: &str) -> PathBuf { + paths.run.join(format!( + "plugin-policy-candidate-{}.json", + sanitize_plugin_name(plugin) + )) +} + +pub fn write_initial_policy(paths: &ClonePaths, manifest: &Manifest) -> Result<()> { + let path = policy_path(paths); + let active = active_plugins_for_policy(manifest); + let policy = load_policy_or_new(&path, &active)?; + write_policy_atomic(&path, &policy) +} + +pub fn active_plugins_for_policy(manifest: &Manifest) -> Vec { + manifest + .probe + .active_plugins + .iter() + .chain(manifest.probe.active_sitewide_plugins.iter()) + .cloned() + .collect() +} + +pub fn load_policy_or_new(path: &Path, active: &[String]) -> Result { + if !path.is_file() { + return Ok(PluginPolicy::new(active)); + } + + let bytes = fs::read(path).with_context(|| format!("read {}", path.display()))?; + let policy = serde_json::from_slice::(&bytes) + .with_context(|| format!("parse {}", path.display()))?; + Ok(policy.normalize(active)) +} + +pub fn write_policy_atomic(path: &Path, policy: &PluginPolicy) -> Result<()> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + let tmp = path.with_extension(format!( + "{}.tmp", + path.extension() + .and_then(|extension| extension.to_str()) + .unwrap_or("json") + )); + let json = serde_json::to_vec_pretty(policy)?; + fs::write(&tmp, [json, b"\n".to_vec()].concat()) + .with_context(|| format!("write {}", tmp.display()))?; + fs::rename(&tmp, path).with_context(|| { + format!( + "replace {} with {}", + path.display(), + tmp.file_name() + .map(|name| name.to_string_lossy()) + .unwrap_or_default() + ) + })?; + Ok(()) +} + +pub fn policy_with_candidate(base: &PluginPolicy, plugin: &str) -> PluginPolicy { + let mut policy = base.clone(); + policy.allow_plugin(plugin); + policy +} + +fn normalized_plugins(plugins: impl IntoIterator) -> Vec { + plugins + .into_iter() + .map(|plugin| plugin.trim().trim_start_matches('/').to_string()) + .filter(|plugin| { + !plugin.is_empty() + && !plugin.contains("..") + && !plugin.starts_with('/') + && plugin.ends_with(".php") + }) + .collect::>() + .into_iter() + .collect() +} + +fn sanitize_plugin_name(plugin: &str) -> String { + let mut out = String::new(); + for ch in plugin.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else if matches!(ch, '/' | '-' | '_' | '.') { + out.push('-'); + } + } + let out = out.trim_matches('-').to_string(); + if out.is_empty() { + "plugin".to_string() + } else { + out + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn policy_starts_auto_with_no_allowed_plugins() { + let active = vec![ + "woocommerce/woocommerce.php".to_string(), + "/bad-prefix/plugin.php".to_string(), + "../escape.php".to_string(), + ]; + + let policy = PluginPolicy::new(&active); + + assert_eq!(policy.mode, "auto"); + assert_eq!( + policy.active, + vec!["bad-prefix/plugin.php", "woocommerce/woocommerce.php"] + ); + assert!(policy.allow.is_empty()); + } + + #[test] + fn existing_policy_preserves_allowed_active_plugins_only() { + let temp = tempfile::tempdir().unwrap(); + let path = temp.path().join("plugin-policy.json"); + let mut policy = PluginPolicy::new(&[ + "akismet/akismet.php".to_string(), + "woocommerce/woocommerce.php".to_string(), + ]); + policy.allow_plugin("woocommerce/woocommerce.php"); + policy.allow_plugin("missing/missing.php"); + policy.quarantine_plugin("akismet/akismet.php", "timeout"); + write_policy_atomic(&path, &policy).unwrap(); + + let loaded = load_policy_or_new( + &path, + &[ + "akismet/akismet.php".to_string(), + "hello/hello.php".to_string(), + ], + ) + .unwrap(); + + assert_eq!( + loaded.active, + vec!["akismet/akismet.php", "hello/hello.php"] + ); + assert!(loaded.allow.is_empty()); + assert_eq!( + loaded + .quarantine + .get("akismet/akismet.php") + .map(String::as_str), + Some("timeout") + ); + } + + #[test] + fn candidate_policy_allows_one_extra_plugin() { + let base = PluginPolicy::new(&["woocommerce/woocommerce.php".to_string()]); + let candidate = policy_with_candidate(&base, "woocommerce/woocommerce.php"); + + assert_eq!(candidate.allow, vec!["woocommerce/woocommerce.php"]); + } +} diff --git a/experiments/remote-wp-cow/src/run.rs b/experiments/remote-wp-cow/src/run.rs index e76e833c..6f0bcaef 100644 --- a/experiments/remote-wp-cow/src/run.rs +++ b/experiments/remote-wp-cow/src/run.rs @@ -2,17 +2,18 @@ use anyhow::{anyhow, Context, Result}; use std::fs; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; -use std::process::{Child, Command, Stdio}; +use std::process::{Child, Command, Output, Stdio}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::thread::{self, JoinHandle}; -use std::time::Duration; +use std::time::{Duration, Instant}; use crate::config::{self, ClonePaths, Manifest}; use crate::control; use crate::fusefs; use crate::generate::ROUTER_BASENAME; use crate::mysql_proxy; +use crate::plugin_policy; use crate::remote::RemoteClient; use crate::runtime_cache; @@ -162,6 +163,12 @@ fn run_site_until_shutdown( &options.http_addr, )?) }; + let plugin_admission_thread = spawn_plugin_admission_if_enabled( + manifest.clone(), + paths.clone(), + options.mountpoint.clone(), + shutdown.clone(), + ); eprintln!( "wp-cow running clone '{}' at {} from {}", @@ -184,6 +191,13 @@ fn run_site_until_shutdown( let _ = child.kill(); let _ = child.wait(); } + if let Some(handle) = plugin_admission_thread { + match handle.join() { + Ok(Ok(())) => {} + Ok(Err(err)) => eprintln!("wp-cow plugin admission stopped: {err:#}"), + Err(_) => eprintln!("wp-cow plugin admission thread panicked"), + } + } if let Some(child) = db_tunnel.as_mut() { let _ = child.kill(); let _ = child.wait(); @@ -277,19 +291,40 @@ fn php_side_effect_guards_enabled() -> bool { ) } -fn php_disabled_functions() -> &'static str { +fn default_php_disabled_functions() -> &'static str { "exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec,mail,fsockopen,pfsockopen,stream_socket_client,curl_exec,curl_multi_exec" } +fn php_disabled_functions() -> String { + match std::env::var("WPCOW_PHP_DISABLE_FUNCTIONS") { + Ok(raw) => { + let raw = raw.trim(); + if raw.is_empty() + || matches!( + raw.to_ascii_lowercase().as_str(), + "0" | "false" | "no" | "off" | "none" + ) + { + String::new() + } else { + raw.to_string() + } + } + Err(_) => default_php_disabled_functions().to_string(), + } +} + fn php_safety_ini_entries() -> Vec<(&'static str, String)> { if !php_side_effect_guards_enabled() { return Vec::new(); } - vec![ - ("disable_functions", php_disabled_functions().to_string()), - ("allow_url_include", "0".to_string()), - ] + let disabled_functions = php_disabled_functions(); + let mut entries = vec![("allow_url_include", "0".to_string())]; + if !disabled_functions.is_empty() { + entries.insert(0, ("disable_functions", disabled_functions)); + } + entries } fn opcache_validate_timestamps() -> u64 { @@ -355,9 +390,7 @@ fn start_php_dev_server(paths: &ClonePaths, mountpoint: &Path, http_addr: &str) opcache_validate_timestamps() )) .stdin(Stdio::null()); - for (name, value) in php_safety_ini_entries() { - command.arg("-d").arg(format!("{name}={value}")); - } + apply_php_safety_ini_args(&mut command); command .arg("-S") .arg(http_addr) @@ -502,6 +535,309 @@ fn env_u64(name: &str, default: u64) -> u64 { .unwrap_or(default) } +fn spawn_plugin_admission_if_enabled( + manifest: Manifest, + paths: ClonePaths, + mountpoint: PathBuf, + shutdown: Arc, +) -> Option>> { + if !plugin_admission_enabled(&manifest) { + return None; + } + + Some(thread::spawn(move || { + run_plugin_admission(manifest, paths, mountpoint, shutdown) + })) +} + +fn plugin_admission_enabled(manifest: &Manifest) -> bool { + if plugin_policy::active_plugins_for_policy(manifest).is_empty() { + return false; + } + if env_is_false("WPCOW_PLUGIN_ADMISSION") { + return false; + } + + let mode = plugin_mode_from_env(); + !matches!( + mode.as_str(), + "full" + | "on" + | "enabled" + | "1" + | "true" + | "yes" + | "off" + | "none" + | "disabled" + | "disable" + | "0" + | "false" + | "no" + ) +} + +fn plugin_mode_from_env() -> String { + let mode = std::env::var("WPCOW_PLUGIN_MODE") + .unwrap_or_default() + .trim() + .to_ascii_lowercase(); + if !mode.is_empty() { + return mode; + } + + let legacy = std::env::var("WPCOW_ENABLE_PLUGINS") + .unwrap_or_default() + .trim() + .to_ascii_lowercase(); + if matches!( + legacy.as_str(), + "1" | "true" | "yes" | "on" | "full" | "enabled" + ) { + "full".to_string() + } else if matches!( + legacy.as_str(), + "0" | "false" | "no" | "off" | "none" | "disabled" | "disable" + ) { + "off".to_string() + } else { + "auto".to_string() + } +} + +fn env_is_false(name: &str) -> bool { + std::env::var(name) + .ok() + .map(|raw| { + matches!( + raw.trim().to_ascii_lowercase().as_str(), + "0" | "false" | "no" | "off" | "disabled" + ) + }) + .unwrap_or(false) +} + +fn run_plugin_admission( + manifest: Manifest, + paths: ClonePaths, + mountpoint: PathBuf, + shutdown: Arc, +) -> Result<()> { + if config::is_offline(&paths) { + return Ok(()); + } + if !wait_for_first_request_ready(&paths, &shutdown) { + return Ok(()); + } + sleep_shutdown_aware( + Duration::from_secs(env_u64("WPCOW_PLUGIN_ADMISSION_DELAY_SECS", 20)), + &shutdown, + ); + if shutdown.load(Ordering::SeqCst) || config::is_offline(&paths) { + return Ok(()); + } + + let policy_path = plugin_policy::policy_path(&paths); + let timeout = Duration::from_secs(env_u64("WPCOW_PLUGIN_ADMISSION_TIMEOUT_SECS", 15).max(1)); + let active_plugins = plugin_policy::active_plugins_for_policy(&manifest); + let mut policy = plugin_policy::load_policy_or_new(&policy_path, &active_plugins)?; + + for plugin in &policy.active.clone() { + if shutdown.load(Ordering::SeqCst) || config::is_offline(&paths) { + break; + } + policy = plugin_policy::load_policy_or_new(&policy_path, &active_plugins)?; + if policy.allows(plugin) || policy.quarantine.contains_key(plugin) { + continue; + } + + eprintln!("wp-cow admitting plugin candidate '{plugin}'"); + let candidate = plugin_policy::policy_with_candidate(&policy, plugin); + let candidate_path = plugin_policy::candidate_policy_path(&paths, plugin); + plugin_policy::write_policy_atomic(&candidate_path, &candidate)?; + + let admission = run_plugin_smoke(&mountpoint, &candidate_path, timeout); + policy = plugin_policy::load_policy_or_new(&policy_path, &active_plugins)?; + match admission { + Ok(()) => { + policy.allow_plugin(plugin); + eprintln!("wp-cow admitted plugin '{plugin}'"); + } + Err(err) => { + let reason = trim_reason(&format!("{err:#}")); + policy.quarantine_plugin(plugin, reason.clone()); + eprintln!("wp-cow quarantined plugin '{plugin}': {reason}"); + } + } + plugin_policy::write_policy_atomic(&policy_path, &policy)?; + let _ = fs::remove_file(candidate_path); + } + + Ok(()) +} + +fn wait_for_first_request_ready(paths: &ClonePaths, shutdown: &AtomicBool) -> bool { + let ready_file = paths.run.join("first-request-ready.json"); + let deadline = Instant::now() + + Duration::from_secs(env_u64("WPCOW_PLUGIN_ADMISSION_READY_TIMEOUT_SECS", 600)); + while Instant::now() < deadline { + if shutdown.load(Ordering::SeqCst) { + return false; + } + if ready_file.is_file() { + return true; + } + thread::sleep(Duration::from_millis(250)); + } + false +} + +fn sleep_shutdown_aware(duration: Duration, shutdown: &AtomicBool) { + let deadline = Instant::now() + duration; + while Instant::now() < deadline { + if shutdown.load(Ordering::SeqCst) { + return; + } + thread::sleep(Duration::from_millis(250)); + } +} + +fn run_plugin_smoke( + mountpoint: &Path, + candidate_policy_path: &Path, + timeout: Duration, +) -> Result<()> { + let mut command = Command::new("php"); + command + .current_dir(mountpoint) + .env("WPCOW_PLUGIN_MODE", "auto") + .env("WPCOW_PLUGIN_POLICY_FILE", candidate_policy_path) + .env("WPCOW_SPLASH", "0") + .env("WPCOW_PROXY_FRONTEND", "0") + .env("WPCOW_ACTIVE_WARM_WAIT", "0") + .env("WPCOW_PLUGIN_ADMISSION_SMOKE", "1") + .arg("-d") + .arg(format!( + "max_execution_time={}", + timeout.as_secs().saturating_add(2) + )) + .arg("-d") + .arg(format!( + "default_socket_timeout={}", + env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15).min(timeout.as_secs().max(1)) + )) + .arg("-d") + .arg(format!( + "mysqlnd.net_read_timeout={}", + env_u64("WPCOW_PHP_SOCKET_TIMEOUT_SECS", 15).min(timeout.as_secs().max(1)) + )); + apply_php_safety_ini_args(&mut command); + command + .arg("-r") + .arg(plugin_smoke_php()) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let output = run_command_with_timeout(command, timeout) + .with_context(|| format!("plugin smoke timed out after {}s", timeout.as_secs()))?; + if !output.status.success() { + return Err(anyhow!( + "plugin smoke exited with status {}{}", + output.status, + output_tail(&output) + )); + } + if !String::from_utf8_lossy(&output.stdout).contains("WPCOW_PLUGIN_SMOKE_OK") { + return Err(anyhow!( + "plugin smoke did not finish cleanly{}", + output_tail(&output) + )); + } + Ok(()) +} + +fn plugin_smoke_php() -> &'static str { + r#" +$_SERVER['HTTP_HOST'] = '127.0.0.1'; +$_SERVER['REQUEST_METHOD'] = 'GET'; +$_SERVER['REQUEST_URI'] = '/?__wp_cow_bypass_splash=1&__wp_cow_plugin_smoke=1'; +$_SERVER['SERVER_PROTOCOL'] = 'HTTP/1.1'; +$_SERVER['DOCUMENT_ROOT'] = getcwd(); +$_SERVER['SCRIPT_NAME'] = '/index.php'; +$_SERVER['SCRIPT_FILENAME'] = getcwd() . '/index.php'; +$_GET['__wp_cow_bypass_splash'] = '1'; +ob_start(); +require getcwd() . '/index.php'; +$html = ob_get_clean(); +if ( + false !== stripos( $html, 'wp-cow DB/runtime error' ) || + false !== stripos( $html, 'wp-cow did not load the remote site' ) || + false !== stripos( $html, 'wp-admin/install.php' ) || + false !== stripos( $html, 'WordPress › Installation' ) +) { + fwrite( STDERR, $html ); + exit( 3 ); +} +echo "\nWPCOW_PLUGIN_SMOKE_OK\n"; +"# +} + +fn run_command_with_timeout(mut command: Command, timeout: Duration) -> Result { + let mut child = command.spawn().context("spawn plugin smoke PHP")?; + let started = Instant::now(); + loop { + if child.try_wait()?.is_some() { + return child + .wait_with_output() + .context("collect plugin smoke PHP output"); + } + if started.elapsed() >= timeout { + let _ = child.kill(); + let output = child + .wait_with_output() + .context("collect timed-out plugin smoke PHP output")?; + return Err(anyhow!("timed out{}", output_tail(&output))); + } + thread::sleep(Duration::from_millis(100)); + } +} + +fn output_tail(output: &Output) -> String { + let mut text = String::new(); + text.push_str(&String::from_utf8_lossy(&output.stdout)); + text.push_str(&String::from_utf8_lossy(&output.stderr)); + let text = text.trim(); + if text.is_empty() { + return String::new(); + } + let tail = text + .lines() + .rev() + .take(12) + .collect::>() + .into_iter() + .rev() + .collect::>() + .join("\n"); + format!(": {tail}") +} + +fn trim_reason(reason: &str) -> String { + let reason = reason.replace('\n', " "); + if reason.len() <= 500 { + reason + } else { + format!("{}...", reason.chars().take(500).collect::()) + } +} + +fn apply_php_safety_ini_args(command: &mut Command) { + for (name, value) in php_safety_ini_entries() { + command.arg("-d").arg(format!("{name}={value}")); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/experiments/remote-wp-cow/src/runtime_cache.rs b/experiments/remote-wp-cow/src/runtime_cache.rs index 0d55a415..6df595b7 100644 --- a/experiments/remote-wp-cow/src/runtime_cache.rs +++ b/experiments/remote-wp-cow/src/runtime_cache.rs @@ -200,7 +200,18 @@ fn runtime_code_pack_include_admin() -> bool { } fn plugins_enabled_for_runtime() -> bool { - env_bool("WPCOW_ENABLE_PLUGINS", true) + let mode = std::env::var("WPCOW_PLUGIN_MODE") + .unwrap_or_default() + .trim() + .to_ascii_lowercase(); + if !mode.is_empty() { + return matches!( + mode.as_str(), + "full" | "on" | "enabled" | "1" | "true" | "yes" + ); + } + + env_bool("WPCOW_ENABLE_PLUGINS", false) } fn runtime_code_pack_max_bytes() -> u64 { @@ -245,8 +256,9 @@ mod tests { fn runtime_code_roots_are_bounded_to_core_theme_and_active_plugins() { let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); let old_plugins = std::env::var_os("WPCOW_ENABLE_PLUGINS"); + let old_mode = std::env::var_os("WPCOW_PLUGIN_MODE"); let old_admin = std::env::var_os("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN"); - std::env::set_var("WPCOW_ENABLE_PLUGINS", "1"); + std::env::set_var("WPCOW_PLUGIN_MODE", "full"); std::env::set_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN", "1"); let manifest = Manifest::new( @@ -289,6 +301,10 @@ mod tests { Some(value) => std::env::set_var("WPCOW_ENABLE_PLUGINS", value), None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), } + match old_mode { + Some(value) => std::env::set_var("WPCOW_PLUGIN_MODE", value), + None => std::env::remove_var("WPCOW_PLUGIN_MODE"), + } match old_admin { Some(value) => std::env::set_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN", value), None => std::env::remove_var("WPCOW_RUNTIME_CODE_PACK_INCLUDE_ADMIN"), @@ -299,6 +315,8 @@ mod tests { fn runtime_code_roots_respect_disabled_plugins() { let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap(); let old_plugins = std::env::var_os("WPCOW_ENABLE_PLUGINS"); + let old_mode = std::env::var_os("WPCOW_PLUGIN_MODE"); + std::env::remove_var("WPCOW_PLUGIN_MODE"); std::env::set_var("WPCOW_ENABLE_PLUGINS", "0"); let manifest = Manifest::new( @@ -322,5 +340,9 @@ mod tests { Some(value) => std::env::set_var("WPCOW_ENABLE_PLUGINS", value), None => std::env::remove_var("WPCOW_ENABLE_PLUGINS"), } + match old_mode { + Some(value) => std::env::set_var("WPCOW_PLUGIN_MODE", value), + None => std::env::remove_var("WPCOW_PLUGIN_MODE"), + } } } From 165b5c5320fb0766d9175bf0a845d3ab1abd323e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Wed, 6 May 2026 00:13:18 +0200 Subject: [PATCH 35/39] Fix proxied HTTPS COW clone redirects --- experiments/remote-wp-cow/src/generate.rs | 34 +++++++++++++++-- experiments/remote-wp-cow/src/overlay.rs | 46 ++++++++++++++++++++++- 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 22fda73b..25453e88 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -49,11 +49,32 @@ define( 'DB_PASSWORD', {local_db_password} ); define( 'DB_HOST', {proxy_db_host} ); define( 'WPCOW_LOCAL_DB_HOST', {local_db_host} ); -define( 'WP_HOME', {local_url} ); -define( 'WP_SITEURL', {local_url} ); - $table_prefix = {table_prefix}; +$wp_cow_local_url = {local_url}; +$wp_cow_local_scheme = strtolower( (string) parse_url( $wp_cow_local_url, PHP_URL_SCHEME ) ); +$wp_cow_local_host = strtolower( (string) parse_url( $wp_cow_local_url, PHP_URL_HOST ) ); + +if ( ! empty( $_SERVER['HTTP_X_FORWARDED_HOST'] ) ) {{ + $wp_cow_forwarded_host = trim( explode( ',', $_SERVER['HTTP_X_FORWARDED_HOST'] )[0] ); + if ( '' !== $wp_cow_forwarded_host ) {{ + $_SERVER['HTTP_HOST'] = $wp_cow_forwarded_host; + }} +}} + +$wp_cow_request_host = strtolower( preg_replace( '/:\d+$/', '', (string) ( $_SERVER['HTTP_HOST'] ?? '' ) ) ); +if ( + ( ! empty( $_SERVER['HTTP_X_FORWARDED_PROTO'] ) && in_array( 'https', array_map( 'trim', explode( ',', strtolower( $_SERVER['HTTP_X_FORWARDED_PROTO'] ) ) ), true ) ) || + ( ! empty( $_SERVER['HTTP_X_FORWARDED_SSL'] ) && 'on' === strtolower( $_SERVER['HTTP_X_FORWARDED_SSL'] ) ) || + ( 'https' === $wp_cow_local_scheme && '' !== $wp_cow_local_host && $wp_cow_local_host === $wp_cow_request_host ) +) {{ + $_SERVER['HTTPS'] = 'on'; + $_SERVER['SERVER_PORT'] = '443'; +}} + +define( 'WP_HOME', $wp_cow_local_url ); +define( 'WP_SITEURL', $wp_cow_local_url ); + define( 'WPCOW_CLONE', {clone_name} ); define( 'WPCOW_CONTROL_URL', {control_url} ); define( 'WPCOW_QUERY_CACHE_DIR', {query_cache_dir} ); @@ -1176,7 +1197,12 @@ mod tests { assert!(php.contains("define( 'DB_NAME', 'cow_example' );")); assert!(php.contains("define( 'DB_HOST', '127.0.0.1:33070' );")); assert!(php.contains("define( 'WPCOW_LOCAL_DB_HOST', '127.0.0.1:33071' );")); - assert!(php.contains("define( 'WP_HOME', 'http://example.test' );")); + assert!(php.contains("$wp_cow_local_url = 'http://example.test';")); + assert!(php.contains("define( 'WP_HOME', $wp_cow_local_url );")); + assert!(php.contains("HTTP_X_FORWARDED_PROTO")); + assert!(php.contains("$_SERVER['HTTPS'] = 'on';")); + assert!(php.contains("$wp_cow_local_host === $wp_cow_request_host")); + assert!(php.contains("HTTP_X_FORWARDED_HOST")); assert!(php.contains("$table_prefix = 'wp_';")); assert!(php.contains("WPCOW_CONTROL_URL")); assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index b5afa068..1919a8a8 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -668,9 +668,18 @@ impl OverlayStore { } let mut jsonl = String::new(); - File::open(path)?.read_to_string(&mut jsonl)?; + File::open(&path)?.read_to_string(&mut jsonl)?; for line in jsonl.lines().filter(|line| !line.trim().is_empty()) { - let value: serde_json::Value = serde_json::from_str(line)?; + let value: serde_json::Value = match serde_json::from_str(line) { + Ok(value) => value, + Err(err) => { + eprintln!( + "wp-cow ignoring truncated cache metadata journal entry at {}: {err}", + path.display() + ); + continue; + } + }; let Some(path) = value.get("path").and_then(|value| value.as_str()) else { continue; }; @@ -923,6 +932,39 @@ mod tests { assert!(reloaded.cached_entry(rel).unwrap().is_none()); } + #[test] + fn ignores_truncated_final_metadata_journal_line() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let store = OverlayStore::new(&paths); + let rel = Path::new("index.php"); + let entry = RemoteEntry { + name: "index.php".to_string(), + kind: "file".to_string(), + size: 405, + mode: 0o100644, + mtime: 42, + }; + + store.put_cached_entry(rel, &entry).unwrap(); + let mut journal = OpenOptions::new() + .append(true) + .open(store.metadata_journal_path()) + .unwrap(); + journal + .write_all(b"{\"entry\":{\"kind\":\"file\",\"mode\":33188") + .unwrap(); + drop(journal); + + let reloaded = OverlayStore::new(&paths); + assert_eq!( + reloaded.cached_entry(rel).unwrap().unwrap().size, + 405, + "a crash during cache metadata append must not poison the whole lazy filesystem" + ); + } + #[test] fn cached_metadata_refreshes_when_another_overlay_appends_journal() { let temp = tempfile::tempdir().unwrap(); From b7c63433691809a23aab90ede308cce5008a6921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Wed, 6 May 2026 19:18:59 +0200 Subject: [PATCH 36/39] Fix remote WordPress COW replica rendering --- experiments/remote-wp-cow/src/db.rs | 94 ++++++---- experiments/remote-wp-cow/src/generate.rs | 165 ++++++++++++++++++ experiments/remote-wp-cow/src/overlay.rs | 41 ++++- experiments/remote-wp-cow/src/remote.rs | 25 ++- .../remote-wp-cow/src/runtime_cache.rs | 2 + 5 files changed, 292 insertions(+), 35 deletions(-) diff --git a/experiments/remote-wp-cow/src/db.rs b/experiments/remote-wp-cow/src/db.rs index 736a494f..e0299534 100644 --- a/experiments/remote-wp-cow/src/db.rs +++ b/experiments/remote-wp-cow/src/db.rs @@ -1,4 +1,5 @@ use anyhow::{anyhow, Context, Result}; +use mysql::prelude::Queryable; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::collections::BTreeSet; @@ -1533,40 +1534,24 @@ pub(crate) fn run_mysql_exec(manifest: &Manifest, sql_text: &str) -> Result<()> } pub(crate) fn local_query_result(manifest: &Manifest, sql_text: &str) -> Result { - let output = local_mysql_command(manifest) - .arg("--batch") - .arg("--raw") - .arg("--execute") - .arg(sql_text) - .output() - .context("run local mysql query")?; - if !output.status.success() { - return Err(anyhow!( - "local mysql query failed: {}", - String::from_utf8_lossy(&output.stderr) - )); - } - - let stdout = String::from_utf8_lossy(&output.stdout); - let mut lines = stdout.lines(); - let Some(header) = lines.next() else { - return Ok(CowQueryResult::ok(Vec::new(), Vec::new())); - }; - let fields = header - .split('\t') - .map(|field| field.to_string()) + let mut conn = local_mysql_conn(manifest)?; + let result = conn + .query_iter(sql_text) + .with_context(|| format!("run local mysql query: {sql_text}"))?; + let fields = result + .columns() + .as_ref() + .iter() + .map(|column| column.name_str().to_string()) .collect::>(); let mut rows = Vec::new(); - for line in lines { - let values = line.split('\t').collect::>(); + for row in result { + let row = row.context("read local mysql row")?; + let values = row.unwrap(); let mut row = Row::new(); for (idx, field) in fields.iter().enumerate() { - let value = values.get(idx).copied().unwrap_or_default(); - if value == "NULL" { - row.insert(field.clone(), serde_json::Value::Null); - } else { - row.insert(field.clone(), serde_json::Value::String(value.to_string())); - } + let value = values.get(idx).cloned().unwrap_or(mysql::Value::NULL); + row.insert(field.clone(), mysql_value_to_json(value)); } rows.push(row); } @@ -1574,6 +1559,44 @@ pub(crate) fn local_query_result(manifest: &Manifest, sql_text: &str) -> Result< Ok(CowQueryResult::ok(rows, fields)) } +fn local_mysql_conn(manifest: &Manifest) -> Result { + let mut builder = mysql::OptsBuilder::new() + .ip_or_hostname(Some(manifest.local_db.host.clone())) + .tcp_port(manifest.local_db.port) + .user(Some(manifest.local_db.user.clone())) + .db_name(Some(manifest.local_db.name.clone())); + if !manifest.local_db.password.is_empty() { + builder = builder.pass(Some(manifest.local_db.password.clone())); + } + mysql::Conn::new(builder).context("connect to local mysql") +} + +fn mysql_value_to_json(value: mysql::Value) -> serde_json::Value { + match value { + mysql::Value::NULL => serde_json::Value::Null, + mysql::Value::Bytes(bytes) => { + serde_json::Value::String(String::from_utf8_lossy(&bytes).into()) + } + mysql::Value::Int(value) => serde_json::Value::String(value.to_string()), + mysql::Value::UInt(value) => serde_json::Value::String(value.to_string()), + mysql::Value::Float(value) => serde_json::Value::String(value.to_string()), + mysql::Value::Double(value) => serde_json::Value::String(value.to_string()), + mysql::Value::Date(year, month, day, hour, minute, second, micros) => { + serde_json::Value::String(format!( + "{year:04}-{month:02}-{day:02} {hour:02}:{minute:02}:{second:02}.{:06}", + micros + )) + } + mysql::Value::Time(negative, days, hours, minutes, seconds, micros) => { + let sign = if negative { "-" } else { "" }; + serde_json::Value::String(format!( + "{sign}{days} {hours:02}:{minutes:02}:{seconds:02}.{:06}", + micros + )) + } + } +} + fn mysql_string_literal(value: &str) -> String { value.replace('\\', "\\\\").replace('\'', "\\'") } @@ -1845,6 +1868,17 @@ mod tests { assert_eq!(max_pk_from_rows(&[], "max_pk").unwrap(), 0); } + #[test] + fn mysql_value_conversion_preserves_multiline_wordpress_content() { + let value = mysql_value_to_json(mysql::Value::Bytes( + b"before\n

About

\nafter\tTabbed".to_vec(), + )); + assert_eq!( + value, + serde_json::Value::String("before\n

About

\nafter\tTabbed".to_string()) + ); + } + #[test] fn row_cow_safe_read_fallbacks_do_not_promote_tables() { let tables = vec!["ady_options".to_string()]; diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 25453e88..db93dfee 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -755,12 +755,92 @@ add_filter( 'option_active_plugins', 'wp_cow_filter_active_plugins', PHP_INT_MAX add_filter( 'site_option_active_sitewide_plugins', 'wp_cow_filter_sitewide_plugins', PHP_INT_MAX ); add_filter( 'validate_current_theme', '__return_false', PHP_INT_MAX ); +add_filter( 'should_load_block_assets_on_demand', '__return_false', PHP_INT_MAX ); +add_filter( 'should_load_separate_core_block_assets', '__return_false', PHP_INT_MAX ); + +function wp_cow_disable_local_cache_generation( $value = false ) { + return 0; +} + +foreach ( + array( + 'siteground_optimizer_combine_css', + 'siteground_optimizer_combine_javascript', + 'siteground_optimizer_dns_prefetch', + 'siteground_optimizer_file_caching', + 'siteground_optimizer_fix_insecure_content', + 'siteground_optimizer_optimize_css', + 'siteground_optimizer_optimize_html', + 'siteground_optimizer_optimize_javascript', + 'siteground_optimizer_optimize_javascript_async', + 'siteground_optimizer_optimize_web_fonts', + 'siteground_optimizer_preload_combined_css', + ) as $wp_cow_cache_option +) { + add_filter( 'pre_option_' . $wp_cow_cache_option, 'wp_cow_disable_local_cache_generation', PHP_INT_MAX ); +} +unset( $wp_cow_cache_option ); + +function wp_cow_local_asset_http_response( $url ) { + $parts = parse_url( (string) $url ); + if ( ! is_array( $parts ) || empty( $parts['path'] ) ) { + return false; + } + + $path = rawurldecode( (string) $parts['path'] ); + if ( false !== strpos( $path, "\0" ) || false !== strpos( $path, '..' ) ) { + return false; + } + if ( 0 !== strpos( $path, '/wp-content/' ) && 0 !== strpos( $path, '/wp-includes/' ) ) { + return false; + } + + $file = ABSPATH . ltrim( $path, '/' ); + $real_base = realpath( ABSPATH ); + $real_file = realpath( $file ); + if ( false === $real_base || false === $real_file || 0 !== strpos( $real_file, rtrim( $real_base, DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR ) ) { + return false; + } + if ( ! is_file( $real_file ) || ! is_readable( $real_file ) ) { + return false; + } + + $max_mb = (int) getenv( 'WPCOW_LOCAL_HTTP_ASSET_MAX_MB' ); + if ( $max_mb < 1 ) { + $max_mb = 8; + } + $size = filesize( $real_file ); + if ( false === $size || $size > $max_mb * 1024 * 1024 ) { + return false; + } + + $body = file_get_contents( $real_file ); + if ( false === $body ) { + return false; + } + + return array( + 'headers' => array(), + 'body' => $body, + 'response' => array( + 'code' => 200, + 'message' => 'OK', + ), + 'cookies' => array(), + 'filename' => null, + ); +} add_filter( 'pre_http_request', static function ( $preempt, $args, $url ) { if ( defined( 'WPCOW_ALLOW_OUTBOUND_HTTP' ) && WPCOW_ALLOW_OUTBOUND_HTTP ) { return $preempt; } + $local_asset = wp_cow_local_asset_http_response( $url ); + if ( false !== $local_asset ) { + return $local_asset; + } + return new WP_Error( 'wp_cow_blocked_http', 'Outbound HTTP is blocked in this wp-cow clone.' ); }, 10, 3 ); "# @@ -1339,12 +1419,97 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { assert!(php.contains("pre_wp_mail")); assert!(php.contains("X-Robots-Tag")); assert!(php.contains("pre_http_request")); + assert!(php.contains("wp_cow_local_asset_http_response")); assert!(php.contains("validate_current_theme")); assert!(php.contains("WPCOW_ENABLE_PLUGINS")); assert!(php.contains("WPCOW_PLUGIN_POLICY_FILE")); assert!(php.contains("wp_cow_filter_active_plugins")); assert!(php.contains("wp_cow_allowed_plugins")); assert!(php.contains("option_active_plugins")); + assert!(php.contains("siteground_optimizer_combine_css")); + assert!(php.contains("siteground_optimizer_file_caching")); + assert!(php.contains("siteground_optimizer_optimize_css")); + assert!(php.contains("should_load_block_assets_on_demand")); + assert!(php.contains("should_load_separate_core_block_assets")); + } + + #[test] + fn safety_plugin_serves_local_assets_to_wp_http_without_network() { + if Command::new("php").arg("-v").output().is_err() { + eprintln!("skipping generated PHP local asset HTTP test because php is not on PATH"); + return; + } + + let temp = tempfile::tempdir().unwrap(); + let safety = temp.path().join("wp-cow-safety.php"); + let docroot = temp.path().join("site"); + let asset = docroot.join("wp-content/themes/neve/style.css"); + let check = temp.path().join("check.php"); + fs::create_dir_all(asset.parent().unwrap()).unwrap(); + fs::write(&asset, b"body{color:#123}").unwrap(); + fs::write(&safety, safety_mu_plugin_php()).unwrap(); + fs::write( + &check, + format!( + r#"code = $code; + $this->message = $message; + }} +}} +define( 'ABSPATH', '{docroot}' . '/' ); +require '{safety}'; +$cache_option = call_user_func( $filters['pre_option_siteground_optimizer_combine_css'], 1 ); +if ( 0 !== $cache_option ) {{ + fwrite( STDERR, 'local SG cache generation was not disabled' . PHP_EOL ); + exit( 1 ); +}} +$response = call_user_func( + $filters['pre_http_request'], + false, + array( 'method' => 'GET' ), + 'https://example.test/wp-content/themes/neve/style.css?ver=1' +); +if ( ! is_array( $response ) || 'body{{color:#123}}' !== $response['body'] || 200 !== $response['response']['code'] ) {{ + fwrite( STDERR, 'local asset response failed: ' . json_encode( $response ) . PHP_EOL ); + exit( 1 ); +}} +$blocked = call_user_func( + $filters['pre_http_request'], + false, + array( 'method' => 'GET' ), + 'https://api.example.test/side-effect' +); +if ( ! $blocked instanceof WP_Error ) {{ + fwrite( STDERR, 'external request was not blocked' . PHP_EOL ); + exit( 1 ); +}} +"#, + docroot = php_single_quoted_path(&docroot), + safety = php_single_quoted_path(&safety) + ), + ) + .unwrap(); + + let output = Command::new("php").arg(&check).output().unwrap(); + assert!( + output.status.success(), + "local asset HTTP shim failed: {}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); } #[test] diff --git a/experiments/remote-wp-cow/src/overlay.rs b/experiments/remote-wp-cow/src/overlay.rs index 1919a8a8..531518d6 100644 --- a/experiments/remote-wp-cow/src/overlay.rs +++ b/experiments/remote-wp-cow/src/overlay.rs @@ -271,6 +271,14 @@ impl OverlayStore { ) } + pub fn note_cache_file_finished(&self, rel: &Path, phase: &str, size: u64) -> Result<()> { + self.finish_cache_progress_with_phase( + &Self::rel_string(&Self::clean_rel(rel)?), + phase, + size, + ) + } + pub fn remove_cached(&self, rel: &Path) -> Result<()> { let path = self.cache_path(rel); if path.exists() { @@ -737,7 +745,13 @@ impl OverlayStore { } let mut json = String::new(); File::open(path)?.read_to_string(&mut json)?; - Ok(serde_json::from_str(&json)?) + Ok( + serde_json::from_str(&json).unwrap_or_else(|_| CacheProgress { + phase: "idle".to_string(), + updated_at_unix_ms: now_unix_ms(), + ..CacheProgress::default() + }), + ) } fn write_progress(&self, progress: &CacheProgress) -> Result<()> { @@ -794,8 +808,12 @@ impl OverlayStore { } fn finish_cache_progress(&self, rel: &str, size: u64) -> Result<()> { + self.finish_cache_progress_with_phase(rel, "cached", size) + } + + fn finish_cache_progress_with_phase(&self, rel: &str, phase: &str, size: u64) -> Result<()> { let mut progress = self.load_progress()?; - progress.phase = "cached".to_string(); + progress.phase = phase.to_string(); progress.active_path.clear(); progress.active_bytes = 0; progress.active_total = 0; @@ -965,6 +983,25 @@ mod tests { ); } + #[test] + fn ignores_malformed_cache_progress_file() { + let temp = tempfile::tempdir().unwrap(); + let paths = crate::config::clone_paths(temp.path(), "example"); + ensure_clone_dirs(&paths).unwrap(); + let store = OverlayStore::new(&paths); + fs::write(store.progress_path(), b"{\"phase\":\"fetching\"\ntrailing").unwrap(); + + store + .note_cache_file_finished(Path::new("wp-settings.php"), "runtime-code-pack", 123) + .unwrap(); + + let progress = store.load_progress().unwrap(); + assert_eq!(progress.phase, "runtime-code-pack"); + assert_eq!(progress.files_cached, 1); + assert_eq!(progress.bytes_cached, 123); + assert_eq!(progress.last_cached_path, "wp-settings.php"); + } + #[test] fn cached_metadata_refreshes_when_another_overlay_appends_journal() { let temp = tempfile::tempdir().unwrap(); diff --git a/experiments/remote-wp-cow/src/remote.rs b/experiments/remote-wp-cow/src/remote.rs index b9a482c9..972eac55 100644 --- a/experiments/remote-wp-cow/src/remote.rs +++ b/experiments/remote-wp-cow/src/remote.rs @@ -605,13 +605,30 @@ echo $target; .ok_or_else(|| anyhow!("runtime code pack stdout"))?; let mut summary = RuntimeCodePackSummary::default(); + let mut partial_stream = false; for line in BufReader::new(stdout).lines() { let line = line.context("read remote runtime code pack")?; if line.trim().is_empty() { continue; } - let value: serde_json::Value = serde_json::from_str(&line) - .with_context(|| format!("decode remote runtime code pack line: {line}"))?; + let value: serde_json::Value = match serde_json::from_str(&line) { + Ok(value) => value, + Err(err) => { + if summary.files > 0 { + summary.capped = true; + partial_stream = true; + trace_remote_result::<(), _>( + "runtime_code_pack_partial_line", + &format!("{} cached files", summary.files), + Instant::now(), + &Err(err), + ); + break; + } + return Err(err) + .with_context(|| format!("decode remote runtime code pack line: {line}")); + } + }; match value.get("type").and_then(|value| value.as_str()) { Some("file") => { let rel = value @@ -626,6 +643,8 @@ echo $target; )?; let bytes = decode_helper_data(value.clone())?; if entry.kind == "file" && bytes.len() as u64 == entry.size { + summary.files = summary.files.saturating_add(1); + summary.bytes = summary.bytes.saturating_add(entry.size); on_file(RuntimeCodePackFile { rel: PathBuf::from(rel), entry, @@ -648,7 +667,7 @@ echo $target; } let output = child.wait_with_output()?; - if !output.status.success() { + if !output.status.success() && !(partial_stream && summary.files > 0) { return Err(anyhow!( "remote runtime code pack exited with status {}: {}", output.status, diff --git a/experiments/remote-wp-cow/src/runtime_cache.rs b/experiments/remote-wp-cow/src/runtime_cache.rs index 6df595b7..68040df6 100644 --- a/experiments/remote-wp-cow/src/runtime_cache.rs +++ b/experiments/remote-wp-cow/src/runtime_cache.rs @@ -76,6 +76,8 @@ fn warm_runtime_code_cache_inner( file.entry.size, file.entry.size, ); + let _ = + overlay.note_cache_file_finished(&file.rel, "runtime-code-pack", file.entry.size); Ok(()) }) .context("cache remote runtime code pack")?; From 064764662bdde64b074be338797ad9b4c57aeae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Wed, 6 May 2026 22:37:01 +0200 Subject: [PATCH 37/39] Preserve SG CSS and quarantine stale plugin allows --- experiments/remote-wp-cow/src/generate.rs | 188 +++++++++++++++++- .../remote-wp-cow/src/plugin_policy.rs | 28 +++ 2 files changed, 211 insertions(+), 5 deletions(-) diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index db93dfee..9ce52ab4 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -685,12 +685,21 @@ function wp_cow_plugin_policy() { } function wp_cow_allowed_plugins() { - $policy = wp_cow_plugin_policy(); - $allowed = isset( $policy['allow'] ) && is_array( $policy['allow'] ) ? $policy['allow'] : array(); - $out = array(); - foreach ( $allowed as $plugin ) { + $policy = wp_cow_plugin_policy(); + $allowed = isset( $policy['allow'] ) && is_array( $policy['allow'] ) ? $policy['allow'] : array(); + $quarantine = isset( $policy['quarantine'] ) && is_array( $policy['quarantine'] ) ? $policy['quarantine'] : array(); + $quarantined = array(); + foreach ( $quarantine as $plugin => $reason ) { $plugin = ltrim( (string) $plugin, '/' ); if ( '' !== $plugin ) { + $quarantined[ $plugin ] = true; + } + } + + $out = array(); + foreach ( $allowed as $plugin ) { + $plugin = ltrim( (string) $plugin, '/' ); + if ( '' !== $plugin && ! isset( $quarantined[ $plugin ] ) ) { $out[ $plugin ] = true; } } @@ -781,6 +790,137 @@ foreach ( } unset( $wp_cow_cache_option ); +function wp_cow_siteground_combined_css_markers() { + $post_id = function_exists( 'get_queried_object_id' ) ? (int) get_queried_object_id() : 0; + if ( $post_id <= 0 || ! function_exists( 'get_post' ) ) { + return array(); + } + + $post = get_post( $post_id ); + if ( ! is_object( $post ) || empty( $post->post_content ) ) { + return array(); + } + + preg_match_all( '/wp-block-themeisle-blocks-[a-z0-9-]+-[a-f0-9]{8}/i', (string) $post->post_content, $matches ); + if ( empty( $matches[0] ) ) { + return array(); + } + + return array_values( array_unique( array_slice( $matches[0], 0, 20 ) ) ); +} + +function wp_cow_configured_siteground_combined_css() { + $basename = basename( (string) getenv( 'WPCOW_SITEGROUND_COMBINED_CSS' ) ); + if ( '' === $basename || ! preg_match( '/^siteground-optimizer-combined-css-[a-f0-9]+\.css$/', $basename ) ) { + return false; + } + + $file = ABSPATH . 'wp-content/uploads/siteground-optimizer-assets/' . $basename; + if ( ! is_file( $file ) || ! is_readable( $file ) ) { + return false; + } + + $size = filesize( $file ); + if ( false === $size || $size < 1024 || $size > 2 * 1024 * 1024 ) { + return false; + } + + return $file; +} + +function wp_cow_find_siteground_combined_css() { + static $asset = null; + if ( null !== $asset ) { + return $asset; + } + + $asset = false; + $configured = wp_cow_configured_siteground_combined_css(); + if ( false !== $configured ) { + $asset = $configured; + return $asset; + } + + $scan = strtolower( trim( (string) getenv( 'WPCOW_SITEGROUND_COMBINED_CSS_SCAN' ) ) ); + if ( ! in_array( $scan, array( '1', 'true', 'yes', 'on' ), true ) ) { + return false; + } + + $dir = ABSPATH . 'wp-content/uploads/siteground-optimizer-assets'; + if ( ! is_dir( $dir ) || ! is_readable( $dir ) ) { + return false; + } + + $files = glob( $dir . '/siteground-optimizer-combined-css-*.css' ); + if ( ! is_array( $files ) || empty( $files ) ) { + return false; + } + + usort( + $files, + static function ( $a, $b ) { + return (int) @filemtime( $b ) <=> (int) @filemtime( $a ); + } + ); + + $markers = wp_cow_siteground_combined_css_markers(); + $fallback = false; + + foreach ( $files as $file ) { + if ( ! is_file( $file ) || ! is_readable( $file ) ) { + continue; + } + + $size = filesize( $file ); + if ( false === $size || $size < 1024 || $size > 2 * 1024 * 1024 ) { + continue; + } + + if ( false === $fallback ) { + $fallback = $file; + } + + if ( empty( $markers ) ) { + continue; + } + + $css = file_get_contents( $file ); + if ( false === $css ) { + continue; + } + + foreach ( $markers as $marker ) { + if ( false !== strpos( $css, $marker ) ) { + $asset = $file; + return $asset; + } + } + } + + $asset = $fallback; + return $asset; +} + +add_action( + 'wp_enqueue_scripts', + static function () { + if ( ! function_exists( 'wp_enqueue_style' ) || ! function_exists( 'content_url' ) ) { + return; + } + + $file = wp_cow_find_siteground_combined_css(); + if ( false === $file ) { + return; + } + + $basename = basename( $file ); + $handle = preg_replace( '/\.css$/', '', $basename ); + $url = content_url( 'uploads/siteground-optimizer-assets/' . $basename ); + wp_enqueue_style( $handle, $url, array(), null ); + }, + 0 +); + function wp_cow_local_asset_http_response( $url ) { $parts = parse_url( (string) $url ); if ( ! is_array( $parts ) || empty( $parts['path'] ) ) { @@ -1425,10 +1565,13 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { assert!(php.contains("WPCOW_PLUGIN_POLICY_FILE")); assert!(php.contains("wp_cow_filter_active_plugins")); assert!(php.contains("wp_cow_allowed_plugins")); + assert!(php.contains("$quarantined")); assert!(php.contains("option_active_plugins")); assert!(php.contains("siteground_optimizer_combine_css")); assert!(php.contains("siteground_optimizer_file_caching")); assert!(php.contains("siteground_optimizer_optimize_css")); + assert!(php.contains("wp_cow_find_siteground_combined_css")); + assert!(php.contains("WPCOW_SITEGROUND_COMBINED_CSS")); assert!(php.contains("should_load_block_assets_on_demand")); assert!(php.contains("should_load_separate_core_block_assets")); } @@ -1444,9 +1587,23 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { let safety = temp.path().join("wp-cow-safety.php"); let docroot = temp.path().join("site"); let asset = docroot.join("wp-content/themes/neve/style.css"); + let sg_asset = docroot + .join("wp-content/uploads/siteground-optimizer-assets") + .join("siteground-optimizer-combined-css-abc123.css"); let check = temp.path().join("check.php"); fs::create_dir_all(asset.parent().unwrap()).unwrap(); fs::write(&asset, b"body{color:#123}").unwrap(); + fs::create_dir_all(sg_asset.parent().unwrap()).unwrap(); + fs::write( + &sg_asset, + [ + b"#wp-block-themeisle-blocks-advanced-columns-a241f2a5{min-height:800px}" + .as_slice(), + vec![b' '; 2048].as_slice(), + ] + .concat(), + ) + .unwrap(); fs::write(&safety, safety_mu_plugin_php()).unwrap(); fs::write( &check, @@ -1461,6 +1618,17 @@ function add_action( $tag, $callback, $priority = 10, $accepted_args = 1 ) {{ add_filter( $tag, $callback, $priority, $accepted_args ); }} function __return_false() {{ return false; }} +function content_url( $path = '' ) {{ return 'https://example.test/wp-content/' . ltrim( $path, '/' ); }} +function get_queried_object_id() {{ return 84; }} +function get_post( $id ) {{ + return (object) array( + 'post_content' => '', + ); +}} +function wp_enqueue_style( $handle, $src, $deps = array(), $ver = false ) {{ + global $enqueued; + $enqueued[ $handle ] = $src; +}} class WP_Error {{ public $code; public $message; @@ -1470,7 +1638,17 @@ class WP_Error {{ }} }} define( 'ABSPATH', '{docroot}' . '/' ); +putenv( 'WPCOW_SITEGROUND_COMBINED_CSS=siteground-optimizer-combined-css-abc123.css' ); require '{safety}'; +$enqueued = array(); +call_user_func( $filters['wp_enqueue_scripts'] ); +if ( + empty( $enqueued['siteground-optimizer-combined-css-abc123'] ) || + false === strpos( $enqueued['siteground-optimizer-combined-css-abc123'], '/wp-content/uploads/siteground-optimizer-assets/siteground-optimizer-combined-css-abc123.css' ) +) {{ + fwrite( STDERR, 'existing SG combined CSS was not preserved: ' . json_encode( $enqueued ) . PHP_EOL ); + exit( 1 ); +}} $cache_option = call_user_func( $filters['pre_option_siteground_optimizer_combine_css'], 1 ); if ( 0 !== $cache_option ) {{ fwrite( STDERR, 'local SG cache generation was not disabled' . PHP_EOL ); @@ -1526,7 +1704,7 @@ if ( ! $blocked instanceof WP_Error ) {{ fs::write(&safety, safety_mu_plugin_php()).unwrap(); fs::write( &policy, - r#"{"version":1,"mode":"auto","active":["akismet/akismet.php","woocommerce/woocommerce.php"],"allow":["woocommerce/woocommerce.php"],"quarantine":{"akismet/akismet.php":"timeout"}}"#, + r#"{"version":1,"mode":"auto","active":["akismet/akismet.php","woocommerce/woocommerce.php"],"allow":["akismet/akismet.php","woocommerce/woocommerce.php"],"quarantine":{"akismet/akismet.php":"timeout"}}"#, ) .unwrap(); fs::write( diff --git a/experiments/remote-wp-cow/src/plugin_policy.rs b/experiments/remote-wp-cow/src/plugin_policy.rs index 48cc940e..639d91e0 100644 --- a/experiments/remote-wp-cow/src/plugin_policy.rs +++ b/experiments/remote-wp-cow/src/plugin_policy.rs @@ -45,6 +45,8 @@ impl PluginPolicy { ); self.quarantine .retain(|plugin, _| active_set.contains(plugin)); + let quarantined: BTreeSet<_> = self.quarantine.keys().cloned().collect(); + self.allow.retain(|allowed| !quarantined.contains(allowed)); self } @@ -226,6 +228,32 @@ mod tests { ); } + #[test] + fn existing_policy_never_allows_quarantined_plugins() { + let temp = tempfile::tempdir().unwrap(); + let path = temp.path().join("plugin-policy.json"); + let mut policy = + PluginPolicy::new(&["seo/seo.php".to_string(), "visual/visual.php".to_string()]); + policy.allow_plugin("seo/seo.php"); + policy.allow_plugin("visual/visual.php"); + policy + .quarantine + .insert("seo/seo.php".to_string(), "timed out".to_string()); + write_policy_atomic(&path, &policy).unwrap(); + + let loaded = load_policy_or_new( + &path, + &["seo/seo.php".to_string(), "visual/visual.php".to_string()], + ) + .unwrap(); + + assert_eq!(loaded.allow, vec!["visual/visual.php"]); + assert_eq!( + loaded.quarantine.get("seo/seo.php").map(String::as_str), + Some("timed out") + ); + } + #[test] fn candidate_policy_allows_one_extra_plugin() { let base = PluginPolicy::new(&["woocommerce/woocommerce.php".to_string()]); From a79cddb2a2b6882a6dbf1538c697d5e7bbffc1ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Wed, 6 May 2026 23:41:55 +0200 Subject: [PATCH 38/39] Tighten remote COW visual parity --- experiments/remote-wp-cow/src/generate.rs | 438 +++++++++++++++++++--- 1 file changed, 395 insertions(+), 43 deletions(-) diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 9ce52ab4..6a5bc21a 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -76,6 +76,7 @@ define( 'WP_HOME', $wp_cow_local_url ); define( 'WP_SITEURL', $wp_cow_local_url ); define( 'WPCOW_CLONE', {clone_name} ); +define( 'WPCOW_REMOTE_URL', {remote_url} ); define( 'WPCOW_CONTROL_URL', {control_url} ); define( 'WPCOW_QUERY_CACHE_DIR', {query_cache_dir} ); define( 'WPCOW_DB_STATE_FILE', {db_state_file} ); @@ -112,6 +113,7 @@ require_once ABSPATH . 'wp-settings.php'; manifest.db_proxy.host, manifest.db_proxy.port )), local_url = php_string(&manifest.local_url), + remote_url = php_string(&manifest.remote_url), table_prefix = php_string(&manifest.probe.table_prefix), clone_name = php_string(&manifest.name), control_url = php_string(&manifest.control_url), @@ -706,6 +708,66 @@ function wp_cow_allowed_plugins() { return $out; } +function wp_cow_local_home_url() { + if ( defined( 'WP_HOME' ) ) { + return rtrim( (string) WP_HOME, '/' ); + } + if ( function_exists( 'home_url' ) ) { + return rtrim( (string) home_url( '/' ), '/' ); + } + return ''; +} + +function wp_cow_remote_home_url() { + if ( defined( 'WPCOW_REMOTE_URL' ) ) { + return rtrim( (string) WPCOW_REMOTE_URL, '/' ); + } + return ''; +} + +function wp_cow_url_variants( $url ) { + $url = rtrim( (string) $url, '/' ); + if ( '' === $url ) { + return array(); + } + $variants = array( $url ); + if ( 0 === strpos( $url, 'https://' ) ) { + $variants[] = 'http://' . substr( $url, 8 ); + } elseif ( 0 === strpos( $url, 'http://' ) ) { + $variants[] = 'https://' . substr( $url, 7 ); + } + return array_values( array_unique( $variants ) ); +} + +function wp_cow_rewrite_remote_url_to_local( $url ) { + $url = (string) $url; + $remote = wp_cow_remote_home_url(); + $local = wp_cow_local_home_url(); + if ( '' === $url || '' === $remote || '' === $local ) { + return $url; + } + + foreach ( wp_cow_url_variants( $remote ) as $variant ) { + if ( 0 === strpos( rtrim( $url, '/' ), $variant ) ) { + return $local . substr( $url, strlen( $variant ) ); + } + } + return $url; +} + +function wp_cow_is_remote_or_local_home_url( $url ) { + $url = rtrim( (string) $url, '/' ); + if ( '' === $url ) { + return false; + } + foreach ( array_merge( wp_cow_url_variants( wp_cow_remote_home_url() ), wp_cow_url_variants( wp_cow_local_home_url() ) ) as $variant ) { + if ( $url === $variant ) { + return true; + } + } + return false; +} + function wp_cow_filter_active_plugins( $plugins ) { $mode = wp_cow_plugin_mode(); if ( in_array( $mode, array( 'full', 'on', 'enabled', '1', 'true', 'yes' ), true ) ) { @@ -763,6 +825,63 @@ function wp_cow_filter_sitewide_plugins( $plugins ) { add_filter( 'option_active_plugins', 'wp_cow_filter_active_plugins', PHP_INT_MAX ); add_filter( 'site_option_active_sitewide_plugins', 'wp_cow_filter_sitewide_plugins', PHP_INT_MAX ); +add_action( + 'init', + static function () { + remove_action( 'wp_head', 'print_emoji_detection_script', 7 ); + remove_action( 'wp_enqueue_scripts', 'wp_enqueue_emoji_styles' ); + remove_action( 'wp_print_styles', 'print_emoji_styles' ); + remove_action( 'admin_print_scripts', 'print_emoji_detection_script' ); + remove_action( 'admin_print_styles', 'print_emoji_styles' ); + remove_filter( 'the_content_feed', 'wp_staticize_emoji' ); + remove_filter( 'comment_text_rss', 'wp_staticize_emoji' ); + remove_filter( 'wp_mail', 'wp_staticize_emoji_for_email' ); + }, + 0 +); +add_filter( 'emoji_svg_url', '__return_false', PHP_INT_MAX ); + +add_filter( + 'nav_menu_link_attributes', + static function ( $atts, $item = null ) { + if ( isset( $atts['href'] ) ) { + $atts['href'] = wp_cow_rewrite_remote_url_to_local( $atts['href'] ); + } + if ( + is_object( $item ) && + isset( $item->url ) && + function_exists( 'is_front_page' ) && + is_front_page() && + wp_cow_is_remote_or_local_home_url( $item->url ) + ) { + $atts['aria-current'] = 'page'; + } + return $atts; + }, + PHP_INT_MAX, + 2 +); + +add_filter( + 'nav_menu_css_class', + static function ( $classes, $item = null ) { + if ( + is_object( $item ) && + isset( $item->url ) && + function_exists( 'is_front_page' ) && + is_front_page() && + wp_cow_is_remote_or_local_home_url( $item->url ) + ) { + $classes = is_array( $classes ) ? $classes : array(); + $classes = array_merge( $classes, array( 'current-menu-item', 'current_page_item', 'menu-item-home', 'nv-active' ) ); + $classes = array_values( array_unique( $classes ) ); + } + return $classes; + }, + PHP_INT_MAX, + 2 +); + add_filter( 'validate_current_theme', '__return_false', PHP_INT_MAX ); add_filter( 'should_load_block_assets_on_demand', '__return_false', PHP_INT_MAX ); add_filter( 'should_load_separate_core_block_assets', '__return_false', PHP_INT_MAX ); @@ -901,6 +1020,44 @@ function wp_cow_find_siteground_combined_css() { return $asset; } +function wp_cow_siteground_localized_css_file( $file ) { + $file = (string) $file; + $remote = wp_cow_remote_home_url(); + $local = wp_cow_local_home_url(); + if ( '' === $remote || '' === $local || ! is_file( $file ) || ! is_readable( $file ) ) { + return $file; + } + + $dir = dirname( $file ); + $target = $dir . '/wp-cow-localized-' . basename( $file ); + $source_mtime = (int) @filemtime( $file ); + if ( is_file( $target ) && is_readable( $target ) && (int) @filemtime( $target ) >= $source_mtime ) { + return $target; + } + + $css = file_get_contents( $file ); + if ( false === $css ) { + return $file; + } + + $localized = $css; + foreach ( wp_cow_url_variants( $remote ) as $variant ) { + $localized = str_replace( $variant, $local, $localized ); + } + if ( $localized === $css ) { + return $file; + } + + $tmp = $target . '.tmp.' . getmypid(); + if ( false === @file_put_contents( $tmp, $localized ) ) { + return $file; + } + @rename( $tmp, $target ); + @unlink( $tmp ); + + return is_file( $target ) && is_readable( $target ) ? $target : $file; +} + add_action( 'wp_enqueue_scripts', static function () { @@ -915,12 +1072,117 @@ add_action( $basename = basename( $file ); $handle = preg_replace( '/\.css$/', '', $basename ); - $url = content_url( 'uploads/siteground-optimizer-assets/' . $basename ); + $asset = wp_cow_siteground_localized_css_file( $file ); + $url = content_url( 'uploads/siteground-optimizer-assets/' . basename( $asset ) ); + if ( function_exists( 'wp_dequeue_style' ) ) { + wp_dequeue_style( $handle ); + } + if ( function_exists( 'wp_deregister_style' ) ) { + wp_deregister_style( $handle ); + } wp_enqueue_style( $handle, $url, array(), null ); }, 0 ); +function wp_cow_siteground_lazyload_images_enabled() { + $enabled = strtolower( trim( (string) getenv( 'WPCOW_SITEGROUND_LAZYLOAD_IMAGES' ) ) ); + if ( in_array( $enabled, array( '0', 'false', 'no', 'off', 'disabled' ), true ) ) { + return false; + } + if ( in_array( $enabled, array( '1', 'true', 'yes', 'on', 'enabled' ), true ) ) { + return true; + } + return false !== wp_cow_find_siteground_combined_css() && false !== wp_cow_siteground_lazysizes_path(); +} + +function wp_cow_siteground_lazysizes_path() { + static $path = null; + if ( null !== $path ) { + return $path; + } + + $content_dir = defined( 'WP_CONTENT_DIR' ) ? WP_CONTENT_DIR : ABSPATH . 'wp-content'; + $candidate = rtrim( $content_dir, '/' ) . '/plugins/sg-cachepress/assets/js/lazysizes.min.js'; + if ( is_file( $candidate ) && is_readable( $candidate ) ) { + $path = $candidate; + return $path; + } + + $path = false; + return $path; +} + +function wp_cow_enqueue_siteground_lazysizes() { + if ( ! wp_cow_siteground_lazyload_images_enabled() || ! function_exists( 'wp_enqueue_script' ) || ! function_exists( 'content_url' ) ) { + return; + } + + $handle = 'siteground-optimizer-lazy-sizes-js'; + $url = content_url( 'plugins/sg-cachepress/assets/js/lazysizes.min.js' ); + wp_enqueue_script( $handle, $url, array(), null, true ); + + if ( function_exists( 'wp_script_add_data' ) ) { + wp_script_add_data( $handle, 'strategy', 'defer' ); + } +} + +function wp_cow_siteground_img_placeholder() { + return 'data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7'; +} + +function wp_cow_siteground_lazyload_img_tag( $tag ) { + $tag = (string) $tag; + if ( false === stripos( $tag, 'wp-content/uploads/' ) || false !== stripos( $tag, 'data-src=' ) ) { + return $tag; + } + if ( ! preg_match( '/\ssrc=([\'"])(.*?)\1/i', $tag, $matches ) ) { + return $tag; + } + + $src = html_entity_decode( $matches[2], ENT_QUOTES, 'UTF-8' ); + if ( false === stripos( $src, '/wp-content/uploads/' ) ) { + return $tag; + } + + $tag = preg_replace( '/\s(?:width|height|srcset|sizes|fetchpriority)=([\'"]).*?\1/i', '', $tag ); + $local_src = wp_cow_rewrite_remote_url_to_local( $src ); + $tag = preg_replace( + '/\ssrc=([\'"]).*?\1/i', + ' src="' . wp_cow_siteground_img_placeholder() . '" data-src="' . htmlspecialchars( $local_src, ENT_QUOTES, 'UTF-8' ) . '"', + $tag, + 1 + ); + + if ( preg_match( '/\sclass=([\'"])(.*?)\1/i', $tag, $class_matches ) ) { + if ( false === strpos( ' ' . $class_matches[2] . ' ', ' lazyload ' ) ) { + $classes = trim( $class_matches[2] . ' lazyload' ); + $tag = preg_replace( '/\sclass=([\'"]).*?\1/i', ' class="' . htmlspecialchars( $classes, ENT_QUOTES, 'UTF-8' ) . '"', $tag, 1 ); + } + } else { + $tag = preg_replace( '/\/?>$/', ' class="lazyload"$0', $tag, 1 ); + } + + return $tag; +} + +function wp_cow_siteground_lazyload_content_images( $html ) { + if ( ! wp_cow_siteground_lazyload_images_enabled() || false === stripos( (string) $html, ']*\bwp-image-\d+[^>]*>/i', + static function ( $matches ) { + return wp_cow_siteground_lazyload_img_tag( $matches[0] ); + }, + (string) $html + ); +} + +add_filter( 'the_content', 'wp_cow_siteground_lazyload_content_images', PHP_INT_MAX ); +add_action( 'wp_enqueue_scripts', 'wp_cow_enqueue_siteground_lazysizes', 1 ); + function wp_cow_local_asset_http_response( $url ) { $parts = parse_url( (string) $url ); if ( ! is_array( $parts ) || empty( $parts['path'] ) ) { @@ -1423,6 +1685,7 @@ mod tests { assert!(php.contains("$_SERVER['HTTPS'] = 'on';")); assert!(php.contains("$wp_cow_local_host === $wp_cow_request_host")); assert!(php.contains("HTTP_X_FORWARDED_HOST")); + assert!(php.contains("define( 'WPCOW_REMOTE_URL', 'https://example.com' );")); assert!(php.contains("$table_prefix = 'wp_';")); assert!(php.contains("WPCOW_CONTROL_URL")); assert!(php.contains("WPCOW_QUERY_CACHE_DIR")); @@ -1566,12 +1829,21 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { assert!(php.contains("wp_cow_filter_active_plugins")); assert!(php.contains("wp_cow_allowed_plugins")); assert!(php.contains("$quarantined")); + assert!(php.contains("wp_cow_rewrite_remote_url_to_local")); + assert!(php.contains("nav_menu_link_attributes")); + assert!(php.contains("nav_menu_css_class")); + assert!(php.contains("wp_enqueue_emoji_styles")); + assert!(php.contains("print_emoji_detection_script")); assert!(php.contains("option_active_plugins")); assert!(php.contains("siteground_optimizer_combine_css")); assert!(php.contains("siteground_optimizer_file_caching")); assert!(php.contains("siteground_optimizer_optimize_css")); assert!(php.contains("wp_cow_find_siteground_combined_css")); assert!(php.contains("WPCOW_SITEGROUND_COMBINED_CSS")); + assert!(php.contains("wp_cow_siteground_lazyload_content_images")); + assert!(php.contains("wp_cow_enqueue_siteground_lazysizes")); + assert!(php.contains("lazysizes.min.js")); + assert!(php.contains("WPCOW_SITEGROUND_LAZYLOAD_IMAGES")); assert!(php.contains("should_load_block_assets_on_demand")); assert!(php.contains("should_load_separate_core_block_assets")); } @@ -1590,6 +1862,9 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { let sg_asset = docroot .join("wp-content/uploads/siteground-optimizer-assets") .join("siteground-optimizer-combined-css-abc123.css"); + let lazy_asset = docroot + .join("wp-content/plugins/sg-cachepress/assets/js") + .join("lazysizes.min.js"); let check = temp.path().join("check.php"); fs::create_dir_all(asset.parent().unwrap()).unwrap(); fs::write(&asset, b"body{color:#123}").unwrap(); @@ -1597,38 +1872,72 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { fs::write( &sg_asset, [ - b"#wp-block-themeisle-blocks-advanced-columns-a241f2a5{min-height:800px}" + b"#wp-block-themeisle-blocks-advanced-columns-a241f2a5{min-height:800px;--background:url(https://example.test/wp-content/uploads/hero.jpg)}" .as_slice(), vec![b' '; 2048].as_slice(), ] .concat(), ) .unwrap(); + fs::create_dir_all(lazy_asset.parent().unwrap()).unwrap(); + fs::write(&lazy_asset, b"/*! lazysizes */").unwrap(); fs::write(&safety, safety_mu_plugin_php()).unwrap(); fs::write( &check, format!( r#" '', ); }} -function wp_enqueue_style( $handle, $src, $deps = array(), $ver = false ) {{ - global $enqueued; - $enqueued[ $handle ] = $src; -}} + function wp_enqueue_style( $handle, $src, $deps = array(), $ver = false ) {{ + global $enqueued; + $enqueued[ $handle ] = $src; + }} + function wp_enqueue_script( $handle, $src, $deps = array(), $ver = false, $in_footer = false ) {{ + global $enqueued_scripts; + $enqueued_scripts[ $handle ] = $src; + }} + function wp_script_add_data( $handle, $key, $value ) {{ + global $script_data; + $script_data[ $handle ][ $key ] = $value; + }} class WP_Error {{ public $code; public $message; @@ -1638,38 +1947,65 @@ class WP_Error {{ }} }} define( 'ABSPATH', '{docroot}' . '/' ); -putenv( 'WPCOW_SITEGROUND_COMBINED_CSS=siteground-optimizer-combined-css-abc123.css' ); -require '{safety}'; -$enqueued = array(); -call_user_func( $filters['wp_enqueue_scripts'] ); -if ( - empty( $enqueued['siteground-optimizer-combined-css-abc123'] ) || - false === strpos( $enqueued['siteground-optimizer-combined-css-abc123'], '/wp-content/uploads/siteground-optimizer-assets/siteground-optimizer-combined-css-abc123.css' ) + putenv( 'WPCOW_SITEGROUND_COMBINED_CSS=siteground-optimizer-combined-css-abc123.css' ); + require '{safety}'; + $enqueued = array(); + $enqueued_scripts = array(); + $script_data = array(); + do_test_action( 'wp_enqueue_scripts' ); + if ( + empty( $enqueued['siteground-optimizer-combined-css-abc123'] ) || + false === strpos( $enqueued['siteground-optimizer-combined-css-abc123'], '/wp-content/uploads/siteground-optimizer-assets/wp-cow-localized-siteground-optimizer-combined-css-abc123.css' ) + ) {{ + fwrite( STDERR, 'existing SG combined CSS was not preserved: ' . json_encode( $enqueued ) . PHP_EOL ); + exit( 1 ); + }} + $localized_css = '{docroot}' . '/wp-content/uploads/siteground-optimizer-assets/wp-cow-localized-siteground-optimizer-combined-css-abc123.css'; + if ( ! is_file( $localized_css ) || false === strpos( file_get_contents( $localized_css ), 'https://local.test/wp-content/uploads/hero.jpg' ) ) {{ + fwrite( STDERR, 'SG combined CSS URLs were not localized' . PHP_EOL ); + exit( 1 ); + }} + if ( + empty( $enqueued_scripts['siteground-optimizer-lazy-sizes-js'] ) || + false === strpos( $enqueued_scripts['siteground-optimizer-lazy-sizes-js'], '/wp-content/plugins/sg-cachepress/assets/js/lazysizes.min.js' ) || + 'defer' !== $script_data['siteground-optimizer-lazy-sizes-js']['strategy'] + ) {{ + fwrite( STDERR, 'SG lazysizes runtime was not enqueued: ' . json_encode( array( $enqueued_scripts, $script_data ) ) . PHP_EOL ); + exit( 1 ); + }} + $cache_option = apply_test_filter( 'pre_option_siteground_optimizer_combine_css', 1 ); + if ( 0 !== $cache_option ) {{ + fwrite( STDERR, 'local SG cache generation was not disabled' . PHP_EOL ); + exit( 1 ); + }} + $content = '
'; + $lazy_content = apply_test_filter( 'the_content', $content ); + if ( + false === strpos( $lazy_content, 'src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"' ) || + false === strpos( $lazy_content, 'data-src="https://local.test/wp-content/uploads/2019/12/photo.jpg"' ) || + false === strpos( $lazy_content, 'class="wp-image-45 lazyload"' ) || + false !== strpos( $lazy_content, 'width="600"' ) || + false !== strpos( $lazy_content, 'srcset=' ) ) {{ - fwrite( STDERR, 'existing SG combined CSS was not preserved: ' . json_encode( $enqueued ) . PHP_EOL ); - exit( 1 ); -}} -$cache_option = call_user_func( $filters['pre_option_siteground_optimizer_combine_css'], 1 ); -if ( 0 !== $cache_option ) {{ - fwrite( STDERR, 'local SG cache generation was not disabled' . PHP_EOL ); + fwrite( STDERR, 'SG lazyload placeholder shape was not preserved: ' . $lazy_content . PHP_EOL ); exit( 1 ); }} -$response = call_user_func( - $filters['pre_http_request'], - false, - array( 'method' => 'GET' ), - 'https://example.test/wp-content/themes/neve/style.css?ver=1' -); + $response = apply_test_filter( + 'pre_http_request', + false, + array( 'method' => 'GET' ), + 'https://local.test/wp-content/themes/neve/style.css?ver=1' + ); if ( ! is_array( $response ) || 'body{{color:#123}}' !== $response['body'] || 200 !== $response['response']['code'] ) {{ fwrite( STDERR, 'local asset response failed: ' . json_encode( $response ) . PHP_EOL ); exit( 1 ); }} -$blocked = call_user_func( - $filters['pre_http_request'], - false, - array( 'method' => 'GET' ), - 'https://api.example.test/side-effect' -); + $blocked = apply_test_filter( + 'pre_http_request', + false, + array( 'method' => 'GET' ), + 'https://api.example.test/side-effect' + ); if ( ! $blocked instanceof WP_Error ) {{ fwrite( STDERR, 'external request was not blocked' . PHP_EOL ); exit( 1 ); @@ -1720,9 +2056,12 @@ function add_action( $tag, $callback, $priority = 10, $accepted_args = 1 ) {{ add_filter( $tag, $callback, $priority, $accepted_args ); }} function __return_false() {{ return false; }} +function is_front_page() {{ return true; }} class WP_Error {{ public function __construct( $code, $message ) {{}} }} +define( 'WP_HOME', 'http://local.test' ); +define( 'WPCOW_REMOTE_URL', 'https://remote.test' ); putenv( 'WPCOW_PLUGIN_MODE=auto' ); putenv( 'WPCOW_ENABLE_PLUGINS=0' ); putenv( 'WPCOW_PLUGIN_POLICY_FILE={policy}' ); @@ -1743,6 +2082,19 @@ if ( $sitewide !== array( 'woocommerce/woocommerce.php' => 2 ) ) {{ fwrite( STDERR, 'unexpected sitewide plugin filter: ' . json_encode( $sitewide ) . PHP_EOL ); exit( 1 ); }} +$item = (object) array( 'url' => 'http://remote.test/' ); +$classes = call_user_func( $filters['nav_menu_css_class'], array( 'menu-item' ), $item ); +foreach ( array( 'current-menu-item', 'current_page_item', 'menu-item-home', 'nv-active' ) as $expected_class ) {{ + if ( ! in_array( $expected_class, $classes, true ) ) {{ + fwrite( STDERR, 'missing active nav class: ' . $expected_class . ' from ' . json_encode( $classes ) . PHP_EOL ); + exit( 1 ); + }} +}} +$atts = call_user_func( $filters['nav_menu_link_attributes'], array( 'href' => 'http://remote.test/' ), $item ); +if ( 'http://local.test/' !== $atts['href'] || 'page' !== $atts['aria-current'] ) {{ + fwrite( STDERR, 'remote home nav link was not localized/current: ' . json_encode( $atts ) . PHP_EOL ); + exit( 1 ); +}} putenv( 'WPCOW_PLUGIN_MODE=full' ); $full = call_user_func( $filters['option_active_plugins'], From b2b690e30411463280ad64649bd4da8005fe4272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Wed, 6 May 2026 23:46:40 +0200 Subject: [PATCH 39/39] Localize frontend content links --- experiments/remote-wp-cow/src/generate.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/experiments/remote-wp-cow/src/generate.rs b/experiments/remote-wp-cow/src/generate.rs index 6a5bc21a..86ab50fc 100644 --- a/experiments/remote-wp-cow/src/generate.rs +++ b/experiments/remote-wp-cow/src/generate.rs @@ -768,6 +768,18 @@ function wp_cow_is_remote_or_local_home_url( $url ) { return false; } +function wp_cow_localize_remote_urls_in_text( $text ) { + $remote = wp_cow_remote_home_url(); + $local = wp_cow_local_home_url(); + if ( '' === $remote || '' === $local || false === strpos( (string) $text, '://' ) ) { + return $text; + } + foreach ( wp_cow_url_variants( $remote ) as $variant ) { + $text = str_replace( $variant, $local, (string) $text ); + } + return $text; +} + function wp_cow_filter_active_plugins( $plugins ) { $mode = wp_cow_plugin_mode(); if ( in_array( $mode, array( 'full', 'on', 'enabled', '1', 'true', 'yes' ), true ) ) { @@ -1181,6 +1193,7 @@ function wp_cow_siteground_lazyload_content_images( $html ) { } add_filter( 'the_content', 'wp_cow_siteground_lazyload_content_images', PHP_INT_MAX ); +add_filter( 'the_content', 'wp_cow_localize_remote_urls_in_text', PHP_INT_MAX - 1 ); add_action( 'wp_enqueue_scripts', 'wp_cow_enqueue_siteground_lazysizes', 1 ); function wp_cow_local_asset_http_response( $url ) { @@ -1830,6 +1843,7 @@ if ( cow_cached_remote_read_is_safe_without_control( array( 'wp_options' ) ) ) { assert!(php.contains("wp_cow_allowed_plugins")); assert!(php.contains("$quarantined")); assert!(php.contains("wp_cow_rewrite_remote_url_to_local")); + assert!(php.contains("wp_cow_localize_remote_urls_in_text")); assert!(php.contains("nav_menu_link_attributes")); assert!(php.contains("nav_menu_css_class")); assert!(php.contains("wp_enqueue_emoji_styles")); @@ -1978,11 +1992,12 @@ define( 'ABSPATH', '{docroot}' . '/' ); fwrite( STDERR, 'local SG cache generation was not disabled' . PHP_EOL ); exit( 1 ); }} - $content = '
'; + $content = '

Browse

'; $lazy_content = apply_test_filter( 'the_content', $content ); if ( false === strpos( $lazy_content, 'src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"' ) || false === strpos( $lazy_content, 'data-src="https://local.test/wp-content/uploads/2019/12/photo.jpg"' ) || + false === strpos( $lazy_content, 'href="https://local.test/adventures/"' ) || false === strpos( $lazy_content, 'class="wp-image-45 lazyload"' ) || false !== strpos( $lazy_content, 'width="600"' ) || false !== strpos( $lazy_content, 'srcset=' )