diff --git a/Cargo.lock b/Cargo.lock index e9ee88f3..b5eef97a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,7 +40,7 @@ dependencies = [ "flate2", "foldhash 0.1.5", "futures-core", - "h2", + "h2 0.3.27", "http 0.2.12", "httparse", "httpdate", @@ -629,7 +629,7 @@ dependencies = [ "derive_more 2.1.1", "futures-core", "futures-util", - "h2", + "h2 0.3.27", "http 0.2.12", "itoa", "log", @@ -1994,6 +1994,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap 2.13.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "handlebars" version = "6.4.0" @@ -2119,6 +2138,29 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "pin-project-lite", +] + [[package]] name = "httparse" version = "1.10.1" @@ -2142,7 +2184,7 @@ dependencies = [ "futures-core", "futures-util", "http 0.2.12", - "http-body", + "http-body 0.4.6", "httparse", "httpdate", "itoa", @@ -2154,6 +2196,64 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper 1.8.1", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.3", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -2340,6 +2440,22 @@ dependencies = [ "serde_core", ] +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -2355,6 +2471,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -2465,13 +2590,13 @@ dependencies = [ "bytes", "futures", "http 0.2.12", - "hyper", + "hyper 0.14.32", "lambda_runtime_api_client", "serde", "serde_json", "tokio", "tokio-stream", - "tower", + "tower 0.4.13", "tracing", ] @@ -2482,7 +2607,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7210012be904051520f0dc502140ba599bae3042b65b3737b87727f1aa88a7d6" dependencies = [ "http 0.2.12", - "hyper", + "hyper 0.14.32", "tokio", "tower-service", ] @@ -2637,6 +2762,15 @@ dependencies = [ "unicode-id", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "md-5" version = "0.10.6" @@ -2697,6 +2831,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "mutually_exclusive_features" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94e1e6445d314f972ff7395df2de295fe51b71821694f0b0e1e79c4f12c8577" + [[package]] name = "ndk" version = "0.9.0" @@ -2737,6 +2877,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -3120,7 +3269,7 @@ dependencies = [ "ed25519-dalek", "hmac", "http 1.4.0", - "itertools", + "itertools 0.10.5", "log", "oauth2", "p256", @@ -3145,6 +3294,82 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "opentelemetry-http" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" +dependencies = [ + "async-trait", + "bytes", + "http 1.4.0", + "opentelemetry", + "reqwest", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" +dependencies = [ + "http 1.4.0", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.18", + "tokio", + "tonic", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.2", + "thiserror 2.0.18", + "tokio", + "tokio-stream", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -3356,6 +3581,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "piper" version = "0.2.5" @@ -3490,6 +3721,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quote" version = "1.0.45" @@ -3699,6 +3953,40 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower 0.5.3", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "rfc6979" version = "0.4.0" @@ -4163,6 +4451,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -4290,6 +4587,9 @@ dependencies = [ "mime_guess", "odbc-sys 0.29.0", "openidconnect", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", "password-hash", "percent-encoding", "rand 0.10.0", @@ -4304,6 +4604,11 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", + "tracing", + "tracing-actix-web", + "tracing-log", + "tracing-opentelemetry", + "tracing-subscriber", ] [[package]] @@ -4473,6 +4778,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.13.2" @@ -4537,6 +4851,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "time" version = "0.3.47" @@ -4717,6 +5040,43 @@ dependencies = [ "winnow", ] +[[package]] +name = "tonic" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "sync_wrapper", + "tokio", + "tokio-stream", + "tower 0.5.3", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-prost" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" +dependencies = [ + "bytes", + "prost", + "tonic", +] + [[package]] name = "tower" version = "0.4.13" @@ -4732,6 +5092,43 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 2.13.0", + "pin-project-lite", + "slab", + "sync_wrapper", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags 2.11.0", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "iri-string", + "pin-project-lite", + "tower 0.5.3", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.3" @@ -4756,6 +5153,21 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-actix-web" +version = "0.7.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ca6b15407f9bfcb35f82d0e79e603e1629ece4e91cc6d9e58f890c184dd20af" +dependencies = [ + "actix-web", + "mutually_exclusive_features", + "opentelemetry", + "pin-project", + "tracing", + "tracing-opentelemetry", + "uuid", +] + [[package]] name = "tracing-attributes" version = "0.1.31" @@ -4774,6 +5186,52 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-opentelemetry" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc" +dependencies = [ + "js-sys", + "opentelemetry", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -4897,10 +5355,17 @@ version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ + "getrandom 0.4.2", "js-sys", "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 64d3b1a2..83d26619 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,6 +81,16 @@ openidconnect = { version = "4.0.0", default-features = false, features = ["acce encoding_rs = "0.8.35" odbc-sys = { version = "0.29.0", optional = true } +# OpenTelemetry / tracing +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["registry", "env-filter"] } +tracing-opentelemetry = "0.32" +tracing-actix-web = { version = "0.7", default-features = false, features = ["opentelemetry_0_31"] } +tracing-log = "0.2" +opentelemetry = "0.31" +opentelemetry_sdk = { version = "0.31", features = ["rt-tokio-current-thread"] } +opentelemetry-otlp = { version = "0.31", features = ["http-proto", "grpc-tonic"] } + [features] default = [] diff --git a/Dockerfile b/Dockerfile index a6ceb323..e4a6481b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,8 @@ FROM --platform=$BUILDPLATFORM rust:1.91-slim AS builder WORKDIR /usr/src/sqlpage ARG TARGETARCH ARG BUILDARCH +ARG CARGO_PROFILE=superoptimized +ENV CARGO_PROFILE=$CARGO_PROFILE COPY scripts/ /usr/local/bin/ RUN cargo init . diff --git a/examples/official-site/sqlpage/migrations/73_blog_tracing.sql b/examples/official-site/sqlpage/migrations/73_blog_tracing.sql new file mode 100644 index 00000000..233c65ed --- /dev/null +++ b/examples/official-site/sqlpage/migrations/73_blog_tracing.sql @@ -0,0 +1,174 @@ +INSERT INTO blog_posts (title, description, icon, created_at, content) +VALUES + ( + 'Tracing SQLPage with OpenTelemetry and Grafana', + 'How to inspect requests, SQL queries, and database wait time with distributed tracing', + 'route-2', + '2026-03-09', + ' +# Tracing SQLPage with OpenTelemetry and Grafana + +When a page is slow, a log line telling you that the request took 1.8 seconds is only the start of the investigation. What you usually want to know next is where that time went: + +- Did the request wait for a database connection? +- Which SQL file was executed? +- Which query took the longest? +- Did the delay start in SQLPage, in the reverse proxy, or in the database? + +SQLPage now supports [OpenTelemetry](https://opentelemetry.io/), the standard way to emit distributed traces. Combined with Grafana, Tempo, Loki, and an OpenTelemetry collector, this gives you a detailed timeline of each request and lets you jump directly from logs to traces. + +If you want a ready-to-run demo, see the [OpenTelemetry + Grafana example](https://github.com/sqlpage/SQLPage/tree/main/examples/telemetry), which this article is based on. + +## What tracing gives you + +With tracing enabled, one HTTP request becomes a tree of timed operations called spans. In a typical SQLPage app, you will see something like: + +```text +[nginx] GET /todos + └─ [sqlpage] GET /todos + └─ [sqlpage] SQL website/todos.sql + ├─ db.pool.acquire + └─ db.query +``` + +This is immediately useful for: + +- Debugging slow pages by seeing exactly which query consumed the time +- Detecting connection pool pressure by measuring time spent in `db.pool.acquire` +- Following one request end-to-end from the reverse proxy to SQLPage to PostgreSQL + +Tracing is especially helpful in SQLPage because one request often maps cleanly to one SQL file. That makes traces easy to interpret even when you are not used to application performance tooling. + +## The easiest way to try it + +The simplest way to explore tracing is to run the example shipped with SQLPage: + +```bash +cd examples/telemetry +docker compose up --build +``` + +That stack starts: + +- nginx as the reverse proxy +- SQLPage +- PostgreSQL +- an OpenTelemetry collector +- Grafana Tempo for traces +- Grafana Loki for logs +- Promtail for log shipping +- Grafana for visualization + +Then: + +1. Open [http://localhost](http://localhost) and use the sample todo application. +2. Open [http://localhost:3000](http://localhost:3000) to access Grafana. +3. Inspect recent traces and logs from the default dashboard. +4. Open a trace to see the full span waterfall for a single request. + +This setup is useful both as a demo and as a reference architecture for production deployments. + +## Enabling tracing in SQLPage + +Tracing is built into SQLPage. There is no plugin to install and no SQLPage-specific tracing configuration file to write. + +You only need to set standard OpenTelemetry environment variables before starting SQLPage: + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318" +export OTEL_SERVICE_NAME="sqlpage" +sqlpage +``` + +The `OTEL_EXPORTER_OTLP_ENDPOINT` variable tells SQLPage where to send traces. +The `OTEL_SERVICE_NAME` variable controls how the service appears in your tracing backend. + +If `OTEL_EXPORTER_OTLP_ENDPOINT` is not set, SQLPage falls back to normal logging and tracing stays disabled. + +## What you will see in the trace + +The most useful spans emitted by SQLPage are: + +- The HTTP request span, with attributes such as method, path, status code, and user agent +- The SQL file execution span, showing which `.sql` file handled the request +- The `db.pool.acquire` span, showing time spent waiting for a database connection +- The `db.query` span, containing the SQL statement and database system + +In practice, that means you can answer questions like: + +- Is the page slow because the SQL itself is slow? +- Is the request queued because the connection pool is exhausted? +- Is the delay happening before SQLPage even receives the request? + +This is much more actionable than a single request duration number. + +## Logs and traces together + +Tracing is even more useful when logs and traces are connected. + +In the example stack, SQLPage writes structured logs to stdout, Promtail forwards them to Loki, and Grafana lets you move from a log line to the matching trace using the trace id. This makes it possible to start from an error log and immediately inspect the full request timeline. + +That workflow is often the difference between guessing and knowing. + +## PostgreSQL correlation + +SQLPage also propagates trace context to PostgreSQL through the connection `application_name`. +This makes it possible to correlate live PostgreSQL activity or database logs with the trace that triggered it. + +For example, inspecting `pg_stat_activity` can show which trace is attached to a running query: + +```sql +SELECT application_name, query, state +FROM pg_stat_activity; +``` + +If you also include `%a` in PostgreSQL''s `log_line_prefix`, your database logs can contain the same trace context. + +## A practical debugging example + +Suppose users report that a page occasionally becomes slow under load. + +With tracing enabled, you might see that: + +- the HTTP span is long +- the SQL file execution span is also long +- the `db.query` span is short +- but `db.pool.acquire` takes several hundred milliseconds + +That immediately tells you the database query itself is not the problem. The real issue is contention on the connection pool. You can then increase `max_database_pool_connections`, reduce concurrent load, or review long-running requests that keep connections busy. + +Without tracing, this kind of diagnosis usually requires guesswork. + +## Deployment options + +The example uses Grafana Tempo and Loki, but SQLPage is not tied to a single backend. Because it emits standard OTLP traces, you can also send data to: + +- Jaeger +- Grafana Cloud +- Datadog +- Honeycomb +- New Relic +- Axiom + +In small setups, SQLPage can often send traces directly to the backend. In larger deployments, placing an OpenTelemetry collector in the middle is usually better because it centralizes routing, batching, and authentication. + +## When to enable tracing + +Tracing is particularly valuable when: + +- you are running SQLPage behind a reverse proxy +- several SQL files participate in user-facing workflows +- you want to understand production latency, not just local development behavior +- you need a shared debugging tool for developers and operators + +If your application is already important enough to monitor, it is important enough to trace. + +## Conclusion + +SQLPage already makes the application logic easy to inspect because it lives in SQL files. Tracing extends that visibility to runtime behavior. + +By enabling OpenTelemetry and connecting SQLPage to Grafana, you can see not just that a request was slow, but why it was slow, where the time was spent, and which query or resource caused the delay. + +For a complete working setup, start with the [OpenTelemetry + Grafana example](https://github.com/sqlpage/SQLPage/tree/main/examples/telemetry) and adapt it to your own deployment. +' + ); diff --git a/examples/telemetry/README.md b/examples/telemetry/README.md new file mode 100644 index 00000000..792e12eb --- /dev/null +++ b/examples/telemetry/README.md @@ -0,0 +1,413 @@ +# Distributed Tracing and Logs for SQLPage with OpenTelemetry and Grafana + +SQLPage has built-in support for [OpenTelemetry](https://opentelemetry.io/) (OTel), +an open standard for collecting traces, metrics, and logs from your applications. +When enabled, every HTTP request to SQLPage produces a **trace** — a timeline of +everything that happened to serve that request, from receiving it to querying the +database and rendering the response. SQLPage also emits structured request-aware +logs, which this example forwards to Grafana Loki so you can inspect logs and traces +side by side. + +This is useful for: + +- **Debugging slow pages**: see exactly which SQL query is taking the longest. +- **Diagnosing connection pool exhaustion**: see how long requests wait for a database connection. +- **End-to-end visibility**: follow a single user request from your reverse proxy (nginx, Caddy, etc.) + through SQLPage and into PostgreSQL. + +## Quick start (this example) + +This directory contains a ready-to-run Docker Compose stack that demonstrates +the full tracing, logging, and PostgreSQL metrics pipeline. No prior +OpenTelemetry experience is needed. + +### Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) and + [Docker Compose](https://docs.docker.com/compose/install/) installed on your machine. + +### Run + +```bash +cd examples/telemetry +docker compose up --build +``` + +This starts eight services: + +| Service | Role | Port | +|------------------|-----------------------------------------------------------|---------------| +| **nginx** | Reverse proxy, creates the root trace span | `localhost:80` | +| **SQLPage** | Your application, sends traces to the collector | (internal 8080) | +| **PostgreSQL** | Database | (internal 5432) | +| **Prometheus** | Stores PostgreSQL metrics scraped from the OTel Collector | (internal 9090) | +| **Tempo** | Trace storage backend | (internal 3200) | +| **Loki** | Log storage backend | (internal 3100) | +| **OTel Collector** | Receives traces, PostgreSQL metrics, and SQLPage logs | `localhost:4318`, `localhost:1514` | +| **Grafana** | Web UI to explore traces and logs | `localhost:3000` | + +### Explore traces and logs + +1. Open the todo app at [http://localhost](http://localhost) — add a few items, click to toggle them. +2. Open Grafana at [http://localhost:3000](http://localhost:3000). +3. The default home dashboard now shows recent traces, recent SQLPage logs, and PostgreSQL metrics. +4. Click any trace ID in the trace table to see the full span waterfall. +5. In the logs panel, click a `trace_id` derived field to jump straight to the matching trace. +6. The PostgreSQL metrics panels are populated by the collector's `postgresqlreceiver`. +7. In the left sidebar, click **Explore** (compass icon) if you want to search manually. +8. Select **Tempo** to search traces, **Loki** to search logs, or **Prometheus** to query metrics. + +### What you will see in a trace + +Each HTTP request produces a tree of **spans** (timed operations): + +``` +[nginx] GET /todos ← root span (created by nginx) + └─ [sqlpage] GET /todos ← HTTP request span + └─ [sqlpage] SQL website/todos.sql ← SQL file execution + ├─ db.pool.acquire ← time waiting for a DB connection + └─ db.query ← the actual SQL query + db.statement = "SELECT title, ..." + db.system = "PostgreSQL" +``` + +Key attributes on each span: + +| Span | Key attributes | +|---------------------|--------------------------------------------------------------| +| HTTP request | `http.method`, `http.target`, `http.status_code`, `http.user_agent` | +| SQL file execution | `sqlpage.file` — which `.sql` file was executed | +| `db.pool.acquire` | `db.pool.size` — current pool size when acquiring | +| `db.query` | `db.statement` — the full SQL text; `db.system` — database type | + +### What you will see in the logs + +SQLPage writes one structured log line per event, for example: + +```text +ts=2026-03-08T20:56:15.000Z level=info target=sqlpage::webserver::http msg="request completed" method=GET path=/ trace_id=4f2d... +``` + +The OpenTelemetry Collector receives these SQLPage container logs through Docker's syslog +logging driver and forwards them to Loki. +The homepage dashboard filters to the `sqlpage` service so you can see request logs update +live while you use the sample app. + +### PostgreSQL correlation and explain plans + +SQLPage automatically sets the +[`application_name`](https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-APPLICATION-NAME) +on each database connection to include the W3C +[traceparent](https://www.w3.org/TR/trace-context/#traceparent-header). +This means you can: + +- See trace IDs in `pg_stat_activity` when monitoring live queries: + ```sql + SELECT application_name, query, state FROM pg_stat_activity; + -- application_name: sqlpage 00-abc123...-def456...-01 + ``` +- Include trace IDs in PostgreSQL logs by adding `%a` to + [`log_line_prefix`](https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-LOG-LINE-PREFIX). + +This example also enables PostgreSQL's +[`auto_explain`](https://www.postgresql.org/docs/current/auto-explain.html) +extension for queries slower than 25 ms. The plans are logged in JSON and keep +the SQLPage trace context in the `app=[...]` prefix, so Grafana's Loki +`trace_id` derived field links each slow-query plan back to the originating +SQLPage trace. + +### Testing pool pressure + +To simulate database connection pool exhaustion (a common production issue), +reduce the pool size to 1 in `sqlpage/sqlpage.json`: + +```json +{ + "listen_on": "0.0.0.0:8080", + "max_database_pool_connections": 1 +} +``` + +Restart (`docker compose restart sqlpage`), then open several browser tabs +to `http://localhost` simultaneously. In Grafana, you will see `db.pool.acquire` +spans with longer durations as requests queue up waiting for the single connection. + +--- + +## How it works + +### Enabling tracing in SQLPage + +Tracing is **built into SQLPage** — there is nothing to install or compile. +It activates automatically when you set the `OTEL_EXPORTER_OTLP_ENDPOINT` +environment variable. When this variable is not set, SQLPage behaves exactly +as before (plain text logs, no tracing overhead). + +**Minimal setup — just two environment variables:** + +```bash +# Where to send traces (an OTLP-compatible endpoint) +export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318" + +# A name to identify this service in traces +export OTEL_SERVICE_NAME="sqlpage" + +# Now start SQLPage as usual +sqlpage +``` + +These are [standard OpenTelemetry environment variables](https://opentelemetry.io/docs/specs/otel/protocol/exporter/) +understood by all OTel-compatible tools. SQLPage reads them directly — no +`sqlpage.json` configuration is needed for tracing. + +### The role of each component + +**OpenTelemetry** is a standard, not a product. It defines a protocol (OTLP) for +sending trace data. Here is how the pieces fit together: + +``` + Traces: SQLPage -> OTel Collector -> Tempo -> Grafana + Logs: SQLPage -> Docker syslog logging driver -> OTel Collector -> Loki -> Grafana + Metrics: PostgreSQL -> OTel Collector postgresqlreceiver -> Prometheus -> Grafana +``` + +- **SQLPage** generates trace data and sends it via the OTLP HTTP protocol. +- A **collector** (optional) receives traces and forwards them to one or more backends. + Useful for buffering, sampling, or fanning out to multiple destinations. + You can skip the collector and send directly from SQLPage to most backends. +- The **OTel Collector** also receives SQLPage container logs and forwards them to Loki. +- **Tempo** stores traces, **Loki** stores logs, and **Grafana** lets you search both. + +### Trace context propagation + +When a reverse proxy (like nginx) sits in front of SQLPage, you want the trace +to start at nginx and continue into SQLPage as a single, connected trace. +This works via the +[W3C Trace Context](https://www.w3.org/TR/trace-context/) standard: +nginx adds a `traceparent` HTTP header to the request it forwards to SQLPage, +and SQLPage reads it to continue the same trace. + +Most modern reverse proxies and load balancers support this. +For nginx specifically, use the [`ngx_otel_module`](https://nginx.org/en/docs/ngx_otel_module.html) +(included in the `nginx:otel` Docker image). + +--- + +## Setup guides by deployment scenario + +### Self-hosted with Grafana Tempo and Loki + +This is what the Docker Compose example in this directory uses. +[Grafana Tempo](https://grafana.com/oss/tempo/) is a free, open-source trace backend, and +[Grafana Loki](https://grafana.com/oss/loki/) is the corresponding log backend. + +**Components:** +- [Grafana Tempo](https://grafana.com/docs/tempo/latest/) stores the traces. +- [Grafana Loki](https://grafana.com/docs/loki/latest/) stores the logs. +- [Grafana](https://grafana.com/docs/grafana/latest/) provides the web UI. +- An [OTel Collector](https://opentelemetry.io/docs/collector/) receives SQLPage traces, + SQLPage logs, and PostgreSQL metrics in this example. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=http://:4318 +OTEL_SERVICE_NAME=sqlpage +``` + +**Links:** +- [Tempo installation guide](https://grafana.com/docs/tempo/latest/setup/) +- [OTel Collector installation](https://opentelemetry.io/docs/collector/installation/) + +### Self-hosted with Jaeger + +[Jaeger](https://www.jaegertracing.io/) is another popular open-source tracing +backend. Version 2+ natively accepts OTLP — no collector needed. + +**Start Jaeger with one command:** + +```bash +docker run -d --name jaeger \ + -p 16686:16686 \ + -p 4317:4317 \ + -p 4318:4318 \ + jaegertracing/jaeger:latest +``` + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +OTEL_SERVICE_NAME=sqlpage +``` + +Open the Jaeger UI at [http://localhost:16686](http://localhost:16686) to explore traces. + +**Links:** +- [Jaeger getting started](https://www.jaegertracing.io/docs/latest/getting-started/) + +### Grafana Cloud + +[Grafana Cloud](https://grafana.com/products/cloud/) has a free tier that +includes trace storage. SQLPage can send traces directly — no collector needed. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-.grafana.net/otlp +OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic " +OTEL_SERVICE_NAME=sqlpage +``` + +Replace: +- `` with your Grafana Cloud region (e.g., `us-east-0`, `eu-west-2`). + Find it in your Grafana Cloud portal under **My Account** > **Tempo**. +- `` with the Base64 encoding of + `:`. Generate a token in your Grafana Cloud + portal under **My Account** > **API Keys**. + + On macOS/Linux, generate the Base64 value with: + ```bash + echo -n "123456:glc_your_token_here" | base64 + ``` + +**Links:** +- [Send data via OTLP to Grafana Cloud](https://grafana.com/docs/grafana-cloud/send-data/otlp/send-data-otlp/) + +### Datadog + +[Datadog](https://www.datadoghq.com/) supports OTLP ingestion through the +Datadog Agent. + +**1. Run the Datadog Agent** with OTLP ingest enabled: + +```bash +docker run -d --name datadog-agent \ + -e DD_API_KEY= \ + -e DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_HTTP_ENDPOINT=0.0.0.0:4318 \ + -e DD_SITE=datadoghq.com \ + -p 4318:4318 \ + gcr.io/datadoghq/agent:latest +``` + +**2. Point SQLPage to the Agent:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +OTEL_SERVICE_NAME=sqlpage +``` + +Traces appear in the Datadog **APM > Traces** section. + +**Links:** +- [OTLP ingestion in the Datadog Agent](https://docs.datadoghq.com/opentelemetry/setup/otlp_ingest_in_the_agent/) + +### Honeycomb + +[Honeycomb](https://www.honeycomb.io/) accepts OTLP directly — no collector needed. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io +OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=" +OTEL_SERVICE_NAME=sqlpage +``` + +For the EU region, use `https://api.eu1.honeycomb.io` instead. + +**Links:** +- [Send data with OpenTelemetry — Honeycomb docs](https://docs.honeycomb.io/send-data/opentelemetry/) + +### New Relic + +[New Relic](https://newrelic.com/) accepts OTLP directly. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net +OTEL_EXPORTER_OTLP_HEADERS="api-key=" +OTEL_SERVICE_NAME=sqlpage +``` + +For the EU region, use `https://otlp.eu01.nr-data.net` instead. + +Find your Ingest License Key in the New Relic UI under +**API Keys** (type: `INGEST - LICENSE`). + +**Links:** +- [New Relic OTLP endpoint configuration](https://docs.newrelic.com/docs/opentelemetry/best-practices/opentelemetry-otlp/) + +### Axiom + +[Axiom](https://axiom.co/) accepts OTLP directly. + +**SQLPage environment variables:** + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=https://api.axiom.co +OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer ,X-Axiom-Dataset=" +OTEL_SERVICE_NAME=sqlpage +``` + +**Links:** +- [Send OpenTelemetry data to Axiom](https://axiom.co/docs/send-data/opentelemetry) + +--- + +## Environment variable reference + +These are [standard OpenTelemetry variables](https://opentelemetry.io/docs/specs/otel/protocol/exporter/), +not specific to SQLPage. + +| Variable | Required? | Description | Example | +|-----------------------------------|-----------|------------------------------------------------|--------------------------------------| +| `OTEL_EXPORTER_OTLP_ENDPOINT` | Yes | Base URL of the OTLP receiver | `http://localhost:4318` | +| `OTEL_SERVICE_NAME` | No | Service name shown in traces (default: `unknown_service`) | `sqlpage` | +| `OTEL_EXPORTER_OTLP_HEADERS` | No | Comma-separated `key=value` pairs for auth headers | `api-key=abc123` | +| `OTEL_EXPORTER_OTLP_PROTOCOL` | No | Protocol (default: `http/protobuf`) | `http/protobuf` | +| `RUST_LOG` | No | Filter which spans/logs are emitted | `sqlpage=debug,tracing_actix_web=info` | + +When `OTEL_EXPORTER_OTLP_ENDPOINT` is **not set**, SQLPage uses plain text +logging only (same behavior as versions before tracing support was added). + +--- + +## Troubleshooting + +### No traces appear + +1. **Check that SQLPage sees the endpoint.** Look for this line in the startup logs: + ``` + OpenTelemetry tracing enabled (OTEL_EXPORTER_OTLP_ENDPOINT is set) + ``` + If you don't see it, the environment variable is not reaching SQLPage. + +2. **Check that the collector/backend is reachable.** From the SQLPage host, try: + ```bash + curl -v http://:4318/v1/traces + ``` + You should get a response (even if it's an error like "no data"), not a connection refused. + +3. **Check the collector logs** for export errors (e.g., authentication failures). + +### Traces are disconnected (nginx and SQLPage show as separate traces) + +This means the `traceparent` header is not being propagated. Check that: + +- Your reverse proxy is configured to inject/propagate the `traceparent` header. +- For nginx, you need the `ngx_otel_module` with `otel_trace_context propagate` + in the location block. Setting `otel_span_name "$request_method $uri"` also keeps + the nginx span name aligned with the actual request path. See the `nginx/nginx.conf` + in this example. + +### Spans are missing (e.g., no `db.query` spans) + +The `RUST_LOG` / `OTEL_LOG_LEVEL` filter might be too restrictive. +SQLPage emits spans at the `INFO` level by default. Make sure your filter +includes `sqlpage=info`: + +```bash +RUST_LOG="sqlpage=info,actix_web=info,tracing_actix_web=info" +``` diff --git a/examples/telemetry/docker-compose.yml b/examples/telemetry/docker-compose.yml new file mode 100644 index 00000000..e5883deb --- /dev/null +++ b/examples/telemetry/docker-compose.yml @@ -0,0 +1,157 @@ +services: + nginx: + image: nginx:otel + ports: + - "80:80" + volumes: + - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro + depends_on: + - sqlpage + - otel-collector + logging: + driver: syslog + options: + syslog-address: "udp://localhost:1516" + syslog-format: "rfc5424micro" + tag: "nginx" + + sqlpage: + build: + context: ../.. + args: + CARGO_PROFILE: release + environment: + - DATABASE_URL=postgres://sqlpage:sqlpage@postgres:5432/sqlpage + - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 + - OTEL_SERVICE_NAME=sqlpage + volumes: + - ./website:/var/www + - ./sqlpage:/etc/sqlpage + depends_on: + otel-collector: + condition: service_started + postgres: + condition: service_healthy + logging: + driver: syslog + options: + syslog-address: "udp://localhost:1514" + syslog-format: "rfc5424micro" + tag: "sqlpage" + + postgres: + image: postgres:16 + command: + - postgres + - -c + - shared_preload_libraries=pg_stat_statements,auto_explain + - -c + - pg_stat_statements.track_utility=off + - -c + - logging_collector=on + - -c + - log_destination=jsonlog + - -c + - log_directory=/var/log/postgresql + - -c + - log_filename=postgresql + - -c + - log_file_mode=0644 + - -c + - log_rotation_age=0 + - -c + - log_rotation_size=0 + - -c + - log_line_prefix=ts=%m pid=%p db=%d user=%u app=[%a] + - -c + - log_min_messages=info + - -c + - log_error_verbosity=verbose + - -c + - log_connections=on + - -c + - log_disconnections=on + - -c + - log_duration=on + - -c + - log_statement=all + - -c + - log_min_duration_statement=25 + - -c + - auto_explain.log_min_duration=25ms + - -c + - auto_explain.log_analyze=on + - -c + - auto_explain.log_buffers=on + - -c + - auto_explain.log_timing=on + - -c + - auto_explain.log_verbose=on + - -c + - auto_explain.log_format=json + environment: + POSTGRES_USER: sqlpage + POSTGRES_PASSWORD: sqlpage + POSTGRES_DB: sqlpage + volumes: + - ./postgres-init:/docker-entrypoint-initdb.d:ro + - postgres-logs:/var/log/postgresql + healthcheck: + test: ["CMD-SHELL", "pg_isready -U sqlpage"] + interval: 2s + timeout: 5s + retries: 5 + otel-collector: + image: otel/opentelemetry-collector-contrib:latest + user: "0:0" + volumes: + - ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml:ro + - postgres-logs:/var/log/postgresql:ro + ports: + - "4317:4317" + - "4318:4318" + - "1514:1514/udp" + - "1516:1516/udp" + depends_on: + - tempo + - postgres + + prometheus: + image: prom/prometheus:v3.2.1 + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.enable-otlp-receiver + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + + tempo: + image: grafana/tempo:2.6.1 + volumes: + - ./tempo.yaml:/etc/tempo/config.yaml:ro + command: ["-config.file=/etc/tempo/config.yaml"] + + loki: + image: grafana/loki:3.4.1 + command: ["-config.file=/etc/loki/local-config.yaml"] + + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/etc/grafana/provisioning/dashboards/sqlpage/sqlpage-home.json + - GF_NEWS_NEWS_FEED_ENABLED=false + volumes: + - ./grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro + - ./grafana/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:ro + - ./grafana:/etc/grafana/provisioning/dashboards/sqlpage:ro + depends_on: + - tempo + - loki + - prometheus + +volumes: + postgres-logs: diff --git a/examples/telemetry/grafana/dashboards.yaml b/examples/telemetry/grafana/dashboards.yaml new file mode 100644 index 00000000..9c6d001e --- /dev/null +++ b/examples/telemetry/grafana/dashboards.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: SQLPage Tracing + orgId: 1 + folder: "" + type: file + disableDeletion: true + updateIntervalSeconds: 30 + allowUiUpdates: false + options: + path: /etc/grafana/provisioning/dashboards/sqlpage diff --git a/examples/telemetry/grafana/datasources.yaml b/examples/telemetry/grafana/datasources.yaml new file mode 100644 index 00000000..6b7fcba7 --- /dev/null +++ b/examples/telemetry/grafana/datasources.yaml @@ -0,0 +1,34 @@ +apiVersion: 1 + +datasources: + - name: Tempo + type: tempo + uid: tempo + access: proxy + url: http://tempo:3200 + isDefault: true + jsonData: + nodeGraph: + enabled: true + tracesToLogsV2: + datasourceUid: loki + spanStartTimeShift: "-5m" + spanEndTimeShift: "5m" + customQuery: true + query: '{service_name=~"nginx|sqlpage|postgresql"} | trace_id="$${__span.traceId}"' + - name: Loki + type: loki + uid: loki + access: proxy + url: http://loki:3100 + jsonData: + derivedFields: + - name: trace_id + matcherRegex: '(?:trace_id=|00-)([0-9a-f]{32})' + datasourceUid: tempo + url: '$${__value.raw}' + - name: Prometheus + type: prometheus + uid: prometheus + access: proxy + url: http://prometheus:9090 diff --git a/examples/telemetry/grafana/sqlpage-home.json b/examples/telemetry/grafana/sqlpage-home.json new file mode 100644 index 00000000..6c19ad36 --- /dev/null +++ b/examples/telemetry/grafana/sqlpage-home.json @@ -0,0 +1,444 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

Recent SQLPage traces, logs, and PostgreSQL metrics

Open http://localhost and interact with the app. New requests will appear here automatically.

The trace table shows recent requests. Click any trace ID to open the full span waterfall in Grafana. PostgreSQL slow-query explain plans appear in the PostgreSQL Logs panel and link back to the same trace via the extracted trace ID. The metrics panels come from the OpenTelemetry PostgreSQL receiver via Prometheus.

", + "mode": "html" + }, + "pluginVersion": "12.4.0", + "title": "Overview", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(postgresql_backends)", + "refId": "A" + } + ], + "title": "PostgreSQL Backends", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(postgresql_db_size) by (postgresql_database_name)", + "legendFormat": "{{postgresql_database_name}}", + "refId": "A" + } + ], + "title": "PostgreSQL Database Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "traceID" + }, + "properties": [ + { + "id": "custom.width", + "value": 300 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "startTime" + }, + "properties": [ + { + "id": "custom.width", + "value": 190 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "traceService" + }, + "properties": [ + { + "id": "custom.width", + "value": 140 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "traceDuration" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "nested" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 2, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Start time" + } + ] + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "tempo" + }, + "limit": 50, + "query": "{resource.service.name != nil}", + "queryType": "traceqlSearch", + "refId": "A", + "tableType": "traces" + } + ], + "timeFrom": "1h", + "title": "Trace List", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "nested": true + }, + "indexByName": { + "startTime": 1, + "traceDuration": 4, + "traceID": 0, + "traceName": 3, + "traceService": 2 + }, + "renameByName": { + "startTime": "Start time", + "traceDuration": "Duration", + "traceID": "Trace ID", + "traceName": "Route", + "traceService": "Service" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 3, + "options": { + "dedupStrategy": "none", + "enableInfiniteScrolling": false, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": true, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "direction": "backward", + "editorMode": "builder", + "expr": "{service_name=\"sqlpage\"}", + "queryType": "range", + "refId": "A" + } + ], + "timeFrom": "1h", + "title": "SQLPage Logs", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 6, + "options": { + "dedupStrategy": "none", + "enableInfiniteScrolling": false, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": true, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "direction": "backward", + "editorMode": "builder", + "expr": "{service_name=\"postgresql\"}", + "queryType": "range", + "refId": "A" + } + ], + "timeFrom": "1h", + "title": "PostgreSQL Logs", + "type": "logs" + } + ], + "refresh": "5s", + "schemaVersion": 41, + "style": "dark", + "tags": ["sqlpage", "tracing", "logs"], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "SQLPage Observability Home", + "uid": "sqlpage-tracing-home", + "version": 5 +} diff --git a/examples/telemetry/nginx/nginx.conf b/examples/telemetry/nginx/nginx.conf new file mode 100644 index 00000000..bbedcb09 --- /dev/null +++ b/examples/telemetry/nginx/nginx.conf @@ -0,0 +1,50 @@ +load_module modules/ngx_otel_module.so; + +events {} + +http { + otel_exporter { + endpoint otel-collector:4317; + } + + otel_service_name nginx; + + map $status $log_level { + ~^[45] error; + default info; + } + + map $status $log_target { + ~^[45] nginx.request_error; + default nginx.access; + } + + log_format otel_request 'ts=$time_iso8601 level=$log_level target=$log_target ' + 'client_ip=$remote_addr method=$request_method path="$uri" ' + 'status=$status request_time=$request_time ' + 'connection=$connection ' + 'upstream_addr="$upstream_addr" upstream_status="$upstream_status" ' + 'upstream_response_time="$upstream_response_time" ' + 'referer="$http_referer" user_agent="$http_user_agent" ' + 'trace_id=$otel_trace_id span_id=$otel_span_id parent_id=$otel_parent_id'; + + access_log /dev/stdout otel_request; + error_log /dev/stderr warn; + + upstream sqlpage { + server sqlpage:8080; + } + + server { + listen 80; + + location / { + otel_trace on; + otel_span_name "$request_method $uri"; + otel_trace_context propagate; + proxy_pass http://sqlpage; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + } +} diff --git a/examples/telemetry/otel-collector.yaml b/examples/telemetry/otel-collector.yaml new file mode 100644 index 00000000..032d2919 --- /dev/null +++ b/examples/telemetry/otel-collector.yaml @@ -0,0 +1,139 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + syslog/sqlpage: + protocol: rfc5424 + udp: + listen_address: 0.0.0.0:1514 + syslog/nginx: + protocol: rfc5424 + udp: + listen_address: 0.0.0.0:1516 + filelog/postgresql: + include: + - /var/log/postgresql/postgresql.json + start_at: end + include_file_path: true + operators: + - type: json_parser + parse_from: body + parse_to: attributes + postgresql: + endpoint: postgres:5432 + transport: tcp + username: sqlpage + password: sqlpage + tls: + insecure: true + databases: + - sqlpage + collection_interval: 1s + +processors: + transform/sqlpage_logs: + error_mode: ignore + log_statements: + - context: resource + statements: + - set(resource.attributes["service.name"], "sqlpage") + - context: log + statements: + - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil + - merge_maps(log.cache, ExtractPatterns(log.body, "level=(?P[^ ]+)"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "trace_id=(?P[0-9a-f]+)"), "upsert") where IsString(log.body) + - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil + - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil + - set(log.severity_text, "TRACE") where log.cache["level"] == "trace" + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.cache["level"] == "trace" + - set(log.severity_text, "DEBUG") where log.cache["level"] == "debug" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.cache["level"] == "debug" + - set(log.severity_text, "INFO") where log.cache["level"] == "info" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.cache["level"] == "info" + - set(log.severity_text, "WARN") where log.cache["level"] == "warn" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.cache["level"] == "warn" + - set(log.severity_text, "ERROR") where log.cache["level"] == "error" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.cache["level"] == "error" + transform/postgresql_logs: + error_mode: ignore + log_statements: + - context: resource + statements: + - set(resource.attributes["service.name"], "postgresql") + - context: log + statements: + - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil + - merge_maps(log.cache, ExtractPatterns(log.attributes["application_name"], "00-(?P[0-9a-f]{32})-[0-9a-f]{16}-[0-9a-f]{2}"), "upsert") where IsString(log.attributes["application_name"]) + - set(log.attributes["level"], log.attributes["error_severity"]) where log.attributes["error_severity"] != nil + - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil + - set(log.severity_text, "DEBUG") where log.attributes["level"] == "DEBUG1" or log.attributes["level"] == "DEBUG2" or log.attributes["level"] == "DEBUG3" or log.attributes["level"] == "DEBUG4" or log.attributes["level"] == "DEBUG5" or log.attributes["level"] == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG" + - set(log.severity_text, "INFO") where log.attributes["level"] == "LOG" or log.attributes["level"] == "INFO" or log.attributes["level"] == "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO" + - set(log.severity_text, "WARN") where log.attributes["level"] == "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARN" + - set(log.severity_text, "ERROR") where log.attributes["level"] == "ERROR" or log.attributes["level"] == "FATAL" or log.attributes["level"] == "PANIC" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR" + transform/nginx_logs: + error_mode: ignore + log_statements: + - context: resource + statements: + - set(resource.attributes["service.name"], "nginx") + - context: log + statements: + - set(log.body, log.attributes["message"]) where log.attributes["message"] != nil + - merge_maps(log.cache, ExtractPatterns(log.body, "level=(?P[^ ]+)"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "\\[(?Pinfo|error|warn|notice|debug)\\]"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "trace_id=(?P[0-9a-f]{32})"), "upsert") where IsString(log.body) + - merge_maps(log.cache, ExtractPatterns(log.body, "span_id=(?P[0-9a-f]{16})"), "upsert") where IsString(log.body) + - set(log.attributes["trace_id"], log.cache["trace_id"]) where log.cache["trace_id"] != nil + - set(log.attributes["span_id"], log.cache["span_id"]) where log.cache["span_id"] != nil + - set(log.attributes["level"], log.cache["level"]) where log.cache["level"] != nil + - set(log.attributes["level"], log.cache["nginx_level"]) where log.cache["level"] == nil and log.cache["nginx_level"] != nil + - set(log.severity_text, "DEBUG") where log.attributes["level"] == "debug" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.attributes["level"] == "debug" + - set(log.severity_text, "INFO") where log.attributes["level"] == "info" or log.attributes["level"] == "notice" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.attributes["level"] == "info" or log.attributes["level"] == "notice" + - set(log.severity_text, "WARN") where log.attributes["level"] == "warn" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.attributes["level"] == "warn" + - set(log.severity_text, "ERROR") where log.attributes["level"] == "error" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.attributes["level"] == "error" + batch: + timeout: 1s + +exporters: + otlp_grpc/tempo: + endpoint: tempo:4317 + tls: + insecure: true + otlp_http/loki: + endpoint: http://loki:3100/otlp + prometheus: + endpoint: 0.0.0.0:9464 + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp_grpc/tempo] + metrics: + receivers: [postgresql] + processors: [batch] + exporters: [prometheus] + logs/sqlpage: + receivers: [syslog/sqlpage] + processors: [transform/sqlpage_logs, batch] + exporters: [otlp_http/loki] + logs/postgresql: + receivers: [filelog/postgresql] + processors: [transform/postgresql_logs, batch] + exporters: [otlp_http/loki] + logs/nginx: + receivers: [syslog/nginx] + processors: [transform/nginx_logs, batch] + exporters: [otlp_http/loki] diff --git a/examples/telemetry/postgres-init/001_pg_stat_statements.sql b/examples/telemetry/postgres-init/001_pg_stat_statements.sql new file mode 100644 index 00000000..c237a502 --- /dev/null +++ b/examples/telemetry/postgres-init/001_pg_stat_statements.sql @@ -0,0 +1,5 @@ +\connect postgres +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + +\connect sqlpage +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; diff --git a/examples/telemetry/prometheus.yml b/examples/telemetry/prometheus.yml new file mode 100644 index 00000000..29b26bf8 --- /dev/null +++ b/examples/telemetry/prometheus.yml @@ -0,0 +1,9 @@ +global: + scrape_interval: 10s + evaluation_interval: 10s + +scrape_configs: + - job_name: otel-collector + static_configs: + - targets: ["otel-collector:9464"] + diff --git a/examples/telemetry/sqlpage/migrations/001_init.sql b/examples/telemetry/sqlpage/migrations/001_init.sql new file mode 100644 index 00000000..0ff86847 --- /dev/null +++ b/examples/telemetry/sqlpage/migrations/001_init.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS todos ( + id SERIAL PRIMARY KEY, + title TEXT NOT NULL, + done BOOLEAN NOT NULL DEFAULT false, + created_at TIMESTAMP NOT NULL DEFAULT NOW() +); diff --git a/examples/telemetry/sqlpage/sqlpage.json b/examples/telemetry/sqlpage/sqlpage.json new file mode 100644 index 00000000..5322302d --- /dev/null +++ b/examples/telemetry/sqlpage/sqlpage.json @@ -0,0 +1,3 @@ +{ + "listen_on": "0.0.0.0:8080" +} diff --git a/examples/telemetry/tempo.yaml b/examples/telemetry/tempo.yaml new file mode 100644 index 00000000..a8d79406 --- /dev/null +++ b/examples/telemetry/tempo.yaml @@ -0,0 +1,52 @@ +stream_over_http_enabled: true + +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +ingester: + max_block_duration: 5m + +compactor: + compaction: + block_retention: 1h + +metrics_generator: + ring: + kvstore: + store: inmemory + registry: + external_labels: + source: tempo + cluster: docker-compose + processor: + service_graphs: {} + span_metrics: {} + local_blocks: + flush_to_storage: true + storage: + path: /tmp/tempo/generator/wal + traces_storage: + path: /tmp/tempo/generator/traces + +overrides: + defaults: + metrics_generator: + processors: + - service-graphs + - span-metrics + - local-blocks + +storage: + trace: + backend: local + wal: + path: /tmp/tempo/wal + local: + path: /tmp/tempo/blocks diff --git a/examples/telemetry/website/add_todo.sql b/examples/telemetry/website/add_todo.sql new file mode 100644 index 00000000..32c4aa7f --- /dev/null +++ b/examples/telemetry/website/add_todo.sql @@ -0,0 +1,8 @@ +-- Toggle an existing todo +UPDATE todos SET done = $done::boolean WHERE id = $id::int; + +-- Insert a new todo if title is provided via the form (POST) +INSERT INTO todos (title) +SELECT :title WHERE :title IS NOT NULL AND length(:title) > 0; + +SELECT 'redirect' AS component, '/' AS link; diff --git a/examples/telemetry/website/index.sql b/examples/telemetry/website/index.sql new file mode 100644 index 00000000..151ea3af --- /dev/null +++ b/examples/telemetry/website/index.sql @@ -0,0 +1,11 @@ +SELECT 'list' AS component, + 'Todo List' AS title; + +SELECT title, + CASE WHEN done THEN 'complete' ELSE 'pending' END AS description, + 'add_todo.sql?id=' || id || '&done=' || (NOT done)::text AS link +FROM todos +ORDER BY created_at DESC; + +SELECT 'form' AS component, 'Add a todo' AS title, 'add_todo.sql' AS action; +SELECT 'title' AS name, 'What do you need to do?' AS placeholder; diff --git a/examples/telemetry/website/slow.sql b/examples/telemetry/website/slow.sql new file mode 100644 index 00000000..fead830c --- /dev/null +++ b/examples/telemetry/website/slow.sql @@ -0,0 +1,7 @@ +SELECT pg_sleep(15); + +SELECT 'list' AS component, + 'Slow Query Complete' AS title; + +SELECT 'The slow query finished successfully.' AS title, + 'This page exists to make PostgreSQL query-sample events easy to capture.' AS description; diff --git a/scripts/build-dependencies.sh b/scripts/build-dependencies.sh index 5a55754a..af845432 100755 --- a/scripts/build-dependencies.sh +++ b/scripts/build-dependencies.sh @@ -3,10 +3,11 @@ set -euo pipefail source /tmp/build-env.sh -echo "Building dependencies for target: $TARGET" +PROFILE="${CARGO_PROFILE:-superoptimized}" +echo "Building dependencies for target: $TARGET (profile: $PROFILE)" cargo build \ --target "$TARGET" \ --config "target.$TARGET.linker=\"$LINKER\"" \ --features odbc-static \ - --profile superoptimized + --profile "$PROFILE" diff --git a/scripts/build-project.sh b/scripts/build-project.sh index ac595b61..f9a8781a 100755 --- a/scripts/build-project.sh +++ b/scripts/build-project.sh @@ -3,12 +3,13 @@ set -euo pipefail source /tmp/build-env.sh -echo "Building project for target: $TARGET" +PROFILE="${CARGO_PROFILE:-superoptimized}" +echo "Building project for target: $TARGET (profile: $PROFILE)" cargo build \ --target "$TARGET" \ --config "target.$TARGET.linker=\"$LINKER\"" \ --features odbc-static \ - --profile superoptimized + --profile "$PROFILE" -mv "target/$TARGET/superoptimized/sqlpage" sqlpage.bin +mv "target/$TARGET/$PROFILE/sqlpage" sqlpage.bin diff --git a/src/lib.rs b/src/lib.rs index df2eb9a5..a4babf3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,7 @@ pub mod dynamic_component; pub mod file_cache; pub mod filesystem; pub mod render; +pub mod telemetry; pub mod template_helpers; pub mod templates; pub mod utils; diff --git a/src/main.rs b/src/main.rs index 9074ddea..ed683025 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use sqlpage::{ app_config::AppConfig, - cli, + cli, telemetry, webserver::{self, Database}, AppState, }; @@ -29,17 +29,14 @@ async fn start() -> anyhow::Result<()> { log::debug!("Starting server..."); webserver::http::run_server(&app_config, state).await?; log::info!("Server stopped gracefully. Goodbye!"); + telemetry::shutdown_telemetry(); Ok(()) } fn init_logging() { let load_env = dotenvy::dotenv(); - let env = - env_logger::Env::new().default_filter_or("sqlpage=info,actix_web::middleware::logger=info"); - let mut logging = env_logger::Builder::from_env(env); - logging.format_timestamp_millis(); - logging.init(); + let otel_active = telemetry::init_telemetry(); match load_env { Ok(path) => log::info!("Loaded environment variables from {path:?}"), @@ -48,4 +45,8 @@ fn init_logging() { ), Err(e) => log::error!("Error loading .env file: {e}"), } + + if otel_active { + log::info!("OpenTelemetry tracing enabled (OTEL_EXPORTER_OTLP_ENDPOINT is set)"); + } } diff --git a/src/telemetry.rs b/src/telemetry.rs new file mode 100644 index 00000000..ef86d8dd --- /dev/null +++ b/src/telemetry.rs @@ -0,0 +1,455 @@ +//! OpenTelemetry initialization and shutdown. +//! +//! When `OTEL_EXPORTER_OTLP_ENDPOINT` is set, sets up a full tracing pipeline +//! with OTLP export. Otherwise, sets up tracing with logfmt output only. +//! +//! In both cases, the same logfmt log format is used, with carefully chosen +//! fields for human readability and machine parseability. + +use std::env; +use std::sync::OnceLock; + +use opentelemetry_sdk::trace::SdkTracerProvider; + +static TRACER_PROVIDER: OnceLock = OnceLock::new(); + +/// Initializes logging / tracing. Returns `true` if `OTel` was activated. +#[must_use] +pub fn init_telemetry() -> bool { + let otel_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok(); + let otel_active = otel_endpoint.as_deref().is_some_and(|v| !v.is_empty()); + + if otel_active { + init_otel_tracing(); + } else { + init_tracing(); + } + + otel_active +} + +/// Shuts down the `OTel` tracer provider, flushing pending spans. +pub fn shutdown_telemetry() { + if let Some(provider) = TRACER_PROVIDER.get() { + if let Err(e) = provider.shutdown() { + eprintln!("Error shutting down tracer provider: {e}"); + } + } +} + +/// Tracing subscriber without `OTel` export — logfmt output only. +fn init_tracing() { + use tracing_subscriber::layer::SubscriberExt; + + let subscriber = tracing_subscriber::registry() + .with(default_env_filter()) + .with(logfmt::LogfmtLayer::new()); + + set_global_subscriber(subscriber); +} + +fn init_otel_tracing() { + use opentelemetry::global; + use opentelemetry::trace::TracerProvider as _; + use opentelemetry_sdk::propagation::TraceContextPropagator; + use tracing_subscriber::layer::SubscriberExt; + + // W3C TraceContext propagation (traceparent header) + global::set_text_map_propagator(TraceContextPropagator::new()); + + // OTLP exporter — reads OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_SERVICE_NAME, etc. + let exporter = opentelemetry_otlp::SpanExporter::builder() + .with_http() + .build() + .expect("Failed to build OTLP span exporter"); + + let provider = SdkTracerProvider::builder() + .with_batch_exporter(exporter) + .build(); + + let tracer = provider.tracer("sqlpage"); + global::set_tracer_provider(provider.clone()); + let _ = TRACER_PROVIDER.set(provider); + + let otel_layer = tracing_opentelemetry::layer() + .with_tracer(tracer) + .with_location(false); + + let subscriber = tracing_subscriber::registry() + .with(default_env_filter()) + .with(logfmt::LogfmtLayer::new()) + .with(otel_layer); + + set_global_subscriber(subscriber); +} + +fn default_env_filter() -> tracing_subscriber::EnvFilter { + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "sqlpage=info,actix_web=info,tracing_actix_web=info".into()) +} + +fn set_global_subscriber(subscriber: impl tracing::Subscriber + Send + Sync) { + tracing::subscriber::set_global_default(subscriber) + .expect("Failed to set global tracing subscriber"); + tracing_log::LogTracer::init().expect("Failed to set log→tracing bridge"); +} + +/// Custom logfmt logging layer. +/// +/// Outputs one line per event in logfmt format with carefully chosen fields: +/// ```text +/// ts=2026-03-08T20:56:15Z level=error target=sqlpage::webserver::error msg="..." method=GET path=/foo client_ip=1.2.3.4 trace_id=abc123 +/// ``` +/// +/// With terminal colors when stderr is a TTY. +mod logfmt { + use std::collections::BTreeMap; + use std::collections::HashMap; + use std::fmt::Write; + use std::io::{self, IsTerminal}; + + use tracing::field::{Field, Visit}; + use tracing::Subscriber; + use tracing_subscriber::layer::Context; + use tracing_subscriber::registry::LookupSpan; + use tracing_subscriber::Layer; + + /// Stores span fields so we can access them when formatting events. + #[derive(Default)] + struct SpanFields(HashMap<&'static str, String>); + + /// Visitor that collects fields into a `HashMap`. + struct FieldCollector<'a>(&'a mut HashMap<&'static str, String>); + + impl Visit for FieldCollector<'_> { + fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) { + self.0.insert(field.name(), format!("{value:?}")); + } + fn record_str(&mut self, field: &Field, value: &str) { + self.0.insert(field.name(), value.to_owned()); + } + fn record_i64(&mut self, field: &Field, value: i64) { + self.0.insert(field.name(), value.to_string()); + } + fn record_u64(&mut self, field: &Field, value: u64) { + self.0.insert(field.name(), value.to_string()); + } + fn record_bool(&mut self, field: &Field, value: bool) { + self.0.insert(field.name(), value.to_string()); + } + } + + /// Fields we pick from spans, in display order. + /// (`span_field_name`, `logfmt_key`) + const SPAN_FIELDS: &[(&str, &str)] = &[ + ("http.method", "method"), + ("http.target", "path"), + ("http.status_code", "status"), + ("sqlpage.file", "file"), + ("http.client_ip", "client_ip"), + ]; + + /// All-zeros trace ID means no real trace context. + const INVALID_TRACE_ID: &str = "00000000000000000000000000000000"; + + // ANSI color codes + const RED: &str = "\x1b[31m"; + const YELLOW: &str = "\x1b[33m"; + const GREEN: &str = "\x1b[32m"; + const BLUE: &str = "\x1b[34m"; + const DIM: &str = "\x1b[2m"; + const BOLD: &str = "\x1b[1m"; + const RESET: &str = "\x1b[0m"; + + pub(super) struct LogfmtLayer { + use_colors: bool, + } + + impl LogfmtLayer { + pub fn new() -> Self { + Self { + use_colors: io::stderr().is_terminal(), + } + } + } + + impl Layer for LogfmtLayer + where + S: Subscriber + for<'a> LookupSpan<'a>, + { + fn on_new_span( + &self, + attrs: &tracing::span::Attributes<'_>, + id: &tracing::span::Id, + ctx: Context<'_, S>, + ) { + let mut fields = SpanFields::default(); + attrs.record(&mut FieldCollector(&mut fields.0)); + if let Some(span) = ctx.span(id) { + span.extensions_mut().insert(fields); + } + } + + fn on_record( + &self, + id: &tracing::span::Id, + values: &tracing::span::Record<'_>, + ctx: Context<'_, S>, + ) { + if let Some(span) = ctx.span(id) { + let mut ext = span.extensions_mut(); + if let Some(fields) = ext.get_mut::() { + values.record(&mut FieldCollector(&mut fields.0)); + } + } + } + + fn on_event(&self, event: &tracing::Event<'_>, ctx: Context<'_, S>) { + let mut buf = String::with_capacity(256); + let colors = self.use_colors; + let level = *event.metadata().level(); + let include_all_span_fields = includes_all_span_fields(); + let mut event_fields = HashMap::new(); + event.record(&mut FieldCollector(&mut event_fields)); + let target = event_target(event, &event_fields); + let msg = event_fields.get("message"); + let multiline_msg = is_multiline_terminal_message(colors, msg); + + write_timestamp(&mut buf, colors); + write_level(&mut buf, level, colors); + write_message(&mut buf, msg, multiline_msg); + write_dimmed_field(&mut buf, "target", target, colors); + write_span_fields(&mut buf, ctx.event_scope(event), include_all_span_fields); + write_trace_id(&mut buf, ctx.event_scope(event), colors); + + buf.push('\n'); + write_multiline_message(&mut buf, msg, multiline_msg); + let _ = io::Write::write_all(&mut io::stderr().lock(), buf.as_bytes()); + } + } + + fn event_target<'a>( + event: &'a tracing::Event<'_>, + event_fields: &'a HashMap<&'static str, String>, + ) -> &'a str { + event_fields + .get("log.target") + .map_or_else(|| event.metadata().target(), String::as_str) + } + + fn is_multiline_terminal_message(colors: bool, msg: Option<&String>) -> bool { + colors && msg.is_some_and(|message| message.contains('\n')) + } + + fn write_timestamp(buf: &mut String, colors: bool) { + let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); + if colors { + let _ = write!(buf, "{DIM}ts={now}{RESET}"); + } else { + let _ = write!(buf, "ts={now}"); + } + } + + fn write_level(buf: &mut String, level: tracing::Level, colors: bool) { + if colors { + let (color, label) = level_style(level); + let _ = write!(buf, " {DIM}level={RESET}{BOLD}{color}{label}{RESET}"); + } else { + let _ = write!(buf, " level={}", level.as_str().to_ascii_lowercase()); + } + } + + fn level_style(level: tracing::Level) -> (&'static str, &'static str) { + match level { + tracing::Level::ERROR => (RED, "error"), + tracing::Level::WARN => (YELLOW, "warn"), + tracing::Level::INFO => (GREEN, "info"), + tracing::Level::DEBUG => (BLUE, "debug"), + tracing::Level::TRACE => (DIM, "trace"), + } + } + + fn write_dimmed_field(buf: &mut String, key: &str, value: &str, colors: bool) { + if colors { + let _ = write!(buf, " {DIM}{key}={value}{RESET}"); + } else { + let _ = write!(buf, " {key}={value}"); + } + } + + fn write_message(buf: &mut String, msg: Option<&String>, multiline_msg: bool) { + if !multiline_msg { + if let Some(msg) = msg { + write_logfmt_value(buf, "msg", msg); + } + } + } + + fn write_span_fields( + buf: &mut String, + scope: Option>, + include_all_span_fields: bool, + ) where + S: Subscriber + for<'a> LookupSpan<'a>, + { + if let Some(scope) = scope { + let mut seen_mapped_fields = [false; SPAN_FIELDS.len()]; + let mut extra_fields = BTreeMap::new(); + + for span in scope { + let ext = span.extensions(); + if let Some(fields) = ext.get::() { + for (i, &(span_key, logfmt_key)) in SPAN_FIELDS.iter().enumerate() { + if seen_mapped_fields[i] { + continue; + } + if let Some(val) = fields.0.get(span_key) { + write_logfmt_value(buf, logfmt_key, val); + seen_mapped_fields[i] = true; + } + } + if include_all_span_fields { + for (&key, val) in &fields.0 { + if SPAN_FIELDS.iter().any(|(span_key, _)| key == *span_key) { + continue; + } + extra_fields.entry(key).or_insert_with(|| val.clone()); + } + } + } + } + + if include_all_span_fields { + for (key, val) in extra_fields { + write_logfmt_value(buf, key, &val); + } + } + } + } + + fn includes_all_span_fields() -> bool { + tracing::level_filters::LevelFilter::current() >= tracing::level_filters::LevelFilter::DEBUG + } + + #[cfg(test)] + fn write_span_field_maps<'a>( + buf: &mut String, + span_fields: impl IntoIterator>, + include_all_span_fields: bool, + ) { + let mut seen_mapped_fields = [false; SPAN_FIELDS.len()]; + let mut extra_fields = BTreeMap::new(); + + for fields in span_fields { + for (i, &(span_key, logfmt_key)) in SPAN_FIELDS.iter().enumerate() { + if seen_mapped_fields[i] { + continue; + } + if let Some(val) = fields.get(span_key) { + write_logfmt_value(buf, logfmt_key, val); + seen_mapped_fields[i] = true; + } + } + if include_all_span_fields { + for (&key, val) in fields { + if SPAN_FIELDS.iter().any(|(span_key, _)| key == *span_key) { + continue; + } + extra_fields.entry(key).or_insert_with(|| val.clone()); + } + } + } + + if include_all_span_fields { + for (key, val) in extra_fields { + write_logfmt_value(buf, key, &val); + } + } + } + + fn write_trace_id( + buf: &mut String, + scope: Option>, + colors: bool, + ) where + S: Subscriber + for<'a> LookupSpan<'a>, + { + if let Some(trace_id) = first_valid_trace_id(scope) { + write_dimmed_field(buf, "trace_id", &trace_id, colors); + } + } + + fn first_valid_trace_id( + scope: Option>, + ) -> Option + where + S: Subscriber + for<'a> LookupSpan<'a>, + { + for span in scope? { + let ext = span.extensions(); + if let Some(otel_data) = ext.get::() { + if let Some(trace_id) = otel_data.trace_id() { + let trace_id = trace_id.to_string(); + if trace_id != INVALID_TRACE_ID { + return Some(trace_id); + } + } + } + } + None + } + + fn write_multiline_message(buf: &mut String, msg: Option<&String>, multiline_msg: bool) { + if multiline_msg { + if let Some(msg) = msg { + buf.push_str(msg); + buf.push('\n'); + } + } + } + + /// Write a logfmt key=value pair, quoting the value if it contains spaces or special chars. + fn write_logfmt_value(buf: &mut String, key: &str, value: &str) { + let needs_quoting = value.contains([' ', '"', '=', '\n', '\t']) || value.is_empty(); + + if needs_quoting { + let escaped = value.replace('\n', " ").replace('"', "\\\""); + let _ = write!(buf, " {key}=\"{escaped}\""); + } else { + let _ = write!(buf, " {key}={value}"); + } + } + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn debug_logs_include_unmapped_span_fields() { + let mut buf = String::new(); + let span_fields = HashMap::from([ + ("http.method", "GET".to_string()), + ("http.route", "/users/:id".to_string()), + ("otel.kind", "server".to_string()), + ]); + + write_span_field_maps(&mut buf, [&span_fields], true); + + assert_eq!(buf, " method=GET http.route=/users/:id otel.kind=server"); + } + + #[test] + fn info_logs_keep_only_mapped_span_fields_when_not_in_debug_mode() { + let mut buf = String::new(); + let span_fields = HashMap::from([ + ("http.method", "GET".to_string()), + ("http.route", "/users/:id".to_string()), + ("otel.kind", "server".to_string()), + ]); + + write_span_field_maps(&mut buf, [&span_fields], false); + + assert_eq!(buf, " method=GET"); + } + } +} diff --git a/src/webserver/database/connect.rs b/src/webserver/database/connect.rs index d96f2b5a..573df2fa 100644 --- a/src/webserver/database/connect.rs +++ b/src/webserver/database/connect.rs @@ -58,7 +58,6 @@ impl Database { }; let dbms_name: String = pool.acquire().await?.dbms_name().await?; let database_type = SupportedDatabase::from_dbms_name(&dbms_name); - log::debug!("Initialized {dbms_name:?} database pool: {pool:#?}"); Ok(Database { connection: pool, diff --git a/src/webserver/database/execute_queries.rs b/src/webserver/database/execute_queries.rs index 3edeecfc..8235f0b4 100644 --- a/src/webserver/database/execute_queries.rs +++ b/src/webserver/database/execute_queries.rs @@ -5,6 +5,7 @@ use serde_json::Value; use std::borrow::Cow; use std::path::Path; use std::pin::Pin; +use tracing::Instrument; use super::csv_import::run_csv_import; use super::error_highlighting::{display_stmt_db_error, display_stmt_error}; @@ -29,6 +30,22 @@ use sqlx::{ pub type DbConn = Option>; +fn record_query_params(span: &tracing::Span, params: &[Option]) { + use tracing_opentelemetry::OpenTelemetrySpanExt; + for (idx, value) in params.iter().enumerate() { + let key = opentelemetry::Key::new(format!("db.query.parameter.{idx}")); + let otel_value = match value { + Some(v) => opentelemetry::Value::String(v.clone().into()), + None => opentelemetry::Value::String("NULL".into()), + }; + span.set_attribute(key, otel_value); + } +} + +fn source_line_number(line: usize) -> i64 { + i64::try_from(line).unwrap_or(i64::MAX) +} + impl Database { pub(crate) async fn prepare_with( &self, @@ -64,21 +81,35 @@ pub fn stream_query_results_with_conn<'a>( request.server_timing.record("bind_params"); let connection = take_connection(&request.app_state.db, db_connection, request).await?; log::trace!("Executing query {:?}", query.sql); + let query_span = tracing::info_span!( + "db.query", + db.query.text = query.sql, + db.system.name = request.app_state.db.info.database_type.otel_name(), + code.file.path = %source_file.display(), + code.line.number = source_line_number(stmt.query_position.start.line), + db.response.returned_rows = tracing::field::Empty, + ); + record_query_params(&query_span, &query.param_values); let mut stream = connection.fetch_many(query); let mut error = None; - while let Some(elem) = stream.next().await { + let mut returned_rows: i64 = 0; + while let Some(elem) = stream.next().instrument(query_span.clone()).await { let mut query_result = parse_single_sql_result(source_file, stmt, elem); if let DbItem::Error(e) = query_result { error = Some(e); break; } + if matches!(query_result, DbItem::Row(_)) { + returned_rows += 1; + } apply_json_columns(&mut query_result, &stmt.json_columns); - apply_delayed_functions(request, &stmt.delayed_functions, &mut query_result).await?; + apply_delayed_functions(request, &stmt.delayed_functions, &mut query_result).instrument(query_span.clone()).await?; for db_item in parse_dynamic_rows(query_result) { yield db_item; } } drop(stream); + query_span.record("db.response.returned_rows", returned_rows); if let Some(error) = error { try_rollback_transaction(connection).await; yield DbItem::Error(error); @@ -217,9 +248,28 @@ async fn execute_set_variable_query<'a>( query.sql ); - let value = match connection.fetch_optional(query).await { - Ok(Some(row)) => row_to_string(&row), - Ok(None) => None, + let query_span = tracing::info_span!( + "db.query", + db.query.text = query.sql, + db.system.name = request.app_state.db.info.database_type.otel_name(), + code.file.path = %source_file.display(), + code.line.number = source_line_number(statement.query_position.start.line), + db.response.returned_rows = tracing::field::Empty, + ); + record_query_params(&query_span, &query.param_values); + let value = match connection + .fetch_optional(query) + .instrument(query_span.clone()) + .await + { + Ok(Some(row)) => { + query_span.record("db.response.returned_rows", 1_i64); + row_to_string(&row) + } + Ok(None) => { + query_span.record("db.response.returned_rows", 0_i64); + None + } Err(e) => { try_rollback_transaction(connection).await; let err = display_stmt_db_error(source_file, statement, e); @@ -288,12 +338,16 @@ async fn take_connection<'a>( if let Some(c) = conn { return Ok(c); } - match db.connection.acquire().await { + let pool_size = db.connection.size(); + let acquire_span = tracing::info_span!("db.pool.acquire", db.pool.size = pool_size,); + match db.connection.acquire().instrument(acquire_span).await { Ok(c) => { log::debug!("Acquired a database connection"); request.server_timing.record("db_conn"); *conn = Some(c); - Ok(conn.as_mut().unwrap()) + let connection = conn.as_mut().unwrap(); + set_trace_context(connection, db).await; + Ok(connection) } Err(e) => { let db_name = db.connection.any_kind(); @@ -304,6 +358,41 @@ async fn take_connection<'a>( } } +/// Sets the current `OTel` trace context on the database connection so it is visible +/// in `pg_stat_activity.application_name` (`PostgreSQL`) or as a session variable (`MySQL`). +/// This allows correlating `SQLPage` traces with database-side monitoring. +async fn set_trace_context(connection: &mut AnyConnection, db: &Database) { + use opentelemetry::trace::TraceContextExt; + use tracing_opentelemetry::OpenTelemetrySpanExt; + + let span = tracing::Span::current(); + let context = span.context(); + let otel_span = context.span(); + let span_context = otel_span.span_context(); + if !span_context.is_valid() { + return; + } + let traceparent = format!( + "00-{}-{}-{:02x}", + span_context.trace_id(), + span_context.span_id(), + span_context.trace_flags() + ); + let sql = match db.info.kind { + sqlx::any::AnyKind::Postgres => { + // postgresqlreceiver expects application_name to be a raw W3C traceparent value. + format!("SET application_name = '{traceparent}'") + } + sqlx::any::AnyKind::MySql => { + format!("SET @traceparent = '{traceparent}'") + } + _ => return, + }; + if let Err(e) = connection.execute(sql.as_str()).await { + log::debug!("Failed to set trace context on connection: {e}"); + } +} + #[inline] fn parse_single_sql_result( source_file: &Path, @@ -374,6 +463,7 @@ async fn bind_parameters<'a>( let sql = stmt.query.as_str(); log::debug!("Preparing statement: {sql}"); let mut arguments = AnyArguments::default(); + let mut param_values = Vec::with_capacity(stmt.params.len()); for (param_idx, param) in stmt.params.iter().enumerate() { log::trace!("\tevaluating parameter {}: {}", param_idx + 1, param); let argument = extract_req_param(param, request, db_connection).await?; @@ -382,6 +472,7 @@ async fn bind_parameters<'a>( param_idx + 1, argument.as_ref().unwrap_or(&Cow::Borrowed("NULL")) ); + param_values.push(argument.as_deref().map(str::to_owned)); match argument { None => arguments.add(None::), Some(Cow::Owned(s)) => arguments.add(s), @@ -393,6 +484,7 @@ async fn bind_parameters<'a>( sql, arguments, has_arguments, + param_values, }) } @@ -478,6 +570,7 @@ pub struct StatementWithParams<'a> { sql: &'a str, arguments: AnyArguments<'a>, has_arguments: bool, + param_values: Vec>, } impl<'q> sqlx::Execute<'q, Any> for StatementWithParams<'q> { diff --git a/src/webserver/database/mod.rs b/src/webserver/database/mod.rs index b354c215..950096db 100644 --- a/src/webserver/database/mod.rs +++ b/src/webserver/database/mod.rs @@ -58,6 +58,22 @@ impl SupportedDatabase { Self::Generic => "Generic", } } + + /// Returns the `OTel` `db.system.name` well-known value. + /// See + #[must_use] + pub fn otel_name(self) -> &'static str { + match self { + Self::Sqlite => "sqlite", + Self::Duckdb => "duckdb", + Self::Oracle => "oracle.db", + Self::Postgres => "postgresql", + Self::MySql => "mysql", + Self::Mssql => "microsoft.sql_server", + Self::Snowflake => "snowflake", + Self::Generic => "other_sql", + } + } } pub struct Database { diff --git a/src/webserver/database/sqlpage_functions/functions.rs b/src/webserver/database/sqlpage_functions/functions.rs index a41b18a1..32011f48 100644 --- a/src/webserver/database/sqlpage_functions/functions.rs +++ b/src/webserver/database/sqlpage_functions/functions.rs @@ -15,6 +15,7 @@ use futures_util::StreamExt; use mime_guess::mime; use std::fmt::Write; use std::{borrow::Cow, ffi::OsStr, str::FromStr}; +use tracing::Instrument; super::function_definition_macro::sqlpage_functions! { basic_auth_password((&RequestInfo)); @@ -136,9 +137,16 @@ async fn exec<'a>( Make sure you understand the security implications before enabling it, and never allow user input to be passed as the first argument to this function. You can enable it by setting the allow_exec option to true in the sqlpage.json configuration file.") } + let exec_span = tracing::info_span!( + "subprocess", + otel.name = format!("EXEC {program_name}"), + process.command = %program_name, + process.args_count = args.len(), + ); let res = tokio::process::Command::new(&*program_name) .args(args.iter().map(|x| &**x)) .output() + .instrument(exec_span) .await .with_context(|| { let mut s = format!("Unable to execute command: {program_name}"); @@ -205,39 +213,62 @@ async fn fetch( let Some(http_request) = http_request else { return Ok(None); }; - let client = make_http_client(&request.app_state.config) - .with_context(|| "Unable to create an HTTP client")?; - let req = build_request(&client, &http_request)?; - - log::info!("Fetching {}", http_request.url); - let mut response = if let Some(body) = &http_request.body { - let (body, req) = prepare_request_body(body, req)?; - req.send_body(body) - } else { - req.send() - } - .await - .map_err(|e| anyhow!("Unable to fetch {}: {e}", http_request.url))?; - - log::debug!( - "Finished fetching {}. Status: {}", - http_request.url, - response.status() + let method = http_request.method.as_deref().unwrap_or("GET"); + let fetch_span = tracing::info_span!( + "http.client", + otel.name = format!("{method}"), + http.request.method = method, + url.full = %http_request.url, + http.request.body.size = tracing::field::Empty, + http.response.status_code = tracing::field::Empty, ); - let body = response - .body() + async { + let client = make_http_client(&request.app_state.config) + .with_context(|| "Unable to create an HTTP client")?; + let req = build_request(&client, &http_request)?; + + log::info!("Fetching {}", http_request.url); + let mut response = if let Some(body) = &http_request.body { + let (body, req) = prepare_request_body(body, req)?; + tracing::Span::current().record( + "http.request.body.size", + i64::try_from(body.len()).unwrap_or(i64::MAX), + ); + req.send_body(body) + } else { + req.send() + } .await - .with_context(|| { - format!( - "Unable to read the body of the response from {}", - http_request.url - ) - })? - .to_vec(); - let response_str = decode_response(body, http_request.response_encoding.as_deref())?; - log::debug!("Fetch response: {response_str}"); - Ok(Some(response_str)) + .map_err(|e| anyhow!("Unable to fetch {}: {e}", http_request.url))?; + + tracing::Span::current().record( + "http.response.status_code", + i64::from(response.status().as_u16()), + ); + + log::debug!( + "Finished fetching {}. Status: {}", + http_request.url, + response.status() + ); + + let body = response + .body() + .await + .with_context(|| { + format!( + "Unable to read the body of the response from {}", + http_request.url + ) + })? + .to_vec(); + let response_str = decode_response(body, http_request.response_encoding.as_deref())?; + log::debug!("Fetch response: {response_str}"); + Ok(Some(response_str)) + } + .instrument(fetch_span) + .await } fn decode_response(response: Vec, encoding: Option<&str>) -> anyhow::Result { @@ -285,81 +316,101 @@ async fn fetch_with_meta( return Ok(None); }; - let client = make_http_client(&request.app_state.config) - .with_context(|| "Unable to create an HTTP client")?; - let req = build_request(&client, &http_request)?; - - log::info!("Fetching {} with metadata", http_request.url); - let response_result = if let Some(body) = &http_request.body { - let (body, req) = prepare_request_body(body, req)?; - req.send_body(body).await - } else { - req.send().await - }; + let method = http_request.method.as_deref().unwrap_or("GET"); + let fetch_span = tracing::info_span!( + "http.client", + otel.name = format!("{method}"), + http.request.method = method, + url.full = %http_request.url, + http.request.body.size = tracing::field::Empty, + http.response.status_code = tracing::field::Empty, + ); - let mut resp_str = Vec::new(); - let mut encoder = serde_json::Serializer::new(&mut resp_str); - let mut obj = encoder.serialize_map(Some(3))?; - match response_result { - Ok(mut response) => { - let status = response.status(); - obj.serialize_entry("status", &status.as_u16())?; - let mut has_error = false; - if status.is_server_error() { - has_error = true; - obj.serialize_entry("error", &format!("Server error: {status}"))?; - } + async { + let client = make_http_client(&request.app_state.config) + .with_context(|| "Unable to create an HTTP client")?; + let req = build_request(&client, &http_request)?; + + log::info!("Fetching {} with metadata", http_request.url); + let response_result = if let Some(body) = &http_request.body { + let (body, req) = prepare_request_body(body, req)?; + tracing::Span::current().record( + "http.request.body.size", + i64::try_from(body.len()).unwrap_or(i64::MAX), + ); + req.send_body(body).await + } else { + req.send().await + }; + + let mut resp_str = Vec::new(); + let mut encoder = serde_json::Serializer::new(&mut resp_str); + let mut obj = encoder.serialize_map(Some(3))?; + match response_result { + Ok(mut response) => { + let status = response.status(); + tracing::Span::current() + .record("http.response.status_code", i64::from(status.as_u16())); + obj.serialize_entry("status", &status.as_u16())?; + let mut has_error = false; + if status.is_server_error() { + has_error = true; + obj.serialize_entry("error", &format!("Server error: {status}"))?; + } - let headers = response.headers(); - - let is_json = headers - .get("content-type") - .and_then(|v| v.to_str().ok()) - .unwrap_or_default() - .starts_with("application/json"); - - obj.serialize_entry( - "headers", - &headers - .iter() - .map(|(k, v)| (k.to_string(), v.to_str().unwrap_or_default())) - .collect::>(), - )?; - - match response.body().await { - Ok(body) => { - let body_bytes = body.to_vec(); - let body_str = - decode_response(body_bytes, http_request.response_encoding.as_deref())?; - if is_json { - obj.serialize_entry( - "json_body", - &serde_json::value::RawValue::from_string(body_str)?, - )?; - } else { - obj.serialize_entry("body", &body_str)?; + let headers = response.headers(); + + let is_json = headers + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or_default() + .starts_with("application/json"); + + obj.serialize_entry( + "headers", + &headers + .iter() + .map(|(k, v)| (k.to_string(), v.to_str().unwrap_or_default())) + .collect::>(), + )?; + + match response.body().await { + Ok(body) => { + let body_bytes = body.to_vec(); + let body_str = + decode_response(body_bytes, http_request.response_encoding.as_deref())?; + if is_json { + obj.serialize_entry( + "json_body", + &serde_json::value::RawValue::from_string(body_str)?, + )?; + } else { + obj.serialize_entry("body", &body_str)?; + } } - } - Err(e) => { - log::warn!("Failed to read response body: {e}"); - if !has_error { - obj.serialize_entry( - "error", - &format!("Failed to read response body: {e}"), - )?; + Err(e) => { + log::warn!("Failed to read response body: {e}"); + if !has_error { + obj.serialize_entry( + "error", + &format!("Failed to read response body: {e}"), + )?; + } } } } + Err(e) => { + log::warn!("Request failed: {e}"); + obj.serialize_entry("error", &format!("Request failed: {e}"))?; + } } - Err(e) => { - log::warn!("Request failed: {e}"); - obj.serialize_entry("error", &format!("Request failed: {e}"))?; - } - } - obj.end()?; - let return_value = String::from_utf8(resp_str)?; - Ok(Some(return_value)) + obj.end()?; + let return_value = String::from_utf8(resp_str)?; + Ok(Some(return_value)) + } + .instrument(fetch_span) + .await } pub(crate) async fn hash_password(password: Option) -> anyhow::Result> { @@ -577,6 +628,11 @@ async fn run_sql<'a>( log::debug!("run_sql: first argument is NULL, returning NULL"); return Ok(None); }; + let run_sql_span = tracing::info_span!( + "sqlpage.file", + otel.name = format!("SQL {sql_file_path}"), + code.file.path = %sql_file_path, + ); let app_state = &request.app_state; let sql_file = app_state .sql_file_cache @@ -585,6 +641,7 @@ async fn run_sql<'a>( std::path::Path::new(sql_file_path.as_ref()), true, ) + .instrument(run_sql_span.clone()) .await .with_context(|| format!("run_sql: invalid path {sql_file_path:?}"))?; let tmp_req = if let Some(variables) = variables { @@ -613,7 +670,7 @@ async fn run_sql<'a>( let mut json_results_bytes = Vec::new(); let mut json_encoder = serde_json::Serializer::new(&mut json_results_bytes); let mut seq = json_encoder.serialize_seq(None)?; - while let Some(db_item) = results_stream.next().await { + while let Some(db_item) = results_stream.next().instrument(run_sql_span.clone()).await { use crate::webserver::database::DbItem::{Error, FinishedQuery, Row}; match db_item { Row(row) => { diff --git a/src/webserver/http.rs b/src/webserver/http.rs index 68464e2b..a95b7ea2 100644 --- a/src/webserver/http.rs +++ b/src/webserver/http.rs @@ -16,9 +16,9 @@ use actix_web::http::header::Accept; use actix_web::http::header::{ContentType, Header, HttpDate, IfModifiedSince, LastModified}; use actix_web::http::{header, StatusCode}; use actix_web::web::PayloadConfig; -use actix_web::{ - dev::ServiceResponse, middleware, middleware::Logger, web, App, HttpResponse, HttpServer, -}; +use actix_web::{dev::ServiceResponse, middleware, web, App, Error, HttpResponse, HttpServer}; +use tracing::{Instrument, Span}; +use tracing_actix_web::{DefaultRootSpanBuilder, RootSpanBuilder, TracingLogger}; use super::error::{anyhow_err_to_actix, bind_error, send_anyhow_error}; use super::http_client::make_http_client; @@ -35,6 +35,7 @@ use anyhow::{bail, Context}; use chrono::{DateTime, Utc}; use futures_util::stream::Stream; use futures_util::StreamExt; +use std::borrow::Cow; use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; @@ -219,56 +220,167 @@ async fn render_sql( .map(|accept| ResponseFormat::from_accept_header(&accept)) .unwrap_or_default(); - let exec_ctx = extract_request_info(srv_req, Arc::clone(&app_state), server_timing) - .await - .map_err(|e| anyhow_err_to_actix(e, &app_state))?; + let exec_ctx = { + let content_type = srv_req + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let content_length = srv_req + .headers() + .get("content-length") + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()); + let url_query = srv_req.query_string(); + let url_query = if url_query.is_empty() { + None + } else { + Some(url_query) + }; + let parse_span = tracing::info_span!( + "http.parse_request", + http.request.method = %srv_req.method(), + http.request.header.content_type = content_type, + http.request.body.size = content_length, + url.query = url_query, + ); + extract_request_info(srv_req, Arc::clone(&app_state), server_timing) + .instrument(parse_span) + .await + .map_err(|e| anyhow_err_to_actix(e, &app_state))? + }; log::debug!("Received a request with the following parameters: {exec_ctx:?}"); exec_ctx.request().server_timing.record("parse_req"); let (resp_send, resp_recv) = tokio::sync::oneshot::channel::(); let source_path: PathBuf = sql_file.source_path.clone(); - actix_web::rt::spawn(async move { - let request_info = exec_ctx.request(); - let request_context = RequestContext { - is_embedded: request_info.url_params.contains_key("_sqlpage_embed"), - source_path, - content_security_policy: ContentSecurityPolicy::with_random_nonce(), - server_timing: Arc::clone(&request_info.server_timing), - response_format, - }; - let mut conn = None; - let database_entries_stream = - stream_query_results_with_conn(&sql_file, &exec_ctx, &mut conn); - let database_entries_stream = stop_at_first_error(database_entries_stream); - let response_with_writer = build_response_header_and_stream( - Arc::clone(&app_state), - database_entries_stream, - request_context, - ) - .await; - match response_with_writer { - Ok(ResponseWithWriter::RenderStream { - http_response, - renderer, + let exec_span = tracing::info_span!( + "sqlpage.file", + otel.name = %sql_execution_span_name(&source_path), + code.file.path = %source_path.display(), + ); + actix_web::rt::spawn(tracing::Instrument::instrument( + async move { + let request_info = exec_ctx.request(); + let request_context = RequestContext { + is_embedded: request_info.url_params.contains_key("_sqlpage_embed"), + source_path, + content_security_policy: ContentSecurityPolicy::with_random_nonce(), + server_timing: Arc::clone(&request_info.server_timing), + response_format, + }; + let mut conn = None; + let database_entries_stream = + stream_query_results_with_conn(&sql_file, &exec_ctx, &mut conn); + let database_entries_stream = stop_at_first_error(database_entries_stream); + let response_with_writer = build_response_header_and_stream( + Arc::clone(&app_state), database_entries_stream, - }) => { - resp_send - .send(http_response) - .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); - stream_response(database_entries_stream, renderer).await; - } - Ok(ResponseWithWriter::FinishedResponse { http_response }) => { - resp_send - .send(http_response) - .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); + request_context, + ) + .await; + match response_with_writer { + Ok(ResponseWithWriter::RenderStream { + http_response, + renderer, + database_entries_stream, + }) => { + resp_send + .send(http_response) + .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); + tracing::Instrument::instrument( + stream_response(database_entries_stream, renderer), + tracing::info_span!("render"), + ) + .await; + } + Ok(ResponseWithWriter::FinishedResponse { http_response }) => { + resp_send + .send(http_response) + .unwrap_or_else(|e| log::error!("could not send headers {e:?}")); + } + Err(err) => { + send_anyhow_error(&err, resp_send, &app_state); + } } - Err(err) => { - send_anyhow_error(&err, resp_send, &app_state); + }, + exec_span, + )); + resp_recv.await.map_err(ErrorInternalServerError) +} + +fn request_span_route(request: &ServiceRequest) -> Cow<'_, str> { + request + .match_pattern() + .map_or_else(|| request.path().to_owned().into(), Cow::from) +} + +fn request_span_name(request: &ServiceRequest) -> String { + format!("{} {}", request.method(), request_span_route(request)) +} + +fn sql_execution_span_name(source_path: &std::path::Path) -> String { + format!("SQL {}", source_path.display()) +} + +struct SqlPageRootSpanBuilder; + +impl RootSpanBuilder for SqlPageRootSpanBuilder { + fn on_request_start(request: &ServiceRequest) -> Span { + let user_agent = request + .headers() + .get("User-Agent") + .map_or("", |h| h.to_str().unwrap_or("")); + let http_route = request_span_route(request); + let http_method = + tracing_actix_web::root_span_macro::private::http_method_str(request.method()); + let otel_name = request_span_name(request); + let connection_info = request.connection_info(); + let request_id = tracing_actix_web::root_span_macro::private::get_request_id(request); + + let span = tracing::span!( + tracing::Level::INFO, + "HTTP request", + http.method = %http_method, + http.route = %http_route, + http.flavor = %tracing_actix_web::root_span_macro::private::http_flavor(request.version()), + http.scheme = %tracing_actix_web::root_span_macro::private::http_scheme(connection_info.scheme()), + http.host = %connection_info.host(), + http.client_ip = %request.connection_info().realip_remote_addr().unwrap_or(""), + http.user_agent = %user_agent, + http.target = %request + .uri() + .path_and_query() + .map_or("", actix_web::http::uri::PathAndQuery::as_str), + http.status_code = tracing::field::Empty, + otel.name = %otel_name, + otel.kind = "server", + otel.status_code = tracing::field::Empty, + trace_id = tracing::field::Empty, + request_id = %request_id, + exception.message = tracing::field::Empty, + exception.details = tracing::field::Empty, + ); + std::mem::drop(connection_info); + tracing_actix_web::root_span_macro::private::set_otel_parent(request, &span); + span + } + + fn on_request_end(span: Span, outcome: &Result, Error>) { + let span_ref = span.clone(); + DefaultRootSpanBuilder::on_request_end(span, outcome); + + // Emit a single log event per completed request so it appears in logs. + // Error responses (4xx/5xx) are already logged by our error handlers. + let _enter = span_ref.enter(); + if let Ok(response) = outcome { + let status = response.response().status(); + if status.is_success() || status.is_redirection() { + log::info!("{}", status.canonical_reason().unwrap_or("ok")); } } - }); - resp_recv.await.map_err(ErrorInternalServerError) + } } async fn process_sql_request( @@ -278,12 +390,19 @@ async fn process_sql_request( let app_state: &web::Data = req.app_data().expect("app_state"); let server_timing = ServerTiming::for_env(app_state.config.environment); - let sql_file = app_state - .sql_file_cache - .get_with_privilege(app_state, &sql_path, false) - .await - .with_context(|| format!("Unable to read SQL file \"{}\"", sql_path.display())) - .map_err(|e| anyhow_err_to_actix(e, app_state))?; + let sql_file = { + let span = tracing::info_span!( + "sqlpage.file.load", + code.file.path = %sql_path.display(), + ); + app_state + .sql_file_cache + .get_with_privilege(app_state, &sql_path, false) + .instrument(span) + .await + .with_context(|| format!("Unable to read SQL file \"{}\"", sql_path.display())) + .map_err(|e| anyhow_err_to_actix(e, app_state))? + }; server_timing.record("sql_file"); render_sql(req, sql_file, server_timing).await @@ -435,7 +554,7 @@ pub fn create_app( // when receiving a request outside of the prefix, redirect to the prefix .default_service(fn_service(default_prefix_redirect)) .wrap(OidcMiddleware::new(&app_state)) - .wrap(Logger::default()) + .wrap(TracingLogger::::new()) .wrap(default_headers()) .wrap(middleware::Condition::new( app_state.config.compress_responses, @@ -561,8 +680,8 @@ fn log_welcome_message(config: &AppConfig) { let version = env!("CARGO_PKG_VERSION"); let web_root = config.web_root.display(); - eprintln!( - "{sparkle} SQLPage v{version} started successfully! {sparkle}\n\n\ + log::info!( + "\n{sparkle} SQLPage v{version} started successfully! {sparkle}\n\n\ View your website at:\n{link} {address_message}\n\n\ Create your pages with SQL files in:\n{computer} {web_root}\n\n\ Happy coding! {rocket}" @@ -585,3 +704,24 @@ fn bind_unix_socket_err(e: std::io::Error, unix_socket: &std::path::Path) -> any }; anyhow::anyhow!(e).context(ctx) } + +#[cfg(test)] +mod tests { + use super::{request_span_name, sql_execution_span_name}; + use actix_web::test::TestRequest; + use std::path::Path; + + #[test] + fn request_span_name_uses_request_path_when_no_matched_route_exists() { + let request = TestRequest::with_uri("/todos/42?filter=open").to_srv_request(); + assert_eq!(request_span_name(&request), "GET /todos/42"); + } + + #[test] + fn sql_execution_span_name_uses_sql_file_path() { + assert_eq!( + sql_execution_span_name(Path::new("website/todos.sql")), + "SQL website/todos.sql" + ); + } +} diff --git a/src/webserver/oidc.rs b/src/webserver/oidc.rs index 21a40e24..7c61039c 100644 --- a/src/webserver/oidc.rs +++ b/src/webserver/oidc.rs @@ -35,6 +35,7 @@ use openidconnect::{ StandardTokenResponse, }; use serde::{Deserialize, Serialize}; +use tracing::Instrument; use super::error::anyhow_err_to_actix_resp; use super::http_client::make_http_client; @@ -265,12 +266,33 @@ impl OidcState { id_token: OidcToken, expected_nonce: &Nonce, ) -> anyhow::Result { + let span = tracing::info_span!( + "oidc.jwt.verify", + enduser.id = tracing::field::Empty, + user.id = tracing::field::Empty, + user.name = tracing::field::Empty, + user.full_name = tracing::field::Empty, + user.email = tracing::field::Empty, + ); + let _guard = span.enter(); let snapshot = self.snapshot(); let verifier = self.config.create_id_token_verifier(&snapshot.client); let nonce_verifier = |nonce: Option<&Nonce>| check_nonce(nonce, expected_nonce); let claims: OidcClaims = id_token .into_claims(&verifier, nonce_verifier) .map_err(|e| anyhow::anyhow!("Could not verify the ID token: {e}"))?; + let sub = claims.subject().as_str(); + span.record("enduser.id", sub); + span.record("user.id", sub); + if let Some(name) = claims.preferred_username() { + span.record("user.name", name.as_str()); + } + if let Some(name) = claims.name().and_then(|n| n.get(None)) { + span.record("user.full_name", name.as_str()); + } + if let Some(email) = claims.email() { + span.record("user.email", email.as_str()); + } Ok(claims) } @@ -463,7 +485,11 @@ async fn handle_oidc_callback( oidc_state: &Arc, request: ServiceRequest, ) -> ServiceResponse { - match process_oidc_callback(oidc_state, &request).await { + let span = tracing::info_span!("oidc.callback"); + match process_oidc_callback(oidc_state, &request) + .instrument(span) + .await + { Ok(mut response) => { clear_redirect_count_cookie(&mut response); request.into_response(response) @@ -698,11 +724,17 @@ async fn exchange_code_for_token( http_client: &awc::Client, oidc_callback_params: OidcCallbackParams, ) -> anyhow::Result { + let span = tracing::info_span!( + "http.client", + otel.name = "POST token_endpoint", + http.request.method = "POST", + ); let token_response = oidc_client .exchange_code(openidconnect::AuthorizationCode::new( oidc_callback_params.code, ))? .request_async(&AwcHttpClient::from_client(http_client)) + .instrument(span) .await .context("Failed to exchange code for token")?; let access_token = token_response.access_token();