diff --git a/public/images/docs/observe/1.png b/public/images/docs/observe/1.png deleted file mode 100644 index 4cf8d841..00000000 Binary files a/public/images/docs/observe/1.png and /dev/null differ diff --git a/public/images/docs/observe/2.png b/public/images/docs/observe/2.png deleted file mode 100644 index e5b66386..00000000 Binary files a/public/images/docs/observe/2.png and /dev/null differ diff --git a/public/images/docs/observe/3.png b/public/images/docs/observe/3.png deleted file mode 100644 index 94a4ada7..00000000 Binary files a/public/images/docs/observe/3.png and /dev/null differ diff --git a/public/images/docs/observe/4.png b/public/images/docs/observe/4.png deleted file mode 100644 index 25ba68de..00000000 Binary files a/public/images/docs/observe/4.png and /dev/null differ diff --git a/public/images/docs/observe/5.png b/public/images/docs/observe/5.png deleted file mode 100644 index 598c228e..00000000 Binary files a/public/images/docs/observe/5.png and /dev/null differ diff --git a/public/images/docs/observe/5.webp b/public/images/docs/observe/5.webp deleted file mode 100644 index d4577521..00000000 Binary files a/public/images/docs/observe/5.webp and /dev/null differ diff --git a/public/images/docs/observe/alerts-create.png b/public/images/docs/observe/alerts-create.png new file mode 100644 index 00000000..92ed57d4 Binary files /dev/null and b/public/images/docs/observe/alerts-create.png differ diff --git a/public/images/docs/observe/alerts-overview.png b/public/images/docs/observe/alerts-overview.png new file mode 100644 index 00000000..8732d1b5 Binary files /dev/null and b/public/images/docs/observe/alerts-overview.png differ diff --git a/public/images/docs/observe/dashboard-add-widget.png b/public/images/docs/observe/dashboard-add-widget.png new file mode 100644 index 00000000..816fa177 Binary files /dev/null and b/public/images/docs/observe/dashboard-add-widget.png differ diff --git a/public/images/docs/observe/dashboard-overview.png b/public/images/docs/observe/dashboard-overview.png new file mode 100644 index 00000000..0ef53489 Binary files /dev/null and b/public/images/docs/observe/dashboard-overview.png differ diff --git a/public/images/docs/observe/dashboard-populated.png b/public/images/docs/observe/dashboard-populated.png new file mode 100644 index 00000000..1c990723 Binary files /dev/null and b/public/images/docs/observe/dashboard-populated.png differ diff --git a/public/images/docs/observe/evals-create.png b/public/images/docs/observe/evals-create.png new file mode 100644 index 00000000..c616928d Binary files /dev/null and b/public/images/docs/observe/evals-create.png differ diff --git a/public/images/docs/observe/evals-overview.png b/public/images/docs/observe/evals-overview.png new file mode 100644 index 00000000..f45b9633 Binary files /dev/null and b/public/images/docs/observe/evals-overview.png differ diff --git a/public/images/docs/observe/llm-tracing-agent-graph.png b/public/images/docs/observe/llm-tracing-agent-graph.png new file mode 100644 index 00000000..777d7631 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-agent-graph.png differ diff --git a/public/images/docs/observe/llm-tracing-agent-path.png b/public/images/docs/observe/llm-tracing-agent-path.png new file mode 100644 index 00000000..7e7ffbba Binary files /dev/null and b/public/images/docs/observe/llm-tracing-agent-path.png differ diff --git a/public/images/docs/observe/llm-tracing-bulk-actions.png b/public/images/docs/observe/llm-tracing-bulk-actions.png new file mode 100644 index 00000000..ba19cae6 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-bulk-actions.png differ diff --git a/public/images/docs/observe/llm-tracing-date-range.png b/public/images/docs/observe/llm-tracing-date-range.png new file mode 100644 index 00000000..84b9c2ed Binary files /dev/null and b/public/images/docs/observe/llm-tracing-date-range.png differ diff --git a/public/images/docs/observe/llm-tracing-detail-drawer.png b/public/images/docs/observe/llm-tracing-detail-drawer.png new file mode 100644 index 00000000..2378c742 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-detail-drawer.png differ diff --git a/public/images/docs/observe/llm-tracing-display.png b/public/images/docs/observe/llm-tracing-display.png new file mode 100644 index 00000000..d2928481 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-display.png differ diff --git a/public/images/docs/observe/llm-tracing-filter.png b/public/images/docs/observe/llm-tracing-filter.png new file mode 100644 index 00000000..137cd287 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-filter.png differ diff --git a/public/images/docs/observe/llm-tracing-overview.png b/public/images/docs/observe/llm-tracing-overview.png new file mode 100644 index 00000000..aa1aaacb Binary files /dev/null and b/public/images/docs/observe/llm-tracing-overview.png differ diff --git a/public/images/docs/observe/llm-tracing-sessions-tab.png b/public/images/docs/observe/llm-tracing-sessions-tab.png new file mode 100644 index 00000000..48ffa061 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-sessions-tab.png differ diff --git a/public/images/docs/observe/llm-tracing-users-tab.png b/public/images/docs/observe/llm-tracing-users-tab.png new file mode 100644 index 00000000..e5ea255e Binary files /dev/null and b/public/images/docs/observe/llm-tracing-users-tab.png differ diff --git a/public/images/docs/observe/llm-tracing-voice-detail.png b/public/images/docs/observe/llm-tracing-voice-detail.png new file mode 100644 index 00000000..de0f1079 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-voice-detail.png differ diff --git a/public/images/docs/observe/llm-tracing-voice-overview.png b/public/images/docs/observe/llm-tracing-voice-overview.png new file mode 100644 index 00000000..cf935b6a Binary files /dev/null and b/public/images/docs/observe/llm-tracing-voice-overview.png differ diff --git a/public/images/docs/observe/sessions-bulk-actions.png b/public/images/docs/observe/sessions-bulk-actions.png new file mode 100644 index 00000000..b301ecc5 Binary files /dev/null and b/public/images/docs/observe/sessions-bulk-actions.png differ diff --git a/public/images/docs/observe/sessions-date-range.png b/public/images/docs/observe/sessions-date-range.png new file mode 100644 index 00000000..9cc71673 Binary files /dev/null and b/public/images/docs/observe/sessions-date-range.png differ diff --git a/public/images/docs/observe/sessions-detail.png b/public/images/docs/observe/sessions-detail.png new file mode 100644 index 00000000..a5e08cbd Binary files /dev/null and b/public/images/docs/observe/sessions-detail.png differ diff --git a/public/images/docs/observe/sessions-display.png b/public/images/docs/observe/sessions-display.png new file mode 100644 index 00000000..2c3750f5 Binary files /dev/null and b/public/images/docs/observe/sessions-display.png differ diff --git a/public/images/docs/observe/sessions-filter.png b/public/images/docs/observe/sessions-filter.png new file mode 100644 index 00000000..a90eda0d Binary files /dev/null and b/public/images/docs/observe/sessions-filter.png differ diff --git a/public/images/docs/observe/sessions-overview.png b/public/images/docs/observe/sessions-overview.png new file mode 100644 index 00000000..08d597b2 Binary files /dev/null and b/public/images/docs/observe/sessions-overview.png differ diff --git a/public/images/docs/observe/sessions-replay-config.png b/public/images/docs/observe/sessions-replay-config.png new file mode 100644 index 00000000..16ce3464 Binary files /dev/null and b/public/images/docs/observe/sessions-replay-config.png differ diff --git a/public/images/docs/observe/users-date-range.png b/public/images/docs/observe/users-date-range.png new file mode 100644 index 00000000..1d5a0e0a Binary files /dev/null and b/public/images/docs/observe/users-date-range.png differ diff --git a/public/images/docs/observe/users-detail.png b/public/images/docs/observe/users-detail.png new file mode 100644 index 00000000..c88cf233 Binary files /dev/null and b/public/images/docs/observe/users-detail.png differ diff --git a/public/images/docs/observe/users-display.png b/public/images/docs/observe/users-display.png new file mode 100644 index 00000000..adbffb56 Binary files /dev/null and b/public/images/docs/observe/users-display.png differ diff --git a/public/images/docs/observe/users-filter.png b/public/images/docs/observe/users-filter.png new file mode 100644 index 00000000..f488c9b7 Binary files /dev/null and b/public/images/docs/observe/users-filter.png differ diff --git a/public/images/docs/observe/users-overview.png b/public/images/docs/observe/users-overview.png new file mode 100644 index 00000000..1d5a0e0a Binary files /dev/null and b/public/images/docs/observe/users-overview.png differ diff --git a/public/images/docs/observe/voice-agent-definitions.png b/public/images/docs/observe/voice-agent-definitions.png new file mode 100644 index 00000000..33bb2d4a Binary files /dev/null and b/public/images/docs/observe/voice-agent-definitions.png differ diff --git a/public/images/docs/observe/voice-call-detail.png b/public/images/docs/observe/voice-call-detail.png new file mode 100644 index 00000000..ffc058e0 Binary files /dev/null and b/public/images/docs/observe/voice-call-detail.png differ diff --git a/public/images/docs/observe/voice-create-form.png b/public/images/docs/observe/voice-create-form.png new file mode 100644 index 00000000..6aa525bf Binary files /dev/null and b/public/images/docs/observe/voice-create-form.png differ diff --git a/public/images/docs/observe/voice-projects-list.png b/public/images/docs/observe/voice-projects-list.png new file mode 100644 index 00000000..23d19398 Binary files /dev/null and b/public/images/docs/observe/voice-projects-list.png differ diff --git a/public/images/docs/observe/voice-tracing-overview.png b/public/images/docs/observe/voice-tracing-overview.png new file mode 100644 index 00000000..c94d0d73 Binary files /dev/null and b/public/images/docs/observe/voice-tracing-overview.png differ diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts index f8c26b39..9d9d4f53 100644 --- a/src/lib/navigation.ts +++ b/src/lib/navigation.ts @@ -366,12 +366,13 @@ export const tabNavigation: NavTab[] = [ title: 'Features', items: [ { title: 'Set Up Observability', href: '/docs/observe/features/quickstart' }, - { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' }, + { title: 'LLM Tracing', href: '/docs/observe/features/llm-tracing' }, { title: 'Sessions', href: '/docs/observe/features/session' }, { title: 'Users', href: '/docs/observe/features/users' }, + { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' }, + { title: 'Dashboards', href: '/docs/observe/features/dashboard' }, { title: 'Alerts & Monitors', href: '/docs/observe/features/alerts' }, { title: 'Voice Observability', href: '/docs/observe/features/voice' }, - { title: 'Dashboards', href: '/docs/observe/features/dashboard' }, { title: 'Manual Tracing', items: [ diff --git a/src/pages/docs/observe/features/llm-tracing.mdx b/src/pages/docs/observe/features/llm-tracing.mdx new file mode 100644 index 00000000..b988ccf5 --- /dev/null +++ b/src/pages/docs/observe/features/llm-tracing.mdx @@ -0,0 +1,236 @@ +--- +title: "Tracing" +description: "See every request your AI app handled — what went in, what came out, how long it took, and where it went wrong." +--- + +## About + +Every time someone uses your AI app, the platform records that entire request as a **trace**. A trace captures everything: the user's input, every AI call made along the way, the final output, how long each step took, and whether anything failed. + +The **Tracing** page is where you come to see all of those recordings in one place. Think of it as a searchable history of every conversation or task your AI has handled. + +Tracing overview + +--- + +## When to use + +- **Something went wrong for a user** — Find their exact request and see what your AI said, step by step. +- **Your app feels slow** — Check the Latency column to see which requests are taking the longest. +- **You want to see error patterns** — Filter to show only failed requests and spot what they have in common. +- **You're reviewing how your AI agent thinks** — Open the Agent Graph to see the full decision path it took. +- **You need to train or test your AI on real data** — Select traces in bulk and add them to a dataset. + +--- + +## Getting around the page + + + + Click **Tracing** in the left sidebar under **Observe**, then click the project you want to look at. + + Tracing page with project open + + + + The date picker in the top-right corner controls how far back you're looking. It defaults to the past 7 days. Click it to change the range. + + Date range picker showing Today, Yesterday, Past 7D, Past 30D, Past 3M, Past 6M, Past 12M, Custom range + + Options: Today · Yesterday · Past 7D · Past 30D · Past 3M · Past 6M · Past 12M · or pick a custom date range. + + + + Use **Filter** to search by model, user, error status, or any other property. Use **Display** to quickly show only failed requests or requests that haven't been reviewed yet. + + + + Click any row to open it. A side panel slides in showing the full breakdown — every step the AI took, the input and output at each step, and the timing. Use the ↑ ↓ arrow buttons to move between traces without closing the panel. + + + +--- + +## The trace list + +The **Trace** tab (the default view) shows one row per request. Here's what each column means: + +| Column | What it tells you | +|---|---| +| **Trace Name** | The name of the top-level task (e.g. `support_agent.run`). | +| **Input** | A preview of what the user sent. | +| **Output** | A preview of what your AI replied. | +| **Timestamp** | When this request happened. | +| **Status** | **OK** (green — it worked) or **ERROR** (red — something failed). | +| **Latency** | Total time from request to response. | +| **Tokens** | Total number of AI tokens used across this entire request. | + +Trace list showing Trace Name, Input, Output, Timestamp, Status, Latency, Tokens columns + + + If you're using a **voice project**, the columns are different: Call Details, Status, Duration, Avg Latency, Turn Count, and Tokens — because voice calls are measured differently than text exchanges. + + +--- + +## Opening a trace (the detail panel) + +Click any row to open the detail panel. It splits into two sides: + +Trace detail panel showing span tree on the left and span details on the right + +### Left side — the span tree + +This shows every step your AI took to answer the request, in order. Each step (called a **span**) is shown as a row with its name, how long it took, and whether it passed or failed. + +For example, a support agent might show: +- `llm.intent_classification` — the AI figures out what the user wants +- `tool.check_order_status` — it looks up an order +- `llm.response_generation` — it writes the reply + +Click any step to see its full details on the right side. + +### Right side — the step details + +When you click a step on the left, the right side shows you everything about it: + +- **At the top**: Type, Status, when it started, how long it took, total tokens, prompt tokens, completion tokens, and Cost. For LLM spans, you'll also see the Model name (e.g. `gpt-4o`). +- **Preview tab**: The exact text that went into this step and the exact text that came out. Below that, a full list of technical attributes like model name, provider, and token counts. +- **Log View tab**: Raw logs for this step. +- **Evals tab**: Any quality scores attached to this step. +- **Annotations tab**: Any notes a human reviewer has added. +- **Events tab**: Any events that fired during this step. + +--- + +## Filtering + +Click **Filter** to narrow down which traces you're looking at. + +Filter panel with AI search bar, Basic and Query tabs, and property list + +There are three ways to filter: + +- **AI search** — Just describe what you want in plain English, e.g. *"show traces with errors on gpt-4"*, and the filter is built for you automatically. +- **Basic mode** — Pick a property (like Model or Status), pick a condition (like "is" or "contains"), and enter a value. Add as many as you need — they all apply together. +- **Query mode** — For technical users who want to write a filter expression directly. + +Properties you can filter on include: Trace ID, Trace Name, Span Name, Status, Model, Node Type, User ID, Service / Trace Name, Provider, and Span Kind. You can also filter on eval scores and annotation values. + +--- + +## Display settings + +Click **Display** to control how the page looks and what the graph at the top shows. + +Display panel open with Graph View tab active and sections for Rows, Columns, Metrics, Group, Graph, Settings + +**The three graph views at the top:** + +| View | What you see | +|---|---| +| **Graph View** | A standard chart showing latency and request volume over time. Good for spotting spikes. | +| **Agent Graph** | A diagram showing how the AI's steps connect to each other — useful for understanding complex agent flows. | +| **Agent Path** | A different layout of the same agent flow, showing paths rather than a graph. | + +Agent Graph view — nodes and edges showing span relationships + +Agent Path view — path-based visualization of agent flow + +**Other settings in this panel:** + +- **Rows** — Make each row taller or shorter. +- **Columns** — Choose which columns to show or hide, and add custom ones. +- **Metrics** — One-click filters: show only traces that have eval scores, show only errors, or show only traces that haven't been annotated yet. +- **Group** — Group your traces using the Group by dropdown. +- **Compare graph** — Overlay a second time period on the graph to compare before and after a change. +- **Set default for everyone** — Save your current layout as the default view for the whole team. + +--- + +## Bulk actions + +To take action on multiple traces at once, tick the checkboxes on the left of each row. A toolbar appears at the top showing how many you've selected. + +Bulk action bar showing 5 rows selected and the Actions dropdown with Move to dataset, Add tags, Add to annotation queue + +Click **Actions** to see what you can do: + +| Action | What it does | +|---|---| +| **Move to dataset** | Saves these traces to a dataset — useful for testing or fine-tuning your AI. | +| **Add tags** | Labels all the selected traces at once. Good for organizing by topic, issue type, etc. | +| **Add to annotation queue** | Sends them to a queue for a human to review and score. | + +--- + +## Saving a view + +If you've set up filters or display settings you want to come back to, click the **+** button in the top-right corner to save it as a named view. + +Once saved, if you make changes to it, a **Save view** button appears so you can update it. Saved views are shared across your whole team — anyone on the project can see and use them. + +--- + +## Sessions tab + +Click the **Sessions** tab to group traces by conversation. Instead of seeing individual requests, you see complete multi-turn conversations with their total stats. + +Sessions tab + +| Column | What it shows | +|---|---| +| **Session Id** | A unique ID for this conversation. | +| **First Message** | The opening message of the conversation. | +| **Last Message** | The most recent message. | +| **Duration** | How long the conversation lasted. | +| **Total Cost** | Combined cost of all AI calls in this conversation. | +| **Total Traces** | How many individual requests were part of this session. | + +For more on sessions, see [Sessions](/docs/observe/features/session). + +--- + +## Users tab + +Click the **Users** tab to see activity grouped by individual end users — useful for understanding how different users interact with your AI. + +Users tab + +| Column | What it shows | +|---|---| +| **User ID** | The user's identifier (set via `user.id` in your code). | +| **First Active** | When this user's first request arrived. | +| **Last Active** | When their most recent request arrived. | +| **No. of Traces** | Total number of requests from this user. | +| **No. of Sessions** | How many conversations this user has had. | +| **Actions** | Options to view or manage this user's data. | + +For more on user tracking, see [Users](/docs/observe/features/users). + +--- + +## Auto-refresh and export + +- **Auto refresh** — Toggle this in the header to automatically check for new traces every 10 seconds. The timestamp next to it shows when it last updated. +- **Manual refresh** — Click the refresh icon anytime to pull the latest data immediately. +- **Export** — Click the download icon to save the current view as a file. + +--- + +## Next Steps + + + + Analyze multi-turn conversations grouped by session. + + + View activity and metrics broken down per end user. + + + Run automated quality checks on your production traces. + + + Get notified when metrics cross a threshold. + + diff --git a/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx b/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx index 2aa5b933..e17088cf 100644 --- a/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx +++ b/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx @@ -3,6 +3,8 @@ title: "In-line Evaluations: Attach Evals to Spans in Future AGI" description: "Run evaluations directly inside a traced span so results are automatically attached to that span in the Future AGI dashboard." --- +{/* MANUAL REVIEW NEEDED: verify the inline eval API (trace_eval=True, evaluator.evaluate()) against the current SDK version before publishing — the SDK interface may have changed */} + ## About Evaluation results are most useful when they sit next to the data that produced them. Running evals as a separate step means matching results back to specific spans after the fact. In-line evaluations remove that gap by running `evaluator.evaluate()` with `trace_eval=True` inside an active span. The evaluation result is automatically attached to that span as attributes, so both the trace data and the eval score appear together in the dashboard. diff --git a/src/pages/index.astro b/src/pages/index.astro index 8e92d422..228f3adf 100644 --- a/src/pages/index.astro +++ b/src/pages/index.astro @@ -36,7 +36,7 @@ const sections = [ color: "blue", href: "/docs/observe", links: [ - { title: "Quickstart", href: "/docs/observe/quickstart" }, + { title: "Quickstart", href: "/docs/observe/features/quickstart" }, { title: "Tracing", href: "/docs/tracing" }, ] },