diff --git a/services/ws-modules/face-detection/src/lib.rs b/services/ws-modules/face-detection/src/lib.rs
index aec6cbf..82fa4e1 100644
--- a/services/ws-modules/face-detection/src/lib.rs
+++ b/services/ws-modules/face-detection/src/lib.rs
@@ -823,10 +823,10 @@ fn face_video_element() -> Result<HtmlVideoElement, JsValue> {
         .and_then(|window| window.document())
         .ok_or_else(|| JsValue::from_str("No document available"))?;
     document
-        .get_element_by_id("face-video-preview")
-        .ok_or_else(|| JsValue::from_str("Missing #face-video-preview element"))?
+        .get_element_by_id("video-preview")
+        .ok_or_else(|| JsValue::from_str("Missing #video-preview element"))?
         .dyn_into::<HtmlVideoElement>()
-        .map_err(|_| JsValue::from_str("#face-video-preview was not a video element"))
+        .map_err(|_| JsValue::from_str("#video-preview was not a video element"))
 }
 
 fn face_output_canvas_element() -> Result<HtmlCanvasElement, JsValue> {
@@ -834,10 +834,10 @@ fn face_output_canvas_element() -> Result<HtmlCanvasElement, JsValue> {
         .and_then(|window| window.document())
         .ok_or_else(|| JsValue::from_str("No document available"))?;
     document
-        .get_element_by_id("face-video-output-canvas")
-        .ok_or_else(|| JsValue::from_str("Missing #face-video-output-canvas element"))?
+        .get_element_by_id("video-output-canvas")
+        .ok_or_else(|| JsValue::from_str("Missing #video-output-canvas element"))?
         .dyn_into::<HtmlCanvasElement>()
-        .map_err(|_| JsValue::from_str("#face-video-output-canvas was not a canvas element"))
+        .map_err(|_| JsValue::from_str("#video-output-canvas was not a canvas element"))
 }
 
 fn face_preprocess_canvas() -> Result<HtmlCanvasElement, JsValue> {
diff --git a/services/ws-server/static/app.js b/services/ws-server/static/app.js
index 7108695..2f9e258 100644
--- a/services/ws-server/static/app.js
+++ b/services/ws-server/static/app.js
@@ -28,13 +28,10 @@ const gpuInfoButton = document.getElementById("gpu-info-button");
 const speechButton = document.getElementById("speech-button");
 const nfcButton = document.getElementById("nfc-button");
 const sensorsButton = document.getElementById("sensors-button");
-const videoOutputButton = document.getElementById("video-output-button");
 const agentStatusEl = document.getElementById("agent-status");
 const agentIdEl = document.getElementById("agent-id");
 const sensorOutputEl = document.getElementById("sensor-output");
-const videoOutputEl = document.getElementById("ml-debug-output");
 const videoPreview = document.getElementById("video-preview");
-const videoOutputCanvas = document.getElementById("video-output-canvas");
 
 let microphone = null;
 let videoCapture = null;
@@ -44,25 +41,7 @@ let speechListening = false;
 let sensorsActive = false;
 let orientationState = null;
 let motionState = null;
-let videoCvSession = null;
-let videoCvInputName = null;
-let videoCvOutputName = null;
-let videoCvLoopId = null;
-let videoCvInferencePending = false;
-let lastVideoInferenceAt = 0;
-let lastVideoCvLabel = null;
-let videoCvCanvas = null;
-let videoCvContext = null;
-let videoOverlayContext = videoOutputCanvas.getContext("2d");
-let videoOutputVisible = false;
-let videoRenderFrameId = null;
-let lastVideoInferenceSummary = null;
-const loadedWorkflowModules = new Map();
 let sendClientEvent = () => {};
-const VIDEO_INFERENCE_INTERVAL_MS = 750;
-const VIDEO_RENDER_SCORE_THRESHOLD = 0.35;
-const VIDEO_MODEL_PATH = "/static/models/video_cv.onnx";
-const VIDEO_FALLBACK_INPUT_SIZE = 224;
 const STORED_AGENT_ID_KEY = "ws_wasm_agent.agent_id";
 let currentAgentId = null;
 
@@ -178,11 +157,22 @@ const runSelectedWorkflowModule = async () => {
   }
 
   const loadedModule = await loadWorkflowModule(moduleKey);
+  if (
+    typeof loadedModule.is_running === "function"
+    && loadedModule.is_running()
+    && typeof loadedModule.stop === "function"
+  ) {
+    append(`${moduleConfig.label} module: calling stop()`);
+    loadedModule.stop();
+    append(`${moduleConfig.label} module stopped`);
+    return;
+  }
+
   append(`${moduleConfig.label} module: calling run()`);
   const runPromise = loadedModule.run();
   append(`${moduleConfig.label} module: run() started`);
   await runPromise;
-  append(`${moduleConfig.label} module completed`);
+  append(`${moduleConfig.label} module run() returned`);
 };
 
 const handleProtocolMessage = (message) => {
@@ -257,31 +247,6 @@ const renderSensorOutput = () => {
   sensorOutputEl.value = lines.join("\n");
 };
 
-const setVideoOutput = (lines) => {
-  videoOutputEl.value = Array.isArray(lines) ? lines.join("\n") : String(lines);
-};
-
-const updateVideoStatus = (extraLines = []) => {
-  const inputMetadata = videoCvInputName
-    ? videoCvSession?.inputMetadata?.[videoCvInputName]
-    : null;
-  const outputMetadata = videoCvOutputName
-    ? videoCvSession?.outputMetadata?.[videoCvOutputName]
-    : null;
-  const lines = [
-    `model: ${videoCvSession ? "loaded" : "not loaded"}`,
-    `video: ${videoCapture ? "active" : "inactive"}`,
-    `input: ${videoCvInputName ?? "n/a"}`,
-    `output: ${videoCvOutputName ?? "n/a"}`,
-    `input dims: ${JSON.stringify(inputMetadata?.dimensions ?? [])}`,
-    `output dims: ${JSON.stringify(outputMetadata?.dimensions ?? [])}`,
-    `loop: ${videoCvLoopId === null ? "idle" : "running"}`,
-    `display: ${videoOutputVisible ? "visible" : "hidden"}`,
-    `mode: ${lastVideoInferenceSummary?.mode ?? "unknown"}`,
-  ];
-  setVideoOutput(lines.concat("", extraLines));
-};
-
 const handleOrientation = (event) => {
   orientationState = {
     alpha: event.alpha,
@@ -370,765 +335,7 @@ const startSensorsFlow = async () => {
   append("device sensors started; streaming locally to textbox");
 };
 
-const getTopK = (values, limit = 3) => {
-  return values
-    .map((value, index) => ({ value, index }))
-    .sort((left, right) => right.value - left.value)
-    .slice(0, limit);
-};
-
-const ensureVideoCvCanvas = () => {
-  if (!videoCvCanvas) {
-    videoCvCanvas = document.createElement("canvas");
-    videoCvContext = videoCvCanvas.getContext("2d", { willReadFrequently: true });
-  }
-
-  if (!videoCvContext) {
-    throw new Error("Unable to create 2D canvas context for video preprocessing.");
-  }
-
-  return videoCvContext;
-};
-
-const ensureVideoOverlayContext = () => {
-  if (!videoOverlayContext) {
-    videoOverlayContext = videoOutputCanvas.getContext("2d");
-  }
-
-  if (!videoOverlayContext) {
-    throw new Error("Unable to create video output canvas context.");
-  }
-
-  return videoOverlayContext;
-};
-
-const selectVideoModelInputName = (session) => {
-  const inputNames = Array.isArray(session?.inputNames) ? session.inputNames : [];
-  if (!inputNames.length) {
-    return null;
-  }
-
-  const ranked = inputNames
-    .map((name) => {
-      const metadata = session?.inputMetadata?.[name];
-      const dimensions = Array.isArray(metadata?.dimensions) ? metadata.dimensions : [];
-      const normalizedName = String(name).toLowerCase();
-      let score = 0;
-
-      if (dimensions.length === 4) {
-        score += 100;
-      } else if (dimensions.length === 3) {
-        score += 40;
-      }
-
-      if (
-        normalizedName.includes("pixel")
-        || normalizedName.includes("image")
-        || normalizedName.includes("images")
-        || normalizedName.includes("input")
-      ) {
-        score += 25;
-      }
-
-      if (normalizedName.includes("mask") || normalizedName.includes("token")) {
-        score -= 50;
-      }
-
-      return { name, score };
-    })
-    .sort((left, right) => right.score - left.score);
-
-  return ranked[0]?.name ?? inputNames[0];
-};
-
-const selectVideoModelOutputName = (session) => {
-  const outputNames = Array.isArray(session?.outputNames) ? session.outputNames : [];
-  if (!outputNames.length) {
-    return null;
-  }
-
-  const ranked = outputNames
-    .map((name) => {
-      const normalizedName = String(name).toLowerCase();
-      let score = 0;
-      if (normalizedName.includes("box")) {
-        score += 100;
-      }
-      if (normalizedName.includes("logit") || normalizedName.includes("score")) {
-        score += 40;
-      }
-      return { name, score };
-    })
-    .sort((left, right) => right.score - left.score);
-
-  return ranked[0]?.name ?? outputNames[0];
-};
-
-const resolveVideoModelLayout = () => {
-  if (!videoCvSession || !videoCvInputName) {
-    throw new Error("Video CV model is not loaded.");
-  }
-
-  const metadata = videoCvSession.inputMetadata?.[videoCvInputName];
-  const dataType = metadata?.type ?? "float32";
-  if (dataType !== "float32" && dataType !== "uint8") {
-    throw new Error(`Unsupported video model input type: ${dataType}`);
-  }
-
-  const rawDimensions = Array.isArray(metadata?.dimensions)
-    ? metadata.dimensions
-    : [];
-  const dimensions = rawDimensions.length === 4
-    ? rawDimensions
-    : rawDimensions.length === 3
-    ? [1, ...rawDimensions]
-    : [1, 3, VIDEO_FALLBACK_INPUT_SIZE, VIDEO_FALLBACK_INPUT_SIZE];
-
-  const resolved = dimensions.map((dimension, index) => {
-    if (typeof dimension === "number" && Number.isFinite(dimension) && dimension > 0) {
-      return dimension;
-    }
-
-    if (index === 0) {
-      return 1;
-    }
-
-    if (index === 1 && dimensions.length === 4) {
-      const inputName = String(videoCvInputName).toLowerCase();
-      if (!inputName.includes("nhwc")) {
-        return 3;
-      }
-    }
-
-    return VIDEO_FALLBACK_INPUT_SIZE;
-  });
-
-  const secondDimension = resolved[1];
-  const lastDimension = resolved[3];
-  const inputName = String(videoCvInputName).toLowerCase();
-  const channelsFirst = inputName.includes("nhwc")
-    ? false
-    : secondDimension === 1
-      || secondDimension === 3
-      || ((lastDimension !== 1 && lastDimension !== 3) && !inputName.includes("image_embeddings"));
-  if (channelsFirst) {
-    const [, channels, height, width] = resolved;
-    if (channels !== 1 && channels !== 3) {
-      throw new Error(`Unsupported channel count for NCHW image input: ${channels}`);
-    }
-
-    return {
-      dataType,
-      channels,
-      width,
-      height,
-      tensorDimensions: [1, channels, height, width],
-      layout: "nchw",
-    };
-  }
-
-  const [, height, width, channels] = resolved;
-  if (channels !== 1 && channels !== 3) {
-    throw new Error(`Unsupported channel count for NHWC image input: ${channels}`);
-  }
-
-  return {
-    dataType,
-    channels,
-    width,
-    height,
-    tensorDimensions: [1, height, width, channels],
-    layout: "nhwc",
-  };
-};
-
-const buildVideoInputTensor = () => {
-  if (!videoCapture || !videoCvSession || !videoCvInputName) {
-    throw new Error("Video capture or model session is unavailable.");
-  }
-
-  if (!videoPreview.videoWidth || !videoPreview.videoHeight) {
-    throw new Error("Video stream is not ready yet.");
-  }
-
-  const {
-    dataType,
-    channels,
-    width,
-    height,
-    tensorDimensions,
-    layout,
-  } = resolveVideoModelLayout();
-  const context = ensureVideoCvCanvas();
-  videoCvCanvas.width = width;
-  videoCvCanvas.height = height;
-  context.drawImage(videoPreview, 0, 0, width, height);
-
-  const rgba = context.getImageData(0, 0, width, height).data;
-  const elementCount = width * height * channels;
-  const tensorData = dataType === "uint8"
-    ? new Uint8Array(elementCount)
-    : new Float32Array(elementCount);
-
-  for (let pixelIndex = 0; pixelIndex < width * height; pixelIndex += 1) {
-    const rgbaIndex = pixelIndex * 4;
-    const red = rgba[rgbaIndex];
-    const green = rgba[rgbaIndex + 1];
-    const blue = rgba[rgbaIndex + 2];
-
-    if (channels === 1) {
-      const grayscale = Math.round(0.299 * red + 0.587 * green + 0.114 * blue);
-      tensorData[pixelIndex] = dataType === "uint8" ? grayscale : grayscale / 255;
-      continue;
-    }
-
-    if (layout === "nchw") {
-      const planeSize = width * height;
-      if (dataType === "uint8") {
-        tensorData[pixelIndex] = red;
-        tensorData[pixelIndex + planeSize] = green;
-        tensorData[pixelIndex + 2 * planeSize] = blue;
-      } else {
-        tensorData[pixelIndex] = red / 255;
-        tensorData[pixelIndex + planeSize] = green / 255;
-        tensorData[pixelIndex + 2 * planeSize] = blue / 255;
-      }
-      continue;
-    }
-
-    const tensorIndex = pixelIndex * channels;
-    if (dataType === "uint8") {
-      tensorData[tensorIndex] = red;
-      tensorData[tensorIndex + 1] = green;
-      tensorData[tensorIndex + 2] = blue;
-    } else {
-      tensorData[tensorIndex] = red / 255;
-      tensorData[tensorIndex + 1] = green / 255;
-      tensorData[tensorIndex + 2] = blue / 255;
-    }
-  }
-
-  return new window.ort.Tensor(dataType, tensorData, tensorDimensions);
-};
-
-const looksLikeBoxes = (tensor) => {
-  if (!tensor?.dims || !tensor?.data) {
-    return false;
-  }
-
-  const dims = tensor.dims.filter((dimension) => Number.isFinite(dimension));
-  const values = Array.from(tensor.data ?? []);
-  const lastDimension = dims[dims.length - 1];
-  return values.length >= 4 && (lastDimension === 4 || lastDimension === 6 || lastDimension === 7);
-};
-
-const flattenFinite = (tensor) => {
-  return Array.from(tensor?.data ?? []).map(Number).filter((value) => Number.isFinite(value));
-};
-
-const normalizeBox = (boxValues, format = "xyxy") => {
-  if (boxValues.length < 4) {
-    return null;
-  }
-
-  let x1;
-  let y1;
-  let x2;
-  let y2;
-  if (format === "cxcywh") {
-    const [centerX, centerY, width, height] = boxValues;
-    x1 = centerX - width / 2;
-    y1 = centerY - height / 2;
-    x2 = centerX + width / 2;
-    y2 = centerY + height / 2;
-  } else {
-    [x1, y1, x2, y2] = boxValues;
-  }
-
-  if (x2 < x1) {
-    [x1, x2] = [x2, x1];
-  }
-  if (y2 < y1) {
-    [y1, y2] = [y2, y1];
-  }
-
-  const normalized = [x1, y1, x2, y2].map((value) => (
-    value > 1.5 ? value : Math.max(0, Math.min(1, value))
-  ));
-
-  return normalized;
-};
-
-const softmax = (logits) => {
-  const maxLogit = Math.max(...logits);
-  const scores = logits.map((l) => Math.exp(l - maxLogit));
-  const sumScores = scores.reduce((a, b) => a + b, 0);
-  return scores.map((s) => s / sumScores);
-};
-
-const findDetectionTensor = (entries, patterns, predicate = () => true) => {
-  return entries.find(([name, tensor]) => {
-    const normalizedName = String(name).toLowerCase();
-    return patterns.some((pattern) => pattern.test(normalizedName)) && predicate(tensor);
-  }) ?? null;
-};
-
-const decodeHuggingFaceDetectionOutputs = (entries) => {
-  const boxesEntry = findDetectionTensor(
-    entries,
-    [/pred_boxes/, /boxes?/, /bbox/],
-    (tensor) => (Array.isArray(tensor?.dims) ? tensor.dims[tensor.dims.length - 1] : null) === 4,
-  );
-  const logitsEntry = findDetectionTensor(
-    entries,
-    [/logits/, /scores?/, /class/],
-    (tensor) => (Array.isArray(tensor?.dims) ? tensor.dims[tensor.dims.length - 1] : 0) > 1,
-  );
-
-  if (!boxesEntry || !logitsEntry) {
-    return null;
-  }
-
-  const [boxesName, boxesTensor] = boxesEntry;
-  const [, logitsTensor] = logitsEntry;
-  const rawBoxes = flattenFinite(boxesTensor);
-  const rawLogits = flattenFinite(logitsTensor);
-  const boxCount = Math.floor(rawBoxes.length / 4);
-  const classCount = boxCount > 0 ? Math.floor(rawLogits.length / boxCount) : 0;
-  if (boxCount <= 0 || classCount <= 1) {
-    return null;
-  }
-
-  const usesCenterBoxes = /pred_boxes/.test(String(boxesName).toLowerCase());
-  const detections = [];
-  for (let index = 0; index < boxCount; index += 1) {
-    const box = rawBoxes.slice(index * 4, index * 4 + 4);
-    const logits = rawLogits.slice(index * classCount, index * classCount + classCount);
-    const candidateLogits = logits.length > 1 ? logits.slice(0, -1) : logits;
-    const probabilities = softmax(candidateLogits);
-    const best = getTopK(probabilities, 1)[0];
-    if (!best || best.value < VIDEO_RENDER_SCORE_THRESHOLD) {
-      continue;
-    }
-
-    const normalizedBox = normalizeBox(box, usesCenterBoxes ? "cxcywh" : "xyxy");
-    if (!normalizedBox) {
-      continue;
-    }
-
-    detections.push({
-      label: `class_${best.index}`,
-      class_index: best.index,
-      score: best.value,
-      box: normalizedBox,
-    });
-  }
-
-  if (!detections.length) {
-    return {
-      mode: "detection",
-      detections: [],
-      detected_class: "no_detection",
-      class_index: -1,
-      confidence: 0,
-      probabilities: [],
-      top_classes: [],
-    };
-  }
-
-  detections.sort((left, right) => right.score - left.score);
-  const best = detections[0];
-  return {
-    mode: "detection",
-    detections,
-    detected_class: best.label,
-    class_index: best.class_index,
-    confidence: best.score,
-    probabilities: detections.map((entry) => entry.score),
-    top_classes: detections.slice(0, 3).map((entry) => ({
-      label: entry.label,
-      index: entry.class_index,
-      probability: entry.score,
-    })),
-  };
-};
-
-const decodeDetectionOutputs = (outputs) => {
-  const entries = Object.entries(outputs);
-  const huggingFaceSummary = decodeHuggingFaceDetectionOutputs(entries);
-  if (huggingFaceSummary) {
-    return huggingFaceSummary;
-  }
-
-  const boxesEntry = entries.find(([, tensor]) => looksLikeBoxes(tensor));
-
-  if (!boxesEntry) {
-    return null;
-  }
-
-  const [boxesName, boxesTensor] = boxesEntry;
-  const boxDims = Array.isArray(boxesTensor.dims) ? boxesTensor.dims : [];
-  const rawBoxes = flattenFinite(boxesTensor);
-  const boxWidth = boxDims[boxDims.length - 1] ?? 4;
-  const detectionCount = Math.floor(rawBoxes.length / boxWidth);
-  if (detectionCount <= 0) {
-    return null;
-  }
-
-  const scoresEntry = entries.find(([name, tensor]) =>
-    name !== boxesName && flattenFinite(tensor).length >= detectionCount
-  );
-  const classEntry = entries.find(([name, tensor]) =>
-    name !== boxesName && name !== scoresEntry?.[0] && flattenFinite(tensor).length >= detectionCount
-  );
-  const detections = [];
-  const scoreValues = scoresEntry ? flattenFinite(scoresEntry[1]) : [];
-  const classValues = classEntry ? flattenFinite(classEntry[1]) : [];
-
-  for (let index = 0; index < detectionCount; index += 1) {
-    const start = index * boxWidth;
-    const row = rawBoxes.slice(start, start + boxWidth);
-    const normalizedBox = normalizeBox(row);
-    if (!normalizedBox) {
-      continue;
-    }
-
-    let score = Number(scoreValues[index] ?? row[4] ?? row[5] ?? 1);
-    if (!Number.isFinite(score)) {
-      score = 1;
-    }
-
-    let classIndex = classValues[index];
-    if (!Number.isFinite(classIndex)) {
-      classIndex = row.length >= 6 ? row[5] : row.length >= 7 ? row[6] : index;
-    }
-
-    if (score < VIDEO_RENDER_SCORE_THRESHOLD) {
-      continue;
-    }
-
-    detections.push({
-      label: `class_${Math.round(classIndex)}`,
-      class_index: Math.round(classIndex),
-      score,
-      box: normalizedBox,
-    });
-  }
-
-  if (!detections.length) {
-    return {
-      mode: "detection",
-      detections: [],
-      detected_class: "no_detection",
-      class_index: -1,
-      confidence: 0,
-      probabilities: [],
-      top_classes: [],
-    };
-  }
-
-  detections.sort((left, right) => right.score - left.score);
-  const best = detections[0];
-  return {
-    mode: "detection",
-    detections,
-    detected_class: best.label,
-    class_index: best.class_index,
-    confidence: best.score,
-    probabilities: detections.map((entry) => entry.score),
-    top_classes: detections.slice(0, 3).map((entry) => ({
-      label: entry.label,
-      index: entry.class_index,
-      probability: entry.score,
-    })),
-  };
-};
-
-const decodeClassificationOutputs = (output) => {
-  const values = Array.from(output?.data ?? []);
-  if (values.length === 0) {
-    throw new Error("Video model returned an empty output tensor.");
-  }
-
-  if (values.length === 1) {
-    return {
-      mode: "classification",
-      detections: [],
-      detected_class: "scalar_output",
-      class_index: 0,
-      confidence: Number(values[0]),
-      probabilities: values,
-      top_classes: [{ label: "scalar_output", index: 0, probability: Number(values[0]) }],
-    };
-  }
-
-  const probabilities = softmax(values);
-  const ranked = getTopK(probabilities, 3);
-  const best = ranked[0];
-
-  return {
-    mode: "classification",
-    detections: [],
-    detected_class: `class_${best.index}`,
-    class_index: best.index,
-    confidence: best.value,
-    probabilities,
-    top_classes: ranked.map(({ index, value }) => ({
-      label: `class_${index}`,
-      index,
-      probability: value,
-      logit: values[index],
-    })),
-  };
-};
-
-const summarizeVideoOutput = (outputMap) => {
-  const detectionSummary = decodeDetectionOutputs(outputMap);
-  if (detectionSummary) {
-    return detectionSummary;
-  }
-
-  const primaryOutput = outputMap[videoCvOutputName];
-  const primaryValues = Array.from(primaryOutput?.data ?? []);
-  if (primaryValues.length > 0 && primaryValues.length <= 4096) {
-    return decodeClassificationOutputs(primaryOutput);
-  }
-
-  return {
-    mode: "passthrough",
-    detections: [],
-    detected_class: "unrecognized_output",
-    class_index: -1,
-    confidence: 0,
-    probabilities: [],
-    top_classes: [],
-  };
-};
-
-const drawOverlayText = (context, lines) => {
-  if (!lines.length) {
-    return;
-  }
-
-  context.font = "18px ui-monospace, monospace";
-  const lineHeight = 24;
-  const width = Math.max(...lines.map((line) => context.measureText(line).width), 0) + 20;
-  const height = lines.length * lineHeight + 12;
-  context.fillStyle = "rgba(24, 32, 40, 0.72)";
-  context.fillRect(12, 12, width, height);
-  context.fillStyle = "#fffdfa";
-  lines.forEach((line, index) => {
-    context.fillText(line, 22, 36 + index * lineHeight);
-  });
-};
-
-const renderVideoOutputFrame = () => {
-  videoRenderFrameId = null;
-
-  if (!videoOutputVisible || !videoCapture || !videoPreview.videoWidth || !videoPreview.videoHeight) {
-    return;
-  }
-
-  const context = ensureVideoOverlayContext();
-  const width = videoPreview.videoWidth;
-  const height = videoPreview.videoHeight;
-  if (videoOutputCanvas.width !== width || videoOutputCanvas.height !== height) {
-    videoOutputCanvas.width = width;
-    videoOutputCanvas.height = height;
-  }
-
-  context.drawImage(videoPreview, 0, 0, width, height);
-
-  if (lastVideoInferenceSummary?.mode === "detection") {
-    context.lineWidth = 3;
-    context.font = "16px ui-monospace, monospace";
-    lastVideoInferenceSummary.detections.forEach((entry) => {
-      const [x1, y1, x2, y2] = entry.box;
-      const left = x1 <= 1 ? x1 * width : x1;
-      const top = y1 <= 1 ? y1 * height : y1;
-      const right = x2 <= 1 ? x2 * width : x2;
-      const bottom = y2 <= 1 ? y2 * height : y2;
-      const boxWidth = Math.max(1, right - left);
-      const boxHeight = Math.max(1, bottom - top);
-
-      context.strokeStyle = "#ef8f35";
-      context.strokeRect(left, top, boxWidth, boxHeight);
-
-      const label = `${entry.label} ${(entry.score * 100).toFixed(1)}%`;
-      const textWidth = context.measureText(label).width + 10;
-      context.fillStyle = "#182028";
-      context.fillRect(left, Math.max(0, top - 24), textWidth, 22);
-      context.fillStyle = "#fffdfa";
-      context.fillText(label, left + 5, Math.max(16, top - 8));
-    });
-  } else if (lastVideoInferenceSummary?.mode === "classification") {
-    drawOverlayText(context, [
-      `classification: ${lastVideoInferenceSummary.detected_class}`,
-      `confidence: ${(lastVideoInferenceSummary.confidence * 100).toFixed(1)}%`,
-    ]);
-  } else if (lastVideoInferenceSummary?.mode === "passthrough") {
-    drawOverlayText(context, [
-      "output mode: passthrough",
-      "model output not recognized as detection or classification",
-    ]);
-  }
-
-  videoRenderFrameId = window.requestAnimationFrame(renderVideoOutputFrame);
-};
-
-const syncVideoOutputView = () => {
-  videoOutputCanvas.hidden = !videoOutputVisible || !videoCapture;
-  videoOutputButton.textContent = videoOutputVisible ? "Hide video output" : "Show video output";
-
-  if (!videoOutputVisible || !videoCapture) {
-    if (videoRenderFrameId !== null) {
-      window.cancelAnimationFrame(videoRenderFrameId);
-      videoRenderFrameId = null;
-    }
-    updateVideoStatus();
-    return;
-  }
-
-  if (videoRenderFrameId === null) {
-    videoRenderFrameId = window.requestAnimationFrame(renderVideoOutputFrame);
-  }
-  updateVideoStatus();
-};
-
-const stopVideoCvLoop = () => {
-  if (videoCvLoopId !== null) {
-    window.clearInterval(videoCvLoopId);
-    videoCvLoopId = null;
-  }
-  lastVideoCvLabel = null;
-  updateVideoStatus();
-};
-
-const inferVideoPrediction = async () => {
-  if (
-    !videoCapture
-    || !videoCvSession
-    || !videoCvInputName
-    || !videoCvOutputName
-    || videoCvInferencePending
-  ) {
-    return;
-  }
-
-  const now = Date.now();
-  if (now - lastVideoInferenceAt < VIDEO_INFERENCE_INTERVAL_MS) {
-    return;
-  }
-
-  videoCvInferencePending = true;
-  lastVideoInferenceAt = now;
-
-  try {
-    const input = buildVideoInputTensor();
-    const outputMap = await videoCvSession.run({ [videoCvInputName]: input });
-    const output = outputMap[videoCvOutputName];
-    const summary = summarizeVideoOutput(outputMap);
-    const labelChanged = summary.detected_class !== lastVideoCvLabel;
-    lastVideoCvLabel = summary.detected_class;
-    lastVideoInferenceSummary = summary;
-
-    updateVideoStatus([
-      `output mode: ${summary.mode}`,
-      `prediction: ${summary.detected_class}`,
-      `confidence: ${summary.confidence.toFixed(4)}`,
-      ...(
-        summary.mode === "detection"
-          ? [
-            `detections: ${summary.detections.length}`,
-            ...summary.detections.slice(0, 3).map(
-              (entry) =>
-                `${entry.label}: score=${entry.score.toFixed(4)} box=${
-                  entry.box.map((value) => value.toFixed(3)).join(",")
-                }`,
-            ),
-          ]
-          : [
-            "top classes:",
-            ...summary.top_classes.map(
-              (entry) =>
-                `${entry.label}: p=${entry.probability.toFixed(4)} logit=${
-                  Number(entry.logit ?? entry.probability).toFixed(4)
-                }`,
-            ),
-          ]
-      ),
-      `frame: ${videoPreview.videoWidth}x${videoPreview.videoHeight}`,
-      `processed at: ${new Date().toLocaleTimeString()}`,
-    ]);
-    syncVideoOutputView();
-
-    sendClientEvent("video_cv", "inference", {
-      mode: summary.mode,
-      detected_class: summary.detected_class,
-      class_index: summary.class_index,
-      confidence: summary.confidence,
-      probabilities: summary.probabilities,
-      top_classes: summary.top_classes,
-      detections: summary.detections,
-      changed: labelChanged,
-      processed_at: new Date().toISOString(),
-      model_path: VIDEO_MODEL_PATH,
-      input_name: videoCvInputName,
-      output_name: videoCvOutputName,
-      input_dimensions: videoCvSession.inputMetadata?.[videoCvInputName]?.dimensions ?? [],
-      output_dimensions: Array.isArray(output?.dims) ? output.dims : [],
-      source_resolution: {
-        width: videoPreview.videoWidth,
-        height: videoPreview.videoHeight,
-      },
-    });
-  } catch (error) {
-    lastVideoInferenceSummary = {
-      mode: "passthrough",
-      detections: [],
-      detected_class: "inference_error",
-      class_index: -1,
-      confidence: 0,
-      probabilities: [],
-      top_classes: [],
-    };
-    updateVideoStatus([
-      `inference error: ${error instanceof Error ? error.message : String(error)}`,
-    ]);
-    console.error(error);
-  } finally {
-    videoCvInferencePending = false;
-  }
-};
-
-const syncVideoCvLoop = () => {
-  if (videoCapture && videoCvSession) {
-    if (videoCvLoopId === null) {
-      videoCvLoopId = window.setInterval(() => {
-        void inferVideoPrediction();
-      }, VIDEO_INFERENCE_INTERVAL_MS);
-    }
-    updateVideoStatus([
-      "browser-side webcam inference active",
-      "results are sent to the backend over the websocket.",
-    ]);
-    return;
-  }
-
-  stopVideoCvLoop();
-  lastVideoInferenceSummary = null;
-  updateVideoStatus([
-    videoCvSession
-      ? "model loaded; start video capture to begin inference."
-      : `model file: ${VIDEO_MODEL_PATH}`,
-  ]);
-};
-
 renderSensorOutput();
-updateVideoStatus([
-  `model file: ${VIDEO_MODEL_PATH}`,
-  "load the model, then start video capture to process frames in-browser.",
-]);
 
 const wsProtocol = window.location.protocol === "https:" ? "wss:" : "ws:";
 const wsUrl = `${wsProtocol}//${window.location.host}/ws`;
@@ -1244,8 +451,6 @@ try {
         videoPreview.hidden = true;
         videoButton.textContent = "Start video";
         delete window.videoCapture;
-        syncVideoCvLoop();
-        syncVideoOutputView();
         append("video stopped");
         sendClientEvent("video", "stopped", { track_count: 0 });
         return;
@@ -1257,8 +462,6 @@ try {
       videoButton.textContent = "Stop video";
       append(`video granted: ${videoCapture.trackCount()} video track(s)`);
       window.videoCapture = videoCapture;
-      syncVideoCvLoop();
-      syncVideoOutputView();
       sendClientEvent("video", "started", {
         track_count: videoCapture.trackCount(),
       });
@@ -1480,11 +683,6 @@ try {
     }
   });
 
-  videoOutputButton.addEventListener("click", () => {
-    videoOutputVisible = !videoOutputVisible;
-    syncVideoOutputView();
-  });
-
   window.client = client;
   window.sendAlive = () => client.send_alive();
   window.runWorkflowModule = (moduleKey) => {
diff --git a/services/ws-server/static/index.html b/services/ws-server/static/index.html
index d2a936d..e100aa8 100644
--- a/services/ws-server/static/index.html
+++ b/services/ws-server/static/index.html
@@ -102,6 +102,23 @@
       margin: 0;
       min-height: 180px;
     }
+
+    video,
+    canvas {
+      box-sizing: border-box;
+      display: block;
+      width: auto;
+      height: auto;
+      max-width: 100%;
+      max-height: min(42vh, 420px);
+      margin: 12px auto 0;
+      background: #182028;
+      object-fit: contain;
+    }
+
+    [hidden] {
+      display: none !important;
+    }
     </style>
   </head>
   <body>
@@ -125,8 +142,6 @@ <h1>WASM web agent</h1>
         <select id="module-select"></select>
         <button id="run-module-button" type="button">Run module</button>
       </p>
-      <video id="face-video-preview" autoplay playsinline muted hidden></video>
-      <canvas id="face-video-output-canvas" hidden></canvas>
       <video id="video-preview" autoplay playsinline muted hidden></video>
       <canvas id="video-output-canvas" hidden></canvas>
       <div class="status-grid">
@@ -156,9 +171,6 @@ <h1>WASM web agent</h1>
         <button id="speech-button" type="button">Recognize speech</button>
         <button id="nfc-button" type="button">Scan NFC</button>
         <button id="sensors-button" type="button">Start sensors</button>
-        <button id="video-output-button" type="button">
-          Show video output
-        </button>
       </p>
     </main>