diff --git a/Dockerfile b/Dockerfile index babbf5f..2528ade 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/* -COPY pyproject.toml . +COPY pyproject.toml README.md ./ COPY protoforge/ protoforge/ COPY --from=frontend-builder /app/web/dist /app/static diff --git a/FAULT_INJECTION.md b/FAULT_INJECTION.md new file mode 100644 index 0000000..951648d --- /dev/null +++ b/FAULT_INJECTION.md @@ -0,0 +1,321 @@ +# 故障注入使用文档 + +本文档描述 ProtoForge 故障注入模块的设计、使用方式及内置故障类型。 + +--- + +## 概述 + +故障注入模块允许你在运行中的模拟设备上注入真实工业场景的异常,用于: + +- 验证监控系统的异常检测能力 +- 训练工业 AI 异常检测模型(提供异常样本) +- 测试报警规则和联动逻辑 + +支持四种异常场景: + +| 场景 | 说明 | +|------|------| +| 异常注入 | 立即将指定测点推入异常区间 | +| 自动恢复 | 故障持续指定时间后自动恢复正常 | +| 多指标联动 | 一次注入同时影响多个相关测点 | +| 渐进式劣化 | 指标随时间线性恶化,模拟真实磨损过程 | + +--- + +## 架构设计 + +``` +FaultInjector(独立模块) + │ + ├── inject(device, request) 注入故障 + ├── apply(device) 每次 tick 后覆盖测点值(通过钩子机制) + ├── clear(device_id) 手动清除 + └── 自动到期恢复 + +DeviceInstance.tick() + └── 执行正常生成器 + └── 执行 post_tick_hooks(FaultInjector.apply 挂载于此) +``` + +故障模块通过 `register_post_tick_hook` 挂载到设备,不修改设备本身的生成逻辑,完全解耦。 + +--- + +## API 接口 + +### 查询故障类型 + +``` +GET /api/v1/faults/types +``` + +返回所有内置故障类型列表。 + +``` +GET /api/v1/faults/types/{fault_type_id} +``` + +返回指定故障类型的详细定义,包含影响的测点和行为参数。 + +### 查询活跃故障 + +``` +GET /api/v1/faults/active +``` + +返回当前所有设备上正在运行的故障实例。 + +### 注入故障 + +``` +POST /api/v1/devices/{device_id}/fault +``` + +请求体: + +```json +{ + "fault_type_id": "tool_wear", + "duration": 300, + "intensity": 0.8 +} +``` + +| 字段 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `fault_type_id` | string | 是 | 故障类型 ID,见下方故障类型列表 | +| `duration` | float | 否 | 持续时间(秒),不填则使用类型默认值 | +| `intensity` | float | 否 | 故障强度 0.0~1.0,默认 1.0,影响劣化幅度 | + +响应示例: + +```json +{ + "fault_id": "a3f2c1d4e5b6", + "device_id": "fanuc-cnc-01", + "fault_type_id": "tool_wear", + "fault_type_name": "刀具磨损", + "status": "active", + "intensity": 0.8, + "duration": 300.0, + "elapsed": 0.0, + "progress": 0.0, + "affected_points": ["spindle_current", "vibration_x", "vibration_y", "vibration_z", "feed_rate"], + "started_at": 1716192000.0 +} +``` + +### 查询设备当前故障 + +``` +GET /api/v1/devices/{device_id}/fault +``` + +无故障时返回 `{"status": "none"}`,有故障时返回故障详情(含实时 `elapsed` 和 `progress`)。 + +### 手动清除故障 + +``` +DELETE /api/v1/devices/{device_id}/fault +``` + +立即清除故障,测点值由生成器在下一个 tick 自然恢复正常。 + +--- + +## 内置故障类型 + +### tool_wear — 刀具磨损 + +- **分类**:mechanical +- **模式**:渐进式 +- **默认持续时间**:300 秒 +- **真实场景**:刀具切削刃逐渐磨损,切削阻力增大,系统自动压低进给速率 + +| 测点 | 变化方向 | 峰值倍率 | +|------|---------|---------| +| `spindle_current` | 升高 | ×2.2 | +| `vibration_x` | 升高 | ×3.0 | +| `vibration_y` | 升高 | ×3.0 | +| `vibration_z` | 升高 | ×3.5 | +| `feed_rate` | 降低 | ×0.45 | + +--- + +### tool_breakage — 刀具崩刃 + +- **分类**:mechanical +- **模式**:瞬间注入 +- **默认持续时间**:15 秒 +- **真实场景**:刀具突发性崩刃,机床通常会触发报警并停机 + +| 测点 | 变化方向 | 峰值倍率 | +|------|---------|---------| +| `spindle_current` | 急升 | ×4.5 | +| `vibration_x` | 急升 | ×8.0 | +| `vibration_y` | 急升 | ×8.0 | +| `vibration_z` | 急升 | ×10.0 | +| `feed_rate` | 停止 | →0 | + +--- + +### spindle_overheat — 主轴过热 + +- **分类**:thermal +- **模式**:渐进式 +- **默认持续时间**:240 秒 +- **真实场景**:长时间高负荷或冷却系统故障,热保护机制逐渐降低转速 + +| 测点 | 变化方向 | 峰值倍率 | +|------|---------|---------| +| `spindle_current` | 升高 | ×1.8 | +| `spindle_speed` | 降低 | ×0.6 | +| `vibration_x` | 升高 | ×1.5 | +| `vibration_z` | 升高 | ×1.5 | + +--- + +### spindle_bearing_fault — 主轴轴承故障 + +- **分类**:mechanical +- **模式**:渐进式 +- **默认持续时间**:360 秒 +- **真实场景**:轴承磨损或润滑不足,振动持续升高 + +| 测点 | 变化方向 | 峰值倍率 | +|------|---------|---------| +| `vibration_x` | 升高 | ×4.0 | +| `vibration_y` | 升高 | ×4.0 | +| `vibration_z` | 升高 | ×5.0 | +| `spindle_current` | 轻微升高 | ×1.3 | + +--- + +### feed_stall — 进给堵转 + +- **分类**:process +- **模式**:瞬间注入 +- **默认持续时间**:20 秒 +- **真实场景**:工件夹紧松动或切削量过大导致进给轴卡死 + +| 测点 | 变化方向 | 峰值倍率 | +|------|---------|---------| +| `feed_rate` | 停止 | →0 | +| `spindle_current` | 急升 | ×3.8 | +| `vibration_z` | 急升 | ×5.0 | + +--- + +### vibration_spike — 振动异常 + +- **分类**:mechanical +- **模式**:瞬间注入 +- **默认持续时间**:60 秒 +- **真实场景**:工件装夹松动或切削共振 + +| 测点 | 变化方向 | 峰值倍率 | +|------|---------|---------| +| `vibration_x` | 急升 | ×6.0 | +| `vibration_y` | 急升 | ×6.0 | +| `vibration_z` | 急升 | ×7.0 | + +--- + +### coolant_failure — 切削液不足 + +- **分类**:process +- **模式**:渐进式 +- **默认持续时间**:480 秒 +- **真实场景**:切削液供给不足,热量积累,劣化速度较慢 + +| 测点 | 变化方向 | 峰值倍率 | +|------|---------|---------| +| `spindle_current` | 升高 | ×1.6 | +| `vibration_x` | 升高 | ×2.0 | +| `vibration_y` | 升高 | ×2.0 | +| `vibration_z` | 升高 | ×2.5 | +| `feed_rate` | 降低 | ×0.75 | + +--- + +### power_fluctuation — 电源波动 + +- **分类**:electrical +- **模式**:瞬间注入(持续期间持续抖动) +- **默认持续时间**:90 秒 +- **真实场景**:供电电压不稳定,各指标出现随机波动 + +| 测点 | 变化方向 | 说明 | +|------|---------|------| +| `spindle_speed` | 随机抖动 | ±300 RPM 噪声 | +| `spindle_current` | 随机抖动 | ±5 A 噪声 | +| `feed_rate` | 随机抖动 | ±150 mm/min 噪声 | + +--- + +## 使用示例 + +### 模拟刀具磨损过程 + +```bash +# 注入刀具磨损,持续 5 分钟,强度 100% +curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \ + -H "Content-Type: application/json" \ + -d '{"fault_type_id": "tool_wear", "duration": 300, "intensity": 1.0}' + +# 每隔 30 秒查看故障进度 +curl http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault + +# 查看 Prometheus 指标变化 +curl http://localhost:8000/api/v1/metrics | grep -E "spindle_current|vibration|feed_rate" +``` + +### 模拟突发崩刃后手动恢复 + +```bash +# 注入崩刃故障 +curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \ + -H "Content-Type: application/json" \ + -d '{"fault_type_id": "tool_breakage", "duration": 60}' + +# 手动提前清除 +curl -X DELETE http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault +``` + +### 低强度渐进劣化(用于 AI 模型训练) + +```bash +# 用 50% 强度注入轴承故障,持续 10 分钟,产生轻微异常样本 +curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \ + -H "Content-Type: application/json" \ + -d '{"fault_type_id": "spindle_bearing_fault", "duration": 600, "intensity": 0.5}' +``` + +--- + +## 与 Prometheus 集成 + +故障注入后,测点值的变化会实时反映在 `/api/v1/metrics` 接口中。可以用 Grafana 观察故障期间各指标的时序变化: + +``` +# 主轴电流(故障期间会升高) +fanuc_cnc_spindle_current + +# 三轴振动 +fanuc_cnc_vibration_x +fanuc_cnc_vibration_y +fanuc_cnc_vibration_z + +# 进给速率(刀具磨损/堵转时会降低) +fanuc_cnc_feed_rate +``` + +--- + +## 注意事项 + +- 同一设备同时只能有一个活跃故障,新注入会覆盖旧故障 +- 故障到期后测点值由生成器在下一个 tick 自然恢复,不会瞬间跳回 +- 设备必须处于 `online` 状态才能注入故障 +- 删除设备时会自动清除其故障 diff --git a/ai/predict.py b/ai/predict.py new file mode 100755 index 0000000..b70f822 --- /dev/null +++ b/ai/predict.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +import requests +import numpy as np +from datetime import datetime, timedelta + +VM_URL = "http://localhost:8428" +DEVICE_ID = "fanuc-cnc" +METRIC = f'feed_rate{{device_id="{DEVICE_ID}"}}' + +def fetch_history(minutes=30): + """从VM拉取历史数据""" + end = datetime.now() + start = end - timedelta(minutes=minutes) + resp = requests.get(f"{VM_URL}/api/v1/query_range", params={ + "query": METRIC, + "start": start.timestamp(), + "end": end.timestamp(), + "step": "1s", + }) + result = resp.json()["data"]["result"] + if not result: + return [], [] + values = result[0]["values"] + ts = [float(v[0]) for v in values] + ys = [float(v[1]) for v in values] + return ts, ys + +def predict_next(ts, ys, horizon=60): + """ + 用FFT检测主频,拟合正弦波,外推未来horizon秒 + 适合周期性信号 + """ + if len(ys) < 60: + return [], [] + + ys = np.array(ys) + n = len(ys) + dt = 1.0 # 1秒采样 + + # FFT找主频 + fft = np.fft.rfft(ys - ys.mean()) + freqs = np.fft.rfftfreq(n, d=dt) + dominant_idx = np.argmax(np.abs(fft[1:])) + 1 + dominant_freq = freqs[dominant_idx] + period = 1.0 / dominant_freq if dominant_freq > 0 else 60 + + # 拟合:y = A*sin(2π/T * t + φ) + offset + from scipy.optimize import curve_fit + t_rel = np.arange(n, dtype=float) + offset = ys.mean() + amplitude = (ys.max() - ys.min()) / 2 + + def sine_model(t, A, T, phi, C): + return A * np.sin(2 * np.pi / T * t + phi) + C + + try: + popt, _ = curve_fit( + sine_model, t_rel, ys, + p0=[amplitude, period, 0, offset], + maxfev=5000 + ) + # 外推 + t_future = np.arange(n, n + horizon, dtype=float) + y_pred = sine_model(t_future, *popt) + ts_future = [ts[-1] + i + 1 for i in range(horizon)] + return ts_future, y_pred.tolist() + except Exception: + # 拟合失败降级为线性 + slope = (ys[-1] - ys[-10]) / 10 + ts_future = [ts[-1] + i + 1 for i in range(horizon)] + y_pred = [ys[-1] + slope * (i + 1) for i in range(horizon)] + return ts_future, y_pred + +def write_predictions(ts_future, y_pred, metric_name="protoforge_feed_rate_predicted"): + """写回VictoriaMetrics""" + lines = [] + for t, y in zip(ts_future, y_pred): + ts_ms = int(t * 1000) + lines.append(f'{metric_name}{{device_id="{DEVICE_ID}"}} {y:.2f} {ts_ms}') + payload = "\n".join(lines) + requests.post(f"{VM_URL}/api/v1/import/prometheus", data=payload) + +def run_once(): + ts, ys = fetch_history(minutes=30) + if len(ys) < 60: + print("数据不足") + return + ts_future, y_pred = predict_next(ts, ys, horizon=120) + write_predictions(ts_future, y_pred) + print(f"写入 {len(y_pred)} 个预测点,预测到 +{len(y_pred)}s") + +if __name__ == "__main__": + import time + while True: + run_once() + time.sleep(30) # 每30秒重新预测一次 diff --git a/ai/predict_v2.py b/ai/predict_v2.py new file mode 100755 index 0000000..933a34f --- /dev/null +++ b/ai/predict_v2.py @@ -0,0 +1,571 @@ +# -*- coding: utf-8 -*- +""" +ProtoForge 预测服务 v5 + +修复点: +1. 不再使用“单正弦拟合”作为主预测算法。 +2. 主算法改为:周期模板预测(同相位历史值加权平均)。 +3. 周期估计使用 FFT 粗估 + 自相关细化,比单纯 FFT 更稳。 +4. 若可用完整周期不足,则降级为多谐波回归(而不是单正弦)。 +5. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒,避免预测窗口重叠。 +6. 不删除旧预测历史,避免历史预测消失。 +""" + +import logging +import math +import re +import time +from datetime import datetime, timedelta +from typing import Dict, List, Tuple + +import numpy as np +import requests + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) +logger = logging.getLogger(__name__) + +# ── 配置 ────────────────────────────────────────────────────────────────────── + +VM_URL = "http://localhost:8428" + +PREDICT_TARGETS = [ + ('feed_rate{device_id="fanuc-cnc"}', "feed_rate_predicted"), + ('spindle_speed{device_id="fanuc-cnc"}', "spindle_speed_predicted"), + ('spindle_current{device_id="fanuc-cnc"}', "spindle_current_predicted"), + ('vibration_x{device_id="fanuc-cnc"}', "vibration_x_predicted"), + ('vibration_y{device_id="fanuc-cnc"}', "vibration_y_predicted"), + ('vibration_z{device_id="fanuc-cnc"}', "vibration_z_predicted"), +] + +HISTORY_MINUTES = 30 +HORIZON_SECONDS = 120 +POLL_INTERVAL = 30 +WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) +MIN_POINTS = 120 +QUERY_STEP = "1s" + +# 至少要有多少个完整周期,才使用“周期模板预测” +MIN_FULL_CYCLES_FOR_TEMPLATE = 3 +MAX_CYCLES_FOR_TEMPLATE = 6 + +# 周期范围 +MIN_PERIOD_SECONDS = 5 +MAX_PERIOD_SECONDS = 3600 + +# 多谐波回归最高阶数(降级模式) +MAX_HARMONICS = 4 + +EXTRA_PREDICT_LABELS = { + "forecast": "seasonal_v1", + "source": "protoforge", +} + +# 进程内记录每条预测序列上次写到哪里,避免本进程运行时重复写 +LAST_WRITTEN_UNTIL: Dict[str, int] = {} + +# ───────────────────────────────────────────────────────────────────────────── + + +def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: + """从 VictoriaMetrics 拉取历史时序数据。""" + now = datetime.now() + start = now - timedelta(minutes=minutes) + + try: + resp = requests.get( + f"{VM_URL}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": QUERY_STEP, + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取数据失败 query=%s: %s", query, e) + return [], [] + + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 返回失败 query=%s: %s", query, e) + return [], [] + + if not result: + return [], [] + + values = result[0].get("values", []) + if not values: + return [], [] + + ts = [] + ys = [] + for item in values: + if len(item) < 2: + continue + try: + t = float(item[0]) + y = float(item[1]) + except Exception: + continue + if not math.isfinite(t) or not math.isfinite(y): + continue + ts.append(t) + ys.append(y) + + return ts, ys + + +def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: + """ + 清洗历史数据: + 1. 时间戳整秒化 + 2. 排序 + 3. 同一秒多个点保留最后一个 + 4. 按 1 秒插值补齐 + """ + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + data = {} + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except Exception: + continue + if not math.isfinite(sec) or not math.isfinite(val): + continue + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items(), key=lambda x: x[0]) + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) + + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: + """FFT 粗估周期。""" + n = len(ys_arr) + if n < 8: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + power = np.abs(fft_vals[1:]) + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + """ + 用自相关在 init_period 附近细化周期估计。 + """ + n = len(ys_arr) + if n < 20: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + if np.allclose(centered, 0): + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + corr = np.correlate(centered, centered, mode="full")[n - 1:] + + p0 = int(round(init_period)) + left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) + + if right <= left: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + search = corr[left:right + 1] + if len(search) == 0: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def estimate_period(ys_arr: np.ndarray) -> float: + """FFT + 自相关 的组合周期估计。""" + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + return p_refined + + +def seasonal_template_predict( + ys_arr: np.ndarray, + horizon: int, + period: int, + gap: int = 0, + max_cycles: int = MAX_CYCLES_FOR_TEMPLATE, +) -> List[float]: + """ + 同相位历史值加权平均预测。 + 对未来第 k 个点,取过去多个周期同相位点做加权平均: + y[n-1+gap+k] ≈ avg(y[n-1+gap+k-p], y[n-1+gap+k-2p], ...) + """ + n = len(ys_arr) + preds = [] + + for k in range(1, horizon + 1): + target_idx = (n - 1) + gap + k + + values = [] + weights = [] + + # m=1 表示最近一个周期;m 越大越久远 + for m in range(1, max_cycles + 1): + hist_idx = target_idx - m * period + if 0 <= hist_idx < n: + # 越近权重越大 + w = 1.0 / m + values.append(float(ys_arr[hist_idx])) + weights.append(w) + + if not values: + # 万一拿不到,退化为最后一个值 + preds.append(float(ys_arr[-1])) + else: + preds.append(float(np.average(values, weights=weights))) + + return preds + + +def harmonic_regression_predict( + ys_arr: np.ndarray, + horizon: int, + period: int, + gap: int = 0, + max_harmonics: int = MAX_HARMONICS, +) -> List[float]: + """ + 多谐波回归(降级模式): + y = c + Σ [a_k sin(2πkt/P) + b_k cos(2πkt/P)] + 相比单正弦,更能表达非标准正弦波形。 + """ + n = len(ys_arr) + if n < 10 or period <= 1: + return [float(ys_arr[-1])] * horizon + + # 周期太短时,谐波数不能太大 + K = min(max_harmonics, max(1, period // 4)) + + t = np.arange(n, dtype=float) + cols = [np.ones(n, dtype=float)] + + for k in range(1, K + 1): + angle = 2.0 * np.pi * k * t / period + cols.append(np.sin(angle)) + cols.append(np.cos(angle)) + + X = np.column_stack(cols) + + try: + coef, _, _, _ = np.linalg.lstsq(X, ys_arr, rcond=None) + except Exception: + return [float(ys_arr[-1])] * horizon + + t_future = np.arange(n + gap, n + gap + horizon, dtype=float) + cols_future = [np.ones(horizon, dtype=float)] + + for k in range(1, K + 1): + angle = 2.0 * np.pi * k * t_future / period + cols_future.append(np.sin(angle)) + cols_future.append(np.cos(angle)) + + X_future = np.column_stack(cols_future) + y_pred = X_future @ coef + + return y_pred.astype(float).tolist() + + +def predict_next( + ts: List[float], + ys: List[float], + horizon: int, + base_ts: int, +) -> Tuple[List[float], List[float]]: + """ + 主预测函数: + 1. 周期估计 + 2. 优先使用周期模板预测 + 3. 周期不够时降级为多谐波回归 + """ + ts_grid, ys_grid = normalize_history(ts, ys) + if len(ys_grid) < MIN_POINTS: + return [], [] + + y_min = float(np.min(ys_grid)) + y_max = float(np.max(ys_grid)) + y_range = y_max - y_min + + if y_range <= 1e-9: + base_ts = max(int(base_ts), int(ts_grid[-1])) + ts_future = [base_ts + i + 1 for i in range(horizon)] + y_pred = [float(ys_grid[-1])] * horizon + return ts_future, y_pred + + period_est = estimate_period(ys_grid) + period = int(round(period_est)) + period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period)) + + last_real_ts = int(ts_grid[-1]) + base_ts = max(int(base_ts), last_real_ts) + + # 如果当前时间已经超过最后一个真实点,gap 表示中间“空过去”的秒数 + gap = max(0, base_ts - last_real_ts) + + ts_future = [base_ts + i + 1 for i in range(horizon)] + + full_cycles = len(ys_grid) // period if period > 0 else 0 + + if full_cycles >= MIN_FULL_CYCLES_FOR_TEMPLATE: + y_pred = seasonal_template_predict( + ys_arr=ys_grid, + horizon=horizon, + period=period, + gap=gap, + max_cycles=min(MAX_CYCLES_FOR_TEMPLATE, full_cycles), + ) + model_name = "seasonal_template" + else: + y_pred = harmonic_regression_predict( + ys_arr=ys_grid, + horizon=horizon, + period=period, + gap=gap, + max_harmonics=MAX_HARMONICS, + ) + model_name = "harmonic_regression" + + # 合理裁剪,避免偶然外推过大 + margin = y_range * 0.15 + lower = y_min - margin + upper = y_max + margin + y_pred = np.clip(np.array(y_pred, dtype=float), lower, upper).astype(float).tolist() + + logger.debug( + "predict_next model=%s period=%ss full_cycles=%s gap=%s", + model_name, period, full_cycles, gap + ) + + return ts_future, y_pred + + +def prom_escape_label_value(value: str) -> str: + """Prometheus label value 转义。""" + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def labels_to_str(labels: Dict[str, str]) -> str: + if not labels: + return "" + parts = [] + for k in sorted(labels.keys()): + v = prom_escape_label_value(labels[k]) + parts.append(f'{k}="{v}"') + return "{" + ",".join(parts) + "}" + + +def write_predictions( + ts_future: List[float], + y_pred: List[float], + metric_name: str, + labels: Dict[str, str], +) -> bool: + """将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。""" + if not ts_future or not y_pred or len(ts_future) != len(y_pred): + logger.warning("预测数据为空或长度不一致 metric=%s", metric_name) + return False + + label_str = labels_to_str(labels) + lines = [] + + for t, y in zip(ts_future, y_pred): + try: + ts_sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(ts_sec) or not math.isfinite(val): + continue + + ts_ms = ts_sec * 1000 + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}") + + if not lines: + logger.warning("没有可写入的预测点 metric=%s", metric_name) + return False + + payload = "\n".join(lines) + "\n" + + try: + resp = requests.post( + f"{VM_URL}/api/v1/import/prometheus", + data=payload.encode("utf-8"), + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + timeout=10, + ) + resp.raise_for_status() + return True + except requests.RequestException as e: + logger.error("写入预测数据失败 metric=%s: %s", metric_name, e) + return False + + +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) + + +def _parse_labels(query: str) -> Dict[str, str]: + """从查询表达式中解析标签。""" + labels = {} + + if "{" not in query or "}" not in query: + return labels + + try: + label_part = query[query.index("{") + 1: query.rindex("}")] + except Exception: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = match.group(2) + value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\") + labels[key] = value + + return labels + + +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + result = {} + for d in dicts: + if d: + result.update(d) + return result + + +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + return metric_name + labels_to_str(labels) + + +def run_once(): + now_str = datetime.now().strftime("%H:%M:%S") + + for query, pred_metric in PREDICT_TARGETS: + ts, ys = fetch_history(query) + if len(ys) < MIN_POINTS: + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + continue + + base_labels = _parse_labels(query) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + key = series_key(pred_metric, write_labels) + + now_sec = int(time.time()) + last_until = LAST_WRITTEN_UNTIL.get(key, 0) + + # 避免同一进程内写重叠时间段 + base_ts = max(now_sec, last_until) + + ts_future, y_pred = predict_next( + ts=ts, + ys=ys, + horizon=WRITE_HORIZON_SECONDS, + base_ts=base_ts, + ) + + if not ts_future or not y_pred: + logger.warning("[%s] %s 预测结果为空,跳过", now_str, query) + continue + + ok = write_predictions( + ts_future=ts_future, + y_pred=y_pred, + metric_name=pred_metric, + labels=write_labels, + ) + if not ok: + continue + + LAST_WRITTEN_UNTIL[key] = int(max(ts_future)) + + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + + logger.info( + "[%s] %-40s → %-35s 写入 %d 点,预测区间 %s ~ %s,标签=%s", + now_str, + query, + pred_metric, + len(y_pred), + future_start, + future_end, + labels_to_str(write_labels), + ) + + +def main(): + logger.info( + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds", + VM_URL, + HISTORY_MINUTES, + HORIZON_SECONDS, + WRITE_HORIZON_SECONDS, + POLL_INTERVAL, + ) + + while True: + run_once() + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py new file mode 100755 index 0000000..d212d2d --- /dev/null +++ b/ai/predict_v3_single_scene.py @@ -0,0 +1,1487 @@ +# -*- coding: utf-8 -*- +""" +ProtoForge Predictor v10 + +修复重点: +1. 修复 lag=0 但预测线仍然相位漂移的问题。 +2. 在谷底相位对齐基础上,增加 phase-lock 相位锁定。 +3. 每轮使用最近 1~2 个周期真实数据,搜索最佳 period + phase_origin。 +4. 预测起点仍然锚定最后一个真实点 last_real_ts,避免写入延迟。 +5. 保留健康模板冻结逻辑:异常期间不学习故障数据。 +6. 保留预测上下界和异常指标。 +""" + +import json +import logging +import math +import os +import re +import time +from dataclasses import asdict, dataclass +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple + +import numpy as np +import requests + + +# ============================================================================= +# 日志配置 +# ============================================================================= + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# 基础配置 +# ============================================================================= + +VM_URL = "http://localhost:8428" +STATE_FILE = "/tmp/protoforge_predictor_state_v10.json" + +HISTORY_MINUTES = 30 +HORIZON_SECONDS = 120 +POLL_INTERVAL = 30 + +WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) + +QUERY_STEP = "1s" +MIN_POINTS = 120 + +MIN_PERIOD_SECONDS = 5 +MAX_PERIOD_SECONDS = 3600 + +MIN_FULL_CYCLES_FOR_TEMPLATE = 3 +MAX_CYCLES_FOR_TEMPLATE = 6 + +DETECT_WINDOW_SECONDS = 20 +RECOVERY_MIN_SECONDS = 60 + +HEALTHY_EMA_ALPHA = 0.10 +RECOVERY_EMA_ALPHA = 0.25 + +OUTSIDE_RATIO_THRESHOLD = 0.60 + +VALLEY_QUANTILE = 45 + +# phase-lock 配置 +PHASE_LOCK_MIN_WINDOW_SECONDS = 45 +PHASE_LOCK_MAX_WINDOW_SECONDS = 180 +PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 +PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35 +PHASE_LOCK_PERIOD_STEP = 1 +PHASE_LOCK_ORIGIN_STEP = 1 + +# 真实数据延迟超过这个值,就不继续预测 +MAX_DATA_LAG_SECONDS = 180 + +# 预测锚定最后一个真实点 +ALIGN_PREDICTION_TO_LAST_REAL_TS = True + + +# ============================================================================= +# 指标配置 +# ============================================================================= + +PREDICT_TARGETS = [ + { + "query": 'feed_rate{device_id="fanuc-cnc"}', + "pred_metric": "feed_rate_predicted", + "anomaly_metric": "feed_rate_anomaly", + "abs_threshold": 400.0, + "rel_threshold": 0.25, + }, + { + "query": 'spindle_speed{device_id="fanuc-cnc"}', + "pred_metric": "spindle_speed_predicted", + "anomaly_metric": "spindle_speed_anomaly", + "abs_threshold": 500.0, + "rel_threshold": 0.25, + }, + { + "query": 'spindle_current{device_id="fanuc-cnc"}', + "pred_metric": "spindle_current_predicted", + "anomaly_metric": "spindle_current_anomaly", + "abs_threshold": 5.0, + "rel_threshold": 0.25, + }, + { + "query": 'vibration_x{device_id="fanuc-cnc"}', + "pred_metric": "vibration_x_predicted", + "anomaly_metric": "vibration_x_anomaly", + "abs_threshold": 1.0, + "rel_threshold": 0.30, + }, + { + "query": 'vibration_y{device_id="fanuc-cnc"}', + "pred_metric": "vibration_y_predicted", + "anomaly_metric": "vibration_y_anomaly", + "abs_threshold": 1.0, + "rel_threshold": 0.30, + }, + { + "query": 'vibration_z{device_id="fanuc-cnc"}', + "pred_metric": "vibration_z_predicted", + "anomaly_metric": "vibration_z_anomaly", + "abs_threshold": 1.0, + "rel_threshold": 0.30, + }, +] + +EXTRA_PREDICT_LABELS = { + "forecast": "phase_locked_health_v10", + "source": "protoforge", +} + +BASELINE_STATUS_HEALTHY = "healthy" +BASELINE_STATUS_ANOMALY = "anomaly" +BASELINE_STATUS_RECOVERING = "recovering" + + +# ============================================================================= +# 状态结构 +# ============================================================================= + +@dataclass +class BaselineState: + period: int + phase_origin_ts: int + template: List[float] + status: str + clean_seconds: int + last_update_ts: int + last_seen_ts: int + y_min: float + y_max: float + + +BASELINE_STATES: Dict[str, BaselineState] = {} +LAST_REAL_TS_WRITTEN: Dict[str, int] = {} + + +# ============================================================================= +# VictoriaMetrics 读取 +# ============================================================================= + +def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: + now = datetime.now() + start = now - timedelta(minutes=minutes) + + try: + resp = requests.get( + f"{VM_URL}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": QUERY_STEP, + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取数据失败 query=%s: %s", query, e) + return [], [] + + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 返回失败 query=%s: %s", query, e) + return [], [] + + if not result: + return [], [] + + values = result[0].get("values", []) + + ts = [] + ys = [] + + for item in values: + if len(item) < 2: + continue + + try: + t = float(item[0]) + y = float(item[1]) + except Exception: + continue + + if not math.isfinite(t) or not math.isfinite(y): + continue + + ts.append(t) + ys.append(y) + + return ts, ys + + +def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + data = {} + + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(sec) or not math.isfinite(val): + continue + + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items(), key=lambda x: x[0]) + + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) + + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +# ============================================================================= +# 周期估计 +# ============================================================================= + +def moving_average(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + window = int(window) + + if window % 2 == 0: + window += 1 + + kernel = np.ones(window, dtype=float) / window + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + return np.convolve(padded, kernel, mode="valid") + + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: + n = len(ys_arr) + + if n < 8: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + power = np.abs(fft_vals[1:]) + + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + + return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + n = len(ys_arr) + + if n < 20: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + corr = np.correlate(centered, centered, mode="full")[n - 1:] + + p0 = int(round(init_period)) + left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) + + if right <= left: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + search = corr[left:right + 1] + + if len(search) == 0: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + + return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def estimate_period_rough(ys_arr: np.ndarray) -> int: + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + + period = int(round(p_refined)) + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period) + + +# ============================================================================= +# 谷底检测与模板构建 +# ============================================================================= + +def find_valley_indices( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + expected_period: int, +) -> List[int]: + n = len(ys_grid) + + if n < max(10, expected_period * 2): + return [] + + period = max(3, int(expected_period)) + + smooth_window = max(3, int(round(period * 0.08))) + smooth_window = min(smooth_window, 21) + + ys_smooth = moving_average(ys_grid, smooth_window) + threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE)) + + candidates = [] + + for i in range(1, n - 1): + if ( + ys_smooth[i] <= ys_smooth[i - 1] + and ys_smooth[i] < ys_smooth[i + 1] + and ys_smooth[i] <= threshold + ): + candidates.append(i) + + if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: + candidates = [] + + for i in range(1, n - 1): + if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: + candidates.append(i) + + if not candidates: + return [] + + min_distance = max(2, int(round(period * 0.55))) + selected = [] + + for idx in candidates: + if not selected: + selected.append(idx) + continue + + if idx - selected[-1] >= min_distance: + selected.append(idx) + continue + + if ys_smooth[idx] < ys_smooth[selected[-1]]: + selected[-1] = idx + + if len(selected) < 2: + return selected + + cleaned = [selected[0]] + + for idx in selected[1:]: + diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) + + if int(period * 0.55) <= diff <= int(period * 1.60): + cleaned.append(idx) + continue + + if diff < int(period * 0.55): + if ys_smooth[idx] < ys_smooth[cleaned[-1]]: + cleaned[-1] = idx + continue + + cleaned.append(idx) + + return cleaned + + +def detect_period_and_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, List[int]]: + rough = estimate_period_rough(ys_grid) + valleys = find_valley_indices(ts_grid, ys_grid, rough) + + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys]) + good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] + + if len(good) > 0: + period = int(round(float(np.median(good)))) + else: + period = rough + else: + period = rough + + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period), valleys + + +def build_template_from_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + period: int, + valleys: List[int], + max_cycles: int = MAX_CYCLES_FOR_TEMPLATE, +) -> Optional[np.ndarray]: + if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1: + return None + + pairs = [] + + for a, b in zip(valleys[:-1], valleys[1:]): + cycle_len = float(ts_grid[b] - ts_grid[a]) + + if period * 0.55 <= cycle_len <= period * 1.60: + pairs.append((a, b, cycle_len)) + + if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + pairs = pairs[-max_cycles:] + + phase_grid = np.arange(period, dtype=float) + segments = [] + weights = [] + + for idx, (a, b, cycle_len) in enumerate(pairs): + seg_ts = ts_grid[a:b + 1] + seg_y = ys_grid[a:b + 1] + + if len(seg_y) < 3: + continue + + x_old = (seg_ts - seg_ts[0]) / cycle_len * period + seg = np.interp(phase_grid, x_old, seg_y) + + segments.append(seg.astype(float)) + + weight = 0.5 + 0.5 * ((idx + 1) / len(pairs)) + weights.append(weight) + + if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + arr = np.vstack(segments) + w_arr = np.array(weights, dtype=float) + + template = np.average(arr, axis=0, weights=w_arr) + + return template.astype(float) + + +def build_current_baseline( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + tail_seconds: Optional[int] = None, +) -> Optional[Tuple[int, int, np.ndarray]]: + if len(ys_grid) < MIN_POINTS: + return None + + if tail_seconds is not None and tail_seconds > 0: + cutoff = ts_grid[-1] - int(tail_seconds) + mask = ts_grid >= cutoff + ts_use = ts_grid[mask] + ys_use = ys_grid[mask] + else: + ts_use = ts_grid + ys_use = ys_grid + + if len(ys_use) < MIN_POINTS: + return None + + period, valleys = detect_period_and_valleys(ts_use, ys_use) + + template = build_template_from_valleys( + ts_grid=ts_use, + ys_grid=ys_use, + period=period, + valleys=valleys, + ) + + if template is None or len(valleys) == 0: + return None + + phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) + + return int(period), phase_origin_ts, template + + +# ============================================================================= +# 模板预测与重采样 +# ============================================================================= + +def circular_template_value(template: np.ndarray, phase: float) -> float: + period = len(template) + + if period == 0: + return 0.0 + + phase = float(phase) % period + + i0 = int(math.floor(phase)) % period + i1 = (i0 + 1) % period + frac = phase - math.floor(phase) + + return float((1.0 - frac) * template[i0] + frac * template[i1]) + + +def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: + old_period = len(old_template) + + if old_period == new_period: + return old_template.astype(float) + + if old_period <= 1 or new_period <= 1: + return np.full(new_period, float(np.mean(old_template)), dtype=float) + + old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) + new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) + + old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) + old_y_ext = np.concatenate([old_template, old_template, old_template]) + + return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + + +def predict_template_values( + template: np.ndarray, + period: int, + phase_origin_ts: int, + ts_list: List[int], +) -> np.ndarray: + if period <= 1: + return np.zeros(len(ts_list), dtype=float) + + if len(template) != period: + template = resample_template(template, period) + + values = [] + + for ts in ts_list: + phase = (int(ts) - int(phase_origin_ts)) % period + values.append(circular_template_value(template, phase)) + + return np.array(values, dtype=float) + + +def predict_with_state(state: BaselineState, ts_list: List[int]) -> np.ndarray: + template = np.array(state.template, dtype=float) + + return predict_template_values( + template=template, + period=int(state.period), + phase_origin_ts=int(state.phase_origin_ts), + ts_list=ts_list, + ) + + +def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: + if period <= 1: + return origin + + origin = int(origin) + period = int(period) + near_ts = int(near_ts) + + while origin + period <= near_ts: + origin += period + + while origin > near_ts: + origin -= period + + return origin + + +def align_new_template_to_old( + old_template: np.ndarray, + new_template: np.ndarray, +) -> np.ndarray: + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + period = len(new_template) + + if period <= 2: + return new_template.astype(float) + + max_shift = max(1, int(round(period * 0.10))) + old_norm = old_template - np.mean(old_template) + + best_score = None + best_template = new_template + + for shift in range(-max_shift, max_shift + 1): + shifted = np.roll(new_template, shift) + shifted_norm = shifted - np.mean(shifted) + score = float(np.dot(old_norm, shifted_norm)) + + if best_score is None or score > best_score: + best_score = score + best_template = shifted + + return best_template.astype(float) + + +def merge_template( + old_template: np.ndarray, + new_template: np.ndarray, + alpha: float, +) -> np.ndarray: + alpha = float(np.clip(alpha, 0.0, 1.0)) + + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + new_template = align_new_template_to_old(old_template, new_template) + + merged = (1.0 - alpha) * old_template + alpha * new_template + + return merged.astype(float) + + +# ============================================================================= +# Phase Lock +# ============================================================================= + +def phase_lock_recent( + state: BaselineState, + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, int, np.ndarray, float]: + base_period = int(state.period) + base_origin = int(state.phase_origin_ts) + base_template = np.array(state.template, dtype=float) + + if base_period <= 1 or len(base_template) <= 1: + ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() + pred = predict_with_state(state, ts_recent) + actual = ys_grid[-len(ts_recent):].astype(float) + mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 + return base_period, base_origin, pred, mae + + window_seconds = max( + PHASE_LOCK_MIN_WINDOW_SECONDS, + min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), + ) + + cutoff = ts_grid[-1] - window_seconds + mask = ts_grid >= cutoff + + ts_recent_arr = ts_grid[mask].astype(int) + actual = ys_grid[mask].astype(float) + + if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS): + ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int) + actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float) + + ts_recent = ts_recent_arr.tolist() + last_ts = int(ts_recent[-1]) + + p_min = max(int(MIN_PERIOD_SECONDS), int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO)))) + p_max = min(int(MAX_PERIOD_SECONDS), int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO)))) + + if p_max < p_min: + p_min = p_max = base_period + + best_period = base_period + best_origin = normalize_origin_near(base_origin, base_period, last_ts) + best_template = resample_template(base_template, best_period) + best_pred = predict_template_values(best_template, best_period, best_origin, ts_recent) + best_mae = float(np.mean(np.abs(actual - best_pred))) + + for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): + template = resample_template(base_template, period) + center_origin = normalize_origin_near(base_origin, period, last_ts) + + origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) + + for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): + origin = center_origin + shift + + pred = predict_template_values( + template=template, + period=period, + phase_origin_ts=origin, + ts_list=ts_recent, + ) + + mae = float(np.mean(np.abs(actual - pred))) + + # 轻微惩罚周期变化,避免过拟合抖动 + penalty = abs(period - base_period) * 0.5 + score = mae + penalty + + best_score = best_mae + abs(best_period - base_period) * 0.5 + + if score < best_score: + best_period = period + best_origin = origin + best_pred = pred + best_mae = mae + + best_origin = normalize_origin_near(best_origin, best_period, last_ts) + + return int(best_period), int(best_origin), best_pred, float(best_mae) + + +# ============================================================================= +# 异常检测 +# ============================================================================= + +def calc_threshold( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> np.ndarray: + return np.maximum(abs_threshold, np.abs(pred) * rel_threshold) + + +def calc_bounds( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[np.ndarray, np.ndarray]: + threshold = calc_threshold(pred, abs_threshold, rel_threshold) + + return pred - threshold, pred + threshold + + +def detect_anomaly( + state: BaselineState, + ts_grid: np.ndarray, + ys_grid: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[bool, float, float, float, int, int]: + best_period, best_origin, pred_recent, _ = phase_lock_recent( + state=state, + ts_grid=ts_grid, + ys_grid=ys_grid, + ) + + recent_len = len(pred_recent) + + if recent_len <= 0: + return False, 0.0, 0.0, 0.0, best_period, best_origin + + actual = ys_grid[-recent_len:].astype(float) + + threshold = calc_threshold(pred_recent, abs_threshold, rel_threshold) + + abs_err = np.abs(actual - pred_recent) + outside = abs_err > threshold + + outside_ratio = float(np.mean(outside)) + mean_abs_err = float(np.mean(abs_err)) + mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred_recent), 1.0))) + + is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD + + return ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + int(best_period), + int(best_origin), + ) + + +# ============================================================================= +# 健康基线状态管理 +# ============================================================================= + +def create_initial_state( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + now_sec: int, +) -> Optional[BaselineState]: + baseline = build_current_baseline(ts_grid, ys_grid) + + if baseline is None: + return None + + period, phase_origin_ts, template = baseline + + return BaselineState( + period=int(period), + phase_origin_ts=int(phase_origin_ts), + template=template.astype(float).tolist(), + status=BASELINE_STATUS_HEALTHY, + clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), + last_update_ts=now_sec, + last_seen_ts=now_sec, + y_min=float(np.min(ys_grid)), + y_max=float(np.max(ys_grid)), + ) + + +def apply_phase_lock_to_state( + state: BaselineState, + best_period: int, + best_origin: int, +) -> None: + best_period = int(best_period) + + if best_period <= 1: + return + + template = np.array(state.template, dtype=float) + + if len(template) != best_period: + template = resample_template(template, best_period) + + state.period = best_period + state.phase_origin_ts = int(best_origin) + state.template = template.astype(float).tolist() + + +def maybe_update_state( + key: str, + ts_grid: np.ndarray, + ys_grid: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[Optional[BaselineState], bool, float, float, float]: + now_sec = int(time.time()) + state = BASELINE_STATES.get(key) + + if state is None: + state = create_initial_state(ts_grid, ys_grid, now_sec) + + if state is None: + return None, False, 0.0, 0.0, 0.0 + + BASELINE_STATES[key] = state + + logger.info( + "初始化健康模板 key=%s period=%ss origin=%s clean=%ss", + key, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + ) + + return state, False, 0.0, 0.0, 0.0 + + elapsed = max(1, now_sec - int(state.last_seen_ts)) + elapsed = min(elapsed, POLL_INTERVAL * 2) + state.last_seen_ts = now_sec + + ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + best_period, + best_origin, + ) = detect_anomaly( + state=state, + ts_grid=ts_grid, + ys_grid=ys_grid, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + ) + + if is_anomaly: + state.status = BASELINE_STATUS_ANOMALY + state.clean_seconds = 0 + + BASELINE_STATES[key] = state + + logger.warning( + "检测到异常,冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f", + key, + outside_ratio, + mean_abs_err, + mean_rel_err, + ) + + return state, True, outside_ratio, mean_abs_err, mean_rel_err + + old_period = int(state.period) + old_origin = int(state.phase_origin_ts) + + apply_phase_lock_to_state(state, best_period, best_origin) + + if old_period != state.period or old_origin != state.phase_origin_ts: + logger.info( + "phase-lock key=%s period %s -> %s origin %s -> %s", + key, + old_period, + state.period, + datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + ) + + if state.status == BASELINE_STATUS_ANOMALY: + state.status = BASELINE_STATUS_RECOVERING + state.clean_seconds = elapsed + + BASELINE_STATES[key] = state + + logger.info( + "异常开始恢复 key=%s clean_seconds=%ss", + key, + state.clean_seconds, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + if state.status == BASELINE_STATUS_RECOVERING: + state.clean_seconds += elapsed + else: + state.status = BASELINE_STATUS_HEALTHY + state.clean_seconds += elapsed + + min_clean_for_update = max( + RECOVERY_MIN_SECONDS, + int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, + ) + + if state.clean_seconds < min_clean_for_update: + BASELINE_STATES[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + tail_seconds = min( + int(state.clean_seconds), + int(state.period) * MAX_CYCLES_FOR_TEMPLATE, + ) + + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_grid=ys_grid, + tail_seconds=tail_seconds, + ) + + if baseline is None: + BASELINE_STATES[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + new_period, new_origin, new_template = baseline + + old_template = np.array(state.template, dtype=float) + + alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA + + merged = merge_template( + old_template=old_template, + new_template=new_template, + alpha=alpha, + ) + + state.period = int(new_period) + state.phase_origin_ts = int(new_origin) + state.template = merged.astype(float).tolist() + state.status = BASELINE_STATUS_HEALTHY + state.last_update_ts = now_sec + + if tail_seconds > 0 and len(ys_grid) >= tail_seconds: + state.y_min = float(np.min(ys_grid[-tail_seconds:])) + state.y_max = float(np.max(ys_grid[-tail_seconds:])) + else: + state.y_min = float(np.min(ys_grid)) + state.y_max = float(np.max(ys_grid)) + + BASELINE_STATES[key] = state + + logger.info( + "更新健康模板 key=%s period=%ss origin=%s clean=%ss alpha=%.2f", + key, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + alpha, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + +# ============================================================================= +# Prometheus Exposition 写入 +# ============================================================================= + +def prom_escape_label_value(value: str) -> str: + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def labels_to_str(labels: Dict[str, str]) -> str: + if not labels: + return "" + + parts = [] + + for k in sorted(labels.keys()): + parts.append(f'{k}="{prom_escape_label_value(labels[k])}"') + + return "{" + ",".join(parts) + "}" + + +def write_series( + metric_name: str, + labels: Dict[str, str], + ts_list: List[int], + values: List[float], +) -> bool: + if not ts_list or not values or len(ts_list) != len(values): + return False + + label_str = labels_to_str(labels) + lines = [] + + for t, y in zip(ts_list, values): + try: + ts_sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(ts_sec) or not math.isfinite(val): + continue + + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") + + if not lines: + return False + + payload = "\n".join(lines) + "\n" + + try: + resp = requests.post( + f"{VM_URL}/api/v1/import/prometheus", + data=payload.encode("utf-8"), + headers={ + "Content-Type": "text/plain; version=0.0.4; charset=utf-8", + }, + timeout=10, + ) + resp.raise_for_status() + return True + + except requests.RequestException as e: + logger.error("写入数据失败 metric=%s: %s", metric_name, e) + return False + + +def write_prediction_bundle( + pred_metric: str, + anomaly_metric: str, + labels: Dict[str, str], + ts_future: List[int], + pred_values: np.ndarray, + lower_values: np.ndarray, + upper_values: np.ndarray, + is_anomaly: bool, + outside_ratio: float, + mean_abs_err: float, + mean_rel_err: float, + event_ts: int, +) -> bool: + ok1 = write_series( + metric_name=pred_metric, + labels=labels, + ts_list=ts_future, + values=pred_values.astype(float).tolist(), + ) + + ok2 = write_series( + metric_name=f"{pred_metric}_lower", + labels=labels, + ts_list=ts_future, + values=lower_values.astype(float).tolist(), + ) + + ok3 = write_series( + metric_name=f"{pred_metric}_upper", + labels=labels, + ts_list=ts_future, + values=upper_values.astype(float).tolist(), + ) + + anomaly_labels = dict(labels) + anomaly_labels["type"] = "prediction_deviation" + + ok4 = write_series( + metric_name=anomaly_metric, + labels=anomaly_labels, + ts_list=[event_ts], + values=[1.0 if is_anomaly else 0.0], + ) + + ok5 = write_series( + metric_name=f"{anomaly_metric}_outside_ratio", + labels=anomaly_labels, + ts_list=[event_ts], + values=[outside_ratio], + ) + + ok6 = write_series( + metric_name=f"{anomaly_metric}_mean_abs_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_abs_err], + ) + + ok7 = write_series( + metric_name=f"{anomaly_metric}_mean_rel_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_rel_err], + ) + + return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 + + +# ============================================================================= +# 标签解析 +# ============================================================================= + +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) + + +def parse_labels_from_query(query: str) -> Dict[str, str]: + labels = {} + + if "{" not in query or "}" not in query: + return labels + + try: + label_part = query[query.index("{") + 1:query.rindex("}")] + except Exception: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = match.group(2) + + value = ( + value + .replace('\\"', '"') + .replace("\\n", "\n") + .replace("\\\\", "\\") + ) + + labels[key] = value + + return labels + + +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + result = {} + + for d in dicts: + if d: + result.update(d) + + return result + + +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + return metric_name + labels_to_str(labels) + + +# ============================================================================= +# 状态持久化 +# ============================================================================= + +def load_state() -> None: + global BASELINE_STATES + + if not os.path.exists(STATE_FILE): + return + + try: + with open(STATE_FILE, "r", encoding="utf-8") as f: + raw = json.load(f) + + states = {} + + for key, value in raw.get("baseline_states", {}).items(): + required_fields = { + "period", + "phase_origin_ts", + "template", + "status", + "clean_seconds", + "last_update_ts", + "last_seen_ts", + "y_min", + "y_max", + } + + if not required_fields.issubset(set(value.keys())): + continue + + states[key] = BaselineState(**value) + + BASELINE_STATES = states + + logger.info( + "已加载预测状态文件 %s,状态数量=%d", + STATE_FILE, + len(BASELINE_STATES), + ) + + except Exception as e: + logger.warning("加载预测状态文件失败,将重新学习: %s", e) + + +def save_state() -> None: + try: + raw = { + "baseline_states": { + key: asdict(value) + for key, value in BASELINE_STATES.items() + } + } + + tmp_file = STATE_FILE + ".tmp" + + with open(tmp_file, "w", encoding="utf-8") as f: + json.dump(raw, f, ensure_ascii=False, indent=2) + + os.replace(tmp_file, STATE_FILE) + + except Exception as e: + logger.warning("保存预测状态文件失败: %s", e) + + +# ============================================================================= +# 时间轴 +# ============================================================================= + +def build_prediction_timestamps( + key: str, + last_real_ts: int, + now_sec: int, +) -> Optional[List[int]]: + data_lag = now_sec - last_real_ts + + if data_lag > MAX_DATA_LAG_SECONDS: + logger.warning( + "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", + key, + data_lag, + MAX_DATA_LAG_SECONDS, + ) + return None + + last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) + + if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): + logger.info( + "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", + key, + last_real_ts, + last_written_real_ts, + ) + return None + + if ALIGN_PREDICTION_TO_LAST_REAL_TS: + base_ts = last_real_ts + else: + base_ts = now_sec + + return [ + base_ts + i + 1 + for i in range(WRITE_HORIZON_SECONDS) + ] + + +# ============================================================================= +# 主流程 +# ============================================================================= + +def run_once() -> None: + now_str = datetime.now().strftime("%H:%M:%S") + + for target in PREDICT_TARGETS: + query = target["query"] + pred_metric = target["pred_metric"] + anomaly_metric = target["anomaly_metric"] + abs_threshold = float(target["abs_threshold"]) + rel_threshold = float(target["rel_threshold"]) + + ts, ys = fetch_history(query) + + if len(ys) < MIN_POINTS: + logger.info( + "[%s] %s 数据不足(%d 点),跳过", + now_str, + query, + len(ys), + ) + continue + + ts_grid, ys_grid = normalize_history(ts, ys) + + if len(ys_grid) < MIN_POINTS: + logger.info( + "[%s] %s 清洗后数据不足(%d 点),跳过", + now_str, + query, + len(ys_grid), + ) + continue + + base_labels = parse_labels_from_query(query) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + key = series_key(pred_metric, write_labels) + + state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state( + key=key, + ts_grid=ts_grid, + ys_grid=ys_grid, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + ) + + if state is None: + logger.info( + "[%s] %s 暂无可用健康模板,等待学习", + now_str, + query, + ) + continue + + now_sec = int(time.time()) + last_real_ts = int(ts_grid[-1]) + data_lag = now_sec - last_real_ts + + ts_future = build_prediction_timestamps( + key=key, + last_real_ts=last_real_ts, + now_sec=now_sec, + ) + + if not ts_future: + continue + + pred_values = predict_with_state(state, ts_future) + + lower_values, upper_values = calc_bounds( + pred=pred_values, + abs_threshold=abs_threshold, + rel_threshold=rel_threshold, + ) + + ok = write_prediction_bundle( + pred_metric=pred_metric, + anomaly_metric=anomaly_metric, + labels=write_labels, + ts_future=ts_future, + pred_values=pred_values, + lower_values=lower_values, + upper_values=upper_values, + is_anomaly=is_anomaly, + outside_ratio=outside_ratio, + mean_abs_err=mean_abs_err, + mean_rel_err=mean_rel_err, + event_ts=last_real_ts, + ) + + if not ok: + logger.error( + "[%s] %s 写入预测数据失败", + now_str, + query, + ) + continue + + LAST_REAL_TS_WRITTEN[key] = last_real_ts + + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") + origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") + + logger.info( + "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", + now_str, + query, + pred_metric, + state.status, + is_anomaly, + state.period, + origin_str, + last_real_str, + data_lag, + len(ts_future), + future_start, + future_end, + ) + + save_state() + + +def main() -> None: + load_state() + + logger.info( + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s align_to_last_real=%s", + VM_URL, + HISTORY_MINUTES, + HORIZON_SECONDS, + WRITE_HORIZON_SECONDS, + POLL_INTERVAL, + STATE_FILE, + EXTRA_PREDICT_LABELS["forecast"], + ALIGN_PREDICTION_TO_LAST_REAL_TS, + ) + + while True: + run_once() + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py new file mode 100644 index 0000000..8657944 --- /dev/null +++ b/ai/pridict_v4.py @@ -0,0 +1,1604 @@ +# -*- coding: utf-8 -*- +""" +ProtoForge Predictor v11 + +核心能力: +1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。 +2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。 +3. vibration 类指标不再追求单点完全贴合,而是输出: + - xxx_predicted 中位数预测线 + - xxx_predicted_upper 正常上边界 + - xxx_predicted_lower 正常下边界 +4. 预测起点锚定最后一个真实点 last_real_ts,避免时间错位。 +5. 异常期间冻结健康模板,不学习故障数据。 +6. 故障恢复后等待稳定,再恢复模板学习。 +""" + +import json +import logging +import math +import os +import re +import time +from dataclasses import asdict, dataclass +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple + +import numpy as np +import requests + + +# ============================================================================= +# 日志配置 +# ============================================================================= + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# 基础配置 +# ============================================================================= + +VM_URL = "http://localhost:8428" +STATE_FILE = "/tmp/protoforge_predictor_state_v11.json" + +HISTORY_MINUTES = 30 +HORIZON_SECONDS = 120 +POLL_INTERVAL = 30 + +WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) + +QUERY_STEP = "1s" +MIN_POINTS = 120 + +MIN_PERIOD_SECONDS = 5 +MAX_PERIOD_SECONDS = 3600 + +MIN_FULL_CYCLES_FOR_TEMPLATE = 3 +MAX_CYCLES_FOR_TEMPLATE = 8 + +DETECT_WINDOW_SECONDS = 20 +RECOVERY_MIN_SECONDS = 60 + +HEALTHY_EMA_ALPHA = 0.10 +RECOVERY_EMA_ALPHA = 0.25 + +OUTSIDE_RATIO_THRESHOLD = 0.60 + +VALLEY_QUANTILE = 45 + +MAX_DATA_LAG_SECONDS = 180 + +PHASE_LOCK_MIN_WINDOW_SECONDS = 45 +PHASE_LOCK_MAX_WINDOW_SECONDS = 180 +PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 +PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35 +PHASE_LOCK_PERIOD_STEP = 1 +PHASE_LOCK_ORIGIN_STEP = 1 + + +# ============================================================================= +# 指标配置 +# ============================================================================= + +PREDICT_TARGETS = [ + { + "query": 'feed_rate{device_id="fanuc-cnc"}', + "pred_metric": "feed_rate_predicted", + "anomaly_metric": "feed_rate_anomaly", + "strategy": "phase_point", + "abs_threshold": 400.0, + "rel_threshold": 0.25, + "smooth_window": 1, + }, + { + "query": 'spindle_speed{device_id="fanuc-cnc"}', + "pred_metric": "spindle_speed_predicted", + "anomaly_metric": "spindle_speed_anomaly", + "strategy": "phase_point", + "abs_threshold": 500.0, + "rel_threshold": 0.25, + "smooth_window": 1, + }, + { + "query": 'spindle_current{device_id="fanuc-cnc"}', + "pred_metric": "spindle_current_predicted", + "anomaly_metric": "spindle_current_anomaly", + "strategy": "phase_point", + "abs_threshold": 5.0, + "rel_threshold": 0.25, + "smooth_window": 1, + }, + { + "query": 'vibration_x{device_id="fanuc-cnc"}', + "pred_metric": "vibration_x_predicted", + "anomaly_metric": "vibration_x_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.18, + "rel_threshold": 0.50, + "smooth_window": 5, + "band_low_q": 2, + "band_high_q": 98, + "band_pad_abs": 0.12, + }, + { + "query": 'vibration_y{device_id="fanuc-cnc"}', + "pred_metric": "vibration_y_predicted", + "anomaly_metric": "vibration_y_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.18, + "rel_threshold": 0.50, + "smooth_window": 5, + "band_low_q": 2, + "band_high_q": 98, + "band_pad_abs": 0.12, + }, + { + "query": 'vibration_z{device_id="fanuc-cnc"}', + "pred_metric": "vibration_z_predicted", + "anomaly_metric": "vibration_z_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.18, + "rel_threshold": 0.50, + "smooth_window": 5, + "band_low_q": 2, + "band_high_q": 98, + "band_pad_abs": 0.12, + } +] + +EXTRA_PREDICT_LABELS = { + "forecast": "phase_band_health_v11", + "source": "protoforge", +} + +BASELINE_STATUS_HEALTHY = "healthy" +BASELINE_STATUS_ANOMALY = "anomaly" +BASELINE_STATUS_RECOVERING = "recovering" + + +# ============================================================================= +# 状态结构 +# ============================================================================= + +@dataclass +class BaselineState: + period: int + phase_origin_ts: int + template: List[float] + lower_template: List[float] + upper_template: List[float] + strategy: str + status: str + clean_seconds: int + last_update_ts: int + last_seen_ts: int + y_min: float + y_max: float + + +BASELINE_STATES: Dict[str, BaselineState] = {} +LAST_REAL_TS_WRITTEN: Dict[str, int] = {} + + +# ============================================================================= +# VictoriaMetrics 读取 +# ============================================================================= + +def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: + now = datetime.now() + start = now - timedelta(minutes=minutes) + + try: + resp = requests.get( + f"{VM_URL}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": QUERY_STEP, + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取数据失败 query=%s: %s", query, e) + return [], [] + + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 返回失败 query=%s: %s", query, e) + return [], [] + + if not result: + return [], [] + + values = result[0].get("values", []) + + ts = [] + ys = [] + + for item in values: + if len(item) < 2: + continue + + try: + t = float(item[0]) + y = float(item[1]) + except Exception: + continue + + if not math.isfinite(t) or not math.isfinite(y): + continue + + ts.append(t) + ys.append(y) + + return ts, ys + + +def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + data = {} + + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(sec) or not math.isfinite(val): + continue + + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items(), key=lambda x: x[0]) + + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) + + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +# ============================================================================= +# 平滑与预处理 +# ============================================================================= + +def rolling_median(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + result = [] + + for i in range(len(arr)): + result.append(float(np.median(padded[i:i + window]))) + + return np.array(result, dtype=float) + + +def moving_average(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + kernel = np.ones(window, dtype=float) / window + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + return np.convolve(padded, kernel, mode="valid") + + +def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray: + strategy = target.get("strategy", "phase_point") + smooth_window = int(target.get("smooth_window", 1)) + + if strategy == "phase_band": + return rolling_median(ys_grid, smooth_window) + + if smooth_window > 1: + return moving_average(ys_grid, smooth_window) + + return ys_grid.astype(float) + + +# ============================================================================= +# 周期估计 +# ============================================================================= + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: + n = len(ys_arr) + + if n < 8: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + power = np.abs(fft_vals[1:]) + + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + + return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + n = len(ys_arr) + + if n < 20: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + corr = np.correlate(centered, centered, mode="full")[n - 1:] + + p0 = int(round(init_period)) + left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) + + if right <= left: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + search = corr[left:right + 1] + + if len(search) == 0: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + + return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def estimate_period_rough(ys_arr: np.ndarray) -> int: + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + + period = int(round(p_refined)) + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period) + + +# ============================================================================= +# 谷底检测 +# ============================================================================= + +def find_valley_indices( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + expected_period: int, +) -> List[int]: + n = len(ys_grid) + + if n < max(10, expected_period * 2): + return [] + + period = max(3, int(expected_period)) + smooth_window = max(3, int(round(period * 0.08))) + smooth_window = min(smooth_window, 21) + + ys_smooth = moving_average(ys_grid, smooth_window) + threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE)) + + candidates = [] + + for i in range(1, n - 1): + if ( + ys_smooth[i] <= ys_smooth[i - 1] + and ys_smooth[i] < ys_smooth[i + 1] + and ys_smooth[i] <= threshold + ): + candidates.append(i) + + if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: + candidates = [] + + for i in range(1, n - 1): + if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: + candidates.append(i) + + if not candidates: + return [] + + min_distance = max(2, int(round(period * 0.55))) + selected = [] + + for idx in candidates: + if not selected: + selected.append(idx) + continue + + if idx - selected[-1] >= min_distance: + selected.append(idx) + continue + + if ys_smooth[idx] < ys_smooth[selected[-1]]: + selected[-1] = idx + + if len(selected) < 2: + return selected + + cleaned = [selected[0]] + + for idx in selected[1:]: + diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) + + if int(period * 0.55) <= diff <= int(period * 1.60): + cleaned.append(idx) + continue + + if diff < int(period * 0.55): + if ys_smooth[idx] < ys_smooth[cleaned[-1]]: + cleaned[-1] = idx + continue + + cleaned.append(idx) + + return cleaned + + +def detect_period_and_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, List[int]]: + rough = estimate_period_rough(ys_grid) + valleys = find_valley_indices(ts_grid, ys_grid, rough) + + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys]) + good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] + + if len(good) > 0: + period = int(round(float(np.median(good)))) + else: + period = rough + else: + period = rough + + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period), valleys + + +# ============================================================================= +# 模板构建 +# ============================================================================= + +def build_templates_from_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + period: int, + valleys: List[int], + target: Dict, +) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]: + if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1: + return None + + strategy = target.get("strategy", "phase_point") + low_q = float(target.get("band_low_q", 10)) + high_q = float(target.get("band_high_q", 90)) + + pairs = [] + + for a, b in zip(valleys[:-1], valleys[1:]): + cycle_len = float(ts_grid[b] - ts_grid[a]) + + if period * 0.55 <= cycle_len <= period * 1.60: + pairs.append((a, b, cycle_len)) + + if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:] + + phase_grid = np.arange(period, dtype=float) + segments = [] + weights = [] + + for idx, (a, b, cycle_len) in enumerate(pairs): + seg_ts = ts_grid[a:b + 1] + seg_y = ys_grid[a:b + 1] + + if len(seg_y) < 3: + continue + + x_old = (seg_ts - seg_ts[0]) / cycle_len * period + seg = np.interp(phase_grid, x_old, seg_y) + + segments.append(seg.astype(float)) + weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs))) + + if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + arr = np.vstack(segments) + w_arr = np.array(weights, dtype=float) + + if strategy == "phase_band": + mid_template = np.percentile(arr, 50, axis=0) + lower_template = np.percentile(arr, low_q, axis=0) + upper_template = np.percentile(arr, high_q, axis=0) + else: + mid_template = np.average(arr, axis=0, weights=w_arr) + lower_template = mid_template.copy() + upper_template = mid_template.copy() + + return ( + mid_template.astype(float), + lower_template.astype(float), + upper_template.astype(float), + ) + + +def build_current_baseline( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + target: Dict, + tail_seconds: Optional[int] = None, +) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]: + if len(ys_grid) < MIN_POINTS: + return None + + if tail_seconds is not None and tail_seconds > 0: + cutoff = ts_grid[-1] - int(tail_seconds) + mask = ts_grid >= cutoff + ts_use = ts_grid[mask] + ys_use = ys_grid[mask] + else: + ts_use = ts_grid + ys_use = ys_grid + + if len(ys_use) < MIN_POINTS: + return None + + period, valleys = detect_period_and_valleys(ts_use, ys_use) + + templates = build_templates_from_valleys( + ts_grid=ts_use, + ys_grid=ys_use, + period=period, + valleys=valleys, + target=target, + ) + + if templates is None or len(valleys) == 0: + return None + + template, lower_template, upper_template = templates + phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) + + return int(period), phase_origin_ts, template, lower_template, upper_template + + +# ============================================================================= +# 模板预测 +# ============================================================================= + +def circular_template_value(template: np.ndarray, phase: float) -> float: + period = len(template) + + if period == 0: + return 0.0 + + phase = float(phase) % period + i0 = int(math.floor(phase)) % period + i1 = (i0 + 1) % period + frac = phase - math.floor(phase) + + return float((1.0 - frac) * template[i0] + frac * template[i1]) + + +def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: + old_period = len(old_template) + + if old_period == new_period: + return old_template.astype(float) + + if old_period <= 1 or new_period <= 1: + return np.full(new_period, float(np.mean(old_template)), dtype=float) + + old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) + new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) + + old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) + old_y_ext = np.concatenate([old_template, old_template, old_template]) + + return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + + +def predict_template_values( + template: np.ndarray, + period: int, + phase_origin_ts: int, + ts_list: List[int], +) -> np.ndarray: + if period <= 1: + return np.zeros(len(ts_list), dtype=float) + + if len(template) != period: + template = resample_template(template, period) + + values = [] + + for ts in ts_list: + phase = (int(ts) - int(phase_origin_ts)) % period + values.append(circular_template_value(template, phase)) + + return np.array(values, dtype=float) + + +def predict_state_bundle( + state: BaselineState, + ts_list: List[int], +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + period = int(state.period) + origin = int(state.phase_origin_ts) + + mid = predict_template_values( + template=np.array(state.template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + lower = predict_template_values( + template=np.array(state.lower_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + upper = predict_template_values( + template=np.array(state.upper_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + return mid, lower, upper + + +def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: + if period <= 1: + return origin + + origin = int(origin) + period = int(period) + near_ts = int(near_ts) + + while origin + period <= near_ts: + origin += period + + while origin > near_ts: + origin -= period + + return origin + + +def merge_template( + old_template: np.ndarray, + new_template: np.ndarray, + alpha: float, +) -> np.ndarray: + alpha = float(np.clip(alpha, 0.0, 1.0)) + + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + merged = (1.0 - alpha) * old_template + alpha * new_template + + return merged.astype(float) + + +# ============================================================================= +# Phase Lock +# ============================================================================= + +def phase_lock_recent( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, +) -> Tuple[int, int, np.ndarray, float]: + base_period = int(state.period) + base_origin = int(state.phase_origin_ts) + base_template = np.array(state.template, dtype=float) + + if base_period <= 1 or len(base_template) <= 1: + ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() + pred = predict_template_values(base_template, base_period, base_origin, ts_recent) + actual = ys_model[-len(ts_recent):].astype(float) + mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 + return base_period, base_origin, pred, mae + + window_seconds = max( + PHASE_LOCK_MIN_WINDOW_SECONDS, + min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), + ) + + cutoff = ts_grid[-1] - window_seconds + mask = ts_grid >= cutoff + + ts_recent_arr = ts_grid[mask].astype(int) + actual = ys_model[mask].astype(float) + + if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS): + ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int) + actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float) + + ts_recent = ts_recent_arr.tolist() + last_ts = int(ts_recent[-1]) + + p_min = max( + int(MIN_PERIOD_SECONDS), + int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))), + ) + p_max = min( + int(MAX_PERIOD_SECONDS), + int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))), + ) + + best_period = base_period + best_origin = normalize_origin_near(base_origin, base_period, last_ts) + best_template = resample_template(base_template, best_period) + + best_pred = predict_template_values( + template=best_template, + period=best_period, + phase_origin_ts=best_origin, + ts_list=ts_recent, + ) + + best_mae = float(np.mean(np.abs(actual - best_pred))) + + for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): + template = resample_template(base_template, period) + center_origin = normalize_origin_near(base_origin, period, last_ts) + origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) + + for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): + origin = center_origin + shift + + pred = predict_template_values( + template=template, + period=period, + phase_origin_ts=origin, + ts_list=ts_recent, + ) + + mae = float(np.mean(np.abs(actual - pred))) + penalty = abs(period - base_period) * 0.5 + score = mae + penalty + + best_score = best_mae + abs(best_period - base_period) * 0.5 + + if score < best_score: + best_period = period + best_origin = origin + best_pred = pred + best_mae = mae + + best_origin = normalize_origin_near(best_origin, best_period, last_ts) + + return int(best_period), int(best_origin), best_pred, float(best_mae) + + +# ============================================================================= +# 异常检测 +# ============================================================================= + +def calc_point_bounds( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[np.ndarray, np.ndarray]: + threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold) + return pred - threshold, pred + threshold + + +def calc_final_bounds( + state: BaselineState, + pred: np.ndarray, + lower_raw: np.ndarray, + upper_raw: np.ndarray, + target: Dict, +) -> Tuple[np.ndarray, np.ndarray]: + strategy = target.get("strategy", "phase_point") + abs_threshold = float(target.get("abs_threshold", 1.0)) + rel_threshold = float(target.get("rel_threshold", 0.25)) + + if strategy == "phase_band": + pad_abs = float(target.get("band_pad_abs", abs_threshold)) + dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.20) + lower = lower_raw - dynamic_pad + upper = upper_raw + dynamic_pad + return lower, upper + + return calc_point_bounds(pred, abs_threshold, rel_threshold) + + +def detect_anomaly( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, + target: Dict, +) -> Tuple[bool, float, float, float, int, int]: + best_period, best_origin, pred_recent, _ = phase_lock_recent( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + ) + + recent_len = len(pred_recent) + + if recent_len <= 0: + return False, 0.0, 0.0, 0.0, best_period, best_origin + + actual = ys_model[-recent_len:].astype(float) + + tmp_state = BaselineState( + period=best_period, + phase_origin_ts=best_origin, + template=state.template, + lower_template=state.lower_template, + upper_template=state.upper_template, + strategy=state.strategy, + status=state.status, + clean_seconds=state.clean_seconds, + last_update_ts=state.last_update_ts, + last_seen_ts=state.last_seen_ts, + y_min=state.y_min, + y_max=state.y_max, + ) + + recent_ts = ts_grid[-recent_len:].astype(int).tolist() + pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts) + + lower, upper = calc_final_bounds( + state=tmp_state, + pred=pred, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + outside = (actual < lower) | (actual > upper) + abs_err = np.abs(actual - pred) + + outside_ratio = float(np.mean(outside)) + mean_abs_err = float(np.mean(abs_err)) + mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6))) + + is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD + + return ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + int(best_period), + int(best_origin), + ) + + +# ============================================================================= +# 状态管理 +# ============================================================================= + +def create_initial_state( + ts_grid: np.ndarray, + ys_model: np.ndarray, + target: Dict, + now_sec: int, +) -> Optional[BaselineState]: + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_grid=ys_model, + target=target, + ) + + if baseline is None: + return None + + period, phase_origin_ts, template, lower_template, upper_template = baseline + + return BaselineState( + period=int(period), + phase_origin_ts=int(phase_origin_ts), + template=template.astype(float).tolist(), + lower_template=lower_template.astype(float).tolist(), + upper_template=upper_template.astype(float).tolist(), + strategy=str(target.get("strategy", "phase_point")), + status=BASELINE_STATUS_HEALTHY, + clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), + last_update_ts=now_sec, + last_seen_ts=now_sec, + y_min=float(np.min(ys_model)), + y_max=float(np.max(ys_model)), + ) + + +def apply_phase_lock_to_state( + state: BaselineState, + best_period: int, + best_origin: int, +) -> None: + best_period = int(best_period) + + if best_period <= 1: + return + + if len(state.template) != best_period: + state.template = resample_template( + np.array(state.template, dtype=float), + best_period, + ).astype(float).tolist() + + if len(state.lower_template) != best_period: + state.lower_template = resample_template( + np.array(state.lower_template, dtype=float), + best_period, + ).astype(float).tolist() + + if len(state.upper_template) != best_period: + state.upper_template = resample_template( + np.array(state.upper_template, dtype=float), + best_period, + ).astype(float).tolist() + + state.period = best_period + state.phase_origin_ts = int(best_origin) + + +def maybe_update_state( + key: str, + ts_grid: np.ndarray, + ys_model: np.ndarray, + target: Dict, +) -> Tuple[Optional[BaselineState], bool, float, float, float]: + now_sec = int(time.time()) + state = BASELINE_STATES.get(key) + + if state is None: + state = create_initial_state( + ts_grid=ts_grid, + ys_model=ys_model, + target=target, + now_sec=now_sec, + ) + + if state is None: + return None, False, 0.0, 0.0, 0.0 + + BASELINE_STATES[key] = state + + logger.info( + "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + ) + + return state, False, 0.0, 0.0, 0.0 + + elapsed = max(1, now_sec - int(state.last_seen_ts)) + elapsed = min(elapsed, POLL_INTERVAL * 2) + state.last_seen_ts = now_sec + + ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + best_period, + best_origin, + ) = detect_anomaly( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + target=target, + ) + + if is_anomaly: + state.status = BASELINE_STATUS_ANOMALY + state.clean_seconds = 0 + BASELINE_STATES[key] = state + + logger.warning( + "检测到异常,冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f", + key, + outside_ratio, + mean_abs_err, + mean_rel_err, + ) + + return state, True, outside_ratio, mean_abs_err, mean_rel_err + + old_period = int(state.period) + old_origin = int(state.phase_origin_ts) + + apply_phase_lock_to_state(state, best_period, best_origin) + + if old_period != state.period or old_origin != state.phase_origin_ts: + logger.info( + "phase-lock key=%s period %s -> %s origin %s -> %s", + key, + old_period, + state.period, + datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + ) + + if state.status == BASELINE_STATUS_ANOMALY: + state.status = BASELINE_STATUS_RECOVERING + state.clean_seconds = elapsed + BASELINE_STATES[key] = state + + logger.info( + "异常开始恢复 key=%s clean_seconds=%ss", + key, + state.clean_seconds, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + if state.status == BASELINE_STATUS_RECOVERING: + state.clean_seconds += elapsed + else: + state.status = BASELINE_STATUS_HEALTHY + state.clean_seconds += elapsed + + min_clean_for_update = max( + RECOVERY_MIN_SECONDS, + int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, + ) + + if state.clean_seconds < min_clean_for_update: + BASELINE_STATES[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + tail_seconds = min( + int(state.clean_seconds), + int(state.period) * MAX_CYCLES_FOR_TEMPLATE, + ) + + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_grid=ys_model, + target=target, + tail_seconds=tail_seconds, + ) + + if baseline is None: + BASELINE_STATES[key] = state + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline + + alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA + + state.template = merge_template( + np.array(state.template, dtype=float), + new_template, + alpha, + ).astype(float).tolist() + + state.lower_template = merge_template( + np.array(state.lower_template, dtype=float), + new_lower_template, + alpha, + ).astype(float).tolist() + + state.upper_template = merge_template( + np.array(state.upper_template, dtype=float), + new_upper_template, + alpha, + ).astype(float).tolist() + + state.period = int(new_period) + state.phase_origin_ts = int(new_origin) + state.status = BASELINE_STATUS_HEALTHY + state.last_update_ts = now_sec + + if tail_seconds > 0 and len(ys_model) >= tail_seconds: + state.y_min = float(np.min(ys_model[-tail_seconds:])) + state.y_max = float(np.max(ys_model[-tail_seconds:])) + else: + state.y_min = float(np.min(ys_model)) + state.y_max = float(np.max(ys_model)) + + BASELINE_STATES[key] = state + + logger.info( + "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + alpha, + ) + + return state, False, outside_ratio, mean_abs_err, mean_rel_err + + +# ============================================================================= +# Prometheus 写入 +# ============================================================================= + +def prom_escape_label_value(value: str) -> str: + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def labels_to_str(labels: Dict[str, str]) -> str: + if not labels: + return "" + + parts = [] + + for k in sorted(labels.keys()): + parts.append(f'{k}="{prom_escape_label_value(labels[k])}"') + + return "{" + ",".join(parts) + "}" + + +def write_series( + metric_name: str, + labels: Dict[str, str], + ts_list: List[int], + values: List[float], +) -> bool: + if not ts_list or not values or len(ts_list) != len(values): + return False + + label_str = labels_to_str(labels) + lines = [] + + for t, y in zip(ts_list, values): + try: + ts_sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(ts_sec) or not math.isfinite(val): + continue + + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") + + if not lines: + return False + + payload = "\n".join(lines) + "\n" + + try: + resp = requests.post( + f"{VM_URL}/api/v1/import/prometheus", + data=payload.encode("utf-8"), + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + timeout=10, + ) + resp.raise_for_status() + return True + + except requests.RequestException as e: + logger.error("写入数据失败 metric=%s: %s", metric_name, e) + return False + + +def write_prediction_bundle( + pred_metric: str, + anomaly_metric: str, + labels: Dict[str, str], + ts_future: List[int], + pred_values: np.ndarray, + lower_values: np.ndarray, + upper_values: np.ndarray, + is_anomaly: bool, + outside_ratio: float, + mean_abs_err: float, + mean_rel_err: float, + event_ts: int, +) -> bool: + ok1 = write_series( + metric_name=pred_metric, + labels=labels, + ts_list=ts_future, + values=pred_values.astype(float).tolist(), + ) + + ok2 = write_series( + metric_name=f"{pred_metric}_lower", + labels=labels, + ts_list=ts_future, + values=lower_values.astype(float).tolist(), + ) + + ok3 = write_series( + metric_name=f"{pred_metric}_upper", + labels=labels, + ts_list=ts_future, + values=upper_values.astype(float).tolist(), + ) + + anomaly_labels = dict(labels) + anomaly_labels["type"] = "prediction_deviation" + + ok4 = write_series( + metric_name=anomaly_metric, + labels=anomaly_labels, + ts_list=[event_ts], + values=[1.0 if is_anomaly else 0.0], + ) + + ok5 = write_series( + metric_name=f"{anomaly_metric}_outside_ratio", + labels=anomaly_labels, + ts_list=[event_ts], + values=[outside_ratio], + ) + + ok6 = write_series( + metric_name=f"{anomaly_metric}_mean_abs_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_abs_err], + ) + + ok7 = write_series( + metric_name=f"{anomaly_metric}_mean_rel_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_rel_err], + ) + + return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 + + +# ============================================================================= +# 标签解析 +# ============================================================================= + +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) + + +def parse_labels_from_query(query: str) -> Dict[str, str]: + labels = {} + + if "{" not in query or "}" not in query: + return labels + + try: + label_part = query[query.index("{") + 1:query.rindex("}")] + except Exception: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = match.group(2) + + value = ( + value + .replace('\\"', '"') + .replace("\\n", "\n") + .replace("\\\\", "\\") + ) + + labels[key] = value + + return labels + + +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + result = {} + + for d in dicts: + if d: + result.update(d) + + return result + + +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + return metric_name + labels_to_str(labels) + + +# ============================================================================= +# 状态持久化 +# ============================================================================= + +def load_state() -> None: + global BASELINE_STATES + + if not os.path.exists(STATE_FILE): + return + + try: + with open(STATE_FILE, "r", encoding="utf-8") as f: + raw = json.load(f) + + states = {} + + for key, value in raw.get("baseline_states", {}).items(): + required_fields = { + "period", + "phase_origin_ts", + "template", + "lower_template", + "upper_template", + "strategy", + "status", + "clean_seconds", + "last_update_ts", + "last_seen_ts", + "y_min", + "y_max", + } + + if not required_fields.issubset(set(value.keys())): + continue + + states[key] = BaselineState(**value) + + BASELINE_STATES = states + + logger.info( + "已加载预测状态文件 %s,状态数量=%d", + STATE_FILE, + len(BASELINE_STATES), + ) + + except Exception as e: + logger.warning("加载预测状态文件失败,将重新学习: %s", e) + + +def save_state() -> None: + try: + raw = { + "baseline_states": { + key: asdict(value) + for key, value in BASELINE_STATES.items() + } + } + + tmp_file = STATE_FILE + ".tmp" + + with open(tmp_file, "w", encoding="utf-8") as f: + json.dump(raw, f, ensure_ascii=False, indent=2) + + os.replace(tmp_file, STATE_FILE) + + except Exception as e: + logger.warning("保存预测状态文件失败: %s", e) + + +# ============================================================================= +# 时间轴 +# ============================================================================= + +def build_prediction_timestamps( + key: str, + last_real_ts: int, + now_sec: int, +) -> Optional[List[int]]: + data_lag = now_sec - last_real_ts + + if data_lag > MAX_DATA_LAG_SECONDS: + logger.warning( + "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", + key, + data_lag, + MAX_DATA_LAG_SECONDS, + ) + return None + + last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) + + if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): + logger.info( + "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", + key, + last_real_ts, + last_written_real_ts, + ) + return None + + base_ts = last_real_ts + + return [ + base_ts + i + 1 + for i in range(WRITE_HORIZON_SECONDS) + ] + + +# ============================================================================= +# 主流程 +# ============================================================================= + +def run_once() -> None: + now_str = datetime.now().strftime("%H:%M:%S") + + for target in PREDICT_TARGETS: + query = target["query"] + pred_metric = target["pred_metric"] + anomaly_metric = target["anomaly_metric"] + + ts, ys = fetch_history(query) + + if len(ys) < MIN_POINTS: + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + continue + + ts_grid, ys_grid_raw = normalize_history(ts, ys) + + if len(ys_grid_raw) < MIN_POINTS: + logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid_raw)) + continue + + ys_grid_model = preprocess_values(ys_grid_raw, target) + + base_labels = parse_labels_from_query(query) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + key = series_key(pred_metric, write_labels) + + state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state( + key=key, + ts_grid=ts_grid, + ys_model=ys_grid_model, + target=target, + ) + + if state is None: + logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) + continue + + now_sec = int(time.time()) + last_real_ts = int(ts_grid[-1]) + data_lag = now_sec - last_real_ts + + ts_future = build_prediction_timestamps( + key=key, + last_real_ts=last_real_ts, + now_sec=now_sec, + ) + + if not ts_future: + continue + + pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future) + + lower_values, upper_values = calc_final_bounds( + state=state, + pred=pred_values, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + ok = write_prediction_bundle( + pred_metric=pred_metric, + anomaly_metric=anomaly_metric, + labels=write_labels, + ts_future=ts_future, + pred_values=pred_values, + lower_values=lower_values, + upper_values=upper_values, + is_anomaly=is_anomaly, + outside_ratio=outside_ratio, + mean_abs_err=mean_abs_err, + mean_rel_err=mean_rel_err, + event_ts=last_real_ts, + ) + + if not ok: + logger.error("[%s] %s 写入预测数据失败", now_str, query) + continue + + LAST_REAL_TS_WRITTEN[key] = last_real_ts + + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") + origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") + + logger.info( + "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", + now_str, + query, + pred_metric, + state.strategy, + state.status, + is_anomaly, + state.period, + origin_str, + last_real_str, + data_lag, + len(ts_future), + future_start, + future_end, + ) + + save_state() + + +def main() -> None: + load_state() + + logger.info( + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s", + VM_URL, + HISTORY_MINUTES, + HORIZON_SECONDS, + WRITE_HORIZON_SECONDS, + POLL_INTERVAL, + STATE_FILE, + EXTRA_PREDICT_LABELS["forecast"], + ) + + while True: + run_once() + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py new file mode 100644 index 0000000..6894a66 --- /dev/null +++ b/ai/pridict_v5.py @@ -0,0 +1,1794 @@ +# -*- coding: utf-8 -*- +""" +ProtoForge Predictor v12 + +核心能力: +1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。 +2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。 +3. vibration 类指标: + - predicted 使用平滑后的中位数模板,用于趋势参考。 + - upper/lower 使用原始波动分位数模板 + padding,用于正常波动容忍带。 + - 偶发越界不直接报警,只有持续越界 / 高比例越界 / 严重越界才报警。 +4. 预测起点锚定最后一个真实点 last_real_ts,避免时间错位。 +5. 异常期间冻结健康模板,不学习故障数据。 +6. 故障恢复后等待稳定,再恢复模板学习。 +7. 写入: + - xxx_predicted + - xxx_predicted_upper + - xxx_predicted_lower + - xxx_anomaly + - xxx_anomaly_outside_ratio + - xxx_anomaly_mean_abs_error + - xxx_anomaly_mean_rel_error + - xxx_anomaly_max_consecutive_outside + - xxx_anomaly_max_exceed_ratio +""" + +import json +import logging +import math +import os +import re +import time +from dataclasses import asdict, dataclass +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple + +import numpy as np +import requests + + +# ============================================================================= +# 日志配置 +# ============================================================================= + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# 基础配置 +# ============================================================================= + +VM_URL = "http://localhost:8428" +STATE_FILE = "/tmp/protoforge_predictor_state_v12.json" + +HISTORY_MINUTES = 30 +HORIZON_SECONDS = 120 +POLL_INTERVAL = 30 + +WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL) + +QUERY_STEP = "1s" +MIN_POINTS = 120 + +MIN_PERIOD_SECONDS = 5 +MAX_PERIOD_SECONDS = 3600 + +MIN_FULL_CYCLES_FOR_TEMPLATE = 3 +MAX_CYCLES_FOR_TEMPLATE = 8 + +DETECT_WINDOW_SECONDS = 30 +RECOVERY_MIN_SECONDS = 60 + +HEALTHY_EMA_ALPHA = 0.10 +RECOVERY_EMA_ALPHA = 0.25 + +OUTSIDE_RATIO_THRESHOLD = 0.60 +MIN_CONSECUTIVE_OUTSIDE = 5 +SEVERE_EXCEED_RATIO = 1.8 + +VALLEY_QUANTILE = 45 + +MAX_DATA_LAG_SECONDS = 180 + +PHASE_LOCK_MIN_WINDOW_SECONDS = 45 +PHASE_LOCK_MAX_WINDOW_SECONDS = 180 +PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12 +PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35 +PHASE_LOCK_PERIOD_STEP = 1 +PHASE_LOCK_ORIGIN_STEP = 1 + + +# ============================================================================= +# 指标配置 +# ============================================================================= + +PREDICT_TARGETS = [ + { + "query": 'feed_rate{device_id="fanuc-cnc"}', + "pred_metric": "feed_rate_predicted", + "anomaly_metric": "feed_rate_anomaly", + "strategy": "phase_point", + "abs_threshold": 400.0, + "rel_threshold": 0.25, + "smooth_window": 1, + "outside_ratio_threshold": 0.60, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 1.8, + }, + { + "query": 'spindle_speed{device_id="fanuc-cnc"}', + "pred_metric": "spindle_speed_predicted", + "anomaly_metric": "spindle_speed_anomaly", + "strategy": "phase_point", + "abs_threshold": 500.0, + "rel_threshold": 0.25, + "smooth_window": 1, + "outside_ratio_threshold": 0.60, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 1.8, + }, + { + "query": 'spindle_current{device_id="fanuc-cnc"}', + "pred_metric": "spindle_current_predicted", + "anomaly_metric": "spindle_current_anomaly", + "strategy": "phase_point", + "abs_threshold": 5.0, + "rel_threshold": 0.25, + "smooth_window": 1, + "outside_ratio_threshold": 0.60, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 1.8, + }, + { + "query": 'vibration_x{device_id="fanuc-cnc"}', + "pred_metric": "vibration_x_predicted", + "anomaly_metric": "vibration_x_anomaly", + "strategy": "phase_band", + + # vibration 类指标噪声、尖峰较多,不建议用很窄的阈值。 + "abs_threshold": 0.18, + "rel_threshold": 0.55, + + # 平滑只用于相位锁定和 predicted 中位趋势。 + "smooth_window": 5, + + # upper/lower 用原始值分位数,范围放宽,覆盖正常尖峰。 + "band_low_q": 1, + "band_high_q": 99, + "band_pad_abs": 0.15, + + # 偶发越界容忍。 + "outside_ratio_threshold": 0.70, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 2.0, + }, + { + "query": 'vibration_y{device_id="fanuc-cnc"}', + "pred_metric": "vibration_y_predicted", + "anomaly_metric": "vibration_y_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.18, + "rel_threshold": 0.55, + "smooth_window": 5, + "band_low_q": 1, + "band_high_q": 99, + "band_pad_abs": 0.15, + "outside_ratio_threshold": 0.70, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 2.0, + }, + { + "query": 'vibration_z{device_id="fanuc-cnc"}', + "pred_metric": "vibration_z_predicted", + "anomaly_metric": "vibration_z_anomaly", + "strategy": "phase_band", + "abs_threshold": 0.18, + "rel_threshold": 0.55, + "smooth_window": 5, + "band_low_q": 1, + "band_high_q": 99, + "band_pad_abs": 0.15, + "outside_ratio_threshold": 0.70, + "min_consecutive_outside": 5, + "severe_exceed_ratio": 2.0, + }, +] + +EXTRA_PREDICT_LABELS = { + "forecast": "phase_band_health_v12", + "source": "protoforge", +} + +BASELINE_STATUS_HEALTHY = "healthy" +BASELINE_STATUS_ANOMALY = "anomaly" +BASELINE_STATUS_RECOVERING = "recovering" + + +# ============================================================================= +# 状态结构 +# ============================================================================= + +@dataclass +class BaselineState: + period: int + phase_origin_ts: int + template: List[float] + lower_template: List[float] + upper_template: List[float] + strategy: str + status: str + clean_seconds: int + last_update_ts: int + last_seen_ts: int + y_min: float + y_max: float + + +BASELINE_STATES: Dict[str, BaselineState] = {} +LAST_REAL_TS_WRITTEN: Dict[str, int] = {} + + +# ============================================================================= +# VictoriaMetrics 读取 +# ============================================================================= + +def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]: + now = datetime.now() + start = now - timedelta(minutes=minutes) + + try: + resp = requests.get( + f"{VM_URL}/api/v1/query_range", + params={ + "query": query, + "start": start.timestamp(), + "end": now.timestamp(), + "step": QUERY_STEP, + }, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException as e: + logger.error("拉取数据失败 query=%s: %s", query, e) + return [], [] + + try: + result = resp.json().get("data", {}).get("result", []) + except Exception as e: + logger.error("解析 VM 返回失败 query=%s: %s", query, e) + return [], [] + + if not result: + return [], [] + + values = result[0].get("values", []) + + ts = [] + ys = [] + + for item in values: + if len(item) < 2: + continue + + try: + t = float(item[0]) + y = float(item[1]) + except Exception: + continue + + if not math.isfinite(t) or not math.isfinite(y): + continue + + ts.append(t) + ys.append(y) + + return ts, ys + + +def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]: + if not ts or not ys or len(ts) != len(ys): + return np.array([]), np.array([]) + + data = {} + + for t, y in zip(ts, ys): + try: + sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(sec) or not math.isfinite(val): + continue + + data[sec] = val + + if not data: + return np.array([]), np.array([]) + + sorted_items = sorted(data.items(), key=lambda x: x[0]) + + ts_clean = np.array([x[0] for x in sorted_items], dtype=float) + ys_clean = np.array([x[1] for x in sorted_items], dtype=float) + + if len(ts_clean) < 2: + return ts_clean, ys_clean + + start_sec = int(ts_clean[0]) + end_sec = int(ts_clean[-1]) + + if end_sec <= start_sec: + return ts_clean, ys_clean + + ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float) + ys_grid = np.interp(ts_grid, ts_clean, ys_clean) + + return ts_grid, ys_grid + + +# ============================================================================= +# 平滑与预处理 +# ============================================================================= + +def rolling_median(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + result = [] + + for i in range(len(arr)): + result.append(float(np.median(padded[i:i + window]))) + + return np.array(result, dtype=float) + + +def moving_average(arr: np.ndarray, window: int) -> np.ndarray: + if window <= 1 or len(arr) < window: + return arr.astype(float) + + if window % 2 == 0: + window += 1 + + kernel = np.ones(window, dtype=float) / window + pad = window // 2 + padded = np.pad(arr.astype(float), (pad, pad), mode="edge") + + return np.convolve(padded, kernel, mode="valid") + + +def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray: + strategy = target.get("strategy", "phase_point") + smooth_window = int(target.get("smooth_window", 1)) + + if strategy == "phase_band": + return rolling_median(ys_grid, smooth_window) + + if smooth_window > 1: + return moving_average(ys_grid, smooth_window) + + return ys_grid.astype(float) + + +# ============================================================================= +# 周期估计 +# ============================================================================= + +def estimate_period_by_fft(ys_arr: np.ndarray) -> float: + n = len(ys_arr) + + if n < 8: + return 60.0 + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return 60.0 + + fft_vals = np.fft.rfft(centered) + freqs = np.fft.rfftfreq(n, d=1.0) + + if len(freqs) <= 1: + return 60.0 + + power = np.abs(fft_vals[1:]) + + if len(power) == 0 or np.max(power) <= 0: + return 60.0 + + dominant_idx = int(np.argmax(power)) + 1 + dominant_freq = float(freqs[dominant_idx]) + + if dominant_freq <= 0: + return 60.0 + + period = 1.0 / dominant_freq + + return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float: + n = len(ys_arr) + + if n < 20: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + centered = ys_arr - np.mean(ys_arr) + + if np.allclose(centered, 0): + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + corr = np.correlate(centered, centered, mode="full")[n - 1:] + + p0 = int(round(init_period)) + left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7))) + right = min(n // 2, int(max(left + 1, p0 * 1.3))) + + if right <= left: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + search = corr[left:right + 1] + + if len(search) == 0: + return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + best_lag = left + int(np.argmax(search)) + + return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS)) + + +def estimate_period_rough(ys_arr: np.ndarray) -> int: + p_fft = estimate_period_by_fft(ys_arr) + p_refined = refine_period_by_autocorr(ys_arr, p_fft) + + period = int(round(p_refined)) + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period) + + +# ============================================================================= +# 谷底检测 +# ============================================================================= + +def find_valley_indices( + ts_grid: np.ndarray, + ys_grid: np.ndarray, + expected_period: int, +) -> List[int]: + n = len(ys_grid) + + if n < max(10, expected_period * 2): + return [] + + period = max(3, int(expected_period)) + smooth_window = max(3, int(round(period * 0.08))) + smooth_window = min(smooth_window, 21) + + ys_smooth = moving_average(ys_grid, smooth_window) + threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE)) + + candidates = [] + + for i in range(1, n - 1): + if ( + ys_smooth[i] <= ys_smooth[i - 1] + and ys_smooth[i] < ys_smooth[i + 1] + and ys_smooth[i] <= threshold + ): + candidates.append(i) + + if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE: + candidates = [] + + for i in range(1, n - 1): + if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]: + candidates.append(i) + + if not candidates: + return [] + + min_distance = max(2, int(round(period * 0.55))) + selected = [] + + for idx in candidates: + if not selected: + selected.append(idx) + continue + + if idx - selected[-1] >= min_distance: + selected.append(idx) + continue + + if ys_smooth[idx] < ys_smooth[selected[-1]]: + selected[-1] = idx + + if len(selected) < 2: + return selected + + cleaned = [selected[0]] + + for idx in selected[1:]: + diff = int(ts_grid[idx] - ts_grid[cleaned[-1]]) + + if int(period * 0.55) <= diff <= int(period * 1.60): + cleaned.append(idx) + continue + + if diff < int(period * 0.55): + if ys_smooth[idx] < ys_smooth[cleaned[-1]]: + cleaned[-1] = idx + continue + + cleaned.append(idx) + + return cleaned + + +def detect_period_and_valleys( + ts_grid: np.ndarray, + ys_grid: np.ndarray, +) -> Tuple[int, List[int]]: + rough = estimate_period_rough(ys_grid) + valleys = find_valley_indices(ts_grid, ys_grid, rough) + + if len(valleys) >= 3: + diffs = np.diff(ts_grid[valleys]) + good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)] + + if len(good) > 0: + period = int(round(float(np.median(good)))) + else: + period = rough + else: + period = rough + + period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period)) + + return int(period), valleys + + +# ============================================================================= +# 模板构建 +# ============================================================================= + +def build_templates_from_valleys( + ts_grid: np.ndarray, + ys_mid_grid: np.ndarray, + ys_band_grid: np.ndarray, + period: int, + valleys: List[int], + target: Dict, +) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]: + if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1: + return None + + strategy = target.get("strategy", "phase_point") + low_q = float(target.get("band_low_q", 10)) + high_q = float(target.get("band_high_q", 90)) + + pairs = [] + + for a, b in zip(valleys[:-1], valleys[1:]): + cycle_len = float(ts_grid[b] - ts_grid[a]) + + if period * 0.55 <= cycle_len <= period * 1.60: + pairs.append((a, b, cycle_len)) + + if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:] + + phase_grid = np.arange(period, dtype=float) + mid_segments = [] + band_segments = [] + weights = [] + + for idx, (a, b, cycle_len) in enumerate(pairs): + seg_ts = ts_grid[a:b + 1] + seg_mid_y = ys_mid_grid[a:b + 1] + seg_band_y = ys_band_grid[a:b + 1] + + if len(seg_mid_y) < 3 or len(seg_band_y) < 3: + continue + + x_old = (seg_ts - seg_ts[0]) / cycle_len * period + + mid_seg = np.interp(phase_grid, x_old, seg_mid_y) + band_seg = np.interp(phase_grid, x_old, seg_band_y) + + mid_segments.append(mid_seg.astype(float)) + band_segments.append(band_seg.astype(float)) + weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs))) + + if len(mid_segments) < MIN_FULL_CYCLES_FOR_TEMPLATE: + return None + + mid_arr = np.vstack(mid_segments) + band_arr = np.vstack(band_segments) + w_arr = np.array(weights, dtype=float) + + if strategy == "phase_band": + mid_template = np.percentile(mid_arr, 50, axis=0) + + # upper/lower 使用原始值分布,而不是平滑值分布。 + lower_template = np.percentile(band_arr, low_q, axis=0) + upper_template = np.percentile(band_arr, high_q, axis=0) + else: + mid_template = np.average(mid_arr, axis=0, weights=w_arr) + lower_template = mid_template.copy() + upper_template = mid_template.copy() + + return ( + mid_template.astype(float), + lower_template.astype(float), + upper_template.astype(float), + ) + + +def build_current_baseline( + ts_grid: np.ndarray, + ys_mid_grid: np.ndarray, + ys_band_grid: np.ndarray, + target: Dict, + tail_seconds: Optional[int] = None, +) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]: + if len(ys_mid_grid) < MIN_POINTS or len(ys_band_grid) < MIN_POINTS: + return None + + if tail_seconds is not None and tail_seconds > 0: + cutoff = ts_grid[-1] - int(tail_seconds) + mask = ts_grid >= cutoff + ts_use = ts_grid[mask] + ys_mid_use = ys_mid_grid[mask] + ys_band_use = ys_band_grid[mask] + else: + ts_use = ts_grid + ys_mid_use = ys_mid_grid + ys_band_use = ys_band_grid + + if len(ys_mid_use) < MIN_POINTS or len(ys_band_use) < MIN_POINTS: + return None + + period, valleys = detect_period_and_valleys(ts_use, ys_mid_use) + + templates = build_templates_from_valleys( + ts_grid=ts_use, + ys_mid_grid=ys_mid_use, + ys_band_grid=ys_band_use, + period=period, + valleys=valleys, + target=target, + ) + + if templates is None or len(valleys) == 0: + return None + + template, lower_template, upper_template = templates + phase_origin_ts = int(round(float(ts_use[valleys[-1]]))) + + return int(period), phase_origin_ts, template, lower_template, upper_template + + +# ============================================================================= +# 模板预测 +# ============================================================================= + +def circular_template_value(template: np.ndarray, phase: float) -> float: + period = len(template) + + if period == 0: + return 0.0 + + phase = float(phase) % period + i0 = int(math.floor(phase)) % period + i1 = (i0 + 1) % period + frac = phase - math.floor(phase) + + return float((1.0 - frac) * template[i0] + frac * template[i1]) + + +def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray: + old_period = len(old_template) + + if old_period == new_period: + return old_template.astype(float) + + if old_period <= 1 or new_period <= 1: + return np.full(new_period, float(np.mean(old_template)), dtype=float) + + old_x = np.linspace(0.0, 1.0, old_period, endpoint=False) + new_x = np.linspace(0.0, 1.0, new_period, endpoint=False) + + old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0]) + old_y_ext = np.concatenate([old_template, old_template, old_template]) + + return np.interp(new_x, old_x_ext, old_y_ext).astype(float) + + +def predict_template_values( + template: np.ndarray, + period: int, + phase_origin_ts: int, + ts_list: List[int], +) -> np.ndarray: + if period <= 1: + return np.zeros(len(ts_list), dtype=float) + + if len(template) != period: + template = resample_template(template, period) + + values = [] + + for ts in ts_list: + phase = (int(ts) - int(phase_origin_ts)) % period + values.append(circular_template_value(template, phase)) + + return np.array(values, dtype=float) + + +def predict_state_bundle( + state: BaselineState, + ts_list: List[int], +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + period = int(state.period) + origin = int(state.phase_origin_ts) + + mid = predict_template_values( + template=np.array(state.template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + lower = predict_template_values( + template=np.array(state.lower_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + upper = predict_template_values( + template=np.array(state.upper_template, dtype=float), + period=period, + phase_origin_ts=origin, + ts_list=ts_list, + ) + + return mid, lower, upper + + +def normalize_origin_near(origin: int, period: int, near_ts: int) -> int: + if period <= 1: + return origin + + origin = int(origin) + period = int(period) + near_ts = int(near_ts) + + while origin + period <= near_ts: + origin += period + + while origin > near_ts: + origin -= period + + return origin + + +def merge_template( + old_template: np.ndarray, + new_template: np.ndarray, + alpha: float, +) -> np.ndarray: + alpha = float(np.clip(alpha, 0.0, 1.0)) + + if len(old_template) != len(new_template): + old_template = resample_template(old_template, len(new_template)) + + merged = (1.0 - alpha) * old_template + alpha * new_template + + return merged.astype(float) + + +# ============================================================================= +# Phase Lock +# ============================================================================= + +def phase_lock_recent( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, +) -> Tuple[int, int, np.ndarray, float]: + base_period = int(state.period) + base_origin = int(state.phase_origin_ts) + base_template = np.array(state.template, dtype=float) + + if base_period <= 1 or len(base_template) <= 1: + ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist() + pred = predict_template_values(base_template, base_period, base_origin, ts_recent) + actual = ys_model[-len(ts_recent):].astype(float) + mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0 + return base_period, base_origin, pred, mae + + window_seconds = max( + PHASE_LOCK_MIN_WINDOW_SECONDS, + min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)), + ) + + cutoff = ts_grid[-1] - window_seconds + mask = ts_grid >= cutoff + + ts_recent_arr = ts_grid[mask].astype(int) + actual = ys_model[mask].astype(float) + + if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS): + ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int) + actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float) + + ts_recent = ts_recent_arr.tolist() + last_ts = int(ts_recent[-1]) + + p_min = max( + int(MIN_PERIOD_SECONDS), + int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))), + ) + p_max = min( + int(MAX_PERIOD_SECONDS), + int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))), + ) + + best_period = base_period + best_origin = normalize_origin_near(base_origin, base_period, last_ts) + best_template = resample_template(base_template, best_period) + + best_pred = predict_template_values( + template=best_template, + period=best_period, + phase_origin_ts=best_origin, + ts_list=ts_recent, + ) + + best_mae = float(np.mean(np.abs(actual - best_pred))) + + for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP): + template = resample_template(base_template, period) + center_origin = normalize_origin_near(base_origin, period, last_ts) + origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO))) + + for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP): + origin = center_origin + shift + + pred = predict_template_values( + template=template, + period=period, + phase_origin_ts=origin, + ts_list=ts_recent, + ) + + mae = float(np.mean(np.abs(actual - pred))) + penalty = abs(period - base_period) * 0.5 + score = mae + penalty + + best_score = best_mae + abs(best_period - base_period) * 0.5 + + if score < best_score: + best_period = period + best_origin = origin + best_pred = pred + best_mae = mae + + best_origin = normalize_origin_near(best_origin, best_period, last_ts) + + return int(best_period), int(best_origin), best_pred, float(best_mae) + + +# ============================================================================= +# 异常检测 +# ============================================================================= + +def max_consecutive_true(flags: np.ndarray) -> int: + max_count = 0 + current = 0 + + for flag in flags: + if bool(flag): + current += 1 + max_count = max(max_count, current) + else: + current = 0 + + return int(max_count) + + +def calc_point_bounds( + pred: np.ndarray, + abs_threshold: float, + rel_threshold: float, +) -> Tuple[np.ndarray, np.ndarray]: + threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold) + return pred - threshold, pred + threshold + + +def calc_final_bounds( + state: BaselineState, + pred: np.ndarray, + lower_raw: np.ndarray, + upper_raw: np.ndarray, + target: Dict, +) -> Tuple[np.ndarray, np.ndarray]: + strategy = target.get("strategy", "phase_point") + abs_threshold = float(target.get("abs_threshold", 1.0)) + rel_threshold = float(target.get("rel_threshold", 0.25)) + + if strategy == "phase_band": + pad_abs = float(target.get("band_pad_abs", abs_threshold)) + + # 对 vibration 类指标:边界更像正常波动容忍带,不是硬边界。 + dynamic_pad = np.maximum( + pad_abs, + np.abs(pred) * rel_threshold * 0.25, + ) + + lower = lower_raw - dynamic_pad + upper = upper_raw + dynamic_pad + + return lower, upper + + return calc_point_bounds(pred, abs_threshold, rel_threshold) + + +def detect_anomaly( + state: BaselineState, + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, +) -> Tuple[bool, float, float, float, int, int, int, float]: + best_period, best_origin, pred_recent, _ = phase_lock_recent( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + ) + + recent_len = len(pred_recent) + + if recent_len <= 0: + return False, 0.0, 0.0, 0.0, best_period, best_origin, 0, 0.0 + + if target.get("strategy", "phase_point") == "phase_band": + actual = ys_actual[-recent_len:].astype(float) + else: + actual = ys_model[-recent_len:].astype(float) + + tmp_state = BaselineState( + period=best_period, + phase_origin_ts=best_origin, + template=state.template, + lower_template=state.lower_template, + upper_template=state.upper_template, + strategy=state.strategy, + status=state.status, + clean_seconds=state.clean_seconds, + last_update_ts=state.last_update_ts, + last_seen_ts=state.last_seen_ts, + y_min=state.y_min, + y_max=state.y_max, + ) + + recent_ts = ts_grid[-recent_len:].astype(int).tolist() + pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts) + + lower, upper = calc_final_bounds( + state=tmp_state, + pred=pred, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + above_upper = actual - upper + below_lower = lower - actual + + exceed = np.maximum(above_upper, below_lower) + exceed = np.maximum(exceed, 0.0) + + outside = exceed > 0 + + band_width = np.maximum(upper - lower, 1e-6) + exceed_ratio = exceed / band_width + + abs_err = np.abs(actual - pred) + + outside_ratio = float(np.mean(outside)) + mean_abs_err = float(np.mean(abs_err)) + mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6))) + + max_outside_seconds = max_consecutive_true(outside) + max_exceed_ratio = float(np.max(exceed_ratio)) if len(exceed_ratio) > 0 else 0.0 + + outside_ratio_threshold = float( + target.get("outside_ratio_threshold", OUTSIDE_RATIO_THRESHOLD) + ) + min_consecutive_outside = int( + target.get("min_consecutive_outside", MIN_CONSECUTIVE_OUTSIDE) + ) + severe_exceed_ratio = float( + target.get("severe_exceed_ratio", SEVERE_EXCEED_RATIO) + ) + + # 核心优化: + # 1. 偶发 1~3 个点越界不报警。 + # 2. 持续越界才报警。 + # 3. 高比例越界才报警。 + # 4. 严重越界才立即报警。 + is_anomaly = ( + outside_ratio >= outside_ratio_threshold + or max_outside_seconds >= min_consecutive_outside + or max_exceed_ratio >= severe_exceed_ratio + ) + + return ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + int(best_period), + int(best_origin), + int(max_outside_seconds), + float(max_exceed_ratio), + ) + + +# ============================================================================= +# 状态管理 +# ============================================================================= + +def create_initial_state( + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, + now_sec: int, +) -> Optional[BaselineState]: + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_mid_grid=ys_model, + ys_band_grid=ys_actual, + target=target, + ) + + if baseline is None: + return None + + period, phase_origin_ts, template, lower_template, upper_template = baseline + + return BaselineState( + period=int(period), + phase_origin_ts=int(phase_origin_ts), + template=template.astype(float).tolist(), + lower_template=lower_template.astype(float).tolist(), + upper_template=upper_template.astype(float).tolist(), + strategy=str(target.get("strategy", "phase_point")), + status=BASELINE_STATUS_HEALTHY, + clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE), + last_update_ts=now_sec, + last_seen_ts=now_sec, + y_min=float(np.min(ys_actual)), + y_max=float(np.max(ys_actual)), + ) + + +def apply_phase_lock_to_state( + state: BaselineState, + best_period: int, + best_origin: int, +) -> None: + best_period = int(best_period) + + if best_period <= 1: + return + + if len(state.template) != best_period: + state.template = resample_template( + np.array(state.template, dtype=float), + best_period, + ).astype(float).tolist() + + if len(state.lower_template) != best_period: + state.lower_template = resample_template( + np.array(state.lower_template, dtype=float), + best_period, + ).astype(float).tolist() + + if len(state.upper_template) != best_period: + state.upper_template = resample_template( + np.array(state.upper_template, dtype=float), + best_period, + ).astype(float).tolist() + + state.period = best_period + state.phase_origin_ts = int(best_origin) + + +def maybe_update_state( + key: str, + ts_grid: np.ndarray, + ys_model: np.ndarray, + ys_actual: np.ndarray, + target: Dict, +) -> Tuple[Optional[BaselineState], bool, float, float, float, int, float]: + now_sec = int(time.time()) + state = BASELINE_STATES.get(key) + + if state is None: + state = create_initial_state( + ts_grid=ts_grid, + ys_model=ys_model, + ys_actual=ys_actual, + target=target, + now_sec=now_sec, + ) + + if state is None: + return None, False, 0.0, 0.0, 0.0, 0, 0.0 + + BASELINE_STATES[key] = state + + logger.info( + "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + ) + + return state, False, 0.0, 0.0, 0.0, 0, 0.0 + + elapsed = max(1, now_sec - int(state.last_seen_ts)) + elapsed = min(elapsed, POLL_INTERVAL * 2) + state.last_seen_ts = now_sec + + ( + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + best_period, + best_origin, + max_outside_seconds, + max_exceed_ratio, + ) = detect_anomaly( + state=state, + ts_grid=ts_grid, + ys_model=ys_model, + ys_actual=ys_actual, + target=target, + ) + + if is_anomaly: + state.status = BASELINE_STATUS_ANOMALY + state.clean_seconds = 0 + BASELINE_STATES[key] = state + + logger.warning( + "检测到异常,冻结模板 key=%s outside_ratio=%.2f max_outside=%ss max_exceed_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f", + key, + outside_ratio, + max_outside_seconds, + max_exceed_ratio, + mean_abs_err, + mean_rel_err, + ) + + return ( + state, + True, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + old_period = int(state.period) + old_origin = int(state.phase_origin_ts) + + apply_phase_lock_to_state(state, best_period, best_origin) + + if old_period != state.period or old_origin != state.phase_origin_ts: + logger.info( + "phase-lock key=%s period %s -> %s origin %s -> %s", + key, + old_period, + state.period, + datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"), + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + ) + + if state.status == BASELINE_STATUS_ANOMALY: + state.status = BASELINE_STATUS_RECOVERING + state.clean_seconds = elapsed + BASELINE_STATES[key] = state + + logger.info( + "异常开始恢复 key=%s clean_seconds=%ss", + key, + state.clean_seconds, + ) + + return ( + state, + False, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + if state.status == BASELINE_STATUS_RECOVERING: + state.clean_seconds += elapsed + else: + state.status = BASELINE_STATUS_HEALTHY + state.clean_seconds += elapsed + + min_clean_for_update = max( + RECOVERY_MIN_SECONDS, + int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE, + ) + + if state.clean_seconds < min_clean_for_update: + BASELINE_STATES[key] = state + return ( + state, + False, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + tail_seconds = min( + int(state.clean_seconds), + int(state.period) * MAX_CYCLES_FOR_TEMPLATE, + ) + + baseline = build_current_baseline( + ts_grid=ts_grid, + ys_mid_grid=ys_model, + ys_band_grid=ys_actual, + target=target, + tail_seconds=tail_seconds, + ) + + if baseline is None: + BASELINE_STATES[key] = state + return ( + state, + False, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline + + alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA + + state.template = merge_template( + np.array(state.template, dtype=float), + new_template, + alpha, + ).astype(float).tolist() + + state.lower_template = merge_template( + np.array(state.lower_template, dtype=float), + new_lower_template, + alpha, + ).astype(float).tolist() + + state.upper_template = merge_template( + np.array(state.upper_template, dtype=float), + new_upper_template, + alpha, + ).astype(float).tolist() + + state.period = int(new_period) + state.phase_origin_ts = int(new_origin) + state.status = BASELINE_STATUS_HEALTHY + state.last_update_ts = now_sec + + if tail_seconds > 0 and len(ys_actual) >= tail_seconds: + state.y_min = float(np.min(ys_actual[-tail_seconds:])) + state.y_max = float(np.max(ys_actual[-tail_seconds:])) + else: + state.y_min = float(np.min(ys_actual)) + state.y_max = float(np.max(ys_actual)) + + BASELINE_STATES[key] = state + + logger.info( + "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f", + key, + state.strategy, + state.period, + datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"), + state.clean_seconds, + alpha, + ) + + return ( + state, + False, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) + + +# ============================================================================= +# Prometheus 写入 +# ============================================================================= + +def prom_escape_label_value(value: str) -> str: + return ( + str(value) + .replace("\\", "\\\\") + .replace("\n", "\\n") + .replace('"', '\\"') + ) + + +def labels_to_str(labels: Dict[str, str]) -> str: + if not labels: + return "" + + parts = [] + + for k in sorted(labels.keys()): + parts.append(f'{k}="{prom_escape_label_value(labels[k])}"') + + return "{" + ",".join(parts) + "}" + + +def write_series( + metric_name: str, + labels: Dict[str, str], + ts_list: List[int], + values: List[float], +) -> bool: + if not ts_list or not values or len(ts_list) != len(values): + return False + + label_str = labels_to_str(labels) + lines = [] + + for t, y in zip(ts_list, values): + try: + ts_sec = int(round(float(t))) + val = float(y) + except Exception: + continue + + if not math.isfinite(ts_sec) or not math.isfinite(val): + continue + + lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}") + + if not lines: + return False + + payload = "\n".join(lines) + "\n" + + try: + resp = requests.post( + f"{VM_URL}/api/v1/import/prometheus", + data=payload.encode("utf-8"), + headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"}, + timeout=10, + ) + resp.raise_for_status() + return True + + except requests.RequestException as e: + logger.error("写入数据失败 metric=%s: %s", metric_name, e) + return False + + +def write_prediction_bundle( + pred_metric: str, + anomaly_metric: str, + labels: Dict[str, str], + ts_future: List[int], + pred_values: np.ndarray, + lower_values: np.ndarray, + upper_values: np.ndarray, + is_anomaly: bool, + outside_ratio: float, + mean_abs_err: float, + mean_rel_err: float, + max_outside_seconds: int, + max_exceed_ratio: float, + event_ts: int, +) -> bool: + ok1 = write_series( + metric_name=pred_metric, + labels=labels, + ts_list=ts_future, + values=pred_values.astype(float).tolist(), + ) + + ok2 = write_series( + metric_name=f"{pred_metric}_lower", + labels=labels, + ts_list=ts_future, + values=lower_values.astype(float).tolist(), + ) + + ok3 = write_series( + metric_name=f"{pred_metric}_upper", + labels=labels, + ts_list=ts_future, + values=upper_values.astype(float).tolist(), + ) + + anomaly_labels = dict(labels) + anomaly_labels["type"] = "prediction_deviation" + + ok4 = write_series( + metric_name=anomaly_metric, + labels=anomaly_labels, + ts_list=[event_ts], + values=[1.0 if is_anomaly else 0.0], + ) + + ok5 = write_series( + metric_name=f"{anomaly_metric}_outside_ratio", + labels=anomaly_labels, + ts_list=[event_ts], + values=[outside_ratio], + ) + + ok6 = write_series( + metric_name=f"{anomaly_metric}_mean_abs_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_abs_err], + ) + + ok7 = write_series( + metric_name=f"{anomaly_metric}_mean_rel_error", + labels=anomaly_labels, + ts_list=[event_ts], + values=[mean_rel_err], + ) + + ok8 = write_series( + metric_name=f"{anomaly_metric}_max_consecutive_outside", + labels=anomaly_labels, + ts_list=[event_ts], + values=[float(max_outside_seconds)], + ) + + ok9 = write_series( + metric_name=f"{anomaly_metric}_max_exceed_ratio", + labels=anomaly_labels, + ts_list=[event_ts], + values=[float(max_exceed_ratio)], + ) + + return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 and ok8 and ok9 + + +# ============================================================================= +# 标签解析 +# ============================================================================= + +_LABEL_PATTERN = re.compile( + r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*' +) + + +def parse_labels_from_query(query: str) -> Dict[str, str]: + labels = {} + + if "{" not in query or "}" not in query: + return labels + + try: + label_part = query[query.index("{") + 1:query.rindex("}")] + except Exception: + return labels + + for match in _LABEL_PATTERN.finditer(label_part): + key = match.group(1) + value = match.group(2) + + value = ( + value + .replace('\\"', '"') + .replace("\\n", "\n") + .replace("\\\\", "\\") + ) + + labels[key] = value + + return labels + + +def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]: + result = {} + + for d in dicts: + if d: + result.update(d) + + return result + + +def series_key(metric_name: str, labels: Dict[str, str]) -> str: + return metric_name + labels_to_str(labels) + + +# ============================================================================= +# 状态持久化 +# ============================================================================= + +def load_state() -> None: + global BASELINE_STATES + + if not os.path.exists(STATE_FILE): + return + + try: + with open(STATE_FILE, "r", encoding="utf-8") as f: + raw = json.load(f) + + states = {} + + for key, value in raw.get("baseline_states", {}).items(): + required_fields = { + "period", + "phase_origin_ts", + "template", + "lower_template", + "upper_template", + "strategy", + "status", + "clean_seconds", + "last_update_ts", + "last_seen_ts", + "y_min", + "y_max", + } + + if not required_fields.issubset(set(value.keys())): + continue + + states[key] = BaselineState(**value) + + BASELINE_STATES = states + + logger.info( + "已加载预测状态文件 %s,状态数量=%d", + STATE_FILE, + len(BASELINE_STATES), + ) + + except Exception as e: + logger.warning("加载预测状态文件失败,将重新学习: %s", e) + + +def save_state() -> None: + try: + raw = { + "baseline_states": { + key: asdict(value) + for key, value in BASELINE_STATES.items() + } + } + + tmp_file = STATE_FILE + ".tmp" + + with open(tmp_file, "w", encoding="utf-8") as f: + json.dump(raw, f, ensure_ascii=False, indent=2) + + os.replace(tmp_file, STATE_FILE) + + except Exception as e: + logger.warning("保存预测状态文件失败: %s", e) + + +# ============================================================================= +# 时间轴 +# ============================================================================= + +def build_prediction_timestamps( + key: str, + last_real_ts: int, + now_sec: int, +) -> Optional[List[int]]: + data_lag = now_sec - last_real_ts + + if data_lag > MAX_DATA_LAG_SECONDS: + logger.warning( + "真实数据延迟过大,跳过预测 key=%s data_lag=%ss max=%ss", + key, + data_lag, + MAX_DATA_LAG_SECONDS, + ) + return None + + last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key) + + if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts): + logger.info( + "真实数据时间戳未推进,跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s", + key, + last_real_ts, + last_written_real_ts, + ) + return None + + base_ts = last_real_ts + + return [ + base_ts + i + 1 + for i in range(WRITE_HORIZON_SECONDS) + ] + + +# ============================================================================= +# 主流程 +# ============================================================================= + +def run_once() -> None: + now_str = datetime.now().strftime("%H:%M:%S") + + for target in PREDICT_TARGETS: + query = target["query"] + pred_metric = target["pred_metric"] + anomaly_metric = target["anomaly_metric"] + + ts, ys = fetch_history(query) + + if len(ys) < MIN_POINTS: + logger.info("[%s] %s 数据不足(%d 点),跳过", now_str, query, len(ys)) + continue + + ts_grid, ys_grid_raw = normalize_history(ts, ys) + + if len(ys_grid_raw) < MIN_POINTS: + logger.info("[%s] %s 清洗后数据不足(%d 点),跳过", now_str, query, len(ys_grid_raw)) + continue + + ys_grid_model = preprocess_values(ys_grid_raw, target) + + base_labels = parse_labels_from_query(query) + write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS) + + key = series_key(pred_metric, write_labels) + + ( + state, + is_anomaly, + outside_ratio, + mean_abs_err, + mean_rel_err, + max_outside_seconds, + max_exceed_ratio, + ) = maybe_update_state( + key=key, + ts_grid=ts_grid, + ys_model=ys_grid_model, + ys_actual=ys_grid_raw, + target=target, + ) + + if state is None: + logger.info("[%s] %s 暂无可用健康模板,等待学习", now_str, query) + continue + + now_sec = int(time.time()) + last_real_ts = int(ts_grid[-1]) + data_lag = now_sec - last_real_ts + + ts_future = build_prediction_timestamps( + key=key, + last_real_ts=last_real_ts, + now_sec=now_sec, + ) + + if not ts_future: + continue + + pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future) + + lower_values, upper_values = calc_final_bounds( + state=state, + pred=pred_values, + lower_raw=lower_raw, + upper_raw=upper_raw, + target=target, + ) + + ok = write_prediction_bundle( + pred_metric=pred_metric, + anomaly_metric=anomaly_metric, + labels=write_labels, + ts_future=ts_future, + pred_values=pred_values, + lower_values=lower_values, + upper_values=upper_values, + is_anomaly=is_anomaly, + outside_ratio=outside_ratio, + mean_abs_err=mean_abs_err, + mean_rel_err=mean_rel_err, + max_outside_seconds=max_outside_seconds, + max_exceed_ratio=max_exceed_ratio, + event_ts=last_real_ts, + ) + + if not ok: + logger.error("[%s] %s 写入预测数据失败", now_str, query) + continue + + LAST_REAL_TS_WRITTEN[key] = last_real_ts + + future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S") + future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S") + last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S") + origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S") + + logger.info( + "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点,预测区间 %s ~ %s", + now_str, + query, + pred_metric, + state.strategy, + state.status, + is_anomaly, + outside_ratio, + max_outside_seconds, + max_exceed_ratio, + state.period, + origin_str, + last_real_str, + data_lag, + len(ts_future), + future_start, + future_end, + ) + + save_state() + + +def main() -> None: + load_state() + + logger.info( + "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s", + VM_URL, + HISTORY_MINUTES, + HORIZON_SECONDS, + WRITE_HORIZON_SECONDS, + POLL_INTERVAL, + STATE_FILE, + EXTRA_PREDICT_LABELS["forecast"], + ) + + while True: + run_once() + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/docs/curl.md b/docs/curl.md new file mode 100644 index 0000000..ae3cbb1 --- /dev/null +++ b/docs/curl.md @@ -0,0 +1,6 @@ +# 更新设备测试点请求 + +```bash +# /api/v1/devices/{device_id}/sync-from-template +curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc数控系统/sync-from-template +``` diff --git a/protoforge/api/v1/router.py b/protoforge/api/v1/router.py index e06b966..7a6c050 100644 --- a/protoforge/api/v1/router.py +++ b/protoforge/api/v1/router.py @@ -8,6 +8,7 @@ from fastapi.responses import PlainTextResponse from protoforge.models.device import DeviceConfig, DeviceInfo, PointValue +from protoforge.models.fault import FaultInjectRequest from protoforge.models.scenario import ScenarioConfig, ScenarioInfo from protoforge.models.template import TemplateDetail, TemplateInfo @@ -202,6 +203,38 @@ async def batch_stop_devices(device_ids: list[str]): return {"status": "ok", "stopped": stopped, "errors": errors} +@router.post("/devices/{device_id}/sync-from-template") +async def sync_device_from_template(device_id: str): + engine = _get_engine() + db = _get_database() + tm = _get_template_manager() + try: + instance = engine._devices.get(device_id) + if not instance: + raise HTTPException(status_code=404, detail=f"Device not found: {device_id}") + template_id = instance.config.template_id + if not template_id: + raise HTTPException(status_code=400, detail="Device has no associated template") + template = tm.get_template(template_id) + if not template: + raise HTTPException(status_code=404, detail=f"Template not found: {template_id}") + new_config = DeviceConfig( + id=device_id, + name=instance.config.name, + protocol=instance.config.protocol, + template_id=template_id, + points=template.points, + protocol_config=instance.config.protocol_config, + ) + result = await engine.update_device(device_id, new_config) + await db.save_device(new_config) + return {"status": "ok", "point_count": len(template.points), "device": result} + except HTTPException: + raise + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + + @router.get("/devices/{device_id}", response_model=DeviceInfo) async def get_device(device_id: str): engine = _get_engine() @@ -1175,6 +1208,62 @@ async def setup_demo(): raise HTTPException(status_code=500, detail=get_friendly_error(str(e))) +@router.get("/faults/types") +async def list_fault_types(): + engine = _get_engine() + types = engine.list_fault_types() + return [t.model_dump() for t in types] + + +@router.get("/faults/types/{fault_type_id}") +async def get_fault_type(fault_type_id: str): + from protoforge.core.fault import fault_injector + ft = fault_injector.get_fault_type(fault_type_id) + if not ft: + raise HTTPException(status_code=404, detail=f"Fault type not found: {fault_type_id}") + return ft.model_dump() + + +@router.get("/faults/active") +async def list_active_faults(): + engine = _get_engine() + return [f.model_dump() for f in engine.list_active_faults()] + + +@router.post("/devices/{device_id}/fault") +async def inject_fault(device_id: str, request: FaultInjectRequest): + engine = _get_engine() + log_bus = _get_log_bus() + try: + info = engine.inject_fault(device_id, request) + log_bus.emit("", "system", device_id, "fault_injected", + f"Fault {request.fault_type_id} injected into {device_id}", + {"fault_type": request.fault_type_id, "duration": info.duration}) + return info.model_dump() + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + +@router.get("/devices/{device_id}/fault") +async def get_device_fault(device_id: str): + engine = _get_engine() + info = engine.get_fault(device_id) + if not info: + return {"status": "none"} + return info.model_dump() + + +@router.delete("/devices/{device_id}/fault") +async def clear_device_fault(device_id: str): + engine = _get_engine() + log_bus = _get_log_bus() + cleared = engine.clear_fault(device_id) + if cleared: + log_bus.emit("", "system", device_id, "fault_cleared", + f"Fault cleared on {device_id}") + return {"status": "ok", "cleared": cleared} + + @router.get("/setup/status") async def setup_status(): engine = _get_engine() diff --git a/protoforge/core/device.py b/protoforge/core/device.py index a344c7c..f04414a 100644 --- a/protoforge/core/device.py +++ b/protoforge/core/device.py @@ -1,5 +1,5 @@ import time -from typing import Any, Optional +from typing import Any, Callable, Optional from protoforge.core.generator import DataGenerator from protoforge.models.device import DeviceConfig, DeviceStatus, GeneratorType, PointConfig, PointValue @@ -13,6 +13,8 @@ def __init__(self, config: DeviceConfig, generator: DataGenerator): self._point_values: dict[str, Any] = {} self._point_configs: dict[str, PointConfig] = {} self._start_time: Optional[float] = None + # 可选的 tick 后处理钩子,由外部模块(如 FaultInjector)注册 + self._post_tick_hooks: list[Callable[["DeviceInstance"], None]] = [] for point in config.points: self._point_configs[point.name] = point @@ -21,6 +23,14 @@ def __init__(self, config: DeviceConfig, generator: DataGenerator): else: self._point_values[point.name] = self._generator.generate(point) + def register_post_tick_hook(self, hook: Callable[["DeviceInstance"], None]) -> None: + """注册 tick 后处理钩子,外部模块通过此接口介入,不修改 tick 逻辑本身""" + if hook not in self._post_tick_hooks: + self._post_tick_hooks.append(hook) + + def unregister_post_tick_hook(self, hook: Callable[["DeviceInstance"], None]) -> None: + self._post_tick_hooks = [h for h in self._post_tick_hooks if h != hook] + @property def id(self) -> str: return self.config.id @@ -51,6 +61,12 @@ def tick(self) -> None: for name, point in self._point_configs.items(): if point.generator_type != GeneratorType.FIXED: self._point_values[name] = self._generator.generate(point) + # 执行后处理钩子(故障注入等外部模块在此覆盖测点值) + for hook in self._post_tick_hooks: + try: + hook(self) + except Exception: + pass def read_point(self, point_name: str) -> Optional[PointValue]: if point_name not in self._point_values: @@ -59,9 +75,11 @@ def read_point(self, point_name: str) -> Optional[PointValue]: name=point_name, value=self._point_values[point_name], timestamp=time.time(), + quality="good" if self._status == DeviceStatus.ONLINE else "bad", ) def read_all_points(self) -> list[PointValue]: + quality = "good" if self._status == DeviceStatus.ONLINE else "bad" result = [] now = time.time() for name in self._point_values: @@ -70,6 +88,7 @@ def read_all_points(self) -> list[PointValue]: name=name, value=self._point_values[name], timestamp=now, + quality=quality, ) ) return result diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py index f289425..059f10e 100644 --- a/protoforge/core/engine.py +++ b/protoforge/core/engine.py @@ -4,9 +4,11 @@ from typing import Any, Optional from protoforge.core.device import DeviceInstance +from protoforge.core.fault import fault_injector from protoforge.core.generator import DataGenerator from protoforge.core.scenario import Scenario from protoforge.models.device import DeviceConfig, DeviceInfo, DeviceStatus, PointValue +from protoforge.models.fault import FaultInfo, FaultInjectRequest, FaultTypeDefinition from protoforge.models.scenario import ScenarioConfig, ScenarioInfo, ScenarioStatus from protoforge.protocols.base import ProtocolServer, ProtocolStatus @@ -56,6 +58,8 @@ async def stop_protocol(self, protocol_name: str) -> None: async def create_device(self, config: DeviceConfig) -> DeviceInfo: instance = DeviceInstance(config, self._generator) self._devices[config.id] = instance + # 注册故障注入钩子 + instance.register_post_tick_hook(fault_injector.apply) server = self._protocol_servers.get(config.protocol) if server and server.status == ProtocolStatus.RUNNING: @@ -70,6 +74,9 @@ async def remove_device(self, device_id: str) -> None: if not instance: raise ValueError(f"Device not found: {device_id}") + # 清除该设备的故障 + fault_injector.clear(device_id) + server = self._protocol_servers.get(instance.protocol) if server and server.status == ProtocolStatus.RUNNING: await server.remove_device(device_id) @@ -299,3 +306,28 @@ def _get_device_info(self, instance: DeviceInstance) -> DeviceInfo: status=instance.status, points=instance.read_all_points(), ) + + # ------------------------------------------------------------------ + # 故障管理 + # ------------------------------------------------------------------ + + def inject_fault(self, device_id: str, request: FaultInjectRequest) -> FaultInfo: + instance = self._devices.get(device_id) + if not instance: + raise ValueError(f"Device not found: {device_id}") + if instance.status != DeviceStatus.ONLINE: + raise ValueError(f"Device {device_id} is not online") + return fault_injector.inject(instance, request) + + def clear_fault(self, device_id: str) -> bool: + return fault_injector.clear(device_id) + + def get_fault(self, device_id: str) -> Optional[FaultInfo]: + return fault_injector.get_fault(device_id) + + def list_active_faults(self) -> list[FaultInfo]: + return fault_injector.list_active() + + @staticmethod + def list_fault_types() -> list[FaultTypeDefinition]: + return fault_injector.list_fault_types() diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py new file mode 100644 index 0000000..e72842d --- /dev/null +++ b/protoforge/core/fault.py @@ -0,0 +1,419 @@ +""" +故障注入模块 (FaultInjector) + +设计原则: +- 完全独立,不修改 device.py / engine.py 现有逻辑 +- 通过 apply(device) 在每次 tick 后覆盖测点值,device 本身无感知 +- 支持四种场景:异常注入、自动恢复、多指标联动、渐进式劣化 +""" +import logging +import random +import time +import uuid +from typing import Any, Optional + +from protoforge.models.fault import ( + ActiveFault, + FaultInfo, + FaultInjectRequest, + FaultMode, + FaultStatus, + FaultTypeDefinition, + PointFaultConfig, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# 内置故障类型定义(基于真实工业场景) +# --------------------------------------------------------------------------- + +BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [ + + # ------------------------------------------------------------------ + # 刀具磨损 — 最常见的机加工故障 + # 特征:切削阻力增大 → 主轴电流缓慢爬升,振动幅度增大,进给速率被系统压低 + # 模式:渐进式,持续数分钟,模拟刀具从轻度磨损到需要换刀的过程 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_wear", + name="刀具磨损", + description="刀具切削刃磨损,切削阻力增大,主轴电流升高,振动增大,进给速率下降", + category="mechanical", + default_duration=300.0, + tags=["刀具", "磨损", "渐进"], + point_faults=[ + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + multiplier=2.2, noise_scale=0.8), + PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL, + multiplier=3.0, noise_scale=0.3), + PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL, + multiplier=3.0, noise_scale=0.3), + PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL, + multiplier=3.5, noise_scale=0.4), + PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL, + multiplier=0.45, noise_scale=20.0), + ], + ), + + # ------------------------------------------------------------------ + # 刀具崩刃 — 突发性刀具失效 + # 特征:瞬间冲击 → 振动突增,电流瞬间峰值,进给立即停止 + # 模式:瞬间注入,持续时间短(机床通常会触发报警停机) + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="tool_breakage", + name="刀具崩刃", + description="刀具突发性崩刃,振动剧烈突增,主轴电流峰值,进给停止", + category="mechanical", + default_duration=15.0, + tags=["刀具", "崩刃", "突发"], + point_faults=[ + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=4.5, noise_scale=2.0), + PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT, + multiplier=8.0, noise_scale=1.5), + PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT, + multiplier=8.0, noise_scale=1.5), + PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, + multiplier=10.0, noise_scale=2.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + ], + ), + + # ------------------------------------------------------------------ + # 主轴过热 — 长时间高负荷或冷却系统故障 + # 特征:主轴电流持续偏高,转速因热保护逐渐降低 + # 模式:渐进式,持续时间较长 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="spindle_overheat", + name="主轴过热", + description="主轴长时间高负荷运转或冷却不足,电流持续偏高,转速因热保护下降", + category="thermal", + default_duration=240.0, + tags=["主轴", "过热", "渐进"], + point_faults=[ + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + multiplier=1.8, noise_scale=1.2), + PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL, + multiplier=0.6, noise_scale=50.0), + PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL, + multiplier=1.5, noise_scale=0.2), + PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL, + multiplier=1.5, noise_scale=0.2), + ], + ), + + # ------------------------------------------------------------------ + # 主轴轴承故障 — 轴承磨损或润滑不足 + # 特征:振动频率特征变化,整体振动幅度升高,电流略升 + # 模式:渐进式 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="spindle_bearing_fault", + name="主轴轴承故障", + description="主轴轴承磨损或润滑不足,振动幅度持续升高,伴随电流轻微上升", + category="mechanical", + default_duration=360.0, + tags=["主轴", "轴承", "渐进"], + point_faults=[ + PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL, + multiplier=4.0, noise_scale=0.5), + PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL, + multiplier=4.0, noise_scale=0.5), + PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL, + multiplier=5.0, noise_scale=0.8), + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + multiplier=1.3, noise_scale=0.5), + ], + ), + + # ------------------------------------------------------------------ + # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死 + # 特征:进给速率瞬间降为 0,主轴电流急剧升高 + # 模式:瞬间注入 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="feed_stall", + name="进给堵转", + description="进给轴卡死,进给速率降为零,主轴电流急剧升高", + category="process", + default_duration=20.0, + tags=["进给", "堵转", "突发"], + point_faults=[ + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + target_value=0.0, noise_scale=0.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=3.8, noise_scale=1.5), + PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, + multiplier=5.0, noise_scale=1.0), + ], + ), + + # ------------------------------------------------------------------ + # 振动异常 — 工件装夹松动或共振 + # 特征:三轴振动突然大幅增加,其他指标基本正常 + # 模式:瞬间注入 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="vibration_spike", + name="振动异常", + description="工件装夹松动或切削共振,三轴振动突然大幅增加", + category="mechanical", + default_duration=60.0, + tags=["振动", "装夹", "突发"], + point_faults=[ + PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT, + multiplier=6.0, noise_scale=1.0), + PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT, + multiplier=6.0, noise_scale=1.0), + PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT, + multiplier=7.0, noise_scale=1.2), + ], + ), + + # ------------------------------------------------------------------ + # 切削液不足 — 冷却润滑失效 + # 特征:热量积累 → 振动缓慢升高,电流缓慢升高,进给略降 + # 模式:渐进式,速度较慢 + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="coolant_failure", + name="切削液不足", + description="切削液供给不足,冷却润滑失效,热量积累导致振动和电流缓慢升高", + category="process", + default_duration=480.0, + tags=["切削液", "冷却", "渐进"], + point_faults=[ + PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL, + multiplier=1.6, noise_scale=0.8), + PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL, + multiplier=2.0, noise_scale=0.3), + PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL, + multiplier=2.0, noise_scale=0.3), + PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL, + multiplier=2.5, noise_scale=0.4), + PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL, + multiplier=0.75, noise_scale=15.0), + ], + ), + + # ------------------------------------------------------------------ + # 电源波动 — 供电不稳定 + # 特征:主轴转速和进给速率出现随机波动,电流不稳定 + # 模式:瞬间注入(持续期间持续抖动) + # ------------------------------------------------------------------ + FaultTypeDefinition( + id="power_fluctuation", + name="电源波动", + description="供电电压不稳定,主轴转速和进给速率出现随机波动", + category="electrical", + default_duration=90.0, + tags=["电源", "波动", "突发"], + point_faults=[ + PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=300.0), + PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=5.0), + PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT, + multiplier=1.0, noise_scale=150.0), + ], + ), +] + +# 按 id 索引 +_FAULT_TYPE_MAP: dict[str, FaultTypeDefinition] = {ft.id: ft for ft in BUILTIN_FAULT_TYPES} + + +# --------------------------------------------------------------------------- +# FaultInjector +# --------------------------------------------------------------------------- + +class FaultInjector: + """ + 故障注入器,完全独立于 DeviceInstance。 + + 使用方式: + injector = FaultInjector() + injector.inject(device, request) # 注入故障 + injector.apply(device) # 每次 tick 后调用,覆盖测点值 + injector.clear(device_id) # 手动清除 + """ + + def __init__(self): + # device_id -> ActiveFault + self._active: dict[str, ActiveFault] = {} + + # ------------------------------------------------------------------ + # 公开接口 + # ------------------------------------------------------------------ + + def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo: + """向设备注入故障,返回故障信息""" + fault_type = _FAULT_TYPE_MAP.get(request.fault_type_id) + if not fault_type: + raise ValueError(f"Unknown fault type: {request.fault_type_id}") + + duration = request.duration if request.duration is not None else fault_type.default_duration + + # 记录注入时各测点的当前基线值 + baseline: dict[str, float] = {} + for pf in fault_type.point_faults: + val = device._point_values.get(pf.point) + if val is not None: + try: + baseline[pf.point] = float(val) + except (TypeError, ValueError): + baseline[pf.point] = 0.0 + + fault = ActiveFault( + fault_id=uuid.uuid4().hex[:12], + device_id=device.id, + fault_type_id=fault_type.id, + fault_type_name=fault_type.name, + intensity=max(0.0, min(1.0, request.intensity)), + duration=duration, + started_at=time.time(), + baseline_values=baseline, + ) + self._active[device.id] = fault + logger.info("Fault injected: device=%s type=%s duration=%.0fs", + device.id, fault_type.id, duration) + return self._to_info(fault, fault_type) + + def apply(self, device: Any) -> None: + """ + 在 device.tick() 之后调用,将故障效果覆盖到 _point_values。 + 故障超时后自动清除。 + """ + fault = self._active.get(device.id) + if not fault: + return + + now = time.time() + elapsed = now - fault.started_at + + if elapsed >= fault.duration: + self._expire(device, fault) + return + + fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id) + if not fault_type: + return + + # progress: 0.0(刚注入)→ 1.0(达到峰值) + progress = min(elapsed / fault.duration, 1.0) + + for pf in fault_type.point_faults: + if pf.point not in device._point_values: + continue + baseline = fault.baseline_values.get(pf.point, 0.0) + if baseline == 0.0: + # 基线为 0 时用当前值兜底,避免乘法无效 + try: + baseline = float(device._point_values[pf.point]) or 1.0 + except (TypeError, ValueError): + continue + + device._point_values[pf.point] = self._compute_value( + pf, baseline, progress, fault.intensity + ) + + def clear(self, device_id: str) -> bool: + """手动清除故障,不恢复基线(让生成器自然恢复)""" + if device_id not in self._active: + return False + fault = self._active.pop(device_id) + fault.status = FaultStatus.CLEARED + fault.cleared_at = time.time() + logger.info("Fault cleared manually: device=%s type=%s", device_id, fault.fault_type_id) + return True + + def get_fault(self, device_id: str) -> Optional[FaultInfo]: + fault = self._active.get(device_id) + if not fault: + return None + fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id) + return self._to_info(fault, fault_type) + + def list_active(self) -> list[FaultInfo]: + result = [] + for fault in self._active.values(): + fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id) + result.append(self._to_info(fault, fault_type)) + return result + + @staticmethod + def list_fault_types() -> list[FaultTypeDefinition]: + return BUILTIN_FAULT_TYPES + + @staticmethod + def get_fault_type(fault_type_id: str) -> Optional[FaultTypeDefinition]: + return _FAULT_TYPE_MAP.get(fault_type_id) + + # ------------------------------------------------------------------ + # 内部逻辑 + # ------------------------------------------------------------------ + + def _compute_value( + self, + pf: PointFaultConfig, + baseline: float, + progress: float, + intensity: float, + ) -> float: + """根据故障配置和当前进度计算覆盖值""" + if pf.mode == FaultMode.INSTANT: + # 瞬间模式:直接用目标值,不随时间变化 + if pf.target_value is not None: + target = pf.target_value + elif pf.multiplier is not None: + target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity) + else: + target = baseline + else: + # 渐进模式:随 progress 线性劣化 + if pf.target_value is not None: + target = baseline + (pf.target_value - baseline) * progress * intensity + elif pf.multiplier is not None: + target = baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity) + else: + target = baseline + + # 叠加随机噪声,模拟真实信号抖动 + if pf.noise_scale > 0: + target += random.gauss(0, pf.noise_scale * intensity) + + return round(max(0.0, target), 4) + + def _expire(self, device: Any, fault: ActiveFault) -> None: + """故障到期,从 active 中移除,让生成器自然恢复正常值""" + self._active.pop(device.id, None) + logger.info("Fault expired: device=%s type=%s", device.id, fault.fault_type_id) + + @staticmethod + def _to_info(fault: ActiveFault, fault_type: Optional[FaultTypeDefinition]) -> FaultInfo: + now = time.time() + elapsed = now - fault.started_at + progress = min(elapsed / fault.duration, 1.0) + affected = [pf.point for pf in fault_type.point_faults] if fault_type else [] + return FaultInfo( + fault_id=fault.fault_id, + device_id=fault.device_id, + fault_type_id=fault.fault_type_id, + fault_type_name=fault.fault_type_name, + status=fault.status, + intensity=fault.intensity, + duration=fault.duration, + elapsed=round(elapsed, 1), + progress=round(progress, 3), + affected_points=affected, + started_at=fault.started_at, + ) + + +# 全局单例 +fault_injector = FaultInjector() diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py index 9300372..9670525 100644 --- a/protoforge/core/metrics.py +++ b/protoforge/core/metrics.py @@ -46,6 +46,25 @@ def collect_from_engine(self, engine: Any) -> None: if p.status.value == "running") self.set_gauge("protoforge_protocols_running", protocols_running) + for device in engine._devices.values(): + labels_base = { + "device_id": device.config.id, + "device_name": device.config.name, + "protocol": device.config.protocol, + } + for point in device.read_all_points(): + labels = {**labels_base, "point": point.name} + point_config = next( + (p for p in device.config.points if p.name == point.name), None + ) + if point_config and point_config.unit: + labels["unit"] = point_config.unit + key = self._make_key(point.name, labels) + if point.quality != "good": + self._gauges.pop(key, None) + elif isinstance(point.value, (int, float)): + self.set_gauge(point.name, float(point.value), labels) + def collect_from_test_runner(self, runner: Any) -> None: self.set_gauge("protoforge_test_cases_total", len(runner._test_cases)) self.set_gauge("protoforge_test_suites_total", len(runner._test_suites)) diff --git a/protoforge/main.py b/protoforge/main.py index 894c5bc..73489ae 100644 --- a/protoforge/main.py +++ b/protoforge/main.py @@ -1,8 +1,12 @@ import logging +import os from contextlib import asynccontextmanager +from pathlib import Path from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles from protoforge.api.v1.router import router from protoforge.core.engine import SimulationEngine @@ -139,7 +143,6 @@ async def lifespan(app: FastAPI): except Exception as e: logger.warning("Failed to start webhook manager: %s", e) - import os if os.environ.get("PROTOFORGE_DEMO_MODE"): try: from protoforge.core.demo import seed_demo_data @@ -182,14 +185,28 @@ def create_app() -> FastAPI: app.include_router(router) + # 按优先级查找静态文件目录:环境变量 > 容器路径 > 本地构建产物 + _repo_root = Path(__file__).parent.parent + _static_candidates = [ + Path(os.environ["STATIC_DIR"]) if "STATIC_DIR" in os.environ else None, + Path("/app/static"), + _repo_root / "web" / "dist", + ] + static_dir = next((p for p in _static_candidates if p and p.is_dir()), None) + @app.get("/") async def root(): + if static_dir and (static_dir / "index.html").exists(): + return FileResponse(static_dir / "index.html") return { "name": "ProtoForge", "version": "0.1.0", "description": "物联网协议仿真与测试平台", } + if static_dir and (static_dir / "assets").is_dir(): + app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets") + @app.get("/health") async def health(): return {"status": "ok"} diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py new file mode 100644 index 0000000..cc038e0 --- /dev/null +++ b/protoforge/models/fault.py @@ -0,0 +1,77 @@ +from enum import Enum +from typing import Any, Optional + +from pydantic import BaseModel, Field + + +class FaultMode(str, Enum): + """故障注入模式""" + INSTANT = "instant" # 瞬间跳变到异常值,持续 duration 后恢复 + GRADUAL = "gradual" # 渐进式劣化,随时间线性恶化,到 duration 时达到峰值后恢复 + + +class FaultStatus(str, Enum): + ACTIVE = "active" + RECOVERING = "recovering" + CLEARED = "cleared" + + +class PointFaultConfig(BaseModel): + """单个测点的故障行为定义""" + point: str + mode: FaultMode = FaultMode.INSTANT + + # INSTANT 模式:直接设置为 target_value(若为 None 则用 multiplier 乘以当前值) + target_value: Optional[float] = None + multiplier: Optional[float] = None # 异常值 = 当前正常值 × multiplier + + # GRADUAL 模式:从当前值线性劣化到 target_value 或 multiplier 倍 + # 劣化程度 = progress(0~1) × (target - baseline) + noise_scale: float = 0.0 # 叠加随机噪声幅度,模拟真实抖动 + + +class FaultTypeDefinition(BaseModel): + """故障类型定义,描述一种真实故障场景""" + id: str + name: str + description: str + category: str # 故障分类:mechanical / electrical / thermal / process + default_duration: float = 120.0 # 默认持续时间(秒) + point_faults: list[PointFaultConfig] = Field(default_factory=list) + tags: list[str] = Field(default_factory=list) + + +class FaultInjectRequest(BaseModel): + """故障注入请求""" + fault_type_id: str + duration: Optional[float] = None # 覆盖默认持续时间,None 表示用类型默认值 + intensity: float = 1.0 # 故障强度系数 0~1,影响劣化幅度 + + +class ActiveFault(BaseModel): + """当前激活的故障实例""" + fault_id: str # 唯一实例 ID + device_id: str + fault_type_id: str + fault_type_name: str + status: FaultStatus = FaultStatus.ACTIVE + intensity: float = 1.0 + duration: float = 120.0 + started_at: float = 0.0 + cleared_at: Optional[float] = None + baseline_values: dict[str, float] = Field(default_factory=dict) # 注入时的正常基线值 + + +class FaultInfo(BaseModel): + """故障状态信息(API 响应用)""" + fault_id: str + device_id: str + fault_type_id: str + fault_type_name: str + status: FaultStatus + intensity: float + duration: float + elapsed: float + progress: float # 0~1,故障进度 + affected_points: list[str] + started_at: float diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json index 3231d9b..39437f3 100644 --- a/protoforge/templates/fanuc/fanuc_0if_cnc.json +++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json @@ -64,9 +64,10 @@ "unit": "RPM", "description": "主轴转速", "access": "r", - "generator_type": "random", + "generator_type": "sawtooth", "min_value": 1000, - "max_value": 8000 + "max_value": 8000, + "generator_config": {"period": 120} }, { "name": "feed_rate", @@ -75,9 +76,64 @@ "unit": "mm/min", "description": "进给速度", "access": "r", - "generator_type": "random", + "generator_type": "sine", "min_value": 100, - "max_value": 5000 + "max_value": 5000, + "generator_config": {"period": 60, "phase": 0.0} + }, + { + "name": "spindle_current", + "address": "spindle_current", + "data_type": "float32", + "unit": "A", + "description": "主轴电流", + "access": "r", + "generator_type": "sine", + "min_value": 8.0, + "max_value": 32.0, + "generator_config": {"period": 120, "phase": 0.5} + }, + { + "name": "vibration_x", + "address": "vibration_x", + "data_type": "float32", + "unit": "m/s²", + "description": "X轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 2.5, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 90); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)" + } + }, + { + "name": "vibration_y", + "address": "vibration_y", + "data_type": "float32", + "unit": "m/s²", + "description": "Y轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 2.5, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 75 + 1.0); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)" + } + }, + { + "name": "vibration_z", + "address": "vibration_z", + "data_type": "float32", + "unit": "m/s²", + "description": "Z轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 3.0, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 60 + 2.1); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)" + } }, { "name": "alarm_status", diff --git a/protoforge/templates/fanuc/fanuc_31ib_cnc.json b/protoforge/templates/fanuc/fanuc_31ib_cnc.json index 89f6ef1..808fbc6 100644 --- a/protoforge/templates/fanuc/fanuc_31ib_cnc.json +++ b/protoforge/templates/fanuc/fanuc_31ib_cnc.json @@ -66,9 +66,10 @@ "unit": "RPM", "description": "主轴转速", "access": "r", - "generator_type": "random", + "generator_type": "sawtooth", "min_value": 2000, - "max_value": 15000 + "max_value": 15000, + "generator_config": {"period": 150} }, { "name": "feed_override", @@ -80,6 +81,60 @@ "generator_type": "fixed", "fixed_value": 100 }, + { + "name": "spindle_current", + "address": "spindle_current", + "data_type": "float32", + "unit": "A", + "description": "主轴电流", + "access": "r", + "generator_type": "sine", + "min_value": 10.0, + "max_value": 45.0, + "generator_config": {"period": 120, "phase": 1.2} + }, + { + "name": "vibration_x", + "address": "vibration_x", + "data_type": "float32", + "unit": "m/s²", + "description": "X轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 3.0, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.6 + 0.4 * math.sin(2 * math.pi * elapsed / 80 + 0.5); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)" + } + }, + { + "name": "vibration_y", + "address": "vibration_y", + "data_type": "float32", + "unit": "m/s²", + "description": "Y轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 3.0, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.6 + 0.4 * math.sin(2 * math.pi * elapsed / 65 + 1.5); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)" + } + }, + { + "name": "vibration_z", + "address": "vibration_z", + "data_type": "float32", + "unit": "m/s²", + "description": "Z轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 4.0, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.8 + 0.5 * math.sin(2 * math.pi * elapsed / 55 + 2.5); noise = random.uniform(-0.25, 0.25); result = round(max(0.1, base + noise), 3)" + } + }, { "name": "tool_number", "address": "tool_number", diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json index 843151e..43622cf 100644 --- a/protoforge/templates/modbus/fanuc_cnc.json +++ b/protoforge/templates/modbus/fanuc_cnc.json @@ -13,9 +13,10 @@ "unit": "RPM", "description": "主轴实际转速", "access": "r", - "generator_type": "random", + "generator_type": "sawtooth", "min_value": 0, - "max_value": 12000 + "max_value": 12000, + "generator_config": {"period": 180} }, { "name": "feed_rate", @@ -24,9 +25,64 @@ "unit": "mm/min", "description": "实际进给速度", "access": "r", - "generator_type": "random", - "min_value": 0.0, - "max_value": 10000.0 + "generator_type": "sine", + "min_value": 200.0, + "max_value": 3000.0, + "generator_config": {"period": 90, "phase": 1.0} + }, + { + "name": "spindle_current", + "address": "2", + "data_type": "float32", + "unit": "A", + "description": "主轴电流", + "access": "r", + "generator_type": "sine", + "min_value": 8.0, + "max_value": 35.0, + "generator_config": {"period": 120, "phase": 2.0} + }, + { + "name": "vibration_x", + "address": "23", + "data_type": "float32", + "unit": "m/s²", + "description": "X轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 2.5, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 85 + 0.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)" + } + }, + { + "name": "vibration_y", + "address": "25", + "data_type": "float32", + "unit": "m/s²", + "description": "Y轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 2.5, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 70 + 1.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)" + } + }, + { + "name": "vibration_z", + "address": "27", + "data_type": "float32", + "unit": "m/s²", + "description": "Z轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 3.0, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)" + } }, { "name": "spindle_override", @@ -147,9 +203,12 @@ "data_type": "uint16", "description": "加工计数", "access": "r", - "generator_type": "random", + "generator_type": "script", "min_value": 0, - "max_value": 99999 + "max_value": 99999, + "generator_config": { + "script": "elapsed = context['elapsed']; result = min(int(elapsed / 45), 99999)" + } }, { "name": "cycle_time", diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json index eaeef74..fd08bdd 100644 --- a/protoforge/templates/mtconnect/mill_machine.json +++ b/protoforge/templates/mtconnect/mill_machine.json @@ -55,9 +55,10 @@ "unit": "RPM", "description": "主轴转速", "access": "r", - "generator_type": "random", + "generator_type": "sawtooth", "min_value": 3000, - "max_value": 12000 + "max_value": 12000, + "generator_config": {"period": 135} }, { "name": "feed_rate", @@ -66,9 +67,64 @@ "unit": "mm/min", "description": "进给速度", "access": "r", - "generator_type": "random", + "generator_type": "sine", "min_value": 200, - "max_value": 3000 + "max_value": 3000, + "generator_config": {"period": 75, "phase": 2.1} + }, + { + "name": "spindle_current", + "address": "SpindleCurrent", + "data_type": "float32", + "unit": "A", + "description": "主轴电流", + "access": "r", + "generator_type": "sine", + "min_value": 6.0, + "max_value": 28.0, + "generator_config": {"period": 120, "phase": 3.1} + }, + { + "name": "vibration_x", + "address": "VibrationX", + "data_type": "float32", + "unit": "m/s²", + "description": "X轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 2.0, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.4 + 0.25 * math.sin(2 * math.pi * elapsed / 95 + 0.3); noise = random.uniform(-0.12, 0.12); result = round(max(0.1, base + noise), 3)" + } + }, + { + "name": "vibration_y", + "address": "VibrationY", + "data_type": "float32", + "unit": "m/s²", + "description": "Y轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 2.0, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.4 + 0.25 * math.sin(2 * math.pi * elapsed / 78 + 1.3); noise = random.uniform(-0.12, 0.12); result = round(max(0.1, base + noise), 3)" + } + }, + { + "name": "vibration_z", + "address": "VibrationZ", + "data_type": "float32", + "unit": "m/s²", + "description": "Z轴振动加速度", + "access": "r", + "generator_type": "script", + "min_value": 0.1, + "max_value": 2.5, + "generator_config": { + "script": "elapsed = context['elapsed']; base = 0.6 + 0.35 * math.sin(2 * math.pi * elapsed / 62 + 2.3); noise = random.uniform(-0.18, 0.18); result = round(max(0.1, base + noise), 3)" + } }, { "name": "part_count", @@ -77,9 +133,12 @@ "unit": "件", "description": "加工件数", "access": "r", - "generator_type": "sawtooth", + "generator_type": "script", "min_value": 0, - "max_value": 999 + "max_value": 999, + "generator_config": { + "script": "elapsed = context['elapsed']; result = min(int(elapsed / 60), 999)" + } } ], "protocol_config": { diff --git a/web/src/App.vue b/web/src/App.vue index ae6d315..22eabe9 100644 --- a/web/src/App.vue +++ b/web/src/App.vue @@ -1,4 +1,7 @@