Conversation
There was a problem hiding this comment.
Code Review
This pull request introduces several enhancements and bug fixes across the LMeterX platform. Key changes include the addition of Claude Code Skills for LLM, HTTP, and Web load testing, along with corresponding backend API whitelist updates and frontend localization. Additionally, the load testing engine now supports tracking success/failure outcome splits, bounded memory guards for captured subprocess outputs, and stop_reason error validations.
Critical feedback has been provided regarding three issues in the new implementation: first, the frontend metricsDetailRows logic only extracts the first endpoint name, which will hide other endpoints in multi-endpoint or web load tests; second, terminate_locust_processes_by_task_id double-counts terminated processes, potentially masking termination failures; and third, _safe_repr_truncate still materializes the full string representation of large dicts/lists in memory before slicing, failing to prevent the OOM risks described in its documentation.
| const metricsDetailRows = useMemo(() => { | ||
| const endpointBaseName = | ||
| results.find( | ||
| r => | ||
| r.metric_type !== 'total' && | ||
| !r.metric_type?.endsWith('::success') && | ||
| !r.metric_type?.endsWith('::failure') | ||
| )?.metric_type || | ||
| results | ||
| .find( | ||
| r => | ||
| r.metric_type?.endsWith('::success') && | ||
| r.metric_type !== 'total::success' | ||
| ) | ||
| ?.metric_type.replace('::success', '') || | ||
| results | ||
| .find( | ||
| r => | ||
| r.metric_type?.endsWith('::failure') && | ||
| r.metric_type !== 'total::failure' | ||
| ) | ||
| ?.metric_type.replace('::failure', ''); | ||
|
|
||
| const orderedMetricTypes = [ | ||
| endpointBaseName ? `${endpointBaseName}::success` : null, | ||
| endpointBaseName ? `${endpointBaseName}::failure` : null, | ||
| 'total', | ||
| ].filter(Boolean); | ||
|
|
||
| return orderedMetricTypes | ||
| .map(metricType => results.find(r => r.metric_type === metricType)) | ||
| .filter(Boolean); | ||
| }, [results]); |
There was a problem hiding this comment.
在 HttpResults.tsx 中,metricsDetailRows 的实现仅提取了第一个端点的名称(endpointBaseName),这会导致在多端点压测(例如使用包含多个不同 URL 的数据集或进行 Web 网页压测时)中,除第一个端点外的其他所有端点数据在表格中被完全过滤和隐藏。
建议支持提取所有唯一的端点基础名称,并按端点分组展示其 ::success 和 ::failure 行,最后追加 total 行。
const metricsDetailRows = useMemo(() => {
const baseNames = Array.from(
new Set(
results
.map(r => {
const mt = r.metric_type || '';
if (mt === 'total' || mt === 'total::success' || mt === 'total::failure') {
return null;
}
return mt.replace('::success', '').replace('::failure', '');
})
.filter(Boolean)
)
);
const orderedMetricTypes: string[] = [];
baseNames.forEach(base => {
orderedMetricTypes.push(`${base}::success`, `${base}::failure`);
});
orderedMetricTypes.push('total');
return orderedMetricTypes
.map(metricType => results.find(r => r.metric_type === metricType))
.filter(Boolean);
}, [results]);
| def terminate_locust_processes_by_task_id( | ||
| self, task_id: str, timeout: float = 5.0 | ||
| ) -> int: | ||
| """Terminate any Locust processes associated with a task id.""" | ||
| processes = self.find_locust_processes_by_task_id(task_id) | ||
| terminated_count = 0 | ||
|
|
||
| for process in processes: | ||
| try: | ||
| if not process.is_running(): | ||
| terminated_count += 1 | ||
| continue | ||
| process.terminate() | ||
| except (psutil.NoSuchProcess, psutil.AccessDenied): | ||
| terminated_count += 1 | ||
|
|
||
| for process in processes: | ||
| try: | ||
| if not process.is_running(): | ||
| terminated_count += 1 | ||
| continue | ||
| try: | ||
| process.wait(timeout=timeout) | ||
| except psutil.TimeoutExpired: | ||
| process.kill() | ||
| process.wait(timeout=timeout) | ||
| terminated_count += 1 | ||
| except (psutil.NoSuchProcess, psutil.AccessDenied): | ||
| terminated_count += 1 | ||
| except psutil.TimeoutExpired: | ||
| logger.warning( | ||
| f"Timed out while force killing Locust process {process.pid} " | ||
| f"for task {task_id}" | ||
| ) | ||
|
|
||
| return min(terminated_count, len(processes)) |
There was a problem hiding this comment.
在 terminate_locust_processes_by_task_id 中,对于已经停止运行的进程,terminated_count 会在第一个循环(第 271 行)和第二个循环(第 280 行)中被重复累加两次。
这会导致 terminated_count 的值被异常放大,从而在 min(terminated_count, len(processes)) 返回时,可能会掩盖其他进程终止失败(例如发生 TimeoutExpired)的情况,导致方法错误地返回所有进程都已成功终止。
建议在第一个循环中不要累加 terminated_count,仅在第二个循环中进行统计。
def terminate_locust_processes_by_task_id(
self, task_id: str, timeout: float = 5.0
) -> int:
"""Terminate any Locust processes associated with a task id."""
processes = self.find_locust_processes_by_task_id(task_id)
terminated_count = 0
for process in processes:
try:
if process.is_running():
process.terminate()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
for process in processes:
try:
if not process.is_running():
terminated_count += 1
continue
try:
process.wait(timeout=timeout)
except psutil.TimeoutExpired:
process.kill()
process.wait(timeout=timeout)
terminated_count += 1
except (psutil.NoSuchProcess, psutil.AccessDenied):
terminated_count += 1
except psutil.TimeoutExpired:
logger.warning(
f"Timed out while force killing Locust process {process.pid} "
f"for task {task_id}"
)
return terminated_count| def _safe_repr_truncate(obj: Any, limit: int = _PAYLOAD_LOG_LIMIT) -> str: | ||
| """Return a truncated repr without allocating the full string first. | ||
|
|
||
| For large objects (e.g. dicts containing base64 images), calling repr() | ||
| then slicing still creates the entire multi-MB string in memory. Instead | ||
| we convert to str with a hard size cap using iterative key inspection for | ||
| dicts and a direct str() fallback for other types. | ||
| """ | ||
| try: | ||
| if isinstance(obj, dict): | ||
| preview = str(obj)[:limit] | ||
| elif isinstance(obj, (list, tuple)): | ||
| preview = str(obj)[:limit] | ||
| else: | ||
| preview = repr(obj)[:limit] | ||
| except Exception: | ||
| preview = "<unrepresentable>" | ||
| if len(preview) >= limit: | ||
| return preview[:limit] + "... (truncated)" | ||
| return preview |
There was a problem hiding this comment.
在 _safe_repr_truncate 中,文档注释提到为了避免大对象(如包含 base64 图片的字典)在内存中分配完整的巨型字符串,将使用迭代键检查(iterative key inspection)来限制大小。
然而,目前的实际代码实现仍然是直接调用 str(obj)[:limit],这依然会在内存中先完整构建并分配整个字典的字符串表示,然后才进行切片,这并没有达到避免大内存分配的优化效果。
建议真正实现迭代或限制深度的字典/列表序列化,以避免大内存分配导致的 OOM 风险。
def _safe_repr_truncate(obj: Any, limit: int = _PAYLOAD_LOG_LIMIT) -> str:
"""Return a truncated repr without allocating the full string first.
For large objects (e.g. dicts containing base64 images), calling repr()
then slicing still creates the entire multi-MB string in memory. Instead
we convert to str with a hard size cap using iterative key inspection for
dicts and a direct str() fallback for other types.
"""
try:
if isinstance(obj, dict):
parts = []
current_len = 0
for k, v in obj.items():
k_repr = repr(k)
v_repr = repr(v)
if len(v_repr) > limit:
v_repr = v_repr[:limit] + "... (truncated)"
item_str = f"{k_repr}: {v_repr}"
parts.append(item_str)
current_len += len(item_str) + 2
if current_len > limit:
break
preview = "{" + ", ".join(parts) + "}"
elif isinstance(obj, (list, tuple)):
parts = []
current_len = 0
for item in obj:
item_repr = repr(item)
if len(item_repr) > limit:
item_repr = item_repr[:limit] + "... (truncated)"
parts.append(item_repr)
current_len += len(item_repr) + 2
if current_len > limit:
break
preview = ("[" if isinstance(obj, list) else "(") + ", ".join(parts) + ("]" if isinstance(obj, list) else ")")
else:
preview = repr(obj)[:limit]
except Exception:
preview = "<unrepresentable>"
if len(preview) >= limit:
return preview[:limit] + "... (truncated)"
return preview
No description provided.