From 2cf24061e473cdaaaad3a13777178ad97c530f80 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Wed, 6 Aug 2025 23:43:48 -0700 Subject: [PATCH 01/31] =?UTF-8?q?don't=20just=20log=20and=20continue=20any?= =?UTF-8?q?more=E2=80=94user=20should=20know?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- eval_protocol/common_utils.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/eval_protocol/common_utils.py b/eval_protocol/common_utils.py index e6b14018..d2030344 100644 --- a/eval_protocol/common_utils.py +++ b/eval_protocol/common_utils.py @@ -1,9 +1,6 @@ import json -import logging from typing import Any, Dict, List -logger = logging.getLogger(__name__) - def load_jsonl(file_path: str) -> List[Dict[str, Any]]: """ @@ -14,23 +11,10 @@ def load_jsonl(file_path: str) -> List[Dict[str, Any]]: Returns: A list of dictionaries, where each dictionary is a parsed JSON object from a line. - Returns an empty list if the file is not found or if errors occur during parsing, - with errors logged. + Returns an empty list if the file is not found or if errors occur during parsing. """ data: List[Dict[str, Any]] = [] - try: - with open(file_path, "r", encoding="utf-8") as f: - for i, line in enumerate(f): - try: - data.append(json.loads(line.strip())) - except json.JSONDecodeError as e: - logger.error(f"Error decoding JSON on line {i+1} in {file_path}: {e} - Line: '{line.strip()}'") - # Optionally, re-raise, or return partial data, or handle as per desired strictness - # For now, we'll log and continue, returning successfully parsed lines. - except FileNotFoundError: - logger.error(f"File not found: {file_path}") - return [] - except Exception as e: - logger.error(f"An unexpected error occurred while reading {file_path}: {e}") - return [] + with open(file_path, "r", encoding="utf-8") as f: + for line in f: + data.append(json.loads(line.strip())) return data From a6c9de279ed5cf75a70629656336e74af9026820 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 00:56:47 -0700 Subject: [PATCH 02/31] Add PEP 440 versioning support - Introduced a new module `get_pep440_version.py` to generate PEP 440 compliant version strings based on git information, caching results to minimize repeated calls. - Updated `EvalMetadata` in `models.py` to use the new versioning function for the version field, replacing the previous method of using commit hashes. - Removed dependency on `versioneer` in tests, streamlining version retrieval for evaluations. --- eval_protocol/get_pep440_version.py | 133 ++++++++++++++++++++++++ eval_protocol/models.py | 8 +- eval_protocol/pytest/evaluation_test.py | 3 - 3 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 eval_protocol/get_pep440_version.py diff --git a/eval_protocol/get_pep440_version.py b/eval_protocol/get_pep440_version.py new file mode 100644 index 00000000..8ebb33c9 --- /dev/null +++ b/eval_protocol/get_pep440_version.py @@ -0,0 +1,133 @@ +# Cache for PEP 440 version string +import subprocess + +_version_cache = {"version": None, "base_version": None} + + +def get_pep440_version(base_version=None): + """ + Generate a PEP 440 compliant version string based on git information. + + This function is inspired by versioneer but doesn't require the full versioneer + setup, making it easier for downstream users to adopt without additional dependencies. + + The result is cached statically to avoid repeated git calls. + + Args: + base_version: The base version string (e.g., "1.0.0"). If None, will try to + find the most recent version tag in git. + + Returns: + A PEP 440 compliant version string that includes: + - Development release number (devN) based on commit count since base_version + - Local version identifier with git commit hash + - Dirty indicator if there are uncommitted changes + + Examples: + >>> get_pep440_version("1.0.0") + "1.0.0.dev42+g1234567" # 42 commits since 1.0.0, commit hash 1234567 + >>> get_pep440_version("1.0.0") # with uncommitted changes + "1.0.0.dev42+g1234567.dirty" # indicates dirty working directory + >>> get_pep440_version("1.0.0") # no git available + "1.0.0+unknown" # indicates git info not available + """ + # Check if we have a cached version for this base_version + if _version_cache["version"] is not None and _version_cache["base_version"] == base_version: + return _version_cache["version"] + try: + # Check if we're in a git repository + subprocess.run( + ["git", "rev-parse", "--git-dir"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + + # If base_version is None, try to find the most recent version tag + if base_version is None: + try: + base_version = subprocess.check_output( + ["git", "describe", "--tags", "--abbrev=0"], universal_newlines=True, stderr=subprocess.DEVNULL + ).strip() + except subprocess.CalledProcessError: + # No tags found, we'll handle this case specially + base_version = None + + # Get commit count since base_version + if base_version is None: + # No base version (no tags), just count all commits + count = subprocess.check_output( + ["git", "rev-list", "--count", "HEAD"], universal_newlines=True, stderr=subprocess.DEVNULL + ).strip() + base_version = "0.0.0" # Use this for the final version string + else: + try: + count = subprocess.check_output( + ["git", "rev-list", "--count", f"{base_version}..HEAD"], + universal_newlines=True, + stderr=subprocess.DEVNULL, + ).strip() + # If no commits found, try counting from the beginning + if count == "0" or not count: + count = subprocess.check_output( + ["git", "rev-list", "--count", "HEAD"], universal_newlines=True, stderr=subprocess.DEVNULL + ).strip() + except subprocess.CalledProcessError: + # If base_version tag doesn't exist, count all commits + count = subprocess.check_output( + ["git", "rev-list", "--count", "HEAD"], universal_newlines=True, stderr=subprocess.DEVNULL + ).strip() + + # Get short commit hash + commit_hash = subprocess.check_output( + ["git", "rev-parse", "--short", "HEAD"], universal_newlines=True, stderr=subprocess.DEVNULL + ).strip() + + # Check for uncommitted changes (dirty working directory) + try: + subprocess.run( + ["git", "diff-index", "--quiet", "HEAD", "--"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + dirty_suffix = "" + except subprocess.CalledProcessError: + dirty_suffix = ".dirty" + + # Ensure count is a valid integer + try: + dev_count = int(count) + except (ValueError, TypeError): + dev_count = 0 + + # Build PEP 440 compliant version string + # Format: .dev+g[.dirty] + version_parts = [base_version] + + if dev_count > 0: + version_parts.append(f".dev{dev_count}") + + version_parts.append(f"+g{commit_hash}") + + if dirty_suffix: + version_parts.append(dirty_suffix) + + result = "".join(version_parts) + + # Cache the result + _version_cache["version"] = result + _version_cache["base_version"] = base_version + + return result + + except (subprocess.CalledProcessError, FileNotFoundError, OSError): + # Git is not available or not a git repository + result = f"{base_version}+unknown" + + # Cache the result + _version_cache["version"] = result + _version_cache["base_version"] = base_version + + return result diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 74e030cc..8be3a40a 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -8,6 +8,7 @@ ) from pydantic import BaseModel, ConfigDict, Field +from eval_protocol.get_pep440_version import get_pep440_version from eval_protocol.human_id import generate_id @@ -206,9 +207,12 @@ class EvalMetadata(BaseModel): name: str = Field(..., description="Name of the evaluation") description: Optional[str] = Field(None, description="Description of the evaluation") version: str = Field( - ..., description="Version of the evaluation. By default, we will populate this with the current commit hash." + default_factory=get_pep440_version, + description="Version of the evaluation. Should be populated with a PEP 440 version string.", + ) + status: Literal["running", "finished", "error", "stopped"] = Field( + "running", description="Status of the evaluation" ) - status: Literal["running", "finished", "error"] = Field("running", description="Status of the evaluation") num_runs: int = Field(..., description="Number of times the evaluation was repeated") aggregation_method: str = Field(..., description="Method used to aggregate scores across runs") threshold_of_success: Optional[float] = Field(None, description="Threshold score for test success") diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index 17fb9d4d..35a069d7 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -3,8 +3,6 @@ import pytest -# Import versioneer for getting version information -import versioneer from eval_protocol.dataset_logger import default_logger from eval_protocol.models import CompletionParams, EvalMetadata, EvaluationRow, InputMetadata from eval_protocol.pytest.default_dataset_adapter import default_dataset_adapter @@ -216,7 +214,6 @@ def wrapper_body(**kwargs): eval_metadata = EvalMetadata( name=test_func.__name__, description=test_func.__doc__, - version=versioneer.get_version(), status="running", num_runs=num_runs, aggregation_method=aggregation_method, From c22dcc90ff3a0808c639b8759cd0844a4f63b85f Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 00:57:02 -0700 Subject: [PATCH 03/31] Enhance default_single_turn_rollout_processor to log messages - Added logging functionality using `default_logger` to track processed messages in `default_single_turn_rollout_processor`. - Updated the return structure to include the modified row with messages instead of creating a new `EvaluationRow` instance. - Ensured dataset is returned as a list after processing all rows concurrently. --- .../pytest/default_single_turn_rollout_process.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/eval_protocol/pytest/default_single_turn_rollout_process.py b/eval_protocol/pytest/default_single_turn_rollout_process.py index 1d6d9f7c..654ffc4b 100644 --- a/eval_protocol/pytest/default_single_turn_rollout_process.py +++ b/eval_protocol/pytest/default_single_turn_rollout_process.py @@ -4,6 +4,7 @@ from openai import AsyncOpenAI from eval_protocol.auth import get_fireworks_api_base, get_fireworks_api_key +from eval_protocol.dataset_logger import default_logger from eval_protocol.models import EvaluationRow, Message from eval_protocol.pytest.types import RolloutProcessorConfig @@ -38,13 +39,12 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: ) ] - return EvaluationRow( - messages=messages, - **row.model_dump(exclude={"messages"}), - ) + row.messages = messages + default_logger.log(row) + return row # Process all rows concurrently tasks = [process_row(row) for row in rows] - dataset = await asyncio.gather(*tasks) + dataset = list(await asyncio.gather(*tasks)) return dataset From 5e2a497270b8d2f0a94be507962b4ae421d1b249 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 00:57:10 -0700 Subject: [PATCH 04/31] Add pytest as a dependency in pyproject.toml - Included `pytest>=6.0.0` in the main dependencies section to ensure compatibility with testing requirements. - Removed `pytest>=6.0.0` from the dev dependencies to streamline the development environment. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9d587cd5..8274e558 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ dependencies = [ "watchdog>=2.1.0", "websockets>=15.0.1", "fastapi>=0.116.1", + "pytest>=6.0.0", ] [project.urls] @@ -58,7 +59,6 @@ Homepage = "https://github.com/fireworks-ai/eval-protocol" dev = [ "build", "twine", - "pytest>=6.0.0", "pytest-asyncio", "pytest-httpserver", "werkzeug>=2.0.0", From 6e22d3561c47a3e6f32e749c4eecb9f127a7c33e Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 01:15:27 -0700 Subject: [PATCH 05/31] Remove unused imports in utils.py to clean up the codebase. --- eval_protocol/pytest/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/eval_protocol/pytest/utils.py b/eval_protocol/pytest/utils.py index 3cfe6bfb..c57a6fb8 100644 --- a/eval_protocol/pytest/utils.py +++ b/eval_protocol/pytest/utils.py @@ -2,8 +2,6 @@ import inspect from typing import Any, Callable, List, Literal -from ..models import EvaluateResult, EvaluationRow - def execute_function(func: Callable, **kwargs) -> Any: """ From eeda2a683539ad3064249603564927ea48074a76 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 02:53:23 -0700 Subject: [PATCH 06/31] Add directory utility functions for finding and creating evaluation protocol directories - Introduced `find_eval_protocol_dir` and `find_eval_protocol_datasets_dir` functions to streamline the discovery and creation of the `.eval_protocol` and its `datasets` subdirectory. - Updated `LocalFSDatasetLoggerAdapter` to utilize these new utility functions, simplifying the initialization process for logging directories. --- .../dataset_logger/directory_utils.py | 55 +++++++++++++++++++ .../local_fs_dataset_logger_adapter.py | 37 ++----------- 2 files changed, 61 insertions(+), 31 deletions(-) create mode 100644 eval_protocol/dataset_logger/directory_utils.py diff --git a/eval_protocol/dataset_logger/directory_utils.py b/eval_protocol/dataset_logger/directory_utils.py new file mode 100644 index 00000000..74f691b9 --- /dev/null +++ b/eval_protocol/dataset_logger/directory_utils.py @@ -0,0 +1,55 @@ +import os +from typing import Optional + +# Shared constants for directory discovery +EVAL_PROTOCOL_DIR = ".eval_protocol" +PYTHON_FILES = ["pyproject.toml", "requirements.txt"] +DATASETS_DIR = "datasets" + + +def find_eval_protocol_dir() -> str: + """ + Find the .eval_protocol directory by looking up the directory tree. + + Returns: + Path to the .eval_protocol directory + """ + # recursively look up for a .eval_protocol directory + current_dir = os.path.dirname(os.path.abspath(__file__)) + while current_dir != "/": + if os.path.exists(os.path.join(current_dir, EVAL_PROTOCOL_DIR)): + log_dir = os.path.join(current_dir, EVAL_PROTOCOL_DIR) + break + current_dir = os.path.dirname(current_dir) + else: + # if not found, recursively look up until a pyproject.toml or requirements.txt is found + current_dir = os.path.dirname(os.path.abspath(__file__)) + while current_dir != "/": + if any(os.path.exists(os.path.join(current_dir, f)) for f in PYTHON_FILES): + log_dir = os.path.join(current_dir, EVAL_PROTOCOL_DIR) + break + current_dir = os.path.dirname(current_dir) + else: + # get the PWD that this python process is running in + log_dir = os.path.join(os.getcwd(), EVAL_PROTOCOL_DIR) + + # create the .eval_protocol directory if it doesn't exist + os.makedirs(log_dir, exist_ok=True) + + return log_dir + + +def find_eval_protocol_datasets_dir() -> str: + """ + Find the .eval_protocol/datasets directory by looking up the directory tree. + + Returns: + Path to the .eval_protocol/datasets directory + """ + log_dir = find_eval_protocol_dir() + + # create the datasets subdirectory + datasets_dir = os.path.join(log_dir, DATASETS_DIR) + os.makedirs(datasets_dir, exist_ok=True) + + return datasets_dir diff --git a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py index 3cb47030..0aef4f8c 100644 --- a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +++ b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py @@ -1,11 +1,13 @@ -from datetime import datetime, timezone import json import os -import tempfile import shutil +import tempfile +from datetime import datetime, timezone from typing import TYPE_CHECKING, List, Optional + from eval_protocol.common_utils import load_jsonl from eval_protocol.dataset_logger.dataset_logger import DatasetLogger +from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_datasets_dir if TYPE_CHECKING: from eval_protocol.models import EvaluationRow @@ -16,36 +18,9 @@ class LocalFSDatasetLoggerAdapter(DatasetLogger): Logger that stores logs in the local filesystem. """ - EVAL_PROTOCOL_DIR = ".eval_protocol" - PYTHON_FILES = ["pyproject.toml", "requirements.txt"] - DATASETS_DIR = "datasets" - def __init__(self): - # recursively look up for a .eval_protocol directory - current_dir = os.path.dirname(os.path.abspath(__file__)) - while current_dir != "/": - if os.path.exists(os.path.join(current_dir, self.EVAL_PROTOCOL_DIR)): - self.log_dir = os.path.join(current_dir, self.EVAL_PROTOCOL_DIR) - break - current_dir = os.path.dirname(current_dir) - - # if not found, recursively look up until a pyproject.toml or requirements.txt is found - current_dir = os.path.dirname(os.path.abspath(__file__)) - while current_dir != "/": - if any(os.path.exists(os.path.join(current_dir, f)) for f in self.PYTHON_FILES): - self.log_dir = os.path.join(current_dir, self.EVAL_PROTOCOL_DIR) - break - current_dir = os.path.dirname(current_dir) - - # get the PWD that this python process is running in - self.log_dir = os.path.join(os.getcwd(), self.EVAL_PROTOCOL_DIR) - - # create the .eval_protocol directory if it doesn't exist - os.makedirs(self.log_dir, exist_ok=True) - - # create the datasets subdirectory - self.datasets_dir = os.path.join(self.log_dir, self.DATASETS_DIR) - os.makedirs(self.datasets_dir, exist_ok=True) + self.log_dir = os.path.dirname(find_eval_protocol_datasets_dir()) + self.datasets_dir = find_eval_protocol_datasets_dir() # ensure that log file exists if not os.path.exists(self.current_jsonl_path): From 429bd8b8fa19e3926d6896203f9806bcdb97c03b Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 02:53:33 -0700 Subject: [PATCH 07/31] Add PID field to EvaluationRow model - Introduced a new optional field `pid` in the `EvaluationRow` model to store the process ID of the evaluation creator. This addition aids the evaluation watcher in detecting stopped evaluations. --- eval_protocol/models.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 8be3a40a..977b3c0b 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -1,3 +1,4 @@ +import os from datetime import datetime from typing import Any, Dict, List, Literal, Optional, Union @@ -264,6 +265,11 @@ class EvaluationRow(BaseModel): default=None, description="Metadata about the evaluation that was run." ) + pid: Optional[int] = Field( + default_factory=os.getpid, + description="The PID of the process that created the row. This is used by the evaluation watcher to detect stopped evaluations.", + ) + def is_trajectory_evaluation(self) -> bool: """ Returns True if this represents a trajectory evaluation (has step_outputs), From 33d054aa6ac76127a284db5ef7ba1e7727598a24 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 02:53:40 -0700 Subject: [PATCH 08/31] Add 'stopped' status to evaluation protocol and update StatusIndicator component - Extended the `status` enum in `eval-protocol.ts` to include a new 'stopped' state, enhancing the evaluation status tracking. - Updated the `StatusIndicator` component to handle the new 'stopped' status, providing appropriate visual feedback with updated colors and text. --- vite-app/src/components/StatusIndicator.tsx | 6 ++++++ vite-app/src/types/eval-protocol.ts | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/vite-app/src/components/StatusIndicator.tsx b/vite-app/src/components/StatusIndicator.tsx index 0e2068e7..451e7fe6 100644 --- a/vite-app/src/components/StatusIndicator.tsx +++ b/vite-app/src/components/StatusIndicator.tsx @@ -49,6 +49,12 @@ const StatusIndicator: React.FC = ({ textColor: "text-red-700", text: "error", }; + case "stopped": + return { + dotColor: "bg-yellow-500", + textColor: "text-yellow-700", + text: "stopped", + }; default: return { dotColor: "bg-gray-500", diff --git a/vite-app/src/types/eval-protocol.ts b/vite-app/src/types/eval-protocol.ts index ee283e71..f87f243b 100644 --- a/vite-app/src/types/eval-protocol.ts +++ b/vite-app/src/types/eval-protocol.ts @@ -78,7 +78,7 @@ export const EvalMetadataSchema = z.object({ name: z.string().describe('Name of the evaluation'), description: z.string().optional().describe('Description of the evaluation'), version: z.string().describe('Version of the evaluation. By default, we will populate this with the current commit hash.'), - status: z.enum(['running', 'finished', 'error']).default('running').describe('Status of the evaluation'), + status: z.enum(['running', 'finished', 'error', 'stopped']).default('running').describe('Status of the evaluation'), num_runs: z.number().int().describe('Number of times the evaluation was repeated'), aggregation_method: z.string().describe('Method used to aggregate scores across runs'), threshold_of_success: z.number().optional().describe('Threshold score for test success'), From 94b0e469852bd09d39758b66bcc4d70ae6522aaf Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 02:53:47 -0700 Subject: [PATCH 09/31] Update uv.lock to modify pytest dependency and revision number - Changed the revision number from 3 to 2. - Added `pytest` to the main dependencies section. - Removed `pytest` from the dev dependencies while retaining its version specification. --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index 134e2ce4..09e3cda9 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.13'", @@ -1144,6 +1144,7 @@ dependencies = [ { name = "pandas" }, { name = "psutil" }, { name = "pydantic" }, + { name = "pytest" }, { name = "python-dotenv" }, { name = "pyyaml" }, { name = "requests" }, @@ -1180,7 +1181,6 @@ dev = [ { name = "openai" }, { name = "pip" }, { name = "pre-commit" }, - { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, { name = "pytest-httpserver" }, @@ -1272,7 +1272,7 @@ requires-dist = [ { name = "pre-commit", marker = "extra == 'dev'" }, { name = "psutil", specifier = ">=5.8.0" }, { name = "pydantic", specifier = ">=2.0.0" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=6.0.0" }, + { name = "pytest", specifier = ">=6.0.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'" }, { name = "pytest-cov", marker = "extra == 'dev'" }, { name = "pytest-httpserver", marker = "extra == 'dev'" }, From 1fe333803ce158d89acc262d4a3d1c82b476f98d Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 03:45:04 -0700 Subject: [PATCH 10/31] Ensure evaluation watcher is running at the start of evaluation tests --- eval_protocol/pytest/evaluation_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index 35a069d7..61d5f643 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -7,6 +7,7 @@ from eval_protocol.models import CompletionParams, EvalMetadata, EvaluationRow, InputMetadata from eval_protocol.pytest.default_dataset_adapter import default_dataset_adapter from eval_protocol.pytest.default_no_op_rollout_process import default_no_op_rollout_processor +from eval_protocol.pytest.eval_watcher import ensure_singleton_watcher from eval_protocol.pytest.types import ( Dataset, DatasetPathParam, @@ -28,6 +29,9 @@ from ..common_utils import load_jsonl +# Ensure the evaluation watcher is running (OS-level singleton) +ensure_singleton_watcher() + def evaluation_test( *, From ee9957fc69b1a4fa63ead5048ced7360cea285ed Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 04:32:29 -0700 Subject: [PATCH 11/31] Add optional PID field to EvaluationRowSchema - Introduced a new optional field `pid` in the `EvaluationRowSchema` to store the process ID of the evaluation creator. This enhancement supports the evaluation watcher in detecting stopped evaluations, improving overall tracking and management of evaluation processes. --- vite-app/src/types/eval-protocol.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vite-app/src/types/eval-protocol.ts b/vite-app/src/types/eval-protocol.ts index f87f243b..f57f3d26 100644 --- a/vite-app/src/types/eval-protocol.ts +++ b/vite-app/src/types/eval-protocol.ts @@ -96,7 +96,8 @@ export const EvaluationRowSchema = z.object({ (val) => typeof val === "string" ? new Date(val) : val, z.date() ).describe('The timestamp when the row was created. Accepts string and parses to Date.'), - eval_metadata: EvalMetadataSchema.optional().describe('Metadata about the evaluation that was run.') + eval_metadata: EvalMetadataSchema.optional().describe('Metadata about the evaluation that was run.'), + pid: z.number().optional().describe('The PID of the process that created the row. This is used by the evaluation watcher to detect stopped evaluations.') }); // Agent Evaluation Framework (V2) schemas From 1c12956d7ac0ff53b9b8aaf38b0337a767f606a3 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 04:51:38 -0700 Subject: [PATCH 12/31] Enhance evaluation logging and error handling - Updated the `load_jsonl` function to include error handling for JSON parsing, logging the line number of any errors encountered. - Modified the `status` field in `EvalMetadata` to be optional, allowing for more flexible evaluation states. - Improved the `LocalFSDatasetLoggerAdapter` to check for existing rows across multiple JSONL files before appending new entries, ensuring no duplicates are logged. - Increased the `word_count` parameter in the `generate_id` function to 5 for more diverse ID generation. - Introduced a new `eval_watcher.py` script to monitor evaluation processes, updating their status if the associated process has terminated. --- eval_protocol/common_utils.py | 8 +- .../local_fs_dataset_logger_adapter.py | 56 ++-- eval_protocol/human_id/__init__.py | 5 +- eval_protocol/models.py | 6 +- eval_protocol/pytest/eval_watcher.py | 304 ++++++++++++++++++ eval_protocol/pytest/evaluation_test.py | 5 + 6 files changed, 353 insertions(+), 31 deletions(-) create mode 100644 eval_protocol/pytest/eval_watcher.py diff --git a/eval_protocol/common_utils.py b/eval_protocol/common_utils.py index d2030344..6596133d 100644 --- a/eval_protocol/common_utils.py +++ b/eval_protocol/common_utils.py @@ -15,6 +15,10 @@ def load_jsonl(file_path: str) -> List[Dict[str, Any]]: """ data: List[Dict[str, Any]] = [] with open(file_path, "r", encoding="utf-8") as f: - for line in f: - data.append(json.loads(line.strip())) + for line_number, line in enumerate(f): + try: + data.append(json.loads(line.strip())) + except json.JSONDecodeError as e: + print(f"Error parsing JSON line for file {file_path} at line {line_number}") + raise e return data diff --git a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py index 0aef4f8c..bbcf3e45 100644 --- a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +++ b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py @@ -43,25 +43,29 @@ def log(self, row: "EvaluationRow") -> None: """Log a row, updating existing row with same ID or appending new row.""" row_id = row.input_metadata.row_id - # Check if row with this ID already exists - if os.path.exists(self.current_jsonl_path): - with open(self.current_jsonl_path, "r") as f: - lines = f.readlines() - - # Find the line with matching ID - for i, line in enumerate(lines): - try: - line_data = json.loads(line.strip()) - if line_data["input_metadata"]["row_id"] == row_id: - # Update existing row - lines[i] = row.model_dump_json(exclude_none=True) + os.linesep - with open(self.current_jsonl_path, "w") as f: - f.writelines(lines) - return - except json.JSONDecodeError: - continue - - # If no existing row found, append new row + # Check if row with this ID already exists in any JSONL file + if os.path.exists(self.datasets_dir): + for filename in os.listdir(self.datasets_dir): + if filename.endswith(".jsonl"): + file_path = os.path.join(self.datasets_dir, filename) + if os.path.exists(file_path): + with open(file_path, "r") as f: + lines = f.readlines() + + # Find the line with matching ID + for i, line in enumerate(lines): + try: + line_data = json.loads(line.strip()) + if line_data["input_metadata"]["row_id"] == row_id: + # Update existing row + lines[i] = row.model_dump_json(exclude_none=True) + os.linesep + with open(file_path, "w") as f: + f.writelines(lines) + return + except json.JSONDecodeError: + continue + + # If no existing row found, append new row to current file with open(self.current_jsonl_path, "a") as f: f.write(row.model_dump_json(exclude_none=True) + os.linesep) @@ -73,14 +77,18 @@ def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]: return [] all_rows = [] + existing_row_ids = set() for filename in os.listdir(self.datasets_dir): if filename.endswith(".jsonl"): file_path = os.path.join(self.datasets_dir, filename) - try: - data = load_jsonl(file_path) - all_rows.extend([EvaluationRow(**r) for r in data]) - except Exception: - continue # skip files that can't be read/parsed + data = load_jsonl(file_path) + for r in data: + row = EvaluationRow(**r) + if row.input_metadata.row_id not in existing_row_ids: + existing_row_ids.add(row.input_metadata.row_id) + else: + raise ValueError(f"Duplicate Row ID {row.input_metadata.row_id} already exists") + all_rows.append(row) if row_id: # Filter by row_id if specified diff --git a/eval_protocol/human_id/__init__.py b/eval_protocol/human_id/__init__.py index a48460f2..8b5d447c 100644 --- a/eval_protocol/human_id/__init__.py +++ b/eval_protocol/human_id/__init__.py @@ -1,6 +1,7 @@ -import random import itertools +import random from typing import Hashable + from . import dictionary __all__ = ["generate_id"] @@ -8,7 +9,7 @@ system_random = random.SystemRandom() -def generate_id(separator="-", seed: int | float | str | bytes | bytearray | None = None, word_count=4) -> str: +def generate_id(separator="-", seed: int | float | str | bytes | bytearray | None = None, word_count=5) -> str: """ Generate a human readable ID diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 977b3c0b..60f75975 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -211,8 +211,8 @@ class EvalMetadata(BaseModel): default_factory=get_pep440_version, description="Version of the evaluation. Should be populated with a PEP 440 version string.", ) - status: Literal["running", "finished", "error", "stopped"] = Field( - "running", description="Status of the evaluation" + status: Optional[Literal["running", "finished", "error", "stopped"]] = Field( + None, description="Status of the evaluation" ) num_runs: int = Field(..., description="Number of times the evaluation was repeated") aggregation_method: str = Field(..., description="Method used to aggregate scores across runs") @@ -266,7 +266,7 @@ class EvaluationRow(BaseModel): ) pid: Optional[int] = Field( - default_factory=os.getpid, + None, description="The PID of the process that created the row. This is used by the evaluation watcher to detect stopped evaluations.", ) diff --git a/eval_protocol/pytest/eval_watcher.py b/eval_protocol/pytest/eval_watcher.py new file mode 100644 index 00000000..d0cf4235 --- /dev/null +++ b/eval_protocol/pytest/eval_watcher.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +""" +Evaluation Watcher Process + +This process monitors all evaluation rows and updates any evaluations that are still +"running" but whose associated process has terminated. + +Usage: + python -m eval_protocol.pytest.eval_watcher [--check-interval ] +""" + +import argparse +import fcntl +import json +import multiprocessing +import os +import sys +import time +from pathlib import Path +from typing import Dict, List, Optional + +# Add freeze_support for multiprocessing compatibility +if __name__ == "__main__": + multiprocessing.freeze_support() + +from eval_protocol.dataset_logger import default_logger +from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_dir +from eval_protocol.models import EvaluationRow + + +def get_lock_file_paths() -> tuple[Path, Path]: + """Get the lock file paths using the same directory discovery logic.""" + eval_protocol_dir = Path(find_eval_protocol_dir()) + lock_file_path = eval_protocol_dir / "watcher.lock" + pid_file_path = eval_protocol_dir / "watcher.pid" + return lock_file_path, pid_file_path + + +def acquire_singleton_lock() -> Optional[int]: + """ + Try to acquire the singleton lock. Returns the PID of the current holder if failed. + + Returns: + None if lock acquired successfully, otherwise the PID of the current holder + """ + lock_file_path, pid_file_path = get_lock_file_paths() + + try: + # Try to acquire an exclusive lock on the lock file + with open(lock_file_path, "w") as lock_file: + fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) + + # Write our PID to the PID file + with open(pid_file_path, "w") as pid_file: + pid_file.write(str(os.getpid())) + + return None # Successfully acquired lock + + except (IOError, OSError): + # Lock is held by another process + try: + if pid_file_path.exists(): + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + return int(content) + except (IOError, OSError): + pass + return None + + +def release_singleton_lock(): + """Release the singleton lock.""" + lock_file_path, pid_file_path = get_lock_file_paths() + try: + if pid_file_path.exists(): + pid_file_path.unlink() + if lock_file_path.exists(): + lock_file_path.unlink() + except (IOError, OSError): + pass + + +def is_process_running(pid: int) -> bool: + """Check if a process is still running.""" + try: + os.kill(pid, 0) + return True + except OSError: + return False + + +def find_running_evaluations() -> List[EvaluationRow]: + """Find all evaluations currently in 'running' status .""" + running_evaluations = [] + all_rows = default_logger.read() + + for row in all_rows: + if row.eval_metadata and row.eval_metadata.status == "running": + running_evaluations.append(row) + + return running_evaluations + + +def update_evaluation_to_stopped(row: EvaluationRow, reason: str) -> None: + """Update an evaluation row to 'stopped' status.""" + try: + if row.eval_metadata: + row.eval_metadata.status = "stopped" + row.eval_metadata.passed = False + + if row.evaluation_result is not None: + row.evaluation_result.error = reason + else: + from eval_protocol.models import EvaluateResult + + row.evaluation_result = EvaluateResult( + score=0.0, is_score_valid=False, reason=f"Evaluation stopped: {reason}", error=reason + ) + + default_logger.log(row) + print( + f" šŸ“ Updated evaluation '{row.eval_metadata.name if row.eval_metadata else 'Unknown'}' (Row ID: {row.input_metadata.row_id}) (PID: {row.pid}) to stopped status" + ) + + except Exception as e: + print(f" āš ļø Error updating evaluation row: {e}") + + +def check_and_update_terminated_evaluations() -> int: + """Check for evaluations with terminated processes and update them to stopped status.""" + running_evaluations = find_running_evaluations() + + if not running_evaluations: + return 0 + + print(f"šŸ” Checking {len(running_evaluations)} running evaluations for terminated processes...") + for row in running_evaluations: + print(f" Row ID: {row.input_metadata.row_id} PID: {row.pid}") + + terminated_count = 0 + for row in running_evaluations: + if row.pid: + if not is_process_running(row.pid): + update_evaluation_to_stopped(row, f"Process {row.pid} terminated") + terminated_count += 1 + else: + update_evaluation_to_stopped(row, f"Process {row.pid} terminated") + terminated_count += 1 + + if terminated_count > 0: + print(f" āœ… Updated {terminated_count} evaluations to stopped status") + + return terminated_count + + +def run_watcher_loop(check_interval: float) -> None: + """Main monitoring loop.""" + print(f"šŸ” Starting evaluation watcher (PID: {os.getpid()})") + print(f" Check interval: {check_interval} seconds") + print(" Monitoring all evaluation rows for terminated processes") + + consecutive_empty_checks = 0 + max_empty_checks = 3 + + try: + while True: + running_evaluations = find_running_evaluations() + + if running_evaluations: + consecutive_empty_checks = 0 + check_and_update_terminated_evaluations() + else: + consecutive_empty_checks += 1 + if consecutive_empty_checks >= max_empty_checks: + print( + f"šŸ” No running evaluations found for {consecutive_empty_checks} consecutive checks. Exiting watcher." + ) + break + else: + print( + f"šŸ” No running evaluations found ({consecutive_empty_checks}/{max_empty_checks} consecutive checks)" + ) + + time.sleep(check_interval) + + except KeyboardInterrupt: + print("\nšŸ›‘ Evaluation watcher interrupted by user") + except Exception as e: + print(f"\nāŒ Evaluation watcher error: {e}") + finally: + print("šŸ” Evaluation watcher stopped") + + +def _start_watcher_process(check_interval: float) -> multiprocessing.Process: + """Start the watcher in a background process.""" + # Ensure we're not in a frozen state and multiprocessing is properly initialized + if multiprocessing.current_process().name != "MainProcess": + raise RuntimeError("Cannot start watcher process from within another process") + + process = multiprocessing.Process(target=_watcher_process_main, args=(check_interval,), name="eval-watcher") + process.start() + return process + + +def _watcher_process_main(check_interval: float) -> None: + """Main entry point for the watcher process - acquires lock and runs the loop.""" + # Try to acquire the lock in this process + current_holder_pid = acquire_singleton_lock() + + if current_holder_pid is not None: + # Another process is already running + print(f"šŸ” Evaluation watcher already running in process {current_holder_pid}") + return + + # We acquired the lock, run the watcher loop + try: + run_watcher_loop(check_interval) + finally: + # Always release the lock when we exit + release_singleton_lock() + + +def ensure_singleton_watcher(check_interval: float = 5.0) -> bool: + """ + Ensure the singleton EvaluationWatcher instance exists and is running. + This function is OS-level global - only one watcher will run across all processes. + + Args: + check_interval: How often to check for terminated processes (seconds) + + Returns: + True if watcher was started successfully, False if another watcher is already running + """ + # Check if we're already in a subprocess + if multiprocessing.current_process().name != "MainProcess": + return False + + # Start the watcher in a background process + try: + process = _start_watcher_process(check_interval) + print(f"šŸ” Started evaluation watcher in background process (PID: {process.pid})") + return True + except Exception as e: + print(f"āŒ Failed to start evaluation watcher: {e}") + return False + + +def is_watcher_running() -> bool: + """Check if the evaluation watcher is currently running.""" + current_holder_pid = acquire_singleton_lock() + if current_holder_pid is None: + # We acquired the lock, so no one else is running + release_singleton_lock() + return False + + # Check if the holder is still alive + assert current_holder_pid is not None # For type checker + is_alive = is_process_running(current_holder_pid) + if not is_alive: + # Clean up stale lock + release_singleton_lock() + + return is_alive + + +def get_watcher_pid() -> Optional[int]: + """Get the PID of the currently running evaluation watcher.""" + _, pid_file_path = get_lock_file_paths() + try: + if pid_file_path.exists(): + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + pid = int(content) + if is_process_running(pid): + return pid + except (IOError, OSError): + pass + return None + + +def main(): + """Main entry point for the evaluation watcher.""" + parser = argparse.ArgumentParser( + description="Monitor all evaluation rows and update those with terminated processes to stopped status" + ) + parser.add_argument( + "--check-interval", + type=float, + default=1.0, + help="How often to check for terminated processes (seconds, default: 1.0)", + ) + + args = parser.parse_args() + + # Run the watcher directly (not as a background process) + run_watcher_loop(args.check_interval) + + +if __name__ == "__main__": + # Ensure multiprocessing is properly initialized + multiprocessing.freeze_support() + main() diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index 61d5f643..e5289316 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -1,4 +1,5 @@ import inspect +import os from typing import Any, Callable, Dict, List, Optional import pytest @@ -244,6 +245,10 @@ def wrapper_body(**kwargs): # Initialize eval_metadata for each row row.eval_metadata = eval_metadata + # has to be done in the pytest main process since it's + # used to determine whether this eval has stopped + row.pid = os.getpid() + # Now run the rollout processor with metadata-initialized data config = RolloutProcessorConfig( model=model_name, From a8a193d19211a7ef5babde3c6825580d76f9ab1e Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 04:57:17 -0700 Subject: [PATCH 13/31] Refactor eval_watcher.py to use structured logging - Replaced print statements with structured logging using the `get_logger` utility for improved log management and consistency. - Enhanced error handling and status updates within the evaluation watcher, ensuring better tracking of evaluation processes and clearer output during execution. --- eval_protocol/pytest/eval_watcher.py | 40 +++++++++++++++------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/eval_protocol/pytest/eval_watcher.py b/eval_protocol/pytest/eval_watcher.py index d0cf4235..1be3fc4a 100644 --- a/eval_protocol/pytest/eval_watcher.py +++ b/eval_protocol/pytest/eval_watcher.py @@ -11,13 +11,11 @@ import argparse import fcntl -import json import multiprocessing import os -import sys import time from pathlib import Path -from typing import Dict, List, Optional +from typing import List, Optional # Add freeze_support for multiprocessing compatibility if __name__ == "__main__": @@ -25,8 +23,12 @@ from eval_protocol.dataset_logger import default_logger from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_dir +from eval_protocol.logging_utils import get_logger from eval_protocol.models import EvaluationRow +# Initialize logger +logger = get_logger("eval_watcher") + def get_lock_file_paths() -> tuple[Path, Path]: """Get the lock file paths using the same directory discovery logic.""" @@ -119,12 +121,12 @@ def update_evaluation_to_stopped(row: EvaluationRow, reason: str) -> None: ) default_logger.log(row) - print( + logger.info( f" šŸ“ Updated evaluation '{row.eval_metadata.name if row.eval_metadata else 'Unknown'}' (Row ID: {row.input_metadata.row_id}) (PID: {row.pid}) to stopped status" ) except Exception as e: - print(f" āš ļø Error updating evaluation row: {e}") + logger.error(f" āš ļø Error updating evaluation row: {e}") def check_and_update_terminated_evaluations() -> int: @@ -134,9 +136,9 @@ def check_and_update_terminated_evaluations() -> int: if not running_evaluations: return 0 - print(f"šŸ” Checking {len(running_evaluations)} running evaluations for terminated processes...") + logger.info(f"šŸ” Checking {len(running_evaluations)} running evaluations for terminated processes...") for row in running_evaluations: - print(f" Row ID: {row.input_metadata.row_id} PID: {row.pid}") + logger.info(f" Row ID: {row.input_metadata.row_id} PID: {row.pid}") terminated_count = 0 for row in running_evaluations: @@ -149,16 +151,16 @@ def check_and_update_terminated_evaluations() -> int: terminated_count += 1 if terminated_count > 0: - print(f" āœ… Updated {terminated_count} evaluations to stopped status") + logger.info(f" āœ… Updated {terminated_count} evaluations to stopped status") return terminated_count def run_watcher_loop(check_interval: float) -> None: """Main monitoring loop.""" - print(f"šŸ” Starting evaluation watcher (PID: {os.getpid()})") - print(f" Check interval: {check_interval} seconds") - print(" Monitoring all evaluation rows for terminated processes") + logger.info(f"šŸ” Starting evaluation watcher (PID: {os.getpid()})") + logger.info(f" Check interval: {check_interval} seconds") + logger.info(" Monitoring all evaluation rows for terminated processes") consecutive_empty_checks = 0 max_empty_checks = 3 @@ -173,23 +175,23 @@ def run_watcher_loop(check_interval: float) -> None: else: consecutive_empty_checks += 1 if consecutive_empty_checks >= max_empty_checks: - print( + logger.info( f"šŸ” No running evaluations found for {consecutive_empty_checks} consecutive checks. Exiting watcher." ) break else: - print( + logger.info( f"šŸ” No running evaluations found ({consecutive_empty_checks}/{max_empty_checks} consecutive checks)" ) time.sleep(check_interval) except KeyboardInterrupt: - print("\nšŸ›‘ Evaluation watcher interrupted by user") + logger.info("\nšŸ›‘ Evaluation watcher interrupted by user") except Exception as e: - print(f"\nāŒ Evaluation watcher error: {e}") + logger.error(f"\nāŒ Evaluation watcher error: {e}") finally: - print("šŸ” Evaluation watcher stopped") + logger.info("šŸ” Evaluation watcher stopped") def _start_watcher_process(check_interval: float) -> multiprocessing.Process: @@ -210,7 +212,7 @@ def _watcher_process_main(check_interval: float) -> None: if current_holder_pid is not None: # Another process is already running - print(f"šŸ” Evaluation watcher already running in process {current_holder_pid}") + logger.info(f"šŸ” Evaluation watcher already running in process {current_holder_pid}") return # We acquired the lock, run the watcher loop @@ -239,10 +241,10 @@ def ensure_singleton_watcher(check_interval: float = 5.0) -> bool: # Start the watcher in a background process try: process = _start_watcher_process(check_interval) - print(f"šŸ” Started evaluation watcher in background process (PID: {process.pid})") + logger.info(f"šŸ” Started evaluation watcher in background process (PID: {process.pid})") return True except Exception as e: - print(f"āŒ Failed to start evaluation watcher: {e}") + logger.error(f"āŒ Failed to start evaluation watcher: {e}") return False From 33d2ce5dd25615a0d41309ea1b6b67fb952d62b3 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 04:57:32 -0700 Subject: [PATCH 14/31] Add logging utilities for eval_protocol package - Introduced a new module `logging_utils.py` to provide centralized logging configuration and utilities. - Implemented functions for setting up loggers, logging evaluation events, performance metrics, and errors with context. - Enhanced logging consistency across the package by utilizing structured logging practices. --- eval_protocol/logging_utils.py | 154 +++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 eval_protocol/logging_utils.py diff --git a/eval_protocol/logging_utils.py b/eval_protocol/logging_utils.py new file mode 100644 index 00000000..147b96d0 --- /dev/null +++ b/eval_protocol/logging_utils.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +""" +Logging utilities for the eval_protocol package. + +This module provides centralized logging configuration and utilities +for consistent logging across the eval_protocol package. +""" + +import logging +import os +from pathlib import Path +from typing import Optional + +from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_dir + + +def setup_logger( + name: str, + log_file: Optional[str] = None, + level: int = logging.INFO, + console_level: int = logging.INFO, + file_level: int = logging.DEBUG, +) -> logging.Logger: + """ + Set up a logger with both console and file handlers. + + Args: + name: Logger name + log_file: Optional log file name (will be created in logs directory) + level: Overall logger level + console_level: Console handler level + file_level: File handler level + + Returns: + Configured logger instance + """ + # Create logs directory under eval_protocol + eval_protocol_dir = Path(find_eval_protocol_dir()) + logs_dir = eval_protocol_dir / "logs" + logs_dir.mkdir(exist_ok=True) + + # Create logger + logger = logging.getLogger(name) + logger.setLevel(level) + + # Clear existing handlers to avoid duplicates + logger.handlers.clear() + + # Create formatters + file_formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + console_formatter = logging.Formatter("%(levelname)s - %(message)s") + + # Console handler + console_handler = logging.StreamHandler() + console_handler.setLevel(console_level) + console_handler.setFormatter(console_formatter) + logger.addHandler(console_handler) + + # File handler (if log_file specified) + if log_file: + log_file_path = logs_dir / log_file + file_handler = logging.FileHandler(log_file_path) + file_handler.setLevel(file_level) + file_handler.setFormatter(file_formatter) + logger.addHandler(file_handler) + + return logger + + +def get_logger(name: str) -> logging.Logger: + """ + Get a logger instance. If it doesn't exist, create it with default settings. + + Args: + name: Logger name + + Returns: + Logger instance + """ + logger = logging.getLogger(name) + + # If logger doesn't have handlers, set it up with defaults + if not logger.handlers: + logger = setup_logger(name, f"{name}.log") + + return logger + + +def log_evaluation_event( + event_type: str, evaluation_id: str, message: str, level: int = logging.INFO, **kwargs +) -> None: + """ + Log evaluation-specific events to a dedicated evaluation log file. + + Args: + event_type: Type of event (e.g., 'start', 'complete', 'error') + evaluation_id: Evaluation identifier + message: Log message + level: Log level + **kwargs: Additional context to include in log + """ + logger = get_logger("evaluation_events") + + # Create structured log entry + log_entry = {"event_type": event_type, "evaluation_id": evaluation_id, "message": message, **kwargs} + + if level == logging.DEBUG: + logger.debug(f"EVENT: {log_entry}") + elif level == logging.INFO: + logger.info(f"EVENT: {log_entry}") + elif level == logging.WARNING: + logger.warning(f"EVENT: {log_entry}") + elif level == logging.ERROR: + logger.error(f"EVENT: {log_entry}") + elif level == logging.CRITICAL: + logger.critical(f"EVENT: {log_entry}") + + +def log_performance_metric(metric_name: str, value: float, unit: str = "", context: Optional[dict] = None) -> None: + """ + Log performance metrics to a dedicated metrics log file. + + Args: + metric_name: Name of the metric + value: Metric value + unit: Unit of measurement + context: Additional context information + """ + logger = get_logger("performance_metrics") + + metric_entry = {"metric": metric_name, "value": value, "unit": unit, "context": context or {}} + + logger.info(f"METRIC: {metric_entry}") + + +def log_error_with_context(error: Exception, context: str, additional_info: Optional[dict] = None) -> None: + """ + Log errors with additional context information. + + Args: + error: The exception that occurred + context: Context where the error occurred + additional_info: Additional information about the error + """ + logger = get_logger("errors") + + error_entry = { + "error_type": type(error).__name__, + "error_message": str(error), + "context": context, + "additional_info": additional_info or {}, + } + + logger.error(f"ERROR: {error_entry}", exc_info=True) From 5ca5d658a1292e1a4b0e48f547ef4f29b14f0fe5 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 10:27:57 -0700 Subject: [PATCH 15/31] Enhance LocalFSDatasetLoggerAdapter to prevent duplicate row IDs - Updated the `read` method to ensure that no duplicate row IDs are logged when reading from JSONL files in the datasets directory. This improvement enhances data integrity and consistency in the evaluation logging process. --- .../dataset_logger/local_fs_dataset_logger_adapter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py index bbcf3e45..01760b0a 100644 --- a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +++ b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py @@ -70,7 +70,8 @@ def log(self, row: "EvaluationRow") -> None: f.write(row.model_dump_json(exclude_none=True) + os.linesep) def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]: - """Read rows from all JSONL files in the datasets directory.""" + """Read rows from all JSONL files in the datasets directory. Also + ensures that there are no duplicate row IDs.""" from eval_protocol.models import EvaluationRow if not os.path.exists(self.datasets_dir): From 755c017d58c84f0f1c1d916e32d3b13c2e2c8049 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 13:43:27 -0700 Subject: [PATCH 16/31] Add singleton lock functionality for process management - Introduced a new module `singleton_lock.py` that implements file-based singleton lock management to ensure only one instance of a process can run at a time. - Added functions for acquiring, releasing, and checking the status of locks, along with mechanisms for handling stale locks. - Implemented tests in `test_singleton_lock.py` and `test_singleton_lock_multiprocessing.py` to validate the lock behavior under various scenarios, including concurrent access and cleanup of stale locks. --- eval_protocol/utils/singleton_lock.py | 225 +++++++ tests/utils/test_singleton_lock.py | 259 ++++++++ .../test_singleton_lock_multiprocessing.py | 606 ++++++++++++++++++ 3 files changed, 1090 insertions(+) create mode 100644 eval_protocol/utils/singleton_lock.py create mode 100644 tests/utils/test_singleton_lock.py create mode 100644 tests/utils/test_singleton_lock_multiprocessing.py diff --git a/eval_protocol/utils/singleton_lock.py b/eval_protocol/utils/singleton_lock.py new file mode 100644 index 00000000..ccf6ce30 --- /dev/null +++ b/eval_protocol/utils/singleton_lock.py @@ -0,0 +1,225 @@ +""" +Singleton Lock Management + +This module provides file-based singleton lock functionality for ensuring only one +instance of a process can run at a time across the system. + +The lock mechanism uses two files: +- A PID file that contains the process ID of the lock holder +- A lock file that serves as a marker for the lock + +This approach provides atomic lock acquisition and proper cleanup of stale locks +from terminated processes. +""" + +import os +from pathlib import Path +from typing import Optional, Tuple + + +def get_lock_file_paths(base_dir: Path, lock_name: str) -> Tuple[Path, Path]: + """ + Get the lock file paths for a given lock name. + + Args: + base_dir: Base directory where lock files should be stored + lock_name: Name identifier for the lock (e.g., "watcher", "server") + + Returns: + Tuple of (lock_file_path, pid_file_path) + """ + lock_file_path = base_dir / f"{lock_name}.lock" + pid_file_path = base_dir / f"{lock_name}.pid" + return lock_file_path, pid_file_path + + +def acquire_singleton_lock(base_dir: Path, lock_name: str) -> Optional[int]: + """ + Try to acquire the singleton lock. Returns the PID of the current holder if failed. + + Args: + base_dir: Base directory where lock files should be stored + lock_name: Name identifier for the lock + + Returns: + None if lock acquired successfully, otherwise the PID of the current holder + """ + lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) + + # First, check if PID file exists and contains a running process + if pid_file_path.exists(): + try: + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + pid = int(content) + # Check if the process is still running + try: + os.kill(pid, 0) + # Process is running, we can't acquire the lock + return pid + except OSError: + # Process is not running, clean up stale files + pass + except (IOError, OSError): + pass + + # Try to create the PID file atomically + temp_pid_file = None + try: + # Use atomic file creation + temp_pid_file = pid_file_path.with_suffix(".tmp") + with open(temp_pid_file, "w") as temp_file: + temp_file.write(str(os.getpid())) + temp_file.flush() + os.fsync(temp_file.fileno()) + + # Atomically move the temp file to the final location + temp_pid_file.rename(pid_file_path) + + # Create the lock file to indicate we have the lock + with open(lock_file_path, "w") as lock_file: + lock_file.write(str(os.getpid())) + lock_file.flush() + os.fsync(lock_file.fileno()) + + return None # Successfully acquired lock + + except (IOError, OSError) as e: + # Failed to acquire lock + try: + if temp_pid_file and temp_pid_file.exists(): + temp_pid_file.unlink() + except (IOError, OSError): + pass + + # Check if someone else got the lock + if pid_file_path.exists(): + try: + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + return int(content) + except (IOError, OSError): + pass + + return 999999 # Dummy PID to indicate lock is held + + +def release_singleton_lock(base_dir: Path, lock_name: str) -> None: + """ + Release the singleton lock. + + Args: + base_dir: Base directory where lock files are stored + lock_name: Name identifier for the lock + """ + lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) + try: + if pid_file_path.exists(): + pid_file_path.unlink() + if lock_file_path.exists(): + lock_file_path.unlink() + except (IOError, OSError): + pass + + +def is_process_running(pid: int) -> bool: + """ + Check if a process is still running. + + Args: + pid: Process ID to check + + Returns: + True if the process is running, False otherwise + """ + try: + os.kill(pid, 0) + return True + except OSError: + return False + + +def is_lock_held(base_dir: Path, lock_name: str) -> bool: + """ + Check if a lock is currently held by a running process. + + Args: + base_dir: Base directory where lock files are stored + lock_name: Name identifier for the lock + + Returns: + True if the lock is held by a running process, False otherwise + """ + _, pid_file_path = get_lock_file_paths(base_dir, lock_name) + + try: + if pid_file_path.exists(): + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + pid = int(content) + if is_process_running(pid): + return True + except (IOError, OSError): + pass + + return False + + +def get_lock_holder_pid(base_dir: Path, lock_name: str) -> Optional[int]: + """ + Get the PID of the process currently holding the lock. + + Args: + base_dir: Base directory where lock files are stored + lock_name: Name identifier for the lock + + Returns: + PID of the lock holder if the lock is held by a running process, None otherwise + """ + _, pid_file_path = get_lock_file_paths(base_dir, lock_name) + try: + if pid_file_path.exists(): + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + pid = int(content) + if is_process_running(pid): + return pid + except (IOError, OSError): + pass + return None + + +def cleanup_stale_lock(base_dir: Path, lock_name: str) -> bool: + """ + Clean up a stale lock (lock files exist but process is not running). + + Args: + base_dir: Base directory where lock files are stored + lock_name: Name identifier for the lock + + Returns: + True if stale lock was cleaned up, False if no cleanup was needed + """ + lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) + + # Check if PID file exists but process is not running + if pid_file_path.exists(): + try: + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + pid = int(content) + if not is_process_running(pid): + # Process is not running, clean up stale files + release_singleton_lock(base_dir, lock_name) + return True + except (IOError, OSError): + # If we can't read the PID file, clean it up + release_singleton_lock(base_dir, lock_name) + return True + + return False diff --git a/tests/utils/test_singleton_lock.py b/tests/utils/test_singleton_lock.py new file mode 100644 index 00000000..e31dd146 --- /dev/null +++ b/tests/utils/test_singleton_lock.py @@ -0,0 +1,259 @@ +""" +Tests for the singleton lock functionality. + +This module tests the file-based singleton lock mechanism that ensures only one +instance of a process can run at a time across the system. +""" + +import os +import tempfile +import time +from pathlib import Path +from unittest.mock import patch + +import pytest + +from eval_protocol.utils.singleton_lock import ( + acquire_singleton_lock, + cleanup_stale_lock, + get_lock_file_paths, + get_lock_holder_pid, + is_lock_held, + is_process_running, + release_singleton_lock, +) + + +class TestSingletonLock: + """Test cases for singleton lock functionality.""" + + @pytest.fixture + def temp_dir(self): + """Create a temporary directory for testing.""" + with tempfile.TemporaryDirectory() as temp_dir: + yield Path(temp_dir) + + @pytest.fixture + def lock_name(self): + """Test lock name.""" + return "test_lock" + + def test_get_lock_file_paths(self, temp_dir, lock_name): + """Test that lock file paths are generated correctly.""" + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + + assert lock_file_path == temp_dir / f"{lock_name}.lock" + assert pid_file_path == temp_dir / f"{lock_name}.pid" + + def test_is_process_running_current_pid(self): + """Test that current process is detected as running.""" + current_pid = os.getpid() + assert is_process_running(current_pid) is True + + def test_is_process_running_invalid_pid(self): + """Test that invalid PID is detected as not running.""" + # Use a very high PID that shouldn't exist + invalid_pid = 999999 + assert is_process_running(invalid_pid) is False + + def test_acquire_singleton_lock_success(self, temp_dir, lock_name): + """Test successful lock acquisition.""" + result = acquire_singleton_lock(temp_dir, lock_name) + assert result is None # Successfully acquired lock + + # Verify lock files were created + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + assert lock_file_path.exists() + assert pid_file_path.exists() + + # Verify PID file contains current PID + with open(pid_file_path, "r") as f: + stored_pid = int(f.read().strip()) + assert stored_pid == os.getpid() + + def test_acquire_singleton_lock_already_held(self, temp_dir, lock_name): + """Test that lock acquisition fails when lock is already held.""" + # First process acquires the lock + result1 = acquire_singleton_lock(temp_dir, lock_name) + assert result1 is None + + # Second process tries to acquire the lock + # Since the lock is already held by the current process, it should return the current PID + result2 = acquire_singleton_lock(temp_dir, lock_name) + assert result2 == os.getpid() # Should return current holder's PID + + def test_acquire_singleton_lock_stale_lock_cleanup(self, temp_dir, lock_name): + """Test that stale locks are cleaned up during acquisition.""" + # Create a stale lock file with a non-existent PID + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + + # Create PID file with non-existent PID + with open(pid_file_path, "w") as f: + f.write("999999") # Non-existent PID + + # Try to acquire lock - should clean up stale lock and succeed + result = acquire_singleton_lock(temp_dir, lock_name) + assert result is None # Should succeed after cleaning up stale lock + + def test_release_singleton_lock(self, temp_dir, lock_name): + """Test lock release functionality.""" + # Acquire the lock first + acquire_singleton_lock(temp_dir, lock_name) + + # Verify lock files exist + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + assert lock_file_path.exists() + assert pid_file_path.exists() + + # Release the lock + release_singleton_lock(temp_dir, lock_name) + + # Verify lock files were removed + assert not lock_file_path.exists() + assert not pid_file_path.exists() + + def test_release_singleton_lock_no_files(self, temp_dir, lock_name): + """Test that release doesn't fail when lock files don't exist.""" + # Release lock without acquiring it first + release_singleton_lock(temp_dir, lock_name) + # Should not raise any exceptions + + def test_is_lock_held_true(self, temp_dir, lock_name): + """Test that lock is detected as held when it exists and process is running.""" + # Acquire the lock + acquire_singleton_lock(temp_dir, lock_name) + + # Check if lock is held + assert is_lock_held(temp_dir, lock_name) is True + + def test_is_lock_held_false_no_lock(self, temp_dir, lock_name): + """Test that lock is detected as not held when no lock files exist.""" + assert is_lock_held(temp_dir, lock_name) is False + + def test_is_lock_held_false_stale_lock(self, temp_dir, lock_name): + """Test that stale lock is detected as not held.""" + # Create a stale lock file with a non-existent PID + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + + with open(pid_file_path, "w") as f: + f.write("999999") # Non-existent PID + + assert is_lock_held(temp_dir, lock_name) is False + + def test_get_lock_holder_pid_success(self, temp_dir, lock_name): + """Test getting PID of lock holder.""" + # Acquire the lock + acquire_singleton_lock(temp_dir, lock_name) + + # Get holder PID + holder_pid = get_lock_holder_pid(temp_dir, lock_name) + assert holder_pid == os.getpid() + + def test_get_lock_holder_pid_no_lock(self, temp_dir, lock_name): + """Test getting PID when no lock exists.""" + holder_pid = get_lock_holder_pid(temp_dir, lock_name) + assert holder_pid is None + + def test_get_lock_holder_pid_stale_lock(self, temp_dir, lock_name): + """Test getting PID when lock is stale.""" + # Create a stale lock file with a non-existent PID + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + + with open(pid_file_path, "w") as f: + f.write("999999") # Non-existent PID + + holder_pid = get_lock_holder_pid(temp_dir, lock_name) + assert holder_pid is None + + def test_cleanup_stale_lock_success(self, temp_dir, lock_name): + """Test successful cleanup of stale lock.""" + # Create a stale lock file with a non-existent PID + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + + with open(pid_file_path, "w") as f: + f.write("999999") # Non-existent PID + + # Clean up stale lock + result = cleanup_stale_lock(temp_dir, lock_name) + assert result is True + + # Verify lock files were removed + assert not lock_file_path.exists() + assert not pid_file_path.exists() + + def test_cleanup_stale_lock_no_cleanup_needed(self, temp_dir, lock_name): + """Test cleanup when no stale lock exists.""" + result = cleanup_stale_lock(temp_dir, lock_name) + assert result is False + + def test_cleanup_stale_lock_active_lock(self, temp_dir, lock_name): + """Test cleanup when lock is actively held.""" + # Acquire the lock + acquire_singleton_lock(temp_dir, lock_name) + + # Try to clean up active lock + result = cleanup_stale_lock(temp_dir, lock_name) + assert result is False + + # Verify lock files still exist + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + assert lock_file_path.exists() + assert pid_file_path.exists() + + def test_concurrent_lock_acquisition_race_condition(self, temp_dir, lock_name): + """Test race condition handling in concurrent lock acquisition.""" + # This test simulates a race condition by creating the PID file + # after the first process checks but before it creates its own + + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + + # Create PID file as if another process got there first + with open(pid_file_path, "w") as f: + f.write("999999") + + # Try to acquire lock - should detect the existing PID file and return that PID + # Note: The function automatically cleans up stale locks, so it will succeed + result = acquire_singleton_lock(temp_dir, lock_name) + assert result is None # Should succeed after cleaning up stale lock + + def test_atomic_file_creation(self, temp_dir, lock_name): + """Test that PID file creation is atomic.""" + lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) + + # Acquire lock + acquire_singleton_lock(temp_dir, lock_name) + + # Verify no temporary file exists + temp_pid_file = pid_file_path.with_suffix(".tmp") + assert not temp_pid_file.exists() + + # Verify final PID file exists and contains correct PID + assert pid_file_path.exists() + with open(pid_file_path, "r") as f: + stored_pid = int(f.read().strip()) + assert stored_pid == os.getpid() + + def test_multiple_lock_names_independence(self, temp_dir): + """Test that different lock names are independent.""" + lock_name1 = "lock1" + lock_name2 = "lock2" + + # Acquire first lock + result1 = acquire_singleton_lock(temp_dir, lock_name1) + assert result1 is None + + # Acquire second lock (should succeed since different name) + result2 = acquire_singleton_lock(temp_dir, lock_name2) + assert result2 is None + + # Verify both locks are held + assert is_lock_held(temp_dir, lock_name1) is True + assert is_lock_held(temp_dir, lock_name2) is True + + # Verify lock files exist for both + lock_file1, pid_file1 = get_lock_file_paths(temp_dir, lock_name1) + lock_file2, pid_file2 = get_lock_file_paths(temp_dir, lock_name2) + assert lock_file1.exists() + assert pid_file1.exists() + assert lock_file2.exists() + assert pid_file2.exists() diff --git a/tests/utils/test_singleton_lock_multiprocessing.py b/tests/utils/test_singleton_lock_multiprocessing.py new file mode 100644 index 00000000..963dbc01 --- /dev/null +++ b/tests/utils/test_singleton_lock_multiprocessing.py @@ -0,0 +1,606 @@ +""" +Multiprocessing tests for the singleton lock functionality. + +This module tests the file-based singleton lock mechanism using actual +multiprocessing to simulate real concurrent scenarios. +""" + +import multiprocessing +import os +import tempfile +import time +from pathlib import Path +from typing import Optional + +import pytest + +from eval_protocol.utils.singleton_lock import ( + acquire_singleton_lock, + is_lock_held, + release_singleton_lock, +) + + +def worker_process_acquire_lock(base_dir: Path, lock_name: str, result_queue: multiprocessing.Queue, worker_id: int): + """Worker process that tries to acquire a lock.""" + try: + print(f"Worker {worker_id} (PID: {os.getpid()}) attempting to acquire lock") + result = acquire_singleton_lock(base_dir, lock_name) + result_queue.put((worker_id, result)) + print(f"Worker {worker_id} (PID: {os.getpid()}) got result: {result}") + + # If we got the lock, hold it for a bit then release + if result is None: + print(f"Worker {worker_id} (PID: {os.getpid()}) acquired lock, holding for 2 seconds") + time.sleep(2) + release_singleton_lock(base_dir, lock_name) + print(f"Worker {worker_id} (PID: {os.getpid()}) released lock") + except Exception as e: + print(f"Worker {worker_id} (PID: {os.getpid()}) got exception: {e}") + result_queue.put((worker_id, f"ERROR: {e}")) + + +def worker_process_check_lock(base_dir: Path, lock_name: str, result_queue: multiprocessing.Queue, worker_id: int): + """Worker process that checks if a lock is held.""" + try: + print(f"Worker {worker_id} (PID: {os.getpid()}) checking if lock is held") + is_held = is_lock_held(base_dir, lock_name) + result_queue.put((worker_id, is_held)) + print(f"Worker {worker_id} (PID: {os.getpid()}) lock held: {is_held}") + except Exception as e: + print(f"Worker {worker_id} (PID: {os.getpid()}) got exception: {e}") + result_queue.put((worker_id, f"ERROR: {e}")) + + +def worker_process_hold_lock( + base_dir: Path, lock_name: str, result_queue: multiprocessing.Queue, worker_id: int, hold_time: float = 5.0 +): + """Worker process that acquires and holds a lock for a specified time.""" + try: + print(f"Worker {worker_id} (PID: {os.getpid()}) attempting to acquire lock") + result = acquire_singleton_lock(base_dir, lock_name) + + if result is None: + print(f"Worker {worker_id} (PID: {os.getpid()}) acquired lock, holding for {hold_time} seconds") + result_queue.put((worker_id, "ACQUIRED")) + time.sleep(hold_time) + release_singleton_lock(base_dir, lock_name) + print(f"Worker {worker_id} (PID: {os.getpid()}) released lock") + result_queue.put((worker_id, "RELEASED")) + else: + print(f"Worker {worker_id} (PID: {os.getpid()}) failed to acquire lock, holder PID: {result}") + result_queue.put((worker_id, f"FAILED: {result}")) + except Exception as e: + print(f"Worker {worker_id} (PID: {os.getpid()}) got exception: {e}") + result_queue.put((worker_id, f"ERROR: {e}")) + + +class TestSingletonLockMultiprocessing: + """Test cases for singleton lock functionality using multiprocessing.""" + + @pytest.fixture + def temp_dir(self): + """Create a temporary directory for testing.""" + with tempfile.TemporaryDirectory() as temp_dir: + yield Path(temp_dir) + + @pytest.fixture + def lock_name(self): + """Test lock name.""" + return "test_multiprocessing_lock" + + def test_multiprocessing_lock_acquisition_sequential(self, temp_dir, lock_name): + """Test that only one process can acquire the lock at a time.""" + # Use multiprocessing.Manager() for better compatibility + manager = multiprocessing.Manager() + result_queue = manager.Queue() + + # Start multiple processes trying to acquire the lock + processes = [] + num_workers = 3 + + for i in range(num_workers): + p = multiprocessing.Process( + target=worker_process_acquire_lock, + args=(temp_dir, lock_name, result_queue, i), + daemon=False, # Don't use daemon processes + ) + processes.append(p) + p.start() + time.sleep(0.1) # Small delay to create race condition + + # Wait for all processes to complete + for p in processes: + p.join(timeout=10) + if p.is_alive(): + p.terminate() + p.join() + + # Collect results + results = [] + while not result_queue.empty(): + results.append(result_queue.get()) + + # Sort by worker ID for consistent ordering + results.sort(key=lambda x: x[0]) + + print(f"Results: {results}") + + # Verify that exactly one process acquired the lock (got None) + acquired_results = [r for r in results if r[1] is None] + assert len(acquired_results) == 1, f"Expected exactly one process to acquire lock, got {len(acquired_results)}" + + # Verify that other processes got the PID of the lock holder + acquired_worker_id = acquired_results[0][0] + for worker_id, result in results: + if worker_id != acquired_worker_id: + assert isinstance(result, int), f"Expected PID, got {result}" + assert result > 0, f"Expected positive PID, got {result}" + + def test_multiprocessing_lock_holder_detection(self, temp_dir, lock_name): + """Test that processes can detect when a lock is held.""" + manager = multiprocessing.Manager() + result_queue = manager.Queue() + + # Start a process that holds the lock + holder_process = multiprocessing.Process( + target=worker_process_hold_lock, + args=(temp_dir, lock_name, result_queue, 0, 3.0), # Hold for 3 seconds + daemon=False, + ) + holder_process.start() + + # Wait a moment for the holder to acquire the lock + time.sleep(0.5) + + # Start multiple processes checking if the lock is held + checker_processes = [] + num_checkers = 3 + + for i in range(num_checkers): + p = multiprocessing.Process( + target=worker_process_check_lock, + args=(temp_dir, lock_name, result_queue, i + 100), # Use different IDs + daemon=False, + ) + checker_processes.append(p) + p.start() + + # Wait for all processes to complete + holder_process.join(timeout=10) + for p in checker_processes: + p.join(timeout=5) + + # Collect results + results = [] + while not result_queue.empty(): + results.append(result_queue.get()) + + print(f"Results: {results}") + + # Verify that the holder process acquired the lock + holder_results = [r for r in results if r[0] == 0] + assert len(holder_results) >= 1, "Holder process should have reported acquiring the lock" + + # Verify that checker processes detected the lock as held + checker_results = [r for r in results if r[0] >= 100] + for worker_id, is_held in checker_results: + assert is_held is True, f"Checker {worker_id} should have detected lock as held" + + def test_multiprocessing_lock_cleanup_after_process_termination(self, temp_dir, lock_name): + """Test that locks are properly cleaned up when processes terminate.""" + manager = multiprocessing.Manager() + result_queue = manager.Queue() + + # Start a process that holds the lock + holder_process = multiprocessing.Process( + target=worker_process_hold_lock, + args=(temp_dir, lock_name, result_queue, 0, 10.0), # Hold for 10 seconds + daemon=False, + ) + holder_process.start() + + # Wait for the holder to acquire the lock and check results + time.sleep(1.0) + + # Check if the process actually acquired the lock + results = [] + while not result_queue.empty(): + results.append(result_queue.get()) + + # Look for the ACQUIRED message + acquired_results = [r for r in results if r[1] == "ACQUIRED"] + if not acquired_results: + # If no acquisition happened, this test is not meaningful + holder_process.terminate() + holder_process.join(timeout=5) + pytest.skip("Process did not acquire lock, skipping cleanup test") + + # Verify the lock is held + assert is_lock_held(temp_dir, lock_name) is True + + # Terminate the holder process + holder_process.terminate() + holder_process.join(timeout=5) + + # Wait a moment for cleanup + time.sleep(0.5) + + # Verify the lock is no longer held + assert is_lock_held(temp_dir, lock_name) is False + + # Try to acquire the lock - should succeed + result = acquire_singleton_lock(temp_dir, lock_name) + assert result is None, "Should be able to acquire lock after process termination" + + # Clean up + release_singleton_lock(temp_dir, lock_name) + + def test_multiprocessing_daemon_vs_non_daemon(self, temp_dir, lock_name): + """Test lock behavior with daemon vs non-daemon processes.""" + manager = multiprocessing.Manager() + result_queue = manager.Queue() + + # Test with daemon=True + print("Testing with daemon=True") + daemon_process = multiprocessing.Process( + target=worker_process_hold_lock, args=(temp_dir, lock_name, result_queue, 0, 2.0), daemon=True + ) + daemon_process.start() + time.sleep(0.5) + + # Check if lock is held + is_held_daemon = is_lock_held(temp_dir, lock_name) + print(f"Lock held with daemon process: {is_held_daemon}") + + daemon_process.join(timeout=5) + + # Test with daemon=False + print("Testing with daemon=False") + non_daemon_process = multiprocessing.Process( + target=worker_process_hold_lock, args=(temp_dir, lock_name, result_queue, 1, 2.0), daemon=False + ) + non_daemon_process.start() + time.sleep(0.5) + + # Check if lock is held + is_held_non_daemon = is_lock_held(temp_dir, lock_name) + print(f"Lock held with non-daemon process: {is_held_non_daemon}") + + non_daemon_process.join(timeout=5) + + # Both should work the same way for lock acquisition + assert ( + is_held_daemon == is_held_non_daemon + ), "Lock behavior should be the same for daemon and non-daemon processes" + + def test_multiprocessing_concurrent_acquisition_race_condition(self, temp_dir, lock_name): + """Test race condition handling with multiple processes trying to acquire simultaneously.""" + manager = multiprocessing.Manager() + result_queue = manager.Queue() + + # Start multiple processes simultaneously + processes = [] + num_workers = 5 + + # Start all processes at nearly the same time + for i in range(num_workers): + p = multiprocessing.Process( + target=worker_process_acquire_lock, args=(temp_dir, lock_name, result_queue, i), daemon=False + ) + processes.append(p) + + # Start all processes with minimal delay + for p in processes: + p.start() + + # Wait for all processes to complete + for p in processes: + p.join(timeout=10) + if p.is_alive(): + p.terminate() + p.join() + + # Collect results + results = [] + while not result_queue.empty(): + results.append(result_queue.get()) + + print(f"Race condition test results: {results}") + + # Verify that exactly one process acquired the lock + acquired_results = [r for r in results if r[1] is None] + assert ( + len(acquired_results) == 1 + ), f"Expected exactly one process to acquire lock in race condition, got {len(acquired_results)}" + + # Verify that other processes got valid PIDs + acquired_worker_id = acquired_results[0][0] + for worker_id, result in results: + if worker_id != acquired_worker_id: + assert isinstance(result, int), f"Expected PID, got {result}" + assert result > 0, f"Expected positive PID, got {result}" + + def test_multiprocessing_lock_independence(self, temp_dir): + """Test that different lock names are independent across processes.""" + lock_name1 = "lock1" + lock_name2 = "lock2" + + manager = multiprocessing.Manager() + result_queue = manager.Queue() + + # Start processes trying to acquire different locks + process1 = multiprocessing.Process( + target=worker_process_acquire_lock, args=(temp_dir, lock_name1, result_queue, 1), daemon=False + ) + process2 = multiprocessing.Process( + target=worker_process_acquire_lock, args=(temp_dir, lock_name2, result_queue, 2), daemon=False + ) + + process1.start() + process2.start() + + process1.join(timeout=5) + process2.join(timeout=5) + + # Collect results + results = [] + while not result_queue.empty(): + results.append(result_queue.get()) + + print(f"Lock independence test results: {results}") + + # Both processes should have acquired their respective locks + assert len(results) == 2, "Expected results from both processes" + for worker_id, result in results: + assert result is None, f"Process {worker_id} should have acquired its lock" + + def test_daemon_off_process_survives_parent_termination(self, temp_dir, lock_name): + """Test that a daemon=Off process continues to run and hold the lock when parent is killed.""" + import signal + import subprocess + import sys + + # Create a script that will be run as a separate process + script_content = f''' +import multiprocessing +import os +import sys +import time +from pathlib import Path +from typing import Optional, Tuple + +# Copy the singleton lock functions directly to avoid import issues +def get_lock_file_paths(base_dir: Path, lock_name: str) -> Tuple[Path, Path]: + lock_file_path = base_dir / f"{{lock_name}}.lock" + pid_file_path = base_dir / f"{{lock_name}}.pid" + return lock_file_path, pid_file_path + +def is_process_running(pid: int) -> bool: + try: + os.kill(pid, 0) + return True + except OSError: + return False + +def acquire_singleton_lock(base_dir: Path, lock_name: str) -> Optional[int]: + lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) + + # First, check if PID file exists and contains a running process + if pid_file_path.exists(): + try: + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + pid = int(content) + # Check if the process is still running + try: + os.kill(pid, 0) + # Process is running, we can't acquire the lock + return pid + except OSError: + # Process is not running, clean up stale files + pass + except (IOError, OSError): + pass + + # Try to create the PID file atomically + temp_pid_file = None + try: + # Use atomic file creation + temp_pid_file = pid_file_path.with_suffix(".tmp") + with open(temp_pid_file, "w") as temp_file: + temp_file.write(str(os.getpid())) + temp_file.flush() + os.fsync(temp_file.fileno()) + + # Atomically move the temp file to the final location + temp_pid_file.rename(pid_file_path) + + # Create the lock file to indicate we have the lock + with open(lock_file_path, "w") as lock_file: + lock_file.write(str(os.getpid())) + lock_file.flush() + os.fsync(lock_file.fileno()) + + return None # Successfully acquired lock + + except (IOError, OSError) as e: + # Failed to acquire lock + try: + if temp_pid_file and temp_pid_file.exists(): + temp_pid_file.unlink() + except (IOError, OSError): + pass + + # Check if someone else got the lock + if pid_file_path.exists(): + try: + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + return int(content) + except (IOError, OSError): + pass + + return 999999 # Dummy PID to indicate lock is held + +def release_singleton_lock(base_dir: Path, lock_name: str) -> None: + lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) + try: + if pid_file_path.exists(): + pid_file_path.unlink() + if lock_file_path.exists(): + lock_file_path.unlink() + except (IOError, OSError): + pass + +def is_lock_held(base_dir: Path, lock_name: str) -> bool: + _, pid_file_path = get_lock_file_paths(base_dir, lock_name) + + try: + if pid_file_path.exists(): + with open(pid_file_path, "r") as pid_file: + content = pid_file.read().strip() + if content.isdigit(): + pid = int(content) + if is_process_running(pid): + return True + except (IOError, OSError): + pass + + return False + +def child_process_holder(base_dir, lock_name, pid_file): + """Child process that acquires and holds a lock.""" + try: + # Write our PID to a file so parent can read it + with open(pid_file, 'w') as f: + f.write(str(os.getpid())) + + result = acquire_singleton_lock(Path(base_dir), lock_name) + + if result is None: + # Keep the lock held by sleeping in a loop + while True: + time.sleep(1) + # Verify we still hold the lock + if not is_lock_held(Path(base_dir), lock_name): + break + else: + sys.exit(1) + except Exception as e: + sys.exit(1) + +if __name__ == "__main__": + base_dir = "{temp_dir}" + lock_name = "{lock_name}" + pid_file = "{temp_dir}/child_pid.txt" + + # Start child process with daemon=False + child = multiprocessing.Process( + target=child_process_holder, + args=(base_dir, lock_name, pid_file), + daemon=False + ) + child.start() + + # Wait for child to start and acquire lock + time.sleep(2) + + # Verify child is still running + if not child.is_alive(): + sys.exit(1) + + # Write parent PID to file + with open("{temp_dir}/parent_pid.txt", 'w') as f: + f.write(str(os.getpid())) + + # Sleep indefinitely - parent will be killed by test + while True: + time.sleep(1) +''' + + # Write the script to a temporary file + script_path = temp_dir / "test_daemon_off_script.py" + with open(script_path, "w") as f: + f.write(script_content) + + # Start the script as a separate process using uvx to ensure correct environment + process = subprocess.Popen( + ["uvx", "python", str(script_path)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + + # Wait for the script to start and child process to acquire lock + time.sleep(3) + + # Read the PIDs + parent_pid_file = temp_dir / "parent_pid.txt" + child_pid_file = temp_dir / "child_pid.txt" + + # Wait for PID files to be created + for _ in range(10): + if parent_pid_file.exists() and child_pid_file.exists(): + break + time.sleep(0.5) + else: + process.terminate() + process.wait(timeout=5) + pytest.fail("PID files were not created in time") + + with open(parent_pid_file, "r") as f: + parent_pid = int(f.read().strip()) + with open(child_pid_file, "r") as f: + child_pid = int(f.read().strip()) + + print(f"Parent PID: {parent_pid}, Child PID: {child_pid}") + + # Verify the lock is held by the child process + assert is_lock_held(temp_dir, lock_name) is True, "Lock should be held by child process" + + # Try to acquire the lock - should fail and return child's PID + result = acquire_singleton_lock(temp_dir, lock_name) + assert result == child_pid, f"Should get child PID {child_pid}, got {result}" + + # Kill the parent process + print(f"Killing parent process {parent_pid}") + os.kill(parent_pid, signal.SIGTERM) + + # Wait a moment for the parent to terminate + time.sleep(2) + + # Verify the child process is still running + try: + # Check if child process is still alive + os.kill(child_pid, 0) # This will raise OSError if process doesn't exist + child_still_alive = True + except OSError: + child_still_alive = False + + assert child_still_alive, "Child process should still be alive after parent termination" + + # Verify the lock is still held by the child process + assert is_lock_held(temp_dir, lock_name) is True, "Lock should still be held by child process" + + # Try to acquire the lock again - should still fail and return child's PID + result = acquire_singleton_lock(temp_dir, lock_name) + assert result == child_pid, f"Should still get child PID {child_pid}, got {result}" + + # Clean up by killing the child process + print(f"Killing child process {child_pid}") + os.kill(child_pid, signal.SIGTERM) + + # Wait for child to terminate + time.sleep(2) + + # Verify the lock is no longer held + assert is_lock_held(temp_dir, lock_name) is False, "Lock should be released after child termination" + + # Clean up the subprocess + process.terminate() + process.wait(timeout=5) + + +if __name__ == "__main__": + # Run the tests directly + pytest.main([__file__, "-v"]) From ca126dd1ca887150069bff8661e76f15e8aabc29 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 14:20:06 -0700 Subject: [PATCH 17/31] works --- eval_protocol/logging_utils.py | 29 ++- eval_protocol/pytest/eval_watcher.py | 296 ++++++++++++++++----------- 2 files changed, 200 insertions(+), 125 deletions(-) diff --git a/eval_protocol/logging_utils.py b/eval_protocol/logging_utils.py index 147b96d0..36519a7c 100644 --- a/eval_protocol/logging_utils.py +++ b/eval_protocol/logging_utils.py @@ -8,6 +8,7 @@ import logging import os +import sys from pathlib import Path from typing import Optional @@ -41,6 +42,11 @@ def setup_logger( # Create logger logger = logging.getLogger(name) + + # Only configure if not already configured (has handlers and proper level) + if logger.handlers and logger.level != logging.NOTSET: + return logger + logger.setLevel(level) # Clear existing handlers to avoid duplicates @@ -50,13 +56,13 @@ def setup_logger( file_formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console_formatter = logging.Formatter("%(levelname)s - %(message)s") - # Console handler - console_handler = logging.StreamHandler() + # Console handler - explicitly write to sys.stdout + console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(console_level) console_handler.setFormatter(console_formatter) logger.addHandler(console_handler) - # File handler (if log_file specified) + # File handler (if log_file specified) - explicitly write to file only if log_file: log_file_path = logs_dir / log_file file_handler = logging.FileHandler(log_file_path) @@ -64,6 +70,9 @@ def setup_logger( file_handler.setFormatter(file_formatter) logger.addHandler(file_handler) + # Prevent propagation to avoid duplicate logging + logger.propagate = False + return logger @@ -81,7 +90,19 @@ def get_logger(name: str) -> logging.Logger: # If logger doesn't have handlers, set it up with defaults if not logger.handlers: - logger = setup_logger(name, f"{name}.log") + # For eval_watcher, check if running in daemon mode + if name == "eval_watcher": + import sys + + # Check if running in daemon mode (subprocess) + if "--daemon" in sys.argv: + # Subprocess: log to file only + logger = setup_logger(name, f"{name}.log", console_level=logging.CRITICAL) + else: + # Top-level: log to console only + logger = setup_logger(name, None) + else: + logger = setup_logger(name, f"{name}.log") return logger diff --git a/eval_protocol/pytest/eval_watcher.py b/eval_protocol/pytest/eval_watcher.py index 1be3fc4a..96b3c6ac 100644 --- a/eval_protocol/pytest/eval_watcher.py +++ b/eval_protocol/pytest/eval_watcher.py @@ -11,85 +11,37 @@ import argparse import fcntl -import multiprocessing import os +import signal +import subprocess +import sys import time from pathlib import Path -from typing import List, Optional - -# Add freeze_support for multiprocessing compatibility -if __name__ == "__main__": - multiprocessing.freeze_support() +from typing import Any, List, Optional from eval_protocol.dataset_logger import default_logger from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_dir from eval_protocol.logging_utils import get_logger from eval_protocol.models import EvaluationRow +from eval_protocol.utils.singleton_lock import ( + acquire_singleton_lock, + get_lock_file_paths, + get_lock_holder_pid, + is_lock_held, + is_process_running, + release_singleton_lock, +) # Initialize logger logger = get_logger("eval_watcher") +# Lock configuration +LOCK_NAME = "eval_watcher" -def get_lock_file_paths() -> tuple[Path, Path]: - """Get the lock file paths using the same directory discovery logic.""" - eval_protocol_dir = Path(find_eval_protocol_dir()) - lock_file_path = eval_protocol_dir / "watcher.lock" - pid_file_path = eval_protocol_dir / "watcher.pid" - return lock_file_path, pid_file_path - - -def acquire_singleton_lock() -> Optional[int]: - """ - Try to acquire the singleton lock. Returns the PID of the current holder if failed. - - Returns: - None if lock acquired successfully, otherwise the PID of the current holder - """ - lock_file_path, pid_file_path = get_lock_file_paths() - - try: - # Try to acquire an exclusive lock on the lock file - with open(lock_file_path, "w") as lock_file: - fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) - - # Write our PID to the PID file - with open(pid_file_path, "w") as pid_file: - pid_file.write(str(os.getpid())) - - return None # Successfully acquired lock - - except (IOError, OSError): - # Lock is held by another process - try: - if pid_file_path.exists(): - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - return int(content) - except (IOError, OSError): - pass - return None - - -def release_singleton_lock(): - """Release the singleton lock.""" - lock_file_path, pid_file_path = get_lock_file_paths() - try: - if pid_file_path.exists(): - pid_file_path.unlink() - if lock_file_path.exists(): - lock_file_path.unlink() - except (IOError, OSError): - pass - -def is_process_running(pid: int) -> bool: - """Check if a process is still running.""" - try: - os.kill(pid, 0) - return True - except OSError: - return False +def get_eval_protocol_dir() -> Path: + """Get the evaluation protocol directory for lock files.""" + return Path(find_eval_protocol_dir()) def find_running_evaluations() -> List[EvaluationRow]: @@ -156,12 +108,27 @@ def check_and_update_terminated_evaluations() -> int: return terminated_count +def signal_handler(signum, frame): + """Handle termination signals gracefully.""" + signal_name = signal.Signals(signum).name + logger.info(f"\nšŸ›‘ Evaluation watcher received signal {signum} (Signal: {signal_name})") + if signum == signal.SIGTERM: + logger.info("SIGTERM received: ignoring to avoid exit during VSCode pytest debugging.") + return + logger.info("Shutting down gracefully.") + sys.exit(0) + + def run_watcher_loop(check_interval: float) -> None: """Main monitoring loop.""" logger.info(f"šŸ” Starting evaluation watcher (PID: {os.getpid()})") logger.info(f" Check interval: {check_interval} seconds") logger.info(" Monitoring all evaluation rows for terminated processes") + # Set up signal handlers for graceful shutdown + signal.signal(signal.SIGTERM, signal_handler) + signal.signal(signal.SIGINT, signal_handler) + consecutive_empty_checks = 0 max_empty_checks = 3 @@ -194,39 +161,75 @@ def run_watcher_loop(check_interval: float) -> None: logger.info("šŸ” Evaluation watcher stopped") -def _start_watcher_process(check_interval: float) -> multiprocessing.Process: - """Start the watcher in a background process.""" - # Ensure we're not in a frozen state and multiprocessing is properly initialized - if multiprocessing.current_process().name != "MainProcess": - raise RuntimeError("Cannot start watcher process from within another process") +def _watcher_process_target(check_interval: float) -> None: + """Target function for the watcher process.""" + try: + # Detach from parent process group to become independent + try: + os.setsid() + except OSError: + # On Windows or if already detached, this might fail + pass - process = multiprocessing.Process(target=_watcher_process_main, args=(check_interval,), name="eval-watcher") - process.start() - return process + # Try to acquire the lock + current_holder_pid = acquire_singleton_lock(get_eval_protocol_dir(), LOCK_NAME) + if current_holder_pid is not None: + # Another process is already running + logger.info(f"šŸ” Evaluation watcher already running in process {current_holder_pid}") + return -def _watcher_process_main(check_interval: float) -> None: - """Main entry point for the watcher process - acquires lock and runs the loop.""" - # Try to acquire the lock in this process - current_holder_pid = acquire_singleton_lock() + # We acquired the lock, run the watcher loop + try: + run_watcher_loop(check_interval) + except SystemExit: + # Graceful shutdown + pass + finally: + # Always release the lock when we exit + release_singleton_lock(get_eval_protocol_dir(), LOCK_NAME) - if current_holder_pid is not None: - # Another process is already running - logger.info(f"šŸ” Evaluation watcher already running in process {current_holder_pid}") - return + except Exception as e: + logger.error(f"āŒ Error in watcher process: {e}") - # We acquired the lock, run the watcher loop + +def _start_watcher_process(check_interval: float) -> Optional[int]: + """Start the watcher in a completely independent background process using subprocess. + + We use subprocess.Popen with start_new_session=True instead of multiprocessing.Process + because VSCode's test debugger kill button sends SIGTERM/SIGKILL to the entire process + tree, including child processes. By using subprocess with a new session, we create + a truly independent process that survives when the parent pytest process is killed. + """ + + # Use subprocess to create a completely independent process + # This ensures the process survives even if the parent pytest process is killed try: - run_watcher_loop(check_interval) - finally: - # Always release the lock when we exit - release_singleton_lock() + # Get the current script path + current_script = __file__ + + # Create the subprocess with complete independence + process = subprocess.Popen( + [sys.executable, current_script, "--daemon", "--check-interval", str(check_interval)], + # These flags make the process completely independent + start_new_session=True, # Creates a new session, detaching from parent + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + stdin=subprocess.DEVNULL, + ) + + return process.pid + except Exception as e: + logger.error(f"āŒ Failed to start watcher process: {e}") + return None -def ensure_singleton_watcher(check_interval: float = 5.0) -> bool: + +def ensure_singleton_watcher(check_interval: float = 2.0) -> bool: """ Ensure the singleton EvaluationWatcher instance exists and is running. This function is OS-level global - only one watcher will run across all processes. + The watcher runs as a completely independent process that survives if the main process dies. Args: check_interval: How often to check for terminated processes (seconds) @@ -234,14 +237,37 @@ def ensure_singleton_watcher(check_interval: float = 5.0) -> bool: Returns: True if watcher was started successfully, False if another watcher is already running """ - # Check if we're already in a subprocess - if multiprocessing.current_process().name != "MainProcess": + + # Check if a watcher is already running before attempting to start a new one + if is_watcher_running(): + logger.info("šŸ” Evaluation watcher is already running") return False - # Start the watcher in a background process + # Start the watcher in a completely independent background process try: - process = _start_watcher_process(check_interval) - logger.info(f"šŸ” Started evaluation watcher in background process (PID: {process.pid})") + pid = _start_watcher_process(check_interval) + if pid is None: + logger.error("āŒ Failed to start evaluation watcher: process creation failed") + return False + + logger.info(f"šŸ” Started evaluation watcher in independent background process (PID: {pid})") + + # Spin until the watcher is running, or timeout after 10 seconds + timeout = 10.0 + interval = 0.1 + waited = 0.0 + while waited < timeout: + if is_watcher_running(): + break + time.sleep(interval) + waited += interval + else: + logger.error( + f"āŒ Watcher process (PID: {pid}) started but didn't acquire the lock after {timeout} seconds" + ) + return False + + # Don't wait for the process - let it run independently return True except Exception as e: logger.error(f"āŒ Failed to start evaluation watcher: {e}") @@ -250,36 +276,28 @@ def ensure_singleton_watcher(check_interval: float = 5.0) -> bool: def is_watcher_running() -> bool: """Check if the evaluation watcher is currently running.""" - current_holder_pid = acquire_singleton_lock() - if current_holder_pid is None: - # We acquired the lock, so no one else is running - release_singleton_lock() - return False - - # Check if the holder is still alive - assert current_holder_pid is not None # For type checker - is_alive = is_process_running(current_holder_pid) - if not is_alive: - # Clean up stale lock - release_singleton_lock() - - return is_alive + return is_lock_held(get_eval_protocol_dir(), LOCK_NAME) def get_watcher_pid() -> Optional[int]: """Get the PID of the currently running evaluation watcher.""" - _, pid_file_path = get_lock_file_paths() + return get_lock_holder_pid(get_eval_protocol_dir(), LOCK_NAME) + + +def stop_watcher() -> bool: + """Stop the currently running evaluation watcher.""" + pid = get_watcher_pid() + if pid is None: + logger.info("šŸ” No evaluation watcher is currently running") + return False + try: - if pid_file_path.exists(): - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - pid = int(content) - if is_process_running(pid): - return pid - except (IOError, OSError): - pass - return None + os.kill(pid, signal.SIGTERM) + logger.info(f"šŸ” Sent SIGTERM to evaluation watcher process {pid}") + return True + except OSError as e: + logger.error(f"āŒ Failed to stop evaluation watcher process {pid}: {e}") + return False def main(): @@ -293,14 +311,50 @@ def main(): default=1.0, help="How often to check for terminated processes (seconds, default: 1.0)", ) + parser.add_argument( + "--daemon", + action="store_true", + help="Run in daemon mode (internal use only)", + ) + parser.add_argument( + "--stop", + action="store_true", + help="Stop the currently running evaluation watcher", + ) args = parser.parse_args() - # Run the watcher directly (not as a background process) - run_watcher_loop(args.check_interval) + # Handle stop command + if args.stop: + stop_watcher() + return + + # If running in daemon mode, try to acquire the lock and run the watcher loop + if args.daemon: + logger.info(f"šŸ” Daemon mode: attempting to acquire lock (PID: {os.getpid()})") + # Try to acquire the lock in this process + current_holder_pid = acquire_singleton_lock(get_eval_protocol_dir(), LOCK_NAME) + + if current_holder_pid is not None: + # Another process is already running + logger.info(f"šŸ” Evaluation watcher already running in process {current_holder_pid}") + return + + logger.info(f"šŸ” Daemon mode: acquired lock successfully (PID: {os.getpid()})") + # We acquired the lock, run the watcher loop + try: + run_watcher_loop(args.check_interval) + except SystemExit: + # Graceful shutdown + pass + finally: + # Always release the lock when we exit + logger.info(f"šŸ” Daemon mode: releasing lock (PID: {os.getpid()})") + release_singleton_lock(get_eval_protocol_dir(), LOCK_NAME) + else: + # Run the watcher directly (not as a background process) + run_watcher_loop(args.check_interval) if __name__ == "__main__": - # Ensure multiprocessing is properly initialized - multiprocessing.freeze_support() main() From b980964cb0639c895de982fb704d96a5cec3c955 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 14:33:23 -0700 Subject: [PATCH 18/31] Enhance JSON line error handling in load_jsonl function - Added regex-based extraction of "row_id" to provide more context in error messages when JSON parsing fails. This improvement aids in debugging by including the problematic row ID in the raised ValueError. --- eval_protocol/common_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/eval_protocol/common_utils.py b/eval_protocol/common_utils.py index 6596133d..e39f80d0 100644 --- a/eval_protocol/common_utils.py +++ b/eval_protocol/common_utils.py @@ -1,4 +1,5 @@ import json +import re from typing import Any, Dict, List @@ -20,5 +21,10 @@ def load_jsonl(file_path: str) -> List[Dict[str, Any]]: data.append(json.loads(line.strip())) except json.JSONDecodeError as e: print(f"Error parsing JSON line for file {file_path} at line {line_number}") + # attempt to find "row_id" in the line by finding index of "row_id" and performing regex of `"row_id": (.*),` + row_id_index = line.find("row_id") + if row_id_index != -1: + row_id = re.search(r'"row_id": (.*),', line[row_id_index:]) + raise ValueError(f"{e.msg} at line {line_number}: {line} ({row_id})") raise e return data From 598c12ac07daa46b68eba8f84f36d03a3754a03a Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 14:39:42 -0700 Subject: [PATCH 19/31] works! --- eval_protocol/pytest/eval_watcher.py | 34 +--------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/eval_protocol/pytest/eval_watcher.py b/eval_protocol/pytest/eval_watcher.py index 96b3c6ac..c2400115 100644 --- a/eval_protocol/pytest/eval_watcher.py +++ b/eval_protocol/pytest/eval_watcher.py @@ -130,7 +130,7 @@ def run_watcher_loop(check_interval: float) -> None: signal.signal(signal.SIGINT, signal_handler) consecutive_empty_checks = 0 - max_empty_checks = 3 + max_empty_checks = 30 try: while True: @@ -161,38 +161,6 @@ def run_watcher_loop(check_interval: float) -> None: logger.info("šŸ” Evaluation watcher stopped") -def _watcher_process_target(check_interval: float) -> None: - """Target function for the watcher process.""" - try: - # Detach from parent process group to become independent - try: - os.setsid() - except OSError: - # On Windows or if already detached, this might fail - pass - - # Try to acquire the lock - current_holder_pid = acquire_singleton_lock(get_eval_protocol_dir(), LOCK_NAME) - - if current_holder_pid is not None: - # Another process is already running - logger.info(f"šŸ” Evaluation watcher already running in process {current_holder_pid}") - return - - # We acquired the lock, run the watcher loop - try: - run_watcher_loop(check_interval) - except SystemExit: - # Graceful shutdown - pass - finally: - # Always release the lock when we exit - release_singleton_lock(get_eval_protocol_dir(), LOCK_NAME) - - except Exception as e: - logger.error(f"āŒ Error in watcher process: {e}") - - def _start_watcher_process(check_interval: float) -> Optional[int]: """Start the watcher in a completely independent background process using subprocess. From 2f07296709ec630bc54f5967d426e4090e3ab162 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 14:51:54 -0700 Subject: [PATCH 20/31] Refactor evaluation_test.py to ensure singleton watcher is initialized - Moved the call to `ensure_singleton_watcher()` into the `evaluation_test` function to ensure the evaluation watcher is running before processing begins. This change enhances the reliability of the evaluation process by ensuring the watcher is active during execution. --- eval_protocol/pytest/evaluation_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index e5289316..f781dfc4 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -30,9 +30,6 @@ from ..common_utils import load_jsonl -# Ensure the evaluation watcher is running (OS-level singleton) -ensure_singleton_watcher() - def evaluation_test( *, @@ -200,6 +197,9 @@ def wrapper_body(**kwargs): eval_metadata = None all_results: List[EvaluationRow] = [] + # Ensure the evaluation watcher is running (OS-level singleton) + ensure_singleton_watcher() + try: # Handle dataset loading data: List[EvaluationRow] = [] From dab44da17d3ef0aaf09ce2bce576f48b3a8c0867 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 15:02:12 -0700 Subject: [PATCH 21/31] Update CI workflow to ignore test_eval_watcher.py in coverage reports - Added an ignore rule for `tests/test_eval_watcher.py` in the coverage command to streamline coverage reporting and focus on relevant tests. --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0b21795..217e51b3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,6 +92,7 @@ jobs: --ignore=tests/pytest/test_frozen_lake.py \ --ignore=tests/pytest/test_lunar_lander.py \ --ignore=tests/pytest/test_tau_bench_airline.py \ + --ignore=tests/test_eval_watcher.py \ --cov=eval_protocol --cov-append --cov-report=xml --cov-report=term-missing -v --durations=10 - name: Store coverage file From bf3786e30d9a3d6867403eda4f6f64a6cf8da7fc Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 15:19:55 -0700 Subject: [PATCH 22/31] Add signal handler to manage zombie processes in eval_watcher.py - Implemented a signal handler to automatically reap zombie child processes, preventing accumulation and potential resource leaks. - Enhanced process management by setting up the signal handler for SIGCHLD if available, ensuring better stability during evaluation execution. --- eval_protocol/pytest/eval_watcher.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/eval_protocol/pytest/eval_watcher.py b/eval_protocol/pytest/eval_watcher.py index c2400115..09fcb2ab 100644 --- a/eval_protocol/pytest/eval_watcher.py +++ b/eval_protocol/pytest/eval_watcher.py @@ -39,6 +39,25 @@ LOCK_NAME = "eval_watcher" +# Signal handler to automatically reap zombie processes +def _reap_zombies(signum, frame): + """Reap zombie child processes to prevent them from accumulating.""" + try: + while True: + # Wait for any child process, but don't block + pid, status = os.waitpid(-1, os.WNOHANG) + if pid == 0: # No more children to reap + break + except OSError: + # No child processes + pass + + +# Set up signal handler for SIGCHLD if available +if hasattr(signal, "SIGCHLD"): + signal.signal(signal.SIGCHLD, _reap_zombies) + + def get_eval_protocol_dir() -> Path: """Get the evaluation protocol directory for lock files.""" return Path(find_eval_protocol_dir()) @@ -184,6 +203,7 @@ def _start_watcher_process(check_interval: float) -> Optional[int]: stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, stdin=subprocess.DEVNULL, + close_fds=True, ) return process.pid From 4ff7912062bc4dba3baea97e75790b10ca5a0c50 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 15:23:38 -0700 Subject: [PATCH 23/31] move --- eval_protocol/{utils => }/singleton_lock.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename eval_protocol/{utils => }/singleton_lock.py (100%) diff --git a/eval_protocol/utils/singleton_lock.py b/eval_protocol/singleton_lock.py similarity index 100% rename from eval_protocol/utils/singleton_lock.py rename to eval_protocol/singleton_lock.py From cc0abb2375291dbf429f16b18a4f83bdda6afb8c Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 15:58:30 -0700 Subject: [PATCH 24/31] Enhance singleton lock functionality and file locking in LocalFSDatasetLoggerAdapter - Updated `is_process_running` to include a timeout parameter, allowing for more flexible process monitoring. - Implemented file locking mechanisms in `LocalFSDatasetLoggerAdapter` to prevent race conditions during logging operations, ensuring data integrity when multiple processes access log files. - Added methods for acquiring and releasing file locks, improving the robustness of the logging process. --- .../local_fs_dataset_logger_adapter.py | 107 +++- eval_protocol/pytest/eval_watcher.py | 55 +- eval_protocol/singleton_lock.py | 21 +- tests/test_eval_watcher.py | 488 ++++++++++++++++++ 4 files changed, 601 insertions(+), 70 deletions(-) create mode 100644 tests/test_eval_watcher.py diff --git a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py index 01760b0a..a8a976fa 100644 --- a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +++ b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py @@ -1,13 +1,14 @@ import json import os -import shutil -import tempfile +import time from datetime import datetime, timezone +from pathlib import Path from typing import TYPE_CHECKING, List, Optional from eval_protocol.common_utils import load_jsonl from eval_protocol.dataset_logger.dataset_logger import DatasetLogger from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_datasets_dir +from eval_protocol.singleton_lock import acquire_singleton_lock, release_singleton_lock if TYPE_CHECKING: from eval_protocol.models import EvaluationRow @@ -15,7 +16,7 @@ class LocalFSDatasetLoggerAdapter(DatasetLogger): """ - Logger that stores logs in the local filesystem. + Logger that stores logs in the local filesystem with file locking to prevent race conditions. """ def __init__(self): @@ -39,6 +40,44 @@ def current_jsonl_path(self) -> str: """ return os.path.join(self.datasets_dir, f"{self.current_date}.jsonl") + def _acquire_file_lock(self, file_path: str, timeout: float = 30.0) -> bool: + """ + Acquire a lock for a specific file using the singleton lock mechanism. + + Args: + file_path: Path to the file to lock + timeout: Maximum time to wait for lock acquisition in seconds + + Returns: + True if lock was acquired, False if timeout occurred + """ + # Create a lock name based on the file path + lock_name = f"file_lock_{os.path.basename(file_path)}" + base_dir = Path(os.path.dirname(file_path)) + + start_time = time.time() + while time.time() - start_time < timeout: + result = acquire_singleton_lock(base_dir, lock_name) + if result is None: + # Successfully acquired lock + return True + else: + # Lock is held by another process, wait and retry + time.sleep(0.1) + + return False + + def _release_file_lock(self, file_path: str) -> None: + """ + Release the lock for a specific file. + + Args: + file_path: Path to the file to unlock + """ + lock_name = f"file_lock_{os.path.basename(file_path)}" + base_dir = Path(os.path.dirname(file_path)) + release_singleton_lock(base_dir, lock_name) + def log(self, row: "EvaluationRow") -> None: """Log a row, updating existing row with same ID or appending new row.""" row_id = row.input_metadata.row_id @@ -49,25 +88,35 @@ def log(self, row: "EvaluationRow") -> None: if filename.endswith(".jsonl"): file_path = os.path.join(self.datasets_dir, filename) if os.path.exists(file_path): - with open(file_path, "r") as f: - lines = f.readlines() - - # Find the line with matching ID - for i, line in enumerate(lines): + if self._acquire_file_lock(file_path): try: - line_data = json.loads(line.strip()) - if line_data["input_metadata"]["row_id"] == row_id: - # Update existing row - lines[i] = row.model_dump_json(exclude_none=True) + os.linesep - with open(file_path, "w") as f: - f.writelines(lines) - return - except json.JSONDecodeError: - continue + with open(file_path, "r") as f: + lines = f.readlines() + + # Find the line with matching ID + for i, line in enumerate(lines): + try: + line_data = json.loads(line.strip()) + if line_data["input_metadata"]["row_id"] == row_id: + # Update existing row + lines[i] = row.model_dump_json(exclude_none=True) + os.linesep + with open(file_path, "w") as f: + f.writelines(lines) + return + except json.JSONDecodeError: + continue + finally: + self._release_file_lock(file_path) # If no existing row found, append new row to current file - with open(self.current_jsonl_path, "a") as f: - f.write(row.model_dump_json(exclude_none=True) + os.linesep) + if self._acquire_file_lock(self.current_jsonl_path): + try: + with open(self.current_jsonl_path, "a") as f: + f.write(row.model_dump_json(exclude_none=True) + os.linesep) + finally: + self._release_file_lock(self.current_jsonl_path) + else: + raise RuntimeError(f"Failed to acquire lock for log file {self.current_jsonl_path}") def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]: """Read rows from all JSONL files in the datasets directory. Also @@ -82,14 +131,18 @@ def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]: for filename in os.listdir(self.datasets_dir): if filename.endswith(".jsonl"): file_path = os.path.join(self.datasets_dir, filename) - data = load_jsonl(file_path) - for r in data: - row = EvaluationRow(**r) - if row.input_metadata.row_id not in existing_row_ids: - existing_row_ids.add(row.input_metadata.row_id) - else: - raise ValueError(f"Duplicate Row ID {row.input_metadata.row_id} already exists") - all_rows.append(row) + if self._acquire_file_lock(file_path): + try: + data = load_jsonl(file_path) + for r in data: + row = EvaluationRow(**r) + if row.input_metadata.row_id not in existing_row_ids: + existing_row_ids.add(row.input_metadata.row_id) + else: + raise ValueError(f"Duplicate Row ID {row.input_metadata.row_id} already exists") + all_rows.append(row) + finally: + self._release_file_lock(file_path) if row_id: # Filter by row_id if specified diff --git a/eval_protocol/pytest/eval_watcher.py b/eval_protocol/pytest/eval_watcher.py index 09fcb2ab..464e807b 100644 --- a/eval_protocol/pytest/eval_watcher.py +++ b/eval_protocol/pytest/eval_watcher.py @@ -23,7 +23,7 @@ from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_dir from eval_protocol.logging_utils import get_logger from eval_protocol.models import EvaluationRow -from eval_protocol.utils.singleton_lock import ( +from eval_protocol.singleton_lock import ( acquire_singleton_lock, get_lock_file_paths, get_lock_holder_pid, @@ -213,7 +213,7 @@ def _start_watcher_process(check_interval: float) -> Optional[int]: return None -def ensure_singleton_watcher(check_interval: float = 2.0) -> bool: +def ensure_singleton_watcher(check_interval: float = 2.0) -> Optional[int]: """ Ensure the singleton EvaluationWatcher instance exists and is running. This function is OS-level global - only one watcher will run across all processes. @@ -223,43 +223,17 @@ def ensure_singleton_watcher(check_interval: float = 2.0) -> bool: check_interval: How often to check for terminated processes (seconds) Returns: - True if watcher was started successfully, False if another watcher is already running + PID of the watcher process if it was started successfully, None if it failed to start """ - # Check if a watcher is already running before attempting to start a new one if is_watcher_running(): logger.info("šŸ” Evaluation watcher is already running") - return False + return None # Start the watcher in a completely independent background process - try: - pid = _start_watcher_process(check_interval) - if pid is None: - logger.error("āŒ Failed to start evaluation watcher: process creation failed") - return False - - logger.info(f"šŸ” Started evaluation watcher in independent background process (PID: {pid})") - - # Spin until the watcher is running, or timeout after 10 seconds - timeout = 10.0 - interval = 0.1 - waited = 0.0 - while waited < timeout: - if is_watcher_running(): - break - time.sleep(interval) - waited += interval - else: - logger.error( - f"āŒ Watcher process (PID: {pid}) started but didn't acquire the lock after {timeout} seconds" - ) - return False - - # Don't wait for the process - let it run independently - return True - except Exception as e: - logger.error(f"āŒ Failed to start evaluation watcher: {e}") - return False + pid = _start_watcher_process(check_interval) + logger.info(f"šŸ” Started evaluation watcher in independent background process (PID: {pid})") + return pid def is_watcher_running() -> bool: @@ -267,9 +241,16 @@ def is_watcher_running() -> bool: return is_lock_held(get_eval_protocol_dir(), LOCK_NAME) -def get_watcher_pid() -> Optional[int]: - """Get the PID of the currently running evaluation watcher.""" - return get_lock_holder_pid(get_eval_protocol_dir(), LOCK_NAME) +def get_watcher_pid(timeout: float = 10.0) -> Optional[int]: + """Get the PID of the currently running evaluation watcher. Tries for 10 seconds.""" + interval = 0.1 + started = time.time() + while time.time() - started < timeout: + pid = get_lock_holder_pid(get_eval_protocol_dir(), LOCK_NAME) + if pid is not None: + return pid + time.sleep(interval) + return None def stop_watcher() -> bool: @@ -280,7 +261,7 @@ def stop_watcher() -> bool: return False try: - os.kill(pid, signal.SIGTERM) + os.kill(pid, signal.SIGKILL) logger.info(f"šŸ” Sent SIGTERM to evaluation watcher process {pid}") return True except OSError as e: diff --git a/eval_protocol/singleton_lock.py b/eval_protocol/singleton_lock.py index ccf6ce30..3715047d 100644 --- a/eval_protocol/singleton_lock.py +++ b/eval_protocol/singleton_lock.py @@ -13,6 +13,7 @@ """ import os +import time from pathlib import Path from typing import Optional, Tuple @@ -124,7 +125,7 @@ def release_singleton_lock(base_dir: Path, lock_name: str) -> None: pass -def is_process_running(pid: int) -> bool: +def is_process_running(pid: int, timeout: float = 10.0) -> bool: """ Check if a process is still running. @@ -134,11 +135,19 @@ def is_process_running(pid: int) -> bool: Returns: True if the process is running, False otherwise """ - try: - os.kill(pid, 0) - return True - except OSError: - return False + start = time.time() + + def _is_process_running(pid: int) -> bool: + try: + os.kill(pid, 0) + return True + except OSError: + return False + + while time.time() - start < timeout: + if not _is_process_running(pid): + return False + return True def is_lock_held(base_dir: Path, lock_name: str) -> bool: diff --git a/tests/test_eval_watcher.py b/tests/test_eval_watcher.py new file mode 100644 index 00000000..ec45424e --- /dev/null +++ b/tests/test_eval_watcher.py @@ -0,0 +1,488 @@ +#!/usr/bin/env python3 +""" +Tests for the evaluation watcher functionality. + +This module tests: +1. Singleton behavior - ensuring only one watcher can run at a time +2. Process termination detection - ensuring evaluations are updated to stopped when processes die +""" + +import os +import signal +import subprocess +import sys +import tempfile +import time +from pathlib import Path +from typing import Optional +from unittest.mock import patch + +import pytest + +from eval_protocol.dataset_logger import default_logger +from eval_protocol.logging_utils import get_logger +from eval_protocol.models import EvalMetadata, EvaluationRow, Message +from eval_protocol.pytest.eval_watcher import ( + ensure_singleton_watcher, + get_watcher_pid, + is_watcher_running, + stop_watcher, +) +from eval_protocol.singleton_lock import ( + acquire_singleton_lock, + is_process_running, + release_singleton_lock, +) + +# Initialize logger +logger = get_logger("test_eval_watcher") + + +class TestEvalWatcherSingleton: + """Test that the evaluation watcher behaves as a singleton.""" + + def test_singleton_behavior(self): + """Test that only one watcher can run at a time.""" + # stop any existing watcher + stop_watcher() + + # Check if watcher is already running (likely due to evaluation_test.py import) + initial_running = is_watcher_running() + + assert not initial_running, "No watcher should be running" + + # Try to start a new watcher + result = ensure_singleton_watcher(check_interval=1.0) + + assert isinstance(result, int), "Should start watcher when none is running" + assert is_watcher_running(), "Watcher should be running" + current_pid = get_watcher_pid() + assert current_pid is not None, "Should get PID of running watcher" + + def test_singleton_lock_cleanup(self): + """Test that singleton lock is properly cleaned up when watcher stops.""" + + ensure_singleton_watcher(check_interval=1.0) + + assert is_watcher_running(), "Watcher should be running" + + # Get current PID + original_pid = get_watcher_pid() + assert original_pid is not None + + # Stop the watcher using SIGKILL (since SIGTERM is ignored) + try: + os.kill(original_pid, signal.SIGABRT) + logger.info(f"šŸ” Sent SIGKILL to evaluation watcher process {original_pid}") + except OSError as e: + logger.error(f"āŒ Failed to stop evaluation watcher process {original_pid}: {e}") + pytest.skip("Could not kill watcher process") + + # Wait longer for cleanup - the watcher process needs time to exit + max_wait = 10.0 + wait_interval = 0.5 + waited = 0.0 + + while waited < max_wait: + if not is_watcher_running(): + break + time.sleep(wait_interval) + waited += wait_interval + + # Verify lock is released + assert not is_watcher_running(), "Watcher should no longer be running" + pid = get_watcher_pid() + assert pid is None, "Should not have a PID after stopping" + + def test_multiple_start_stop_cycles(self): + """Test multiple start/stop cycles work correctly.""" + for i in range(2): # Reduced cycles to avoid interfering with other tests + ensure_singleton_watcher(check_interval=1.0) + # Get current PID + current_pid = get_watcher_pid() + assert current_pid is not None + + # Stop watcher using SIGKILL (since SIGTERM is ignored) + try: + os.kill(current_pid, signal.SIGKILL) + logger.info(f"šŸ” Sent SIGKILL to evaluation watcher process {current_pid}") + except OSError as e: + logger.error(f"āŒ Failed to stop evaluation watcher process {current_pid}: {e}") + pytest.skip("Could not kill watcher process") + + # Wait longer for cleanup - SIGKILL should be immediate but give some time + max_wait = 15.0 + wait_interval = 0.5 + waited = 0.0 + + while waited < max_wait: + if not is_watcher_running(): + break + time.sleep(wait_interval) + waited += wait_interval + + assert not is_watcher_running(), f"Watcher should not be running on cycle {i}" + + def test_watcher_pid_consistency(self): + """Test that watcher PID is consistent and valid.""" + # Ensure watcher is running + ensure_singleton_watcher(check_interval=1.0) + + # Get PID multiple times + pid1 = get_watcher_pid() + pid2 = get_watcher_pid() + + assert pid1 is not None + assert pid2 is not None + assert pid1 == pid2, "PID should be consistent" + assert is_process_running(pid1), "PID should correspond to a running process" + + +class TestEvalWatcherProcessTermination: + """Test that the evaluation watcher detects terminated processes and updates evaluations.""" + + def setup_method(self): + """Set up test environment.""" + # Create a temporary directory for test data + self.temp_dir = tempfile.mkdtemp() + self.original_datasets_dir = os.environ.get("EVAL_PROTOCOL_DATASETS_DIR") + os.environ["EVAL_PROTOCOL_DATASETS_DIR"] = self.temp_dir + + def teardown_method(self): + """Clean up after each test.""" + # Restore original environment + if self.original_datasets_dir: + os.environ["EVAL_PROTOCOL_DATASETS_DIR"] = self.original_datasets_dir + else: + os.environ.pop("EVAL_PROTOCOL_DATASETS_DIR", None) + + # Clean up temporary directory + import shutil + + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def create_running_evaluation_row(self, pid: int) -> EvaluationRow: + """Create an evaluation row with 'running' status and specified PID.""" + from eval_protocol.models import InputMetadata + + row = EvaluationRow( + messages=[Message(role="user", content="Test message")], + input_metadata=InputMetadata(row_id=f"test_row_{pid}"), + eval_metadata=EvalMetadata( + name="test_evaluation", status="running", num_runs=1, aggregation_method="mean" + ), + pid=pid, + ) + + # Log the row + default_logger.log(row) + return row + + def test_detects_terminated_process(self): + """Test that watcher detects when a process terminates and updates evaluation.""" + # Ensure watcher is running + ensure_singleton_watcher(check_interval=0.5) + + # Give the watcher time to fully start + time.sleep(1.0) + + # Create a short-lived process and get its PID + process = subprocess.Popen( + [sys.executable, "-c", "import time; time.sleep(0.1)"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + pid = process.pid + + # Create evaluation row with running status for this PID + row = self.create_running_evaluation_row(pid) + + # Wait for process to terminate + process.wait() + + # Wait for watcher to detect the terminated process + max_wait = 15.0 # Increased wait time + wait_interval = 0.5 + waited = 0.0 + + while waited < max_wait: + # Read the evaluation row + rows = default_logger.read() + test_row = None + for r in rows: + if r.input_metadata.row_id == row.input_metadata.row_id: + test_row = r + break + + if test_row and test_row.eval_metadata and test_row.eval_metadata.status == "stopped": + break + + time.sleep(wait_interval) + waited += wait_interval + + # Verify the evaluation was updated + assert test_row is not None, "Should find the test row" + assert test_row.eval_metadata is not None, "Should have eval metadata" + assert test_row.eval_metadata.status == "stopped", "Status should be updated to stopped" + assert test_row.eval_metadata.passed is False, "Should be marked as not passed" + + # Verify error information is set + assert test_row.evaluation_result is not None, "Should have evaluation result" + assert test_row.evaluation_result.error is not None, "Should have error message" + assert "terminated" in test_row.evaluation_result.error.lower(), "Error should mention termination" + + def test_detects_multiple_terminated_processes(self): + """Test that watcher detects multiple terminated processes.""" + # Ensure watcher is running + ensure_singleton_watcher(check_interval=0.5) + + # Create multiple short-lived processes + processes = [] + pids = [] + rows = [] + + for i in range(3): + process = subprocess.Popen( + [sys.executable, "-c", f"import time; time.sleep({0.1 + i * 0.1})"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + pid = process.pid + pids.append(pid) + processes.append(process) + + # Create evaluation row for this PID + row = self.create_running_evaluation_row(pid) + rows.append(row) + + # Wait for all processes to terminate + for process in processes: + process.wait() + + # Wait for watcher to detect all terminated processes + max_wait = 15.0 # 15 seconds max wait + wait_interval = 0.5 + waited = 0.0 + + while waited < max_wait: + # Read all evaluation rows + all_rows = default_logger.read() + stopped_count = 0 + + for row in rows: + for r in all_rows: + if r.input_metadata.row_id == row.input_metadata.row_id: + if r.eval_metadata and r.eval_metadata.status == "stopped": + stopped_count += 1 + break + + if stopped_count == len(rows): + break + + time.sleep(wait_interval) + waited += wait_interval + + # Verify all evaluations were updated + assert stopped_count == len(rows), f"Expected {len(rows)} stopped evaluations, got {stopped_count}" + + # Verify each row was properly updated + all_rows = default_logger.read() + for original_row in rows: + for r in all_rows: + if r.input_metadata.row_id == original_row.input_metadata.row_id: + assert r.eval_metadata is not None + assert r.eval_metadata.status == "stopped" + assert r.eval_metadata.passed is False + assert r.evaluation_result is not None + assert r.evaluation_result.error is not None + break + + def test_ignores_running_processes(self): + """Test that watcher doesn't update evaluations for running processes.""" + # Ensure watcher is running + if not is_watcher_running(): + ensure_singleton_watcher(check_interval=0.5) + + # Create a long-running process + process = subprocess.Popen( + [sys.executable, "-c", "import time; time.sleep(30)"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) + pid = process.pid + + # Create evaluation row with running status for this PID + row = self.create_running_evaluation_row(pid) + + # Wait a bit for watcher to run + time.sleep(2.0) + + # Verify the evaluation is still running + rows = default_logger.read() + test_row = None + for r in rows: + if r.input_metadata.row_id == row.input_metadata.row_id: + test_row = r + break + + assert test_row is not None, "Should find the test row" + assert test_row.eval_metadata is not None, "Should have eval metadata" + assert test_row.eval_metadata.status == "running", "Status should still be running" + + # Clean up + process.terminate() + process.wait() + + def test_handles_none_pid(self): + """Test that watcher handles evaluation rows with None PID.""" + # Ensure watcher is running + ensure_singleton_watcher(check_interval=0.5) + + # Create evaluation row with None PID + row = self.create_running_evaluation_row(None) + + # Wait for watcher to process the row + max_wait = 5.0 + wait_interval = 0.5 + waited = 0.0 + + while waited < max_wait: + rows = default_logger.read() + test_row = None + for r in rows: + if r.input_metadata.row_id == row.input_metadata.row_id: + test_row = r + break + + if test_row and test_row.eval_metadata and test_row.eval_metadata.status == "stopped": + break + + time.sleep(wait_interval) + waited += wait_interval + + # Verify the evaluation was updated + assert test_row is not None, "Should find the test row" + assert test_row.eval_metadata is not None, "Should have eval metadata" + assert test_row.eval_metadata.status == "stopped", "Status should be updated to stopped" + + +class TestEvalWatcherIntegration: + """Integration tests for the evaluation watcher.""" + + def setup_method(self): + """Set up test environment.""" + # Create a temporary directory for test data + self.temp_dir = tempfile.mkdtemp() + self.original_datasets_dir = os.environ.get("EVAL_PROTOCOL_DATASETS_DIR") + os.environ["EVAL_PROTOCOL_DATASETS_DIR"] = self.temp_dir + + def teardown_method(self): + """Clean up after each test.""" + # Restore original environment + if self.original_datasets_dir: + os.environ["EVAL_PROTOCOL_DATASETS_DIR"] = self.original_datasets_dir + else: + os.environ.pop("EVAL_PROTOCOL_DATASETS_DIR", None) + + # Clean up temporary directory + import shutil + + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_watcher_survives_parent_process_termination(self): + """Test that watcher survives when parent process is killed (simulated).""" + # Ensure watcher is running + ensure_singleton_watcher(check_interval=1.0) + + original_pid = get_watcher_pid() + assert original_pid is not None + + # Simulate parent process termination by directly killing the watcher process + # and then checking if it restarts (in a real scenario, the watcher would be + # in a separate session and survive parent termination) + os.kill(original_pid, signal.SIGKILL) + + # Wait for the process to be killed + max_wait = 10.0 + wait_interval = 0.5 + waited = 0.0 + + while waited < max_wait: + if not is_watcher_running(): + break + time.sleep(wait_interval) + waited += wait_interval + + # The watcher should no longer be running + assert not is_watcher_running() + + # We can start a new watcher + result = ensure_singleton_watcher(check_interval=1.0) + assert isinstance(result, int) + new_pid = get_watcher_pid() + assert new_pid is not None + assert new_pid != original_pid + + def test_watcher_handles_signal_gracefully(self): + """Test that watcher handles termination signals gracefully.""" + # Ensure watcher is running + if not is_watcher_running(): + ensure_singleton_watcher(check_interval=1.0) + + pid = get_watcher_pid() + assert pid is not None + + # Send SIGKILL to the watcher (SIGTERM is ignored) + os.kill(pid, signal.SIGKILL) + + # Wait for the process to be killed + max_wait = 10.0 + wait_interval = 0.5 + waited = 0.0 + + while waited < max_wait: + if not is_watcher_running(): + break + time.sleep(wait_interval) + waited += wait_interval + + # Verify watcher has stopped + assert not is_watcher_running() + assert get_watcher_pid() is None + + def test_concurrent_watcher_startup(self): + """Test that concurrent attempts to start watchers are handled correctly.""" + import queue + import threading + + # stop any existing watcher + stop_watcher() + + results = queue.Queue() + + def start_watcher(): + try: + result = ensure_singleton_watcher(check_interval=1.0) + results.put(result) + except Exception as e: + results.put(e) + + # Start multiple threads trying to start watchers simultaneously + threads = [] + for _ in range(3): + thread = threading.Thread(target=start_watcher) + threads.append(thread) + thread.start() + + # Wait for all threads to complete + for thread in threads: + thread.join() + + # Check results + success_count = 0 + while not results.empty(): + result = results.get() + if is_process_running(result): + success_count += 1 + + # Only one should succeed (or none if already running) + assert success_count == 1, f"Expected 1 successful start, got {success_count}" + assert is_watcher_running(), "Watcher should be running" From 0fdd59464398c49f216da122ec1f2312e07a6b56 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 15:59:50 -0700 Subject: [PATCH 25/31] build --- .../{index-CRkZ6JGL.js => index-BMc_e8JT.js} | 22 +++++++++---------- ...-CRkZ6JGL.js.map => index-BMc_e8JT.js.map} | 2 +- vite-app/dist/assets/index-BySN1scz.css | 1 - vite-app/dist/assets/index-Dp7ms4NJ.css | 1 + vite-app/dist/index.html | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) rename vite-app/dist/assets/{index-CRkZ6JGL.js => index-BMc_e8JT.js} (89%) rename vite-app/dist/assets/{index-CRkZ6JGL.js.map => index-BMc_e8JT.js.map} (64%) delete mode 100644 vite-app/dist/assets/index-BySN1scz.css create mode 100644 vite-app/dist/assets/index-Dp7ms4NJ.css diff --git a/vite-app/dist/assets/index-CRkZ6JGL.js b/vite-app/dist/assets/index-BMc_e8JT.js similarity index 89% rename from vite-app/dist/assets/index-CRkZ6JGL.js rename to vite-app/dist/assets/index-BMc_e8JT.js index 8b877c26..67e5a177 100644 --- a/vite-app/dist/assets/index-CRkZ6JGL.js +++ b/vite-app/dist/assets/index-BMc_e8JT.js @@ -14,7 +14,7 @@ * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. - */var Nm;function Q0(){if(Nm)return oe;Nm=1;var n=Symbol.for("react.transitional.element"),l=Symbol.for("react.portal"),i=Symbol.for("react.fragment"),r=Symbol.for("react.strict_mode"),c=Symbol.for("react.profiler"),f=Symbol.for("react.consumer"),d=Symbol.for("react.context"),v=Symbol.for("react.forward_ref"),m=Symbol.for("react.suspense"),p=Symbol.for("react.memo"),y=Symbol.for("react.lazy"),x=Symbol.iterator;function z(b){return b===null||typeof b!="object"?null:(b=x&&b[x]||b["@@iterator"],typeof b=="function"?b:null)}var B={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},H=Object.assign,L={};function q(b,Z,X){this.props=b,this.context=Z,this.refs=L,this.updater=X||B}q.prototype.isReactComponent={},q.prototype.setState=function(b,Z){if(typeof b!="object"&&typeof b!="function"&&b!=null)throw Error("takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,b,Z,"setState")},q.prototype.forceUpdate=function(b){this.updater.enqueueForceUpdate(this,b,"forceUpdate")};function k(){}k.prototype=q.prototype;function F(b,Z,X){this.props=b,this.context=Z,this.refs=L,this.updater=X||B}var G=F.prototype=new k;G.constructor=F,H(G,q.prototype),G.isPureReactComponent=!0;var I=Array.isArray,K={H:null,A:null,T:null,S:null,V:null},me=Object.prototype.hasOwnProperty;function Re(b,Z,X,V,ee,pe){return X=pe.ref,{$$typeof:n,type:b,key:Z,ref:X!==void 0?X:null,props:pe}}function Ye(b,Z){return Re(b.type,Z,void 0,void 0,void 0,b.props)}function ae(b){return typeof b=="object"&&b!==null&&b.$$typeof===n}function $e(b){var Z={"=":"=0",":":"=2"};return"$"+b.replace(/[=:]/g,function(X){return Z[X]})}var Fe=/\/+/g;function Ge(b,Z){return typeof b=="object"&&b!==null&&b.key!=null?$e(""+b.key):Z.toString(36)}function Jt(){}function qn(b){switch(b.status){case"fulfilled":return b.value;case"rejected":throw b.reason;default:switch(typeof b.status=="string"?b.then(Jt,Jt):(b.status="pending",b.then(function(Z){b.status==="pending"&&(b.status="fulfilled",b.value=Z)},function(Z){b.status==="pending"&&(b.status="rejected",b.reason=Z)})),b.status){case"fulfilled":return b.value;case"rejected":throw b.reason}}throw b}function ut(b,Z,X,V,ee){var pe=typeof b;(pe==="undefined"||pe==="boolean")&&(b=null);var re=!1;if(b===null)re=!0;else switch(pe){case"bigint":case"string":case"number":re=!0;break;case"object":switch(b.$$typeof){case n:case l:re=!0;break;case y:return re=b._init,ut(re(b._payload),Z,X,V,ee)}}if(re)return ee=ee(b),re=V===""?"."+Ge(b,0):V,I(ee)?(X="",re!=null&&(X=re.replace(Fe,"$&/")+"/"),ut(ee,Z,X,"",function(Vn){return Vn})):ee!=null&&(ae(ee)&&(ee=Ye(ee,X+(ee.key==null||b&&b.key===ee.key?"":(""+ee.key).replace(Fe,"$&/")+"/")+re)),Z.push(ee)),1;re=0;var St=V===""?".":V+":";if(I(b))for(var Ne=0;Ne>>1,b=M[Ae];if(0>>1;Aec(V,le))eec(pe,V)?(M[Ae]=pe,M[ee]=le,Ae=ee):(M[Ae]=V,M[X]=le,Ae=X);else if(eec(pe,le))M[Ae]=pe,M[ee]=le,Ae=ee;else break e}}return Y}function c(M,Y){var le=M.sortIndex-Y.sortIndex;return le!==0?le:M.id-Y.id}if(n.unstable_now=void 0,typeof performance=="object"&&typeof performance.now=="function"){var f=performance;n.unstable_now=function(){return f.now()}}else{var d=Date,v=d.now();n.unstable_now=function(){return d.now()-v}}var m=[],p=[],y=1,x=null,z=3,B=!1,H=!1,L=!1,q=!1,k=typeof setTimeout=="function"?setTimeout:null,F=typeof clearTimeout=="function"?clearTimeout:null,G=typeof setImmediate<"u"?setImmediate:null;function I(M){for(var Y=i(p);Y!==null;){if(Y.callback===null)r(p);else if(Y.startTime<=M)r(p),Y.sortIndex=Y.expirationTime,l(m,Y);else break;Y=i(p)}}function K(M){if(L=!1,I(M),!H)if(i(m)!==null)H=!0,me||(me=!0,Ge());else{var Y=i(p);Y!==null&&ut(K,Y.startTime-M)}}var me=!1,Re=-1,Ye=5,ae=-1;function $e(){return q?!0:!(n.unstable_now()-aeM&&$e());){var Ae=x.callback;if(typeof Ae=="function"){x.callback=null,z=x.priorityLevel;var b=Ae(x.expirationTime<=M);if(M=n.unstable_now(),typeof b=="function"){x.callback=b,I(M),Y=!0;break t}x===i(m)&&r(m),I(M)}else r(m);x=i(m)}if(x!==null)Y=!0;else{var Z=i(p);Z!==null&&ut(K,Z.startTime-M),Y=!1}}break e}finally{x=null,z=le,B=!1}Y=void 0}}finally{Y?Ge():me=!1}}}var Ge;if(typeof G=="function")Ge=function(){G(Fe)};else if(typeof MessageChannel<"u"){var Jt=new MessageChannel,qn=Jt.port2;Jt.port1.onmessage=Fe,Ge=function(){qn.postMessage(null)}}else Ge=function(){k(Fe,0)};function ut(M,Y){Re=k(function(){M(n.unstable_now())},Y)}n.unstable_IdlePriority=5,n.unstable_ImmediatePriority=1,n.unstable_LowPriority=4,n.unstable_NormalPriority=3,n.unstable_Profiling=null,n.unstable_UserBlockingPriority=2,n.unstable_cancelCallback=function(M){M.callback=null},n.unstable_forceFrameRate=function(M){0>M||125Ae?(M.sortIndex=le,l(p,M),i(m)===null&&M===i(p)&&(L?(F(Re),Re=-1):L=!0,ut(K,le-Ae))):(M.sortIndex=b,l(m,M),H||B||(H=!0,me||(me=!0,Ge()))),M},n.unstable_shouldYield=$e,n.unstable_wrapCallback=function(M){var Y=z;return function(){var le=z;z=Y;try{return M.apply(this,arguments)}finally{z=le}}}}(Gs)),Gs}var Um;function J0(){return Um||(Um=1,Ys.exports=K0()),Ys.exports}var Xs={exports:{}},ft={};/** + */var Cm;function K0(){return Cm||(Cm=1,function(n){function l(M,Y){var le=M.length;M.push(Y);e:for(;0>>1,b=M[Ae];if(0>>1;Aec(V,le))eec(pe,V)?(M[Ae]=pe,M[ee]=le,Ae=ee):(M[Ae]=V,M[X]=le,Ae=X);else if(eec(pe,le))M[Ae]=pe,M[ee]=le,Ae=ee;else break e}}return Y}function c(M,Y){var le=M.sortIndex-Y.sortIndex;return le!==0?le:M.id-Y.id}if(n.unstable_now=void 0,typeof performance=="object"&&typeof performance.now=="function"){var f=performance;n.unstable_now=function(){return f.now()}}else{var d=Date,v=d.now();n.unstable_now=function(){return d.now()-v}}var m=[],p=[],y=1,x=null,z=3,B=!1,H=!1,L=!1,q=!1,k=typeof setTimeout=="function"?setTimeout:null,F=typeof clearTimeout=="function"?clearTimeout:null,G=typeof setImmediate<"u"?setImmediate:null;function I(M){for(var Y=i(p);Y!==null;){if(Y.callback===null)r(p);else if(Y.startTime<=M)r(p),Y.sortIndex=Y.expirationTime,l(m,Y);else break;Y=i(p)}}function K(M){if(L=!1,I(M),!H)if(i(m)!==null)H=!0,me||(me=!0,Ge());else{var Y=i(p);Y!==null&&ut(K,Y.startTime-M)}}var me=!1,Re=-1,Ye=5,ae=-1;function $e(){return q?!0:!(n.unstable_now()-aeM&&$e());){var Ae=x.callback;if(typeof Ae=="function"){x.callback=null,z=x.priorityLevel;var b=Ae(x.expirationTime<=M);if(M=n.unstable_now(),typeof b=="function"){x.callback=b,I(M),Y=!0;break t}x===i(m)&&r(m),I(M)}else r(m);x=i(m)}if(x!==null)Y=!0;else{var Z=i(p);Z!==null&&ut(K,Z.startTime-M),Y=!1}}break e}finally{x=null,z=le,B=!1}Y=void 0}}finally{Y?Ge():me=!1}}}var Ge;if(typeof G=="function")Ge=function(){G(Ie)};else if(typeof MessageChannel<"u"){var Jt=new MessageChannel,qn=Jt.port2;Jt.port1.onmessage=Ie,Ge=function(){qn.postMessage(null)}}else Ge=function(){k(Ie,0)};function ut(M,Y){Re=k(function(){M(n.unstable_now())},Y)}n.unstable_IdlePriority=5,n.unstable_ImmediatePriority=1,n.unstable_LowPriority=4,n.unstable_NormalPriority=3,n.unstable_Profiling=null,n.unstable_UserBlockingPriority=2,n.unstable_cancelCallback=function(M){M.callback=null},n.unstable_forceFrameRate=function(M){0>M||125Ae?(M.sortIndex=le,l(p,M),i(m)===null&&M===i(p)&&(L?(F(Re),Re=-1):L=!0,ut(K,le-Ae))):(M.sortIndex=b,l(m,M),H||B||(H=!0,me||(me=!0,Ge()))),M},n.unstable_shouldYield=$e,n.unstable_wrapCallback=function(M){var Y=z;return function(){var le=z;z=Y;try{return M.apply(this,arguments)}finally{z=le}}}}(Gs)),Gs}var Um;function J0(){return Um||(Um=1,Ys.exports=K0()),Ys.exports}var Xs={exports:{}},ft={};/** * @license React * react-dom.production.js * @@ -38,15 +38,15 @@ * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. - */var Lm;function W0(){if(Lm)return Gu;Lm=1;var n=J0(),l=uo(),i=$p();function r(e){var t="https://react.dev/errors/"+e;if(1b||(e.current=Ae[b],Ae[b]=null,b--)}function V(e,t){b++,Ae[b]=e.current,e.current=t}var ee=Z(null),pe=Z(null),re=Z(null),St=Z(null);function Ne(e,t){switch(V(re,t),V(pe,e),V(ee,null),t.nodeType){case 9:case 11:e=(e=t.documentElement)&&(e=e.namespaceURI)?lm(e):0;break;default:if(e=t.tagName,t=t.namespaceURI)t=lm(t),e=um(t,e);else switch(e){case"svg":e=1;break;case"math":e=2;break;default:e=0}}X(ee),V(ee,e)}function Vn(){X(ee),X(pe),X(re)}function To(e){e.memoizedState!==null&&V(St,e);var t=ee.current,a=um(t,e.type);t!==a&&(V(pe,e),V(ee,a))}function pi(e){pe.current===e&&(X(ee),X(pe)),St.current===e&&(X(St),Hu._currentValue=le)}var zo=Object.prototype.hasOwnProperty,Ro=n.unstable_scheduleCallback,wo=n.unstable_cancelCallback,E_=n.unstable_shouldYield,x_=n.unstable_requestPaint,ln=n.unstable_now,A_=n.unstable_getCurrentPriorityLevel,Lf=n.unstable_ImmediatePriority,Hf=n.unstable_UserBlockingPriority,gi=n.unstable_NormalPriority,T_=n.unstable_LowPriority,kf=n.unstable_IdlePriority,z_=n.log,R_=n.unstable_setDisableYieldValue,Ql=null,Ot=null;function Yn(e){if(typeof z_=="function"&&R_(e),Ot&&typeof Ot.setStrictMode=="function")try{Ot.setStrictMode(Ql,e)}catch{}}var Et=Math.clz32?Math.clz32:M_,w_=Math.log,D_=Math.LN2;function M_(e){return e>>>=0,e===0?32:31-(w_(e)/D_|0)|0}var _i=256,yi=4194304;function ya(e){var t=e&42;if(t!==0)return t;switch(e&-e){case 1:return 1;case 2:return 2;case 4:return 4;case 8:return 8;case 16:return 16;case 32:return 32;case 64:return 64;case 128:return 128;case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return e&4194048;case 4194304:case 8388608:case 16777216:case 33554432:return e&62914560;case 67108864:return 67108864;case 134217728:return 134217728;case 268435456:return 268435456;case 536870912:return 536870912;case 1073741824:return 0;default:return e}}function bi(e,t,a){var u=e.pendingLanes;if(u===0)return 0;var o=0,s=e.suspendedLanes,h=e.pingedLanes;e=e.warmLanes;var g=u&134217727;return g!==0?(u=g&~s,u!==0?o=ya(u):(h&=g,h!==0?o=ya(h):a||(a=g&~e,a!==0&&(o=ya(a))))):(g=u&~s,g!==0?o=ya(g):h!==0?o=ya(h):a||(a=u&~e,a!==0&&(o=ya(a)))),o===0?0:t!==0&&t!==o&&(t&s)===0&&(s=o&-o,a=t&-t,s>=a||s===32&&(a&4194048)!==0)?t:o}function Kl(e,t){return(e.pendingLanes&~(e.suspendedLanes&~e.pingedLanes)&t)===0}function N_(e,t){switch(e){case 1:case 2:case 4:case 8:case 64:return t+250;case 16:case 32:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return t+5e3;case 4194304:case 8388608:case 16777216:case 33554432:return-1;case 67108864:case 134217728:case 268435456:case 536870912:case 1073741824:return-1;default:return-1}}function $f(){var e=_i;return _i<<=1,(_i&4194048)===0&&(_i=256),e}function qf(){var e=yi;return yi<<=1,(yi&62914560)===0&&(yi=4194304),e}function Do(e){for(var t=[],a=0;31>a;a++)t.push(e);return t}function Jl(e,t){e.pendingLanes|=t,t!==268435456&&(e.suspendedLanes=0,e.pingedLanes=0,e.warmLanes=0)}function j_(e,t,a,u,o,s){var h=e.pendingLanes;e.pendingLanes=a,e.suspendedLanes=0,e.pingedLanes=0,e.warmLanes=0,e.expiredLanes&=a,e.entangledLanes&=a,e.errorRecoveryDisabledLanes&=a,e.shellSuspendCounter=0;var g=e.entanglements,_=e.expirationTimes,A=e.hiddenUpdates;for(a=h&~a;0b||(e.current=Ae[b],Ae[b]=null,b--)}function V(e,t){b++,Ae[b]=e.current,e.current=t}var ee=Z(null),pe=Z(null),re=Z(null),St=Z(null);function Ne(e,t){switch(V(re,t),V(pe,e),V(ee,null),t.nodeType){case 9:case 11:e=(e=t.documentElement)&&(e=e.namespaceURI)?lm(e):0;break;default:if(e=t.tagName,t=t.namespaceURI)t=lm(t),e=um(t,e);else switch(e){case"svg":e=1;break;case"math":e=2;break;default:e=0}}X(ee),V(ee,e)}function Vn(){X(ee),X(pe),X(re)}function To(e){e.memoizedState!==null&&V(St,e);var t=ee.current,a=um(t,e.type);t!==a&&(V(pe,e),V(ee,a))}function pi(e){pe.current===e&&(X(ee),X(pe)),St.current===e&&(X(St),Hu._currentValue=le)}var zo=Object.prototype.hasOwnProperty,Ro=n.unstable_scheduleCallback,wo=n.unstable_cancelCallback,E_=n.unstable_shouldYield,x_=n.unstable_requestPaint,ln=n.unstable_now,A_=n.unstable_getCurrentPriorityLevel,Lf=n.unstable_ImmediatePriority,Hf=n.unstable_UserBlockingPriority,gi=n.unstable_NormalPriority,T_=n.unstable_LowPriority,kf=n.unstable_IdlePriority,z_=n.log,R_=n.unstable_setDisableYieldValue,Ql=null,Ot=null;function Yn(e){if(typeof z_=="function"&&R_(e),Ot&&typeof Ot.setStrictMode=="function")try{Ot.setStrictMode(Ql,e)}catch{}}var Et=Math.clz32?Math.clz32:M_,w_=Math.log,D_=Math.LN2;function M_(e){return e>>>=0,e===0?32:31-(w_(e)/D_|0)|0}var _i=256,yi=4194304;function ya(e){var t=e&42;if(t!==0)return t;switch(e&-e){case 1:return 1;case 2:return 2;case 4:return 4;case 8:return 8;case 16:return 16;case 32:return 32;case 64:return 64;case 128:return 128;case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return e&4194048;case 4194304:case 8388608:case 16777216:case 33554432:return e&62914560;case 67108864:return 67108864;case 134217728:return 134217728;case 268435456:return 268435456;case 536870912:return 536870912;case 1073741824:return 0;default:return e}}function bi(e,t,a){var u=e.pendingLanes;if(u===0)return 0;var o=0,s=e.suspendedLanes,h=e.pingedLanes;e=e.warmLanes;var g=u&134217727;return g!==0?(u=g&~s,u!==0?o=ya(u):(h&=g,h!==0?o=ya(h):a||(a=g&~e,a!==0&&(o=ya(a))))):(g=u&~s,g!==0?o=ya(g):h!==0?o=ya(h):a||(a=u&~e,a!==0&&(o=ya(a)))),o===0?0:t!==0&&t!==o&&(t&s)===0&&(s=o&-o,a=t&-t,s>=a||s===32&&(a&4194048)!==0)?t:o}function Kl(e,t){return(e.pendingLanes&~(e.suspendedLanes&~e.pingedLanes)&t)===0}function N_(e,t){switch(e){case 1:case 2:case 4:case 8:case 64:return t+250;case 16:case 32:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return t+5e3;case 4194304:case 8388608:case 16777216:case 33554432:return-1;case 67108864:case 134217728:case 268435456:case 536870912:case 1073741824:return-1;default:return-1}}function $f(){var e=_i;return _i<<=1,(_i&4194048)===0&&(_i=256),e}function qf(){var e=yi;return yi<<=1,(yi&62914560)===0&&(yi=4194304),e}function Do(e){for(var t=[],a=0;31>a;a++)t.push(e);return t}function Jl(e,t){e.pendingLanes|=t,t!==268435456&&(e.suspendedLanes=0,e.pingedLanes=0,e.warmLanes=0)}function j_(e,t,a,u,o,s){var h=e.pendingLanes;e.pendingLanes=a,e.suspendedLanes=0,e.pingedLanes=0,e.warmLanes=0,e.expiredLanes&=a,e.entangledLanes&=a,e.errorRecoveryDisabledLanes&=a,e.shellSuspendCounter=0;var g=e.entanglements,_=e.expirationTimes,A=e.hiddenUpdates;for(a=h&~a;0)":-1o||_[u]!==A[o]){var N=` `+_[u].replace(" at new "," at ");return e.displayName&&N.includes("")&&(N=N.replace("",e.displayName)),N}while(1<=u&&0<=o);break}}}finally{Zo=!1,Error.prepareStackTrace=a}return(a=e?e.displayName||e.name:"")?el(a):""}function H_(e){switch(e.tag){case 26:case 27:case 5:return el(e.type);case 16:return el("Lazy");case 13:return el("Suspense");case 19:return el("SuspenseList");case 0:case 15:return Bo(e.type,!1);case 11:return Bo(e.type.render,!1);case 1:return Bo(e.type,!0);case 31:return el("Activity");default:return""}}function Ff(e){try{var t="";do t+=H_(e),e=e.return;while(e);return t}catch(a){return` Error generating stack: `+a.message+` -`+a.stack}}function Ut(e){switch(typeof e){case"bigint":case"boolean":case"number":case"string":case"undefined":return e;case"object":return e;default:return""}}function If(e){var t=e.type;return(e=e.nodeName)&&e.toLowerCase()==="input"&&(t==="checkbox"||t==="radio")}function k_(e){var t=If(e)?"checked":"value",a=Object.getOwnPropertyDescriptor(e.constructor.prototype,t),u=""+e[t];if(!e.hasOwnProperty(t)&&typeof a<"u"&&typeof a.get=="function"&&typeof a.set=="function"){var o=a.get,s=a.set;return Object.defineProperty(e,t,{configurable:!0,get:function(){return o.call(this)},set:function(h){u=""+h,s.call(this,h)}}),Object.defineProperty(e,t,{enumerable:a.enumerable}),{getValue:function(){return u},setValue:function(h){u=""+h},stopTracking:function(){e._valueTracker=null,delete e[t]}}}}function Ei(e){e._valueTracker||(e._valueTracker=k_(e))}function ed(e){if(!e)return!1;var t=e._valueTracker;if(!t)return!0;var a=t.getValue(),u="";return e&&(u=If(e)?e.checked?"true":"false":e.value),e=u,e!==a?(t.setValue(e),!0):!1}function xi(e){if(e=e||(typeof document<"u"?document:void 0),typeof e>"u")return null;try{return e.activeElement||e.body}catch{return e.body}}var $_=/[\n"\\]/g;function Zt(e){return e.replace($_,function(t){return"\\"+t.charCodeAt(0).toString(16)+" "})}function Lo(e,t,a,u,o,s,h,g){e.name="",h!=null&&typeof h!="function"&&typeof h!="symbol"&&typeof h!="boolean"?e.type=h:e.removeAttribute("type"),t!=null?h==="number"?(t===0&&e.value===""||e.value!=t)&&(e.value=""+Ut(t)):e.value!==""+Ut(t)&&(e.value=""+Ut(t)):h!=="submit"&&h!=="reset"||e.removeAttribute("value"),t!=null?Ho(e,h,Ut(t)):a!=null?Ho(e,h,Ut(a)):u!=null&&e.removeAttribute("value"),o==null&&s!=null&&(e.defaultChecked=!!s),o!=null&&(e.checked=o&&typeof o!="function"&&typeof o!="symbol"),g!=null&&typeof g!="function"&&typeof g!="symbol"&&typeof g!="boolean"?e.name=""+Ut(g):e.removeAttribute("name")}function td(e,t,a,u,o,s,h,g){if(s!=null&&typeof s!="function"&&typeof s!="symbol"&&typeof s!="boolean"&&(e.type=s),t!=null||a!=null){if(!(s!=="submit"&&s!=="reset"||t!=null))return;a=a!=null?""+Ut(a):"",t=t!=null?""+Ut(t):a,g||t===e.value||(e.value=t),e.defaultValue=t}u=u??o,u=typeof u!="function"&&typeof u!="symbol"&&!!u,e.checked=g?e.checked:!!u,e.defaultChecked=!!u,h!=null&&typeof h!="function"&&typeof h!="symbol"&&typeof h!="boolean"&&(e.name=h)}function Ho(e,t,a){t==="number"&&xi(e.ownerDocument)===e||e.defaultValue===""+a||(e.defaultValue=""+a)}function tl(e,t,a,u){if(e=e.options,t){t={};for(var o=0;o"u"||typeof window.document>"u"||typeof window.document.createElement>"u"),Yo=!1;if(bn)try{var Il={};Object.defineProperty(Il,"passive",{get:function(){Yo=!0}}),window.addEventListener("test",Il,Il),window.removeEventListener("test",Il,Il)}catch{Yo=!1}var Xn=null,Go=null,Ti=null;function od(){if(Ti)return Ti;var e,t=Go,a=t.length,u,o="value"in Xn?Xn.value:Xn.textContent,s=o.length;for(e=0;e=nu),vd=" ",md=!1;function pd(e,t){switch(e){case"keyup":return my.indexOf(t.keyCode)!==-1;case"keydown":return t.keyCode!==229;case"keypress":case"mousedown":case"focusout":return!0;default:return!1}}function gd(e){return e=e.detail,typeof e=="object"&&"data"in e?e.data:null}var ul=!1;function gy(e,t){switch(e){case"compositionend":return gd(t);case"keypress":return t.which!==32?null:(md=!0,vd);case"textInput":return e=t.data,e===vd&&md?null:e;default:return null}}function _y(e,t){if(ul)return e==="compositionend"||!Po&&pd(e,t)?(e=od(),Ti=Go=Xn=null,ul=!1,e):null;switch(e){case"paste":return null;case"keypress":if(!(t.ctrlKey||t.altKey||t.metaKey)||t.ctrlKey&&t.altKey){if(t.char&&1=t)return{node:a,offset:t-e};e=u}e:{for(;a;){if(a.nextSibling){a=a.nextSibling;break e}a=a.parentNode}a=void 0}a=Ad(a)}}function zd(e,t){return e&&t?e===t?!0:e&&e.nodeType===3?!1:t&&t.nodeType===3?zd(e,t.parentNode):"contains"in e?e.contains(t):e.compareDocumentPosition?!!(e.compareDocumentPosition(t)&16):!1:!1}function Rd(e){e=e!=null&&e.ownerDocument!=null&&e.ownerDocument.defaultView!=null?e.ownerDocument.defaultView:window;for(var t=xi(e.document);t instanceof e.HTMLIFrameElement;){try{var a=typeof t.contentWindow.location.href=="string"}catch{a=!1}if(a)e=t.contentWindow;else break;t=xi(e.document)}return t}function Io(e){var t=e&&e.nodeName&&e.nodeName.toLowerCase();return t&&(t==="input"&&(e.type==="text"||e.type==="search"||e.type==="tel"||e.type==="url"||e.type==="password")||t==="textarea"||e.contentEditable==="true")}var Ty=bn&&"documentMode"in document&&11>=document.documentMode,il=null,ec=null,iu=null,tc=!1;function wd(e,t,a){var u=a.window===a?a.document:a.nodeType===9?a:a.ownerDocument;tc||il==null||il!==xi(u)||(u=il,"selectionStart"in u&&Io(u)?u={start:u.selectionStart,end:u.selectionEnd}:(u=(u.ownerDocument&&u.ownerDocument.defaultView||window).getSelection(),u={anchorNode:u.anchorNode,anchorOffset:u.anchorOffset,focusNode:u.focusNode,focusOffset:u.focusOffset}),iu&&uu(iu,u)||(iu=u,u=pr(ec,"onSelect"),0>=h,o-=h,On=1<<32-Et(t)+o|a<s?s:8;var h=M.T,g={};M.T=g,kc(e,!1,t,a);try{var _=o(),A=M.S;if(A!==null&&A(g,_),_!==null&&typeof _=="object"&&typeof _.then=="function"){var N=Uy(_,u);Su(e,t,N,wt(e))}else Su(e,t,u,wt(e))}catch(U){Su(e,t,{then:function(){},status:"rejected",reason:U},wt())}finally{Y.p=s,M.T=h}}function ky(){}function Lc(e,t,a,u){if(e.tag!==5)throw Error(r(476));var o=Dh(e).queue;wh(e,o,t,le,a===null?ky:function(){return Mh(e),a(u)})}function Dh(e){var t=e.memoizedState;if(t!==null)return t;t={memoizedState:le,baseState:le,baseQueue:null,queue:{pending:null,lanes:0,dispatch:null,lastRenderedReducer:Tn,lastRenderedState:le},next:null};var a={};return t.next={memoizedState:a,baseState:a,baseQueue:null,queue:{pending:null,lanes:0,dispatch:null,lastRenderedReducer:Tn,lastRenderedState:a},next:null},e.memoizedState=t,e=e.alternate,e!==null&&(e.memoizedState=t),t}function Mh(e){var t=Dh(e).next.queue;Su(e,t,{},wt())}function Hc(){return st(Hu)}function Nh(){return Qe().memoizedState}function jh(){return Qe().memoizedState}function $y(e){for(var t=e.return;t!==null;){switch(t.tag){case 24:case 3:var a=wt();e=Jn(a);var u=Pn(t,e,a);u!==null&&(Dt(u,t,a),mu(u,t,a)),t={cache:mc()},e.payload=t;return}t=t.return}}function qy(e,t,a){var u=wt();a={lane:u,revertLane:0,action:a,hasEagerState:!1,eagerState:null,next:null},Pi(e)?Uh(t,a):(a=uc(e,t,a,u),a!==null&&(Dt(a,e,u),Zh(a,t,u)))}function Ch(e,t,a){var u=wt();Su(e,t,a,u)}function Su(e,t,a,u){var o={lane:u,revertLane:0,action:a,hasEagerState:!1,eagerState:null,next:null};if(Pi(e))Uh(t,o);else{var s=e.alternate;if(e.lanes===0&&(s===null||s.lanes===0)&&(s=t.lastRenderedReducer,s!==null))try{var h=t.lastRenderedState,g=s(h,a);if(o.hasEagerState=!0,o.eagerState=g,xt(g,h))return ji(e,t,o,0),we===null&&Ni(),!1}catch{}finally{}if(a=uc(e,t,o,u),a!==null)return Dt(a,e,u),Zh(a,t,u),!0}return!1}function kc(e,t,a,u){if(u={lane:2,revertLane:_s(),action:u,hasEagerState:!1,eagerState:null,next:null},Pi(e)){if(t)throw Error(r(479))}else t=uc(e,a,u,2),t!==null&&Dt(t,e,2)}function Pi(e){var t=e.alternate;return e===ce||t!==null&&t===ce}function Uh(e,t){pl=Yi=!0;var a=e.pending;a===null?t.next=t:(t.next=a.next,a.next=t),e.pending=t}function Zh(e,t,a){if((a&4194048)!==0){var u=t.lanes;u&=e.pendingLanes,a|=u,t.lanes=a,Yf(e,a)}}var Wi={readContext:st,use:Xi,useCallback:qe,useContext:qe,useEffect:qe,useImperativeHandle:qe,useLayoutEffect:qe,useInsertionEffect:qe,useMemo:qe,useReducer:qe,useRef:qe,useState:qe,useDebugValue:qe,useDeferredValue:qe,useTransition:qe,useSyncExternalStore:qe,useId:qe,useHostTransitionStatus:qe,useFormState:qe,useActionState:qe,useOptimistic:qe,useMemoCache:qe,useCacheRefresh:qe},Bh={readContext:st,use:Xi,useCallback:function(e,t){return mt().memoizedState=[e,t===void 0?null:t],e},useContext:st,useEffect:bh,useImperativeHandle:function(e,t,a){a=a!=null?a.concat([e]):null,Ji(4194308,4,xh.bind(null,t,e),a)},useLayoutEffect:function(e,t){return Ji(4194308,4,e,t)},useInsertionEffect:function(e,t){Ji(4,2,e,t)},useMemo:function(e,t){var a=mt();t=t===void 0?null:t;var u=e();if(Na){Yn(!0);try{e()}finally{Yn(!1)}}return a.memoizedState=[u,t],u},useReducer:function(e,t,a){var u=mt();if(a!==void 0){var o=a(t);if(Na){Yn(!0);try{a(t)}finally{Yn(!1)}}}else o=t;return u.memoizedState=u.baseState=o,e={pending:null,lanes:0,dispatch:null,lastRenderedReducer:e,lastRenderedState:o},u.queue=e,e=e.dispatch=qy.bind(null,ce,e),[u.memoizedState,e]},useRef:function(e){var t=mt();return e={current:e},t.memoizedState=e},useState:function(e){e=Cc(e);var t=e.queue,a=Ch.bind(null,ce,t);return t.dispatch=a,[e.memoizedState,a]},useDebugValue:Zc,useDeferredValue:function(e,t){var a=mt();return Bc(a,e,t)},useTransition:function(){var e=Cc(!1);return e=wh.bind(null,ce,e.queue,!0,!1),mt().memoizedState=e,[!1,e]},useSyncExternalStore:function(e,t,a){var u=ce,o=mt();if(ye){if(a===void 0)throw Error(r(407));a=a()}else{if(a=t(),we===null)throw Error(r(349));(he&124)!==0||lh(u,t,a)}o.memoizedState=a;var s={value:a,getSnapshot:t};return o.queue=s,bh(ih.bind(null,u,s,e),[e]),u.flags|=2048,_l(9,Ki(),uh.bind(null,u,s,a,t),null),a},useId:function(){var e=mt(),t=we.identifierPrefix;if(ye){var a=En,u=On;a=(u&~(1<<32-Et(u)-1)).toString(32)+a,t="Ā«"+t+"R"+a,a=Gi++,0ne?(nt=W,W=null):nt=W.sibling;var ge=T(O,W,E[ne],C);if(ge===null){W===null&&(W=nt);break}e&&W&&ge.alternate===null&&t(O,W),S=s(ge,S,ne),se===null?Q=ge:se.sibling=ge,se=ge,W=nt}if(ne===E.length)return a(O,W),ye&&Ta(O,ne),Q;if(W===null){for(;nene?(nt=W,W=null):nt=W.sibling;var ha=T(O,W,ge.value,C);if(ha===null){W===null&&(W=nt);break}e&&W&&ha.alternate===null&&t(O,W),S=s(ha,S,ne),se===null?Q=ha:se.sibling=ha,se=ha,W=nt}if(ge.done)return a(O,W),ye&&Ta(O,ne),Q;if(W===null){for(;!ge.done;ne++,ge=E.next())ge=U(O,ge.value,C),ge!==null&&(S=s(ge,S,ne),se===null?Q=ge:se.sibling=ge,se=ge);return ye&&Ta(O,ne),Q}for(W=u(W);!ge.done;ne++,ge=E.next())ge=R(W,O,ne,ge.value,C),ge!==null&&(e&&ge.alternate!==null&&W.delete(ge.key===null?ne:ge.key),S=s(ge,S,ne),se===null?Q=ge:se.sibling=ge,se=ge);return e&&W.forEach(function(Y0){return t(O,Y0)}),ye&&Ta(O,ne),Q}function xe(O,S,E,C){if(typeof E=="object"&&E!==null&&E.type===H&&E.key===null&&(E=E.props.children),typeof E=="object"&&E!==null){switch(E.$$typeof){case z:e:{for(var Q=E.key;S!==null;){if(S.key===Q){if(Q=E.type,Q===H){if(S.tag===7){a(O,S.sibling),C=o(S,E.props.children),C.return=O,O=C;break e}}else if(S.elementType===Q||typeof Q=="object"&&Q!==null&&Q.$$typeof===Ye&&Hh(Q)===S.type){a(O,S.sibling),C=o(S,E.props),Eu(C,E),C.return=O,O=C;break e}a(O,S);break}else t(O,S);S=S.sibling}E.type===H?(C=xa(E.props.children,O.mode,C,E.key),C.return=O,O=C):(C=Ui(E.type,E.key,E.props,null,O.mode,C),Eu(C,E),C.return=O,O=C)}return h(O);case B:e:{for(Q=E.key;S!==null;){if(S.key===Q)if(S.tag===4&&S.stateNode.containerInfo===E.containerInfo&&S.stateNode.implementation===E.implementation){a(O,S.sibling),C=o(S,E.children||[]),C.return=O,O=C;break e}else{a(O,S);break}else t(O,S);S=S.sibling}C=oc(E,O.mode,C),C.return=O,O=C}return h(O);case Ye:return Q=E._init,E=Q(E._payload),xe(O,S,E,C)}if(ut(E))return ue(O,S,E,C);if(Ge(E)){if(Q=Ge(E),typeof Q!="function")throw Error(r(150));return E=Q.call(E),te(O,S,E,C)}if(typeof E.then=="function")return xe(O,S,Fi(E),C);if(E.$$typeof===G)return xe(O,S,Hi(O,E),C);Ii(O,E)}return typeof E=="string"&&E!==""||typeof E=="number"||typeof E=="bigint"?(E=""+E,S!==null&&S.tag===6?(a(O,S.sibling),C=o(S,E),C.return=O,O=C):(a(O,S),C=rc(E,O.mode,C),C.return=O,O=C),h(O)):a(O,S)}return function(O,S,E,C){try{Ou=0;var Q=xe(O,S,E,C);return yl=null,Q}catch(W){if(W===hu||W===$i)throw W;var se=At(29,W,null,O.mode);return se.lanes=C,se.return=O,se}finally{}}}var bl=kh(!0),$h=kh(!1),$t=Z(null),rn=null;function Fn(e){var t=e.alternate;V(Pe,Pe.current&1),V($t,e),rn===null&&(t===null||ml.current!==null||t.memoizedState!==null)&&(rn=e)}function qh(e){if(e.tag===22){if(V(Pe,Pe.current),V($t,e),rn===null){var t=e.alternate;t!==null&&t.memoizedState!==null&&(rn=e)}}else In()}function In(){V(Pe,Pe.current),V($t,$t.current)}function zn(e){X($t),rn===e&&(rn=null),X(Pe)}var Pe=Z(0);function er(e){for(var t=e;t!==null;){if(t.tag===13){var a=t.memoizedState;if(a!==null&&(a=a.dehydrated,a===null||a.data==="$?"||Ds(a)))return t}else if(t.tag===19&&t.memoizedProps.revealOrder!==void 0){if((t.flags&128)!==0)return t}else if(t.child!==null){t.child.return=t,t=t.child;continue}if(t===e)break;for(;t.sibling===null;){if(t.return===null||t.return===e)return null;t=t.return}t.sibling.return=t.return,t=t.sibling}return null}function $c(e,t,a,u){t=e.memoizedState,a=a(u,t),a=a==null?t:y({},t,a),e.memoizedState=a,e.lanes===0&&(e.updateQueue.baseState=a)}var qc={enqueueSetState:function(e,t,a){e=e._reactInternals;var u=wt(),o=Jn(u);o.payload=t,a!=null&&(o.callback=a),t=Pn(e,o,u),t!==null&&(Dt(t,e,u),mu(t,e,u))},enqueueReplaceState:function(e,t,a){e=e._reactInternals;var u=wt(),o=Jn(u);o.tag=1,o.payload=t,a!=null&&(o.callback=a),t=Pn(e,o,u),t!==null&&(Dt(t,e,u),mu(t,e,u))},enqueueForceUpdate:function(e,t){e=e._reactInternals;var a=wt(),u=Jn(a);u.tag=2,t!=null&&(u.callback=t),t=Pn(e,u,a),t!==null&&(Dt(t,e,a),mu(t,e,a))}};function Vh(e,t,a,u,o,s,h){return e=e.stateNode,typeof e.shouldComponentUpdate=="function"?e.shouldComponentUpdate(u,s,h):t.prototype&&t.prototype.isPureReactComponent?!uu(a,u)||!uu(o,s):!0}function Yh(e,t,a,u){e=t.state,typeof t.componentWillReceiveProps=="function"&&t.componentWillReceiveProps(a,u),typeof t.UNSAFE_componentWillReceiveProps=="function"&&t.UNSAFE_componentWillReceiveProps(a,u),t.state!==e&&qc.enqueueReplaceState(t,t.state,null)}function ja(e,t){var a=t;if("ref"in t){a={};for(var u in t)u!=="ref"&&(a[u]=t[u])}if(e=e.defaultProps){a===t&&(a=y({},a));for(var o in e)a[o]===void 0&&(a[o]=e[o])}return a}var tr=typeof reportError=="function"?reportError:function(e){if(typeof window=="object"&&typeof window.ErrorEvent=="function"){var t=new window.ErrorEvent("error",{bubbles:!0,cancelable:!0,message:typeof e=="object"&&e!==null&&typeof e.message=="string"?String(e.message):String(e),error:e});if(!window.dispatchEvent(t))return}else if(typeof process=="object"&&typeof process.emit=="function"){process.emit("uncaughtException",e);return}console.error(e)};function Gh(e){tr(e)}function Xh(e){console.error(e)}function Qh(e){tr(e)}function nr(e,t){try{var a=e.onUncaughtError;a(t.value,{componentStack:t.stack})}catch(u){setTimeout(function(){throw u})}}function Kh(e,t,a){try{var u=e.onCaughtError;u(a.value,{componentStack:a.stack,errorBoundary:t.tag===1?t.stateNode:null})}catch(o){setTimeout(function(){throw o})}}function Vc(e,t,a){return a=Jn(a),a.tag=3,a.payload={element:null},a.callback=function(){nr(e,t)},a}function Jh(e){return e=Jn(e),e.tag=3,e}function Ph(e,t,a,u){var o=a.type.getDerivedStateFromError;if(typeof o=="function"){var s=u.value;e.payload=function(){return o(s)},e.callback=function(){Kh(t,a,u)}}var h=a.stateNode;h!==null&&typeof h.componentDidCatch=="function"&&(e.callback=function(){Kh(t,a,u),typeof o!="function"&&(ua===null?ua=new Set([this]):ua.add(this));var g=u.stack;this.componentDidCatch(u.value,{componentStack:g!==null?g:""})})}function Yy(e,t,a,u,o){if(a.flags|=32768,u!==null&&typeof u=="object"&&typeof u.then=="function"){if(t=a.alternate,t!==null&&su(t,a,o,!0),a=$t.current,a!==null){switch(a.tag){case 13:return rn===null?hs():a.alternate===null&&ke===0&&(ke=3),a.flags&=-257,a.flags|=65536,a.lanes=o,u===_c?a.flags|=16384:(t=a.updateQueue,t===null?a.updateQueue=new Set([u]):t.add(u),ms(e,u,o)),!1;case 22:return a.flags|=65536,u===_c?a.flags|=16384:(t=a.updateQueue,t===null?(t={transitions:null,markerInstances:null,retryQueue:new Set([u])},a.updateQueue=t):(a=t.retryQueue,a===null?t.retryQueue=new Set([u]):a.add(u)),ms(e,u,o)),!1}throw Error(r(435,a.tag))}return ms(e,u,o),hs(),!1}if(ye)return t=$t.current,t!==null?((t.flags&65536)===0&&(t.flags|=256),t.flags|=65536,t.lanes=o,u!==fc&&(e=Error(r(422),{cause:u}),cu(Bt(e,a)))):(u!==fc&&(t=Error(r(423),{cause:u}),cu(Bt(t,a))),e=e.current.alternate,e.flags|=65536,o&=-o,e.lanes|=o,u=Bt(u,a),o=Vc(e.stateNode,u,o),Sc(e,o),ke!==4&&(ke=2)),!1;var s=Error(r(520),{cause:u});if(s=Bt(s,a),Du===null?Du=[s]:Du.push(s),ke!==4&&(ke=2),t===null)return!0;u=Bt(u,a),a=t;do{switch(a.tag){case 3:return a.flags|=65536,e=o&-o,a.lanes|=e,e=Vc(a.stateNode,u,e),Sc(a,e),!1;case 1:if(t=a.type,s=a.stateNode,(a.flags&128)===0&&(typeof t.getDerivedStateFromError=="function"||s!==null&&typeof s.componentDidCatch=="function"&&(ua===null||!ua.has(s))))return a.flags|=65536,o&=-o,a.lanes|=o,o=Jh(o),Ph(o,e,a,u),Sc(a,o),!1}a=a.return}while(a!==null);return!1}var Wh=Error(r(461)),et=!1;function it(e,t,a,u){t.child=e===null?$h(t,null,a,u):bl(t,e.child,a,u)}function Fh(e,t,a,u,o){a=a.render;var s=t.ref;if("ref"in u){var h={};for(var g in u)g!=="ref"&&(h[g]=u[g])}else h=u;return Da(t),u=Tc(e,t,a,h,s,o),g=zc(),e!==null&&!et?(Rc(e,t,o),Rn(e,t,o)):(ye&&g&&cc(t),t.flags|=1,it(e,t,u,o),t.child)}function Ih(e,t,a,u,o){if(e===null){var s=a.type;return typeof s=="function"&&!ic(s)&&s.defaultProps===void 0&&a.compare===null?(t.tag=15,t.type=s,ev(e,t,s,u,o)):(e=Ui(a.type,null,u,t,t.mode,o),e.ref=t.ref,e.return=t,t.child=e)}if(s=e.child,!Wc(e,o)){var h=s.memoizedProps;if(a=a.compare,a=a!==null?a:uu,a(h,u)&&e.ref===t.ref)return Rn(e,t,o)}return t.flags|=1,e=Sn(s,u),e.ref=t.ref,e.return=t,t.child=e}function ev(e,t,a,u,o){if(e!==null){var s=e.memoizedProps;if(uu(s,u)&&e.ref===t.ref)if(et=!1,t.pendingProps=u=s,Wc(e,o))(e.flags&131072)!==0&&(et=!0);else return t.lanes=e.lanes,Rn(e,t,o)}return Yc(e,t,a,u,o)}function tv(e,t,a){var u=t.pendingProps,o=u.children,s=e!==null?e.memoizedState:null;if(u.mode==="hidden"){if((t.flags&128)!==0){if(u=s!==null?s.baseLanes|a:a,e!==null){for(o=t.child=e.child,s=0;o!==null;)s=s|o.lanes|o.childLanes,o=o.sibling;t.childLanes=s&~u}else t.childLanes=0,t.child=null;return nv(e,t,u,a)}if((a&536870912)!==0)t.memoizedState={baseLanes:0,cachePool:null},e!==null&&ki(t,s!==null?s.cachePool:null),s!==null?eh(t,s):Ec(),qh(t);else return t.lanes=t.childLanes=536870912,nv(e,t,s!==null?s.baseLanes|a:a,a)}else s!==null?(ki(t,s.cachePool),eh(t,s),In(),t.memoizedState=null):(e!==null&&ki(t,null),Ec(),In());return it(e,t,o,a),t.child}function nv(e,t,a,u){var o=gc();return o=o===null?null:{parent:Je._currentValue,pool:o},t.memoizedState={baseLanes:a,cachePool:o},e!==null&&ki(t,null),Ec(),qh(t),e!==null&&su(e,t,u,!0),null}function ar(e,t){var a=t.ref;if(a===null)e!==null&&e.ref!==null&&(t.flags|=4194816);else{if(typeof a!="function"&&typeof a!="object")throw Error(r(284));(e===null||e.ref!==a)&&(t.flags|=4194816)}}function Yc(e,t,a,u,o){return Da(t),a=Tc(e,t,a,u,void 0,o),u=zc(),e!==null&&!et?(Rc(e,t,o),Rn(e,t,o)):(ye&&u&&cc(t),t.flags|=1,it(e,t,a,o),t.child)}function av(e,t,a,u,o,s){return Da(t),t.updateQueue=null,a=nh(t,u,a,o),th(e),u=zc(),e!==null&&!et?(Rc(e,t,s),Rn(e,t,s)):(ye&&u&&cc(t),t.flags|=1,it(e,t,a,s),t.child)}function lv(e,t,a,u,o){if(Da(t),t.stateNode===null){var s=sl,h=a.contextType;typeof h=="object"&&h!==null&&(s=st(h)),s=new a(u,s),t.memoizedState=s.state!==null&&s.state!==void 0?s.state:null,s.updater=qc,t.stateNode=s,s._reactInternals=t,s=t.stateNode,s.props=u,s.state=t.memoizedState,s.refs={},yc(t),h=a.contextType,s.context=typeof h=="object"&&h!==null?st(h):sl,s.state=t.memoizedState,h=a.getDerivedStateFromProps,typeof h=="function"&&($c(t,a,h,u),s.state=t.memoizedState),typeof a.getDerivedStateFromProps=="function"||typeof s.getSnapshotBeforeUpdate=="function"||typeof s.UNSAFE_componentWillMount!="function"&&typeof s.componentWillMount!="function"||(h=s.state,typeof s.componentWillMount=="function"&&s.componentWillMount(),typeof s.UNSAFE_componentWillMount=="function"&&s.UNSAFE_componentWillMount(),h!==s.state&&qc.enqueueReplaceState(s,s.state,null),gu(t,u,s,o),pu(),s.state=t.memoizedState),typeof s.componentDidMount=="function"&&(t.flags|=4194308),u=!0}else if(e===null){s=t.stateNode;var g=t.memoizedProps,_=ja(a,g);s.props=_;var A=s.context,N=a.contextType;h=sl,typeof N=="object"&&N!==null&&(h=st(N));var U=a.getDerivedStateFromProps;N=typeof U=="function"||typeof s.getSnapshotBeforeUpdate=="function",g=t.pendingProps!==g,N||typeof s.UNSAFE_componentWillReceiveProps!="function"&&typeof s.componentWillReceiveProps!="function"||(g||A!==h)&&Yh(t,s,u,h),Kn=!1;var T=t.memoizedState;s.state=T,gu(t,u,s,o),pu(),A=t.memoizedState,g||T!==A||Kn?(typeof U=="function"&&($c(t,a,U,u),A=t.memoizedState),(_=Kn||Vh(t,a,_,u,T,A,h))?(N||typeof s.UNSAFE_componentWillMount!="function"&&typeof s.componentWillMount!="function"||(typeof s.componentWillMount=="function"&&s.componentWillMount(),typeof s.UNSAFE_componentWillMount=="function"&&s.UNSAFE_componentWillMount()),typeof s.componentDidMount=="function"&&(t.flags|=4194308)):(typeof s.componentDidMount=="function"&&(t.flags|=4194308),t.memoizedProps=u,t.memoizedState=A),s.props=u,s.state=A,s.context=h,u=_):(typeof s.componentDidMount=="function"&&(t.flags|=4194308),u=!1)}else{s=t.stateNode,bc(e,t),h=t.memoizedProps,N=ja(a,h),s.props=N,U=t.pendingProps,T=s.context,A=a.contextType,_=sl,typeof A=="object"&&A!==null&&(_=st(A)),g=a.getDerivedStateFromProps,(A=typeof g=="function"||typeof s.getSnapshotBeforeUpdate=="function")||typeof s.UNSAFE_componentWillReceiveProps!="function"&&typeof s.componentWillReceiveProps!="function"||(h!==U||T!==_)&&Yh(t,s,u,_),Kn=!1,T=t.memoizedState,s.state=T,gu(t,u,s,o),pu();var R=t.memoizedState;h!==U||T!==R||Kn||e!==null&&e.dependencies!==null&&Li(e.dependencies)?(typeof g=="function"&&($c(t,a,g,u),R=t.memoizedState),(N=Kn||Vh(t,a,N,u,T,R,_)||e!==null&&e.dependencies!==null&&Li(e.dependencies))?(A||typeof s.UNSAFE_componentWillUpdate!="function"&&typeof s.componentWillUpdate!="function"||(typeof s.componentWillUpdate=="function"&&s.componentWillUpdate(u,R,_),typeof s.UNSAFE_componentWillUpdate=="function"&&s.UNSAFE_componentWillUpdate(u,R,_)),typeof s.componentDidUpdate=="function"&&(t.flags|=4),typeof s.getSnapshotBeforeUpdate=="function"&&(t.flags|=1024)):(typeof s.componentDidUpdate!="function"||h===e.memoizedProps&&T===e.memoizedState||(t.flags|=4),typeof s.getSnapshotBeforeUpdate!="function"||h===e.memoizedProps&&T===e.memoizedState||(t.flags|=1024),t.memoizedProps=u,t.memoizedState=R),s.props=u,s.state=R,s.context=_,u=N):(typeof s.componentDidUpdate!="function"||h===e.memoizedProps&&T===e.memoizedState||(t.flags|=4),typeof s.getSnapshotBeforeUpdate!="function"||h===e.memoizedProps&&T===e.memoizedState||(t.flags|=1024),u=!1)}return s=u,ar(e,t),u=(t.flags&128)!==0,s||u?(s=t.stateNode,a=u&&typeof a.getDerivedStateFromError!="function"?null:s.render(),t.flags|=1,e!==null&&u?(t.child=bl(t,e.child,null,o),t.child=bl(t,null,a,o)):it(e,t,a,o),t.memoizedState=s.state,e=t.child):e=Rn(e,t,o),e}function uv(e,t,a,u){return ou(),t.flags|=256,it(e,t,a,u),t.child}var Gc={dehydrated:null,treeContext:null,retryLane:0,hydrationErrors:null};function Xc(e){return{baseLanes:e,cachePool:Xd()}}function Qc(e,t,a){return e=e!==null?e.childLanes&~a:0,t&&(e|=qt),e}function iv(e,t,a){var u=t.pendingProps,o=!1,s=(t.flags&128)!==0,h;if((h=s)||(h=e!==null&&e.memoizedState===null?!1:(Pe.current&2)!==0),h&&(o=!0,t.flags&=-129),h=(t.flags&32)!==0,t.flags&=-33,e===null){if(ye){if(o?Fn(t):In(),ye){var g=He,_;if(_=g){e:{for(_=g,g=un;_.nodeType!==8;){if(!g){g=null;break e}if(_=Ft(_.nextSibling),_===null){g=null;break e}}g=_}g!==null?(t.memoizedState={dehydrated:g,treeContext:Aa!==null?{id:On,overflow:En}:null,retryLane:536870912,hydrationErrors:null},_=At(18,null,null,0),_.stateNode=g,_.return=t,t.child=_,dt=t,He=null,_=!0):_=!1}_||Ra(t)}if(g=t.memoizedState,g!==null&&(g=g.dehydrated,g!==null))return Ds(g)?t.lanes=32:t.lanes=536870912,null;zn(t)}return g=u.children,u=u.fallback,o?(In(),o=t.mode,g=lr({mode:"hidden",children:g},o),u=xa(u,o,a,null),g.return=t,u.return=t,g.sibling=u,t.child=g,o=t.child,o.memoizedState=Xc(a),o.childLanes=Qc(e,h,a),t.memoizedState=Gc,u):(Fn(t),Kc(t,g))}if(_=e.memoizedState,_!==null&&(g=_.dehydrated,g!==null)){if(s)t.flags&256?(Fn(t),t.flags&=-257,t=Jc(e,t,a)):t.memoizedState!==null?(In(),t.child=e.child,t.flags|=128,t=null):(In(),o=u.fallback,g=t.mode,u=lr({mode:"visible",children:u.children},g),o=xa(o,g,a,null),o.flags|=2,u.return=t,o.return=t,u.sibling=o,t.child=u,bl(t,e.child,null,a),u=t.child,u.memoizedState=Xc(a),u.childLanes=Qc(e,h,a),t.memoizedState=Gc,t=o);else if(Fn(t),Ds(g)){if(h=g.nextSibling&&g.nextSibling.dataset,h)var A=h.dgst;h=A,u=Error(r(419)),u.stack="",u.digest=h,cu({value:u,source:null,stack:null}),t=Jc(e,t,a)}else if(et||su(e,t,a,!1),h=(a&e.childLanes)!==0,et||h){if(h=we,h!==null&&(u=a&-a,u=(u&42)!==0?1:Mo(u),u=(u&(h.suspendedLanes|a))!==0?0:u,u!==0&&u!==_.retryLane))throw _.retryLane=u,cl(e,u),Dt(h,e,u),Wh;g.data==="$?"||hs(),t=Jc(e,t,a)}else g.data==="$?"?(t.flags|=192,t.child=e.child,t=null):(e=_.treeContext,He=Ft(g.nextSibling),dt=t,ye=!0,za=null,un=!1,e!==null&&(Ht[kt++]=On,Ht[kt++]=En,Ht[kt++]=Aa,On=e.id,En=e.overflow,Aa=t),t=Kc(t,u.children),t.flags|=4096);return t}return o?(In(),o=u.fallback,g=t.mode,_=e.child,A=_.sibling,u=Sn(_,{mode:"hidden",children:u.children}),u.subtreeFlags=_.subtreeFlags&65011712,A!==null?o=Sn(A,o):(o=xa(o,g,a,null),o.flags|=2),o.return=t,u.return=t,u.sibling=o,t.child=u,u=o,o=t.child,g=e.child.memoizedState,g===null?g=Xc(a):(_=g.cachePool,_!==null?(A=Je._currentValue,_=_.parent!==A?{parent:A,pool:A}:_):_=Xd(),g={baseLanes:g.baseLanes|a,cachePool:_}),o.memoizedState=g,o.childLanes=Qc(e,h,a),t.memoizedState=Gc,u):(Fn(t),a=e.child,e=a.sibling,a=Sn(a,{mode:"visible",children:u.children}),a.return=t,a.sibling=null,e!==null&&(h=t.deletions,h===null?(t.deletions=[e],t.flags|=16):h.push(e)),t.child=a,t.memoizedState=null,a)}function Kc(e,t){return t=lr({mode:"visible",children:t},e.mode),t.return=e,e.child=t}function lr(e,t){return e=At(22,e,null,t),e.lanes=0,e.stateNode={_visibility:1,_pendingMarkers:null,_retryCache:null,_transitions:null},e}function Jc(e,t,a){return bl(t,e.child,null,a),e=Kc(t,t.pendingProps.children),e.flags|=2,t.memoizedState=null,e}function rv(e,t,a){e.lanes|=t;var u=e.alternate;u!==null&&(u.lanes|=t),hc(e.return,t,a)}function Pc(e,t,a,u,o){var s=e.memoizedState;s===null?e.memoizedState={isBackwards:t,rendering:null,renderingStartTime:0,last:u,tail:a,tailMode:o}:(s.isBackwards=t,s.rendering=null,s.renderingStartTime=0,s.last=u,s.tail=a,s.tailMode=o)}function ov(e,t,a){var u=t.pendingProps,o=u.revealOrder,s=u.tail;if(it(e,t,u.children,a),u=Pe.current,(u&2)!==0)u=u&1|2,t.flags|=128;else{if(e!==null&&(e.flags&128)!==0)e:for(e=t.child;e!==null;){if(e.tag===13)e.memoizedState!==null&&rv(e,a,t);else if(e.tag===19)rv(e,a,t);else if(e.child!==null){e.child.return=e,e=e.child;continue}if(e===t)break e;for(;e.sibling===null;){if(e.return===null||e.return===t)break e;e=e.return}e.sibling.return=e.return,e=e.sibling}u&=1}switch(V(Pe,u),o){case"forwards":for(a=t.child,o=null;a!==null;)e=a.alternate,e!==null&&er(e)===null&&(o=a),a=a.sibling;a=o,a===null?(o=t.child,t.child=null):(o=a.sibling,a.sibling=null),Pc(t,!1,o,a,s);break;case"backwards":for(a=null,o=t.child,t.child=null;o!==null;){if(e=o.alternate,e!==null&&er(e)===null){t.child=o;break}e=o.sibling,o.sibling=a,a=o,o=e}Pc(t,!0,a,null,s);break;case"together":Pc(t,!1,null,null,void 0);break;default:t.memoizedState=null}return t.child}function Rn(e,t,a){if(e!==null&&(t.dependencies=e.dependencies),la|=t.lanes,(a&t.childLanes)===0)if(e!==null){if(su(e,t,a,!1),(a&t.childLanes)===0)return null}else return null;if(e!==null&&t.child!==e.child)throw Error(r(153));if(t.child!==null){for(e=t.child,a=Sn(e,e.pendingProps),t.child=a,a.return=t;e.sibling!==null;)e=e.sibling,a=a.sibling=Sn(e,e.pendingProps),a.return=t;a.sibling=null}return t.child}function Wc(e,t){return(e.lanes&t)!==0?!0:(e=e.dependencies,!!(e!==null&&Li(e)))}function Gy(e,t,a){switch(t.tag){case 3:Ne(t,t.stateNode.containerInfo),Qn(t,Je,e.memoizedState.cache),ou();break;case 27:case 5:To(t);break;case 4:Ne(t,t.stateNode.containerInfo);break;case 10:Qn(t,t.type,t.memoizedProps.value);break;case 13:var u=t.memoizedState;if(u!==null)return u.dehydrated!==null?(Fn(t),t.flags|=128,null):(a&t.child.childLanes)!==0?iv(e,t,a):(Fn(t),e=Rn(e,t,a),e!==null?e.sibling:null);Fn(t);break;case 19:var o=(e.flags&128)!==0;if(u=(a&t.childLanes)!==0,u||(su(e,t,a,!1),u=(a&t.childLanes)!==0),o){if(u)return ov(e,t,a);t.flags|=128}if(o=t.memoizedState,o!==null&&(o.rendering=null,o.tail=null,o.lastEffect=null),V(Pe,Pe.current),u)break;return null;case 22:case 23:return t.lanes=0,tv(e,t,a);case 24:Qn(t,Je,e.memoizedState.cache)}return Rn(e,t,a)}function cv(e,t,a){if(e!==null)if(e.memoizedProps!==t.pendingProps)et=!0;else{if(!Wc(e,a)&&(t.flags&128)===0)return et=!1,Gy(e,t,a);et=(e.flags&131072)!==0}else et=!1,ye&&(t.flags&1048576)!==0&&Hd(t,Bi,t.index);switch(t.lanes=0,t.tag){case 16:e:{e=t.pendingProps;var u=t.elementType,o=u._init;if(u=o(u._payload),t.type=u,typeof u=="function")ic(u)?(e=ja(u,e),t.tag=1,t=lv(null,t,u,e,a)):(t.tag=0,t=Yc(null,t,u,e,a));else{if(u!=null){if(o=u.$$typeof,o===I){t.tag=11,t=Fh(null,t,u,e,a);break e}else if(o===Re){t.tag=14,t=Ih(null,t,u,e,a);break e}}throw t=qn(u)||u,Error(r(306,t,""))}}return t;case 0:return Yc(e,t,t.type,t.pendingProps,a);case 1:return u=t.type,o=ja(u,t.pendingProps),lv(e,t,u,o,a);case 3:e:{if(Ne(t,t.stateNode.containerInfo),e===null)throw Error(r(387));u=t.pendingProps;var s=t.memoizedState;o=s.element,bc(e,t),gu(t,u,null,a);var h=t.memoizedState;if(u=h.cache,Qn(t,Je,u),u!==s.cache&&vc(t,[Je],a,!0),pu(),u=h.element,s.isDehydrated)if(s={element:u,isDehydrated:!1,cache:h.cache},t.updateQueue.baseState=s,t.memoizedState=s,t.flags&256){t=uv(e,t,u,a);break e}else if(u!==o){o=Bt(Error(r(424)),t),cu(o),t=uv(e,t,u,a);break e}else{switch(e=t.stateNode.containerInfo,e.nodeType){case 9:e=e.body;break;default:e=e.nodeName==="HTML"?e.ownerDocument.body:e}for(He=Ft(e.firstChild),dt=t,ye=!0,za=null,un=!0,a=$h(t,null,u,a),t.child=a;a;)a.flags=a.flags&-3|4096,a=a.sibling}else{if(ou(),u===o){t=Rn(e,t,a);break e}it(e,t,u,a)}t=t.child}return t;case 26:return ar(e,t),e===null?(a=hm(t.type,null,t.pendingProps,null))?t.memoizedState=a:ye||(a=t.type,e=t.pendingProps,u=_r(re.current).createElement(a),u[ct]=t,u[ht]=e,ot(u,a,e),Ie(u),t.stateNode=u):t.memoizedState=hm(t.type,e.memoizedProps,t.pendingProps,e.memoizedState),null;case 27:return To(t),e===null&&ye&&(u=t.stateNode=sm(t.type,t.pendingProps,re.current),dt=t,un=!0,o=He,oa(t.type)?(Ms=o,He=Ft(u.firstChild)):He=o),it(e,t,t.pendingProps.children,a),ar(e,t),e===null&&(t.flags|=4194304),t.child;case 5:return e===null&&ye&&((o=u=He)&&(u=y0(u,t.type,t.pendingProps,un),u!==null?(t.stateNode=u,dt=t,He=Ft(u.firstChild),un=!1,o=!0):o=!1),o||Ra(t)),To(t),o=t.type,s=t.pendingProps,h=e!==null?e.memoizedProps:null,u=s.children,zs(o,s)?u=null:h!==null&&zs(o,h)&&(t.flags|=32),t.memoizedState!==null&&(o=Tc(e,t,By,null,null,a),Hu._currentValue=o),ar(e,t),it(e,t,u,a),t.child;case 6:return e===null&&ye&&((e=a=He)&&(a=b0(a,t.pendingProps,un),a!==null?(t.stateNode=a,dt=t,He=null,e=!0):e=!1),e||Ra(t)),null;case 13:return iv(e,t,a);case 4:return Ne(t,t.stateNode.containerInfo),u=t.pendingProps,e===null?t.child=bl(t,null,u,a):it(e,t,u,a),t.child;case 11:return Fh(e,t,t.type,t.pendingProps,a);case 7:return it(e,t,t.pendingProps,a),t.child;case 8:return it(e,t,t.pendingProps.children,a),t.child;case 12:return it(e,t,t.pendingProps.children,a),t.child;case 10:return u=t.pendingProps,Qn(t,t.type,u.value),it(e,t,u.children,a),t.child;case 9:return o=t.type._context,u=t.pendingProps.children,Da(t),o=st(o),u=u(o),t.flags|=1,it(e,t,u,a),t.child;case 14:return Ih(e,t,t.type,t.pendingProps,a);case 15:return ev(e,t,t.type,t.pendingProps,a);case 19:return ov(e,t,a);case 31:return u=t.pendingProps,a=t.mode,u={mode:u.mode,children:u.children},e===null?(a=lr(u,a),a.ref=t.ref,t.child=a,a.return=t,t=a):(a=Sn(e.child,u),a.ref=t.ref,t.child=a,a.return=t,t=a),t;case 22:return tv(e,t,a);case 24:return Da(t),u=st(Je),e===null?(o=gc(),o===null&&(o=we,s=mc(),o.pooledCache=s,s.refCount++,s!==null&&(o.pooledCacheLanes|=a),o=s),t.memoizedState={parent:u,cache:o},yc(t),Qn(t,Je,o)):((e.lanes&a)!==0&&(bc(e,t),gu(t,null,null,a),pu()),o=e.memoizedState,s=t.memoizedState,o.parent!==u?(o={parent:u,cache:u},t.memoizedState=o,t.lanes===0&&(t.memoizedState=t.updateQueue.baseState=o),Qn(t,Je,u)):(u=s.cache,Qn(t,Je,u),u!==o.cache&&vc(t,[Je],a,!0))),it(e,t,t.pendingProps.children,a),t.child;case 29:throw t.pendingProps}throw Error(r(156,t.tag))}function wn(e){e.flags|=4}function sv(e,t){if(t.type!=="stylesheet"||(t.state.loading&4)!==0)e.flags&=-16777217;else if(e.flags|=16777216,!_m(t)){if(t=$t.current,t!==null&&((he&4194048)===he?rn!==null:(he&62914560)!==he&&(he&536870912)===0||t!==rn))throw vu=_c,Qd;e.flags|=8192}}function ur(e,t){t!==null&&(e.flags|=4),e.flags&16384&&(t=e.tag!==22?qf():536870912,e.lanes|=t,xl|=t)}function xu(e,t){if(!ye)switch(e.tailMode){case"hidden":t=e.tail;for(var a=null;t!==null;)t.alternate!==null&&(a=t),t=t.sibling;a===null?e.tail=null:a.sibling=null;break;case"collapsed":a=e.tail;for(var u=null;a!==null;)a.alternate!==null&&(u=a),a=a.sibling;u===null?t||e.tail===null?e.tail=null:e.tail.sibling=null:u.sibling=null}}function Ze(e){var t=e.alternate!==null&&e.alternate.child===e.child,a=0,u=0;if(t)for(var o=e.child;o!==null;)a|=o.lanes|o.childLanes,u|=o.subtreeFlags&65011712,u|=o.flags&65011712,o.return=e,o=o.sibling;else for(o=e.child;o!==null;)a|=o.lanes|o.childLanes,u|=o.subtreeFlags,u|=o.flags,o.return=e,o=o.sibling;return e.subtreeFlags|=u,e.childLanes=a,t}function Xy(e,t,a){var u=t.pendingProps;switch(sc(t),t.tag){case 31:case 16:case 15:case 0:case 11:case 7:case 8:case 12:case 9:case 14:return Ze(t),null;case 1:return Ze(t),null;case 3:return a=t.stateNode,u=null,e!==null&&(u=e.memoizedState.cache),t.memoizedState.cache!==u&&(t.flags|=2048),An(Je),Vn(),a.pendingContext&&(a.context=a.pendingContext,a.pendingContext=null),(e===null||e.child===null)&&(ru(t)?wn(t):e===null||e.memoizedState.isDehydrated&&(t.flags&256)===0||(t.flags|=1024,qd())),Ze(t),null;case 26:return a=t.memoizedState,e===null?(wn(t),a!==null?(Ze(t),sv(t,a)):(Ze(t),t.flags&=-16777217)):a?a!==e.memoizedState?(wn(t),Ze(t),sv(t,a)):(Ze(t),t.flags&=-16777217):(e.memoizedProps!==u&&wn(t),Ze(t),t.flags&=-16777217),null;case 27:pi(t),a=re.current;var o=t.type;if(e!==null&&t.stateNode!=null)e.memoizedProps!==u&&wn(t);else{if(!u){if(t.stateNode===null)throw Error(r(166));return Ze(t),null}e=ee.current,ru(t)?kd(t):(e=sm(o,u,a),t.stateNode=e,wn(t))}return Ze(t),null;case 5:if(pi(t),a=t.type,e!==null&&t.stateNode!=null)e.memoizedProps!==u&&wn(t);else{if(!u){if(t.stateNode===null)throw Error(r(166));return Ze(t),null}if(e=ee.current,ru(t))kd(t);else{switch(o=_r(re.current),e){case 1:e=o.createElementNS("http://www.w3.org/2000/svg",a);break;case 2:e=o.createElementNS("http://www.w3.org/1998/Math/MathML",a);break;default:switch(a){case"svg":e=o.createElementNS("http://www.w3.org/2000/svg",a);break;case"math":e=o.createElementNS("http://www.w3.org/1998/Math/MathML",a);break;case"script":e=o.createElement("div"),e.innerHTML=" - + +
From e37b0783899bad1085281b59b1c422479a83358f Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 16:01:14 -0700 Subject: [PATCH 26/31] Add script alias for eval_protocol CLI in pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8274e558..9e6112a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,6 +118,7 @@ adapters = [ [project.scripts] fireworks-reward = "eval_protocol.cli:main" eval-protocol = "eval_protocol.cli:main" +ep = "eval_protocol.cli:main" [tool.setuptools.packages.find] include = ["eval_protocol*", "development*", "vendor*"] From 698b04d06c006c04e431cd02a0e1f99b92612c8d Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 16:19:47 -0700 Subject: [PATCH 27/31] Fix import path for braintrust adapters in eval_protocol module --- eval_protocol/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/eval_protocol/__init__.py b/eval_protocol/__init__.py index 4fea58cb..5939896f 100644 --- a/eval_protocol/__init__.py +++ b/eval_protocol/__init__.py @@ -10,15 +10,16 @@ import warnings -from .adapters.braintrust import reward_fn_to_scorer, scorer_to_reward_fn +from eval_protocol.adapters.braintrust import reward_fn_to_scorer, scorer_to_reward_fn + from .auth import get_fireworks_account_id, get_fireworks_api_key from .common_utils import load_jsonl from .config import RewardKitConfig, get_config, load_config from .mcp_env import ( AnthropicPolicy, - OpenAIPolicy, - LiteLLMPolicy, FireworksPolicy, + LiteLLMPolicy, + OpenAIPolicy, make, rollout, test_mcp, From a7b76d47fe0e9ad62135996478fec620130dfd33 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 16:19:51 -0700 Subject: [PATCH 28/31] Update broadcast_file_update method to restrict broadcasting to .jsonl files only, preventing unnecessary updates for .lock files. --- eval_protocol/utils/logs_server.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eval_protocol/utils/logs_server.py b/eval_protocol/utils/logs_server.py index 545fef3d..98310ba0 100644 --- a/eval_protocol/utils/logs_server.py +++ b/eval_protocol/utils/logs_server.py @@ -93,7 +93,11 @@ def disconnect(self, websocket: WebSocket): def broadcast_file_update(self, update_type: str, file_path: str): """Broadcast file update to all connected clients.""" - if not file_path.startswith(default_logger.datasets_dir): + if not file_path.startswith(default_logger.datasets_dir) and not file_path.endswith(".jsonl"): + """ + .lock files are often created and deleted by the singleton lock + mechanism so we only broadcast .jsonl files + """ return logger.info(f"Broadcasting file update: {update_type} {file_path}") From a0a487ff5278c8c07e74bbd37be594673db68846 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 16:26:24 -0700 Subject: [PATCH 29/31] Fix broadcast_file_update logic to ensure only .jsonl files are broadcasted, preventing unnecessary updates for .lock files. --- eval_protocol/utils/logs_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eval_protocol/utils/logs_server.py b/eval_protocol/utils/logs_server.py index 98310ba0..ce48205a 100644 --- a/eval_protocol/utils/logs_server.py +++ b/eval_protocol/utils/logs_server.py @@ -93,7 +93,7 @@ def disconnect(self, websocket: WebSocket): def broadcast_file_update(self, update_type: str, file_path: str): """Broadcast file update to all connected clients.""" - if not file_path.startswith(default_logger.datasets_dir) and not file_path.endswith(".jsonl"): + if not file_path.startswith(default_logger.datasets_dir) or not file_path.endswith(".jsonl"): """ .lock files are often created and deleted by the singleton lock mechanism so we only broadcast .jsonl files From f84360fe091f9df7c0a8159214de3f1b6dfe9f77 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 16:44:52 -0700 Subject: [PATCH 30/31] remove a bunch of stuff --- .../local_fs_dataset_logger_adapter.py | 101 +-- eval_protocol/pytest/eval_watcher.py | 329 ---------- eval_protocol/pytest/evaluation_test.py | 4 - eval_protocol/singleton_lock.py | 234 ------- tests/test_eval_watcher.py | 488 -------------- tests/utils/test_singleton_lock.py | 259 -------- .../test_singleton_lock_multiprocessing.py | 606 ------------------ 7 files changed, 24 insertions(+), 1997 deletions(-) delete mode 100644 eval_protocol/pytest/eval_watcher.py delete mode 100644 eval_protocol/singleton_lock.py delete mode 100644 tests/test_eval_watcher.py delete mode 100644 tests/utils/test_singleton_lock.py delete mode 100644 tests/utils/test_singleton_lock_multiprocessing.py diff --git a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py index a8a976fa..a482a47a 100644 --- a/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +++ b/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py @@ -8,7 +8,6 @@ from eval_protocol.common_utils import load_jsonl from eval_protocol.dataset_logger.dataset_logger import DatasetLogger from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_datasets_dir -from eval_protocol.singleton_lock import acquire_singleton_lock, release_singleton_lock if TYPE_CHECKING: from eval_protocol.models import EvaluationRow @@ -40,44 +39,6 @@ def current_jsonl_path(self) -> str: """ return os.path.join(self.datasets_dir, f"{self.current_date}.jsonl") - def _acquire_file_lock(self, file_path: str, timeout: float = 30.0) -> bool: - """ - Acquire a lock for a specific file using the singleton lock mechanism. - - Args: - file_path: Path to the file to lock - timeout: Maximum time to wait for lock acquisition in seconds - - Returns: - True if lock was acquired, False if timeout occurred - """ - # Create a lock name based on the file path - lock_name = f"file_lock_{os.path.basename(file_path)}" - base_dir = Path(os.path.dirname(file_path)) - - start_time = time.time() - while time.time() - start_time < timeout: - result = acquire_singleton_lock(base_dir, lock_name) - if result is None: - # Successfully acquired lock - return True - else: - # Lock is held by another process, wait and retry - time.sleep(0.1) - - return False - - def _release_file_lock(self, file_path: str) -> None: - """ - Release the lock for a specific file. - - Args: - file_path: Path to the file to unlock - """ - lock_name = f"file_lock_{os.path.basename(file_path)}" - base_dir = Path(os.path.dirname(file_path)) - release_singleton_lock(base_dir, lock_name) - def log(self, row: "EvaluationRow") -> None: """Log a row, updating existing row with same ID or appending new row.""" row_id = row.input_metadata.row_id @@ -88,35 +49,25 @@ def log(self, row: "EvaluationRow") -> None: if filename.endswith(".jsonl"): file_path = os.path.join(self.datasets_dir, filename) if os.path.exists(file_path): - if self._acquire_file_lock(file_path): + with open(file_path, "r") as f: + lines = f.readlines() + + # Find the line with matching ID + for i, line in enumerate(lines): try: - with open(file_path, "r") as f: - lines = f.readlines() - - # Find the line with matching ID - for i, line in enumerate(lines): - try: - line_data = json.loads(line.strip()) - if line_data["input_metadata"]["row_id"] == row_id: - # Update existing row - lines[i] = row.model_dump_json(exclude_none=True) + os.linesep - with open(file_path, "w") as f: - f.writelines(lines) - return - except json.JSONDecodeError: - continue - finally: - self._release_file_lock(file_path) + line_data = json.loads(line.strip()) + if line_data["input_metadata"]["row_id"] == row_id: + # Update existing row + lines[i] = row.model_dump_json(exclude_none=True) + os.linesep + with open(file_path, "w") as f: + f.writelines(lines) + return + except json.JSONDecodeError: + continue # If no existing row found, append new row to current file - if self._acquire_file_lock(self.current_jsonl_path): - try: - with open(self.current_jsonl_path, "a") as f: - f.write(row.model_dump_json(exclude_none=True) + os.linesep) - finally: - self._release_file_lock(self.current_jsonl_path) - else: - raise RuntimeError(f"Failed to acquire lock for log file {self.current_jsonl_path}") + with open(self.current_jsonl_path, "a") as f: + f.write(row.model_dump_json(exclude_none=True) + os.linesep) def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]: """Read rows from all JSONL files in the datasets directory. Also @@ -131,18 +82,14 @@ def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]: for filename in os.listdir(self.datasets_dir): if filename.endswith(".jsonl"): file_path = os.path.join(self.datasets_dir, filename) - if self._acquire_file_lock(file_path): - try: - data = load_jsonl(file_path) - for r in data: - row = EvaluationRow(**r) - if row.input_metadata.row_id not in existing_row_ids: - existing_row_ids.add(row.input_metadata.row_id) - else: - raise ValueError(f"Duplicate Row ID {row.input_metadata.row_id} already exists") - all_rows.append(row) - finally: - self._release_file_lock(file_path) + data = load_jsonl(file_path) + for r in data: + row = EvaluationRow(**r) + if row.input_metadata.row_id not in existing_row_ids: + existing_row_ids.add(row.input_metadata.row_id) + else: + raise ValueError(f"Duplicate Row ID {row.input_metadata.row_id} already exists") + all_rows.append(row) if row_id: # Filter by row_id if specified diff --git a/eval_protocol/pytest/eval_watcher.py b/eval_protocol/pytest/eval_watcher.py deleted file mode 100644 index 464e807b..00000000 --- a/eval_protocol/pytest/eval_watcher.py +++ /dev/null @@ -1,329 +0,0 @@ -#!/usr/bin/env python3 -""" -Evaluation Watcher Process - -This process monitors all evaluation rows and updates any evaluations that are still -"running" but whose associated process has terminated. - -Usage: - python -m eval_protocol.pytest.eval_watcher [--check-interval ] -""" - -import argparse -import fcntl -import os -import signal -import subprocess -import sys -import time -from pathlib import Path -from typing import Any, List, Optional - -from eval_protocol.dataset_logger import default_logger -from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_dir -from eval_protocol.logging_utils import get_logger -from eval_protocol.models import EvaluationRow -from eval_protocol.singleton_lock import ( - acquire_singleton_lock, - get_lock_file_paths, - get_lock_holder_pid, - is_lock_held, - is_process_running, - release_singleton_lock, -) - -# Initialize logger -logger = get_logger("eval_watcher") - -# Lock configuration -LOCK_NAME = "eval_watcher" - - -# Signal handler to automatically reap zombie processes -def _reap_zombies(signum, frame): - """Reap zombie child processes to prevent them from accumulating.""" - try: - while True: - # Wait for any child process, but don't block - pid, status = os.waitpid(-1, os.WNOHANG) - if pid == 0: # No more children to reap - break - except OSError: - # No child processes - pass - - -# Set up signal handler for SIGCHLD if available -if hasattr(signal, "SIGCHLD"): - signal.signal(signal.SIGCHLD, _reap_zombies) - - -def get_eval_protocol_dir() -> Path: - """Get the evaluation protocol directory for lock files.""" - return Path(find_eval_protocol_dir()) - - -def find_running_evaluations() -> List[EvaluationRow]: - """Find all evaluations currently in 'running' status .""" - running_evaluations = [] - all_rows = default_logger.read() - - for row in all_rows: - if row.eval_metadata and row.eval_metadata.status == "running": - running_evaluations.append(row) - - return running_evaluations - - -def update_evaluation_to_stopped(row: EvaluationRow, reason: str) -> None: - """Update an evaluation row to 'stopped' status.""" - try: - if row.eval_metadata: - row.eval_metadata.status = "stopped" - row.eval_metadata.passed = False - - if row.evaluation_result is not None: - row.evaluation_result.error = reason - else: - from eval_protocol.models import EvaluateResult - - row.evaluation_result = EvaluateResult( - score=0.0, is_score_valid=False, reason=f"Evaluation stopped: {reason}", error=reason - ) - - default_logger.log(row) - logger.info( - f" šŸ“ Updated evaluation '{row.eval_metadata.name if row.eval_metadata else 'Unknown'}' (Row ID: {row.input_metadata.row_id}) (PID: {row.pid}) to stopped status" - ) - - except Exception as e: - logger.error(f" āš ļø Error updating evaluation row: {e}") - - -def check_and_update_terminated_evaluations() -> int: - """Check for evaluations with terminated processes and update them to stopped status.""" - running_evaluations = find_running_evaluations() - - if not running_evaluations: - return 0 - - logger.info(f"šŸ” Checking {len(running_evaluations)} running evaluations for terminated processes...") - for row in running_evaluations: - logger.info(f" Row ID: {row.input_metadata.row_id} PID: {row.pid}") - - terminated_count = 0 - for row in running_evaluations: - if row.pid: - if not is_process_running(row.pid): - update_evaluation_to_stopped(row, f"Process {row.pid} terminated") - terminated_count += 1 - else: - update_evaluation_to_stopped(row, f"Process {row.pid} terminated") - terminated_count += 1 - - if terminated_count > 0: - logger.info(f" āœ… Updated {terminated_count} evaluations to stopped status") - - return terminated_count - - -def signal_handler(signum, frame): - """Handle termination signals gracefully.""" - signal_name = signal.Signals(signum).name - logger.info(f"\nšŸ›‘ Evaluation watcher received signal {signum} (Signal: {signal_name})") - if signum == signal.SIGTERM: - logger.info("SIGTERM received: ignoring to avoid exit during VSCode pytest debugging.") - return - logger.info("Shutting down gracefully.") - sys.exit(0) - - -def run_watcher_loop(check_interval: float) -> None: - """Main monitoring loop.""" - logger.info(f"šŸ” Starting evaluation watcher (PID: {os.getpid()})") - logger.info(f" Check interval: {check_interval} seconds") - logger.info(" Monitoring all evaluation rows for terminated processes") - - # Set up signal handlers for graceful shutdown - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - - consecutive_empty_checks = 0 - max_empty_checks = 30 - - try: - while True: - running_evaluations = find_running_evaluations() - - if running_evaluations: - consecutive_empty_checks = 0 - check_and_update_terminated_evaluations() - else: - consecutive_empty_checks += 1 - if consecutive_empty_checks >= max_empty_checks: - logger.info( - f"šŸ” No running evaluations found for {consecutive_empty_checks} consecutive checks. Exiting watcher." - ) - break - else: - logger.info( - f"šŸ” No running evaluations found ({consecutive_empty_checks}/{max_empty_checks} consecutive checks)" - ) - - time.sleep(check_interval) - - except KeyboardInterrupt: - logger.info("\nšŸ›‘ Evaluation watcher interrupted by user") - except Exception as e: - logger.error(f"\nāŒ Evaluation watcher error: {e}") - finally: - logger.info("šŸ” Evaluation watcher stopped") - - -def _start_watcher_process(check_interval: float) -> Optional[int]: - """Start the watcher in a completely independent background process using subprocess. - - We use subprocess.Popen with start_new_session=True instead of multiprocessing.Process - because VSCode's test debugger kill button sends SIGTERM/SIGKILL to the entire process - tree, including child processes. By using subprocess with a new session, we create - a truly independent process that survives when the parent pytest process is killed. - """ - - # Use subprocess to create a completely independent process - # This ensures the process survives even if the parent pytest process is killed - try: - # Get the current script path - current_script = __file__ - - # Create the subprocess with complete independence - process = subprocess.Popen( - [sys.executable, current_script, "--daemon", "--check-interval", str(check_interval)], - # These flags make the process completely independent - start_new_session=True, # Creates a new session, detaching from parent - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - stdin=subprocess.DEVNULL, - close_fds=True, - ) - - return process.pid - - except Exception as e: - logger.error(f"āŒ Failed to start watcher process: {e}") - return None - - -def ensure_singleton_watcher(check_interval: float = 2.0) -> Optional[int]: - """ - Ensure the singleton EvaluationWatcher instance exists and is running. - This function is OS-level global - only one watcher will run across all processes. - The watcher runs as a completely independent process that survives if the main process dies. - - Args: - check_interval: How often to check for terminated processes (seconds) - - Returns: - PID of the watcher process if it was started successfully, None if it failed to start - """ - # Check if a watcher is already running before attempting to start a new one - if is_watcher_running(): - logger.info("šŸ” Evaluation watcher is already running") - return None - - # Start the watcher in a completely independent background process - pid = _start_watcher_process(check_interval) - logger.info(f"šŸ” Started evaluation watcher in independent background process (PID: {pid})") - return pid - - -def is_watcher_running() -> bool: - """Check if the evaluation watcher is currently running.""" - return is_lock_held(get_eval_protocol_dir(), LOCK_NAME) - - -def get_watcher_pid(timeout: float = 10.0) -> Optional[int]: - """Get the PID of the currently running evaluation watcher. Tries for 10 seconds.""" - interval = 0.1 - started = time.time() - while time.time() - started < timeout: - pid = get_lock_holder_pid(get_eval_protocol_dir(), LOCK_NAME) - if pid is not None: - return pid - time.sleep(interval) - return None - - -def stop_watcher() -> bool: - """Stop the currently running evaluation watcher.""" - pid = get_watcher_pid() - if pid is None: - logger.info("šŸ” No evaluation watcher is currently running") - return False - - try: - os.kill(pid, signal.SIGKILL) - logger.info(f"šŸ” Sent SIGTERM to evaluation watcher process {pid}") - return True - except OSError as e: - logger.error(f"āŒ Failed to stop evaluation watcher process {pid}: {e}") - return False - - -def main(): - """Main entry point for the evaluation watcher.""" - parser = argparse.ArgumentParser( - description="Monitor all evaluation rows and update those with terminated processes to stopped status" - ) - parser.add_argument( - "--check-interval", - type=float, - default=1.0, - help="How often to check for terminated processes (seconds, default: 1.0)", - ) - parser.add_argument( - "--daemon", - action="store_true", - help="Run in daemon mode (internal use only)", - ) - parser.add_argument( - "--stop", - action="store_true", - help="Stop the currently running evaluation watcher", - ) - - args = parser.parse_args() - - # Handle stop command - if args.stop: - stop_watcher() - return - - # If running in daemon mode, try to acquire the lock and run the watcher loop - if args.daemon: - logger.info(f"šŸ” Daemon mode: attempting to acquire lock (PID: {os.getpid()})") - # Try to acquire the lock in this process - current_holder_pid = acquire_singleton_lock(get_eval_protocol_dir(), LOCK_NAME) - - if current_holder_pid is not None: - # Another process is already running - logger.info(f"šŸ” Evaluation watcher already running in process {current_holder_pid}") - return - - logger.info(f"šŸ” Daemon mode: acquired lock successfully (PID: {os.getpid()})") - # We acquired the lock, run the watcher loop - try: - run_watcher_loop(args.check_interval) - except SystemExit: - # Graceful shutdown - pass - finally: - # Always release the lock when we exit - logger.info(f"šŸ” Daemon mode: releasing lock (PID: {os.getpid()})") - release_singleton_lock(get_eval_protocol_dir(), LOCK_NAME) - else: - # Run the watcher directly (not as a background process) - run_watcher_loop(args.check_interval) - - -if __name__ == "__main__": - main() diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index f781dfc4..925e8f55 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -8,7 +8,6 @@ from eval_protocol.models import CompletionParams, EvalMetadata, EvaluationRow, InputMetadata from eval_protocol.pytest.default_dataset_adapter import default_dataset_adapter from eval_protocol.pytest.default_no_op_rollout_process import default_no_op_rollout_processor -from eval_protocol.pytest.eval_watcher import ensure_singleton_watcher from eval_protocol.pytest.types import ( Dataset, DatasetPathParam, @@ -197,9 +196,6 @@ def wrapper_body(**kwargs): eval_metadata = None all_results: List[EvaluationRow] = [] - # Ensure the evaluation watcher is running (OS-level singleton) - ensure_singleton_watcher() - try: # Handle dataset loading data: List[EvaluationRow] = [] diff --git a/eval_protocol/singleton_lock.py b/eval_protocol/singleton_lock.py deleted file mode 100644 index 3715047d..00000000 --- a/eval_protocol/singleton_lock.py +++ /dev/null @@ -1,234 +0,0 @@ -""" -Singleton Lock Management - -This module provides file-based singleton lock functionality for ensuring only one -instance of a process can run at a time across the system. - -The lock mechanism uses two files: -- A PID file that contains the process ID of the lock holder -- A lock file that serves as a marker for the lock - -This approach provides atomic lock acquisition and proper cleanup of stale locks -from terminated processes. -""" - -import os -import time -from pathlib import Path -from typing import Optional, Tuple - - -def get_lock_file_paths(base_dir: Path, lock_name: str) -> Tuple[Path, Path]: - """ - Get the lock file paths for a given lock name. - - Args: - base_dir: Base directory where lock files should be stored - lock_name: Name identifier for the lock (e.g., "watcher", "server") - - Returns: - Tuple of (lock_file_path, pid_file_path) - """ - lock_file_path = base_dir / f"{lock_name}.lock" - pid_file_path = base_dir / f"{lock_name}.pid" - return lock_file_path, pid_file_path - - -def acquire_singleton_lock(base_dir: Path, lock_name: str) -> Optional[int]: - """ - Try to acquire the singleton lock. Returns the PID of the current holder if failed. - - Args: - base_dir: Base directory where lock files should be stored - lock_name: Name identifier for the lock - - Returns: - None if lock acquired successfully, otherwise the PID of the current holder - """ - lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) - - # First, check if PID file exists and contains a running process - if pid_file_path.exists(): - try: - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - pid = int(content) - # Check if the process is still running - try: - os.kill(pid, 0) - # Process is running, we can't acquire the lock - return pid - except OSError: - # Process is not running, clean up stale files - pass - except (IOError, OSError): - pass - - # Try to create the PID file atomically - temp_pid_file = None - try: - # Use atomic file creation - temp_pid_file = pid_file_path.with_suffix(".tmp") - with open(temp_pid_file, "w") as temp_file: - temp_file.write(str(os.getpid())) - temp_file.flush() - os.fsync(temp_file.fileno()) - - # Atomically move the temp file to the final location - temp_pid_file.rename(pid_file_path) - - # Create the lock file to indicate we have the lock - with open(lock_file_path, "w") as lock_file: - lock_file.write(str(os.getpid())) - lock_file.flush() - os.fsync(lock_file.fileno()) - - return None # Successfully acquired lock - - except (IOError, OSError) as e: - # Failed to acquire lock - try: - if temp_pid_file and temp_pid_file.exists(): - temp_pid_file.unlink() - except (IOError, OSError): - pass - - # Check if someone else got the lock - if pid_file_path.exists(): - try: - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - return int(content) - except (IOError, OSError): - pass - - return 999999 # Dummy PID to indicate lock is held - - -def release_singleton_lock(base_dir: Path, lock_name: str) -> None: - """ - Release the singleton lock. - - Args: - base_dir: Base directory where lock files are stored - lock_name: Name identifier for the lock - """ - lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) - try: - if pid_file_path.exists(): - pid_file_path.unlink() - if lock_file_path.exists(): - lock_file_path.unlink() - except (IOError, OSError): - pass - - -def is_process_running(pid: int, timeout: float = 10.0) -> bool: - """ - Check if a process is still running. - - Args: - pid: Process ID to check - - Returns: - True if the process is running, False otherwise - """ - start = time.time() - - def _is_process_running(pid: int) -> bool: - try: - os.kill(pid, 0) - return True - except OSError: - return False - - while time.time() - start < timeout: - if not _is_process_running(pid): - return False - return True - - -def is_lock_held(base_dir: Path, lock_name: str) -> bool: - """ - Check if a lock is currently held by a running process. - - Args: - base_dir: Base directory where lock files are stored - lock_name: Name identifier for the lock - - Returns: - True if the lock is held by a running process, False otherwise - """ - _, pid_file_path = get_lock_file_paths(base_dir, lock_name) - - try: - if pid_file_path.exists(): - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - pid = int(content) - if is_process_running(pid): - return True - except (IOError, OSError): - pass - - return False - - -def get_lock_holder_pid(base_dir: Path, lock_name: str) -> Optional[int]: - """ - Get the PID of the process currently holding the lock. - - Args: - base_dir: Base directory where lock files are stored - lock_name: Name identifier for the lock - - Returns: - PID of the lock holder if the lock is held by a running process, None otherwise - """ - _, pid_file_path = get_lock_file_paths(base_dir, lock_name) - try: - if pid_file_path.exists(): - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - pid = int(content) - if is_process_running(pid): - return pid - except (IOError, OSError): - pass - return None - - -def cleanup_stale_lock(base_dir: Path, lock_name: str) -> bool: - """ - Clean up a stale lock (lock files exist but process is not running). - - Args: - base_dir: Base directory where lock files are stored - lock_name: Name identifier for the lock - - Returns: - True if stale lock was cleaned up, False if no cleanup was needed - """ - lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) - - # Check if PID file exists but process is not running - if pid_file_path.exists(): - try: - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - pid = int(content) - if not is_process_running(pid): - # Process is not running, clean up stale files - release_singleton_lock(base_dir, lock_name) - return True - except (IOError, OSError): - # If we can't read the PID file, clean it up - release_singleton_lock(base_dir, lock_name) - return True - - return False diff --git a/tests/test_eval_watcher.py b/tests/test_eval_watcher.py deleted file mode 100644 index ec45424e..00000000 --- a/tests/test_eval_watcher.py +++ /dev/null @@ -1,488 +0,0 @@ -#!/usr/bin/env python3 -""" -Tests for the evaluation watcher functionality. - -This module tests: -1. Singleton behavior - ensuring only one watcher can run at a time -2. Process termination detection - ensuring evaluations are updated to stopped when processes die -""" - -import os -import signal -import subprocess -import sys -import tempfile -import time -from pathlib import Path -from typing import Optional -from unittest.mock import patch - -import pytest - -from eval_protocol.dataset_logger import default_logger -from eval_protocol.logging_utils import get_logger -from eval_protocol.models import EvalMetadata, EvaluationRow, Message -from eval_protocol.pytest.eval_watcher import ( - ensure_singleton_watcher, - get_watcher_pid, - is_watcher_running, - stop_watcher, -) -from eval_protocol.singleton_lock import ( - acquire_singleton_lock, - is_process_running, - release_singleton_lock, -) - -# Initialize logger -logger = get_logger("test_eval_watcher") - - -class TestEvalWatcherSingleton: - """Test that the evaluation watcher behaves as a singleton.""" - - def test_singleton_behavior(self): - """Test that only one watcher can run at a time.""" - # stop any existing watcher - stop_watcher() - - # Check if watcher is already running (likely due to evaluation_test.py import) - initial_running = is_watcher_running() - - assert not initial_running, "No watcher should be running" - - # Try to start a new watcher - result = ensure_singleton_watcher(check_interval=1.0) - - assert isinstance(result, int), "Should start watcher when none is running" - assert is_watcher_running(), "Watcher should be running" - current_pid = get_watcher_pid() - assert current_pid is not None, "Should get PID of running watcher" - - def test_singleton_lock_cleanup(self): - """Test that singleton lock is properly cleaned up when watcher stops.""" - - ensure_singleton_watcher(check_interval=1.0) - - assert is_watcher_running(), "Watcher should be running" - - # Get current PID - original_pid = get_watcher_pid() - assert original_pid is not None - - # Stop the watcher using SIGKILL (since SIGTERM is ignored) - try: - os.kill(original_pid, signal.SIGABRT) - logger.info(f"šŸ” Sent SIGKILL to evaluation watcher process {original_pid}") - except OSError as e: - logger.error(f"āŒ Failed to stop evaluation watcher process {original_pid}: {e}") - pytest.skip("Could not kill watcher process") - - # Wait longer for cleanup - the watcher process needs time to exit - max_wait = 10.0 - wait_interval = 0.5 - waited = 0.0 - - while waited < max_wait: - if not is_watcher_running(): - break - time.sleep(wait_interval) - waited += wait_interval - - # Verify lock is released - assert not is_watcher_running(), "Watcher should no longer be running" - pid = get_watcher_pid() - assert pid is None, "Should not have a PID after stopping" - - def test_multiple_start_stop_cycles(self): - """Test multiple start/stop cycles work correctly.""" - for i in range(2): # Reduced cycles to avoid interfering with other tests - ensure_singleton_watcher(check_interval=1.0) - # Get current PID - current_pid = get_watcher_pid() - assert current_pid is not None - - # Stop watcher using SIGKILL (since SIGTERM is ignored) - try: - os.kill(current_pid, signal.SIGKILL) - logger.info(f"šŸ” Sent SIGKILL to evaluation watcher process {current_pid}") - except OSError as e: - logger.error(f"āŒ Failed to stop evaluation watcher process {current_pid}: {e}") - pytest.skip("Could not kill watcher process") - - # Wait longer for cleanup - SIGKILL should be immediate but give some time - max_wait = 15.0 - wait_interval = 0.5 - waited = 0.0 - - while waited < max_wait: - if not is_watcher_running(): - break - time.sleep(wait_interval) - waited += wait_interval - - assert not is_watcher_running(), f"Watcher should not be running on cycle {i}" - - def test_watcher_pid_consistency(self): - """Test that watcher PID is consistent and valid.""" - # Ensure watcher is running - ensure_singleton_watcher(check_interval=1.0) - - # Get PID multiple times - pid1 = get_watcher_pid() - pid2 = get_watcher_pid() - - assert pid1 is not None - assert pid2 is not None - assert pid1 == pid2, "PID should be consistent" - assert is_process_running(pid1), "PID should correspond to a running process" - - -class TestEvalWatcherProcessTermination: - """Test that the evaluation watcher detects terminated processes and updates evaluations.""" - - def setup_method(self): - """Set up test environment.""" - # Create a temporary directory for test data - self.temp_dir = tempfile.mkdtemp() - self.original_datasets_dir = os.environ.get("EVAL_PROTOCOL_DATASETS_DIR") - os.environ["EVAL_PROTOCOL_DATASETS_DIR"] = self.temp_dir - - def teardown_method(self): - """Clean up after each test.""" - # Restore original environment - if self.original_datasets_dir: - os.environ["EVAL_PROTOCOL_DATASETS_DIR"] = self.original_datasets_dir - else: - os.environ.pop("EVAL_PROTOCOL_DATASETS_DIR", None) - - # Clean up temporary directory - import shutil - - shutil.rmtree(self.temp_dir, ignore_errors=True) - - def create_running_evaluation_row(self, pid: int) -> EvaluationRow: - """Create an evaluation row with 'running' status and specified PID.""" - from eval_protocol.models import InputMetadata - - row = EvaluationRow( - messages=[Message(role="user", content="Test message")], - input_metadata=InputMetadata(row_id=f"test_row_{pid}"), - eval_metadata=EvalMetadata( - name="test_evaluation", status="running", num_runs=1, aggregation_method="mean" - ), - pid=pid, - ) - - # Log the row - default_logger.log(row) - return row - - def test_detects_terminated_process(self): - """Test that watcher detects when a process terminates and updates evaluation.""" - # Ensure watcher is running - ensure_singleton_watcher(check_interval=0.5) - - # Give the watcher time to fully start - time.sleep(1.0) - - # Create a short-lived process and get its PID - process = subprocess.Popen( - [sys.executable, "-c", "import time; time.sleep(0.1)"], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - pid = process.pid - - # Create evaluation row with running status for this PID - row = self.create_running_evaluation_row(pid) - - # Wait for process to terminate - process.wait() - - # Wait for watcher to detect the terminated process - max_wait = 15.0 # Increased wait time - wait_interval = 0.5 - waited = 0.0 - - while waited < max_wait: - # Read the evaluation row - rows = default_logger.read() - test_row = None - for r in rows: - if r.input_metadata.row_id == row.input_metadata.row_id: - test_row = r - break - - if test_row and test_row.eval_metadata and test_row.eval_metadata.status == "stopped": - break - - time.sleep(wait_interval) - waited += wait_interval - - # Verify the evaluation was updated - assert test_row is not None, "Should find the test row" - assert test_row.eval_metadata is not None, "Should have eval metadata" - assert test_row.eval_metadata.status == "stopped", "Status should be updated to stopped" - assert test_row.eval_metadata.passed is False, "Should be marked as not passed" - - # Verify error information is set - assert test_row.evaluation_result is not None, "Should have evaluation result" - assert test_row.evaluation_result.error is not None, "Should have error message" - assert "terminated" in test_row.evaluation_result.error.lower(), "Error should mention termination" - - def test_detects_multiple_terminated_processes(self): - """Test that watcher detects multiple terminated processes.""" - # Ensure watcher is running - ensure_singleton_watcher(check_interval=0.5) - - # Create multiple short-lived processes - processes = [] - pids = [] - rows = [] - - for i in range(3): - process = subprocess.Popen( - [sys.executable, "-c", f"import time; time.sleep({0.1 + i * 0.1})"], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - pid = process.pid - pids.append(pid) - processes.append(process) - - # Create evaluation row for this PID - row = self.create_running_evaluation_row(pid) - rows.append(row) - - # Wait for all processes to terminate - for process in processes: - process.wait() - - # Wait for watcher to detect all terminated processes - max_wait = 15.0 # 15 seconds max wait - wait_interval = 0.5 - waited = 0.0 - - while waited < max_wait: - # Read all evaluation rows - all_rows = default_logger.read() - stopped_count = 0 - - for row in rows: - for r in all_rows: - if r.input_metadata.row_id == row.input_metadata.row_id: - if r.eval_metadata and r.eval_metadata.status == "stopped": - stopped_count += 1 - break - - if stopped_count == len(rows): - break - - time.sleep(wait_interval) - waited += wait_interval - - # Verify all evaluations were updated - assert stopped_count == len(rows), f"Expected {len(rows)} stopped evaluations, got {stopped_count}" - - # Verify each row was properly updated - all_rows = default_logger.read() - for original_row in rows: - for r in all_rows: - if r.input_metadata.row_id == original_row.input_metadata.row_id: - assert r.eval_metadata is not None - assert r.eval_metadata.status == "stopped" - assert r.eval_metadata.passed is False - assert r.evaluation_result is not None - assert r.evaluation_result.error is not None - break - - def test_ignores_running_processes(self): - """Test that watcher doesn't update evaluations for running processes.""" - # Ensure watcher is running - if not is_watcher_running(): - ensure_singleton_watcher(check_interval=0.5) - - # Create a long-running process - process = subprocess.Popen( - [sys.executable, "-c", "import time; time.sleep(30)"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL - ) - pid = process.pid - - # Create evaluation row with running status for this PID - row = self.create_running_evaluation_row(pid) - - # Wait a bit for watcher to run - time.sleep(2.0) - - # Verify the evaluation is still running - rows = default_logger.read() - test_row = None - for r in rows: - if r.input_metadata.row_id == row.input_metadata.row_id: - test_row = r - break - - assert test_row is not None, "Should find the test row" - assert test_row.eval_metadata is not None, "Should have eval metadata" - assert test_row.eval_metadata.status == "running", "Status should still be running" - - # Clean up - process.terminate() - process.wait() - - def test_handles_none_pid(self): - """Test that watcher handles evaluation rows with None PID.""" - # Ensure watcher is running - ensure_singleton_watcher(check_interval=0.5) - - # Create evaluation row with None PID - row = self.create_running_evaluation_row(None) - - # Wait for watcher to process the row - max_wait = 5.0 - wait_interval = 0.5 - waited = 0.0 - - while waited < max_wait: - rows = default_logger.read() - test_row = None - for r in rows: - if r.input_metadata.row_id == row.input_metadata.row_id: - test_row = r - break - - if test_row and test_row.eval_metadata and test_row.eval_metadata.status == "stopped": - break - - time.sleep(wait_interval) - waited += wait_interval - - # Verify the evaluation was updated - assert test_row is not None, "Should find the test row" - assert test_row.eval_metadata is not None, "Should have eval metadata" - assert test_row.eval_metadata.status == "stopped", "Status should be updated to stopped" - - -class TestEvalWatcherIntegration: - """Integration tests for the evaluation watcher.""" - - def setup_method(self): - """Set up test environment.""" - # Create a temporary directory for test data - self.temp_dir = tempfile.mkdtemp() - self.original_datasets_dir = os.environ.get("EVAL_PROTOCOL_DATASETS_DIR") - os.environ["EVAL_PROTOCOL_DATASETS_DIR"] = self.temp_dir - - def teardown_method(self): - """Clean up after each test.""" - # Restore original environment - if self.original_datasets_dir: - os.environ["EVAL_PROTOCOL_DATASETS_DIR"] = self.original_datasets_dir - else: - os.environ.pop("EVAL_PROTOCOL_DATASETS_DIR", None) - - # Clean up temporary directory - import shutil - - shutil.rmtree(self.temp_dir, ignore_errors=True) - - def test_watcher_survives_parent_process_termination(self): - """Test that watcher survives when parent process is killed (simulated).""" - # Ensure watcher is running - ensure_singleton_watcher(check_interval=1.0) - - original_pid = get_watcher_pid() - assert original_pid is not None - - # Simulate parent process termination by directly killing the watcher process - # and then checking if it restarts (in a real scenario, the watcher would be - # in a separate session and survive parent termination) - os.kill(original_pid, signal.SIGKILL) - - # Wait for the process to be killed - max_wait = 10.0 - wait_interval = 0.5 - waited = 0.0 - - while waited < max_wait: - if not is_watcher_running(): - break - time.sleep(wait_interval) - waited += wait_interval - - # The watcher should no longer be running - assert not is_watcher_running() - - # We can start a new watcher - result = ensure_singleton_watcher(check_interval=1.0) - assert isinstance(result, int) - new_pid = get_watcher_pid() - assert new_pid is not None - assert new_pid != original_pid - - def test_watcher_handles_signal_gracefully(self): - """Test that watcher handles termination signals gracefully.""" - # Ensure watcher is running - if not is_watcher_running(): - ensure_singleton_watcher(check_interval=1.0) - - pid = get_watcher_pid() - assert pid is not None - - # Send SIGKILL to the watcher (SIGTERM is ignored) - os.kill(pid, signal.SIGKILL) - - # Wait for the process to be killed - max_wait = 10.0 - wait_interval = 0.5 - waited = 0.0 - - while waited < max_wait: - if not is_watcher_running(): - break - time.sleep(wait_interval) - waited += wait_interval - - # Verify watcher has stopped - assert not is_watcher_running() - assert get_watcher_pid() is None - - def test_concurrent_watcher_startup(self): - """Test that concurrent attempts to start watchers are handled correctly.""" - import queue - import threading - - # stop any existing watcher - stop_watcher() - - results = queue.Queue() - - def start_watcher(): - try: - result = ensure_singleton_watcher(check_interval=1.0) - results.put(result) - except Exception as e: - results.put(e) - - # Start multiple threads trying to start watchers simultaneously - threads = [] - for _ in range(3): - thread = threading.Thread(target=start_watcher) - threads.append(thread) - thread.start() - - # Wait for all threads to complete - for thread in threads: - thread.join() - - # Check results - success_count = 0 - while not results.empty(): - result = results.get() - if is_process_running(result): - success_count += 1 - - # Only one should succeed (or none if already running) - assert success_count == 1, f"Expected 1 successful start, got {success_count}" - assert is_watcher_running(), "Watcher should be running" diff --git a/tests/utils/test_singleton_lock.py b/tests/utils/test_singleton_lock.py deleted file mode 100644 index e31dd146..00000000 --- a/tests/utils/test_singleton_lock.py +++ /dev/null @@ -1,259 +0,0 @@ -""" -Tests for the singleton lock functionality. - -This module tests the file-based singleton lock mechanism that ensures only one -instance of a process can run at a time across the system. -""" - -import os -import tempfile -import time -from pathlib import Path -from unittest.mock import patch - -import pytest - -from eval_protocol.utils.singleton_lock import ( - acquire_singleton_lock, - cleanup_stale_lock, - get_lock_file_paths, - get_lock_holder_pid, - is_lock_held, - is_process_running, - release_singleton_lock, -) - - -class TestSingletonLock: - """Test cases for singleton lock functionality.""" - - @pytest.fixture - def temp_dir(self): - """Create a temporary directory for testing.""" - with tempfile.TemporaryDirectory() as temp_dir: - yield Path(temp_dir) - - @pytest.fixture - def lock_name(self): - """Test lock name.""" - return "test_lock" - - def test_get_lock_file_paths(self, temp_dir, lock_name): - """Test that lock file paths are generated correctly.""" - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - - assert lock_file_path == temp_dir / f"{lock_name}.lock" - assert pid_file_path == temp_dir / f"{lock_name}.pid" - - def test_is_process_running_current_pid(self): - """Test that current process is detected as running.""" - current_pid = os.getpid() - assert is_process_running(current_pid) is True - - def test_is_process_running_invalid_pid(self): - """Test that invalid PID is detected as not running.""" - # Use a very high PID that shouldn't exist - invalid_pid = 999999 - assert is_process_running(invalid_pid) is False - - def test_acquire_singleton_lock_success(self, temp_dir, lock_name): - """Test successful lock acquisition.""" - result = acquire_singleton_lock(temp_dir, lock_name) - assert result is None # Successfully acquired lock - - # Verify lock files were created - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - assert lock_file_path.exists() - assert pid_file_path.exists() - - # Verify PID file contains current PID - with open(pid_file_path, "r") as f: - stored_pid = int(f.read().strip()) - assert stored_pid == os.getpid() - - def test_acquire_singleton_lock_already_held(self, temp_dir, lock_name): - """Test that lock acquisition fails when lock is already held.""" - # First process acquires the lock - result1 = acquire_singleton_lock(temp_dir, lock_name) - assert result1 is None - - # Second process tries to acquire the lock - # Since the lock is already held by the current process, it should return the current PID - result2 = acquire_singleton_lock(temp_dir, lock_name) - assert result2 == os.getpid() # Should return current holder's PID - - def test_acquire_singleton_lock_stale_lock_cleanup(self, temp_dir, lock_name): - """Test that stale locks are cleaned up during acquisition.""" - # Create a stale lock file with a non-existent PID - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - - # Create PID file with non-existent PID - with open(pid_file_path, "w") as f: - f.write("999999") # Non-existent PID - - # Try to acquire lock - should clean up stale lock and succeed - result = acquire_singleton_lock(temp_dir, lock_name) - assert result is None # Should succeed after cleaning up stale lock - - def test_release_singleton_lock(self, temp_dir, lock_name): - """Test lock release functionality.""" - # Acquire the lock first - acquire_singleton_lock(temp_dir, lock_name) - - # Verify lock files exist - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - assert lock_file_path.exists() - assert pid_file_path.exists() - - # Release the lock - release_singleton_lock(temp_dir, lock_name) - - # Verify lock files were removed - assert not lock_file_path.exists() - assert not pid_file_path.exists() - - def test_release_singleton_lock_no_files(self, temp_dir, lock_name): - """Test that release doesn't fail when lock files don't exist.""" - # Release lock without acquiring it first - release_singleton_lock(temp_dir, lock_name) - # Should not raise any exceptions - - def test_is_lock_held_true(self, temp_dir, lock_name): - """Test that lock is detected as held when it exists and process is running.""" - # Acquire the lock - acquire_singleton_lock(temp_dir, lock_name) - - # Check if lock is held - assert is_lock_held(temp_dir, lock_name) is True - - def test_is_lock_held_false_no_lock(self, temp_dir, lock_name): - """Test that lock is detected as not held when no lock files exist.""" - assert is_lock_held(temp_dir, lock_name) is False - - def test_is_lock_held_false_stale_lock(self, temp_dir, lock_name): - """Test that stale lock is detected as not held.""" - # Create a stale lock file with a non-existent PID - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - - with open(pid_file_path, "w") as f: - f.write("999999") # Non-existent PID - - assert is_lock_held(temp_dir, lock_name) is False - - def test_get_lock_holder_pid_success(self, temp_dir, lock_name): - """Test getting PID of lock holder.""" - # Acquire the lock - acquire_singleton_lock(temp_dir, lock_name) - - # Get holder PID - holder_pid = get_lock_holder_pid(temp_dir, lock_name) - assert holder_pid == os.getpid() - - def test_get_lock_holder_pid_no_lock(self, temp_dir, lock_name): - """Test getting PID when no lock exists.""" - holder_pid = get_lock_holder_pid(temp_dir, lock_name) - assert holder_pid is None - - def test_get_lock_holder_pid_stale_lock(self, temp_dir, lock_name): - """Test getting PID when lock is stale.""" - # Create a stale lock file with a non-existent PID - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - - with open(pid_file_path, "w") as f: - f.write("999999") # Non-existent PID - - holder_pid = get_lock_holder_pid(temp_dir, lock_name) - assert holder_pid is None - - def test_cleanup_stale_lock_success(self, temp_dir, lock_name): - """Test successful cleanup of stale lock.""" - # Create a stale lock file with a non-existent PID - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - - with open(pid_file_path, "w") as f: - f.write("999999") # Non-existent PID - - # Clean up stale lock - result = cleanup_stale_lock(temp_dir, lock_name) - assert result is True - - # Verify lock files were removed - assert not lock_file_path.exists() - assert not pid_file_path.exists() - - def test_cleanup_stale_lock_no_cleanup_needed(self, temp_dir, lock_name): - """Test cleanup when no stale lock exists.""" - result = cleanup_stale_lock(temp_dir, lock_name) - assert result is False - - def test_cleanup_stale_lock_active_lock(self, temp_dir, lock_name): - """Test cleanup when lock is actively held.""" - # Acquire the lock - acquire_singleton_lock(temp_dir, lock_name) - - # Try to clean up active lock - result = cleanup_stale_lock(temp_dir, lock_name) - assert result is False - - # Verify lock files still exist - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - assert lock_file_path.exists() - assert pid_file_path.exists() - - def test_concurrent_lock_acquisition_race_condition(self, temp_dir, lock_name): - """Test race condition handling in concurrent lock acquisition.""" - # This test simulates a race condition by creating the PID file - # after the first process checks but before it creates its own - - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - - # Create PID file as if another process got there first - with open(pid_file_path, "w") as f: - f.write("999999") - - # Try to acquire lock - should detect the existing PID file and return that PID - # Note: The function automatically cleans up stale locks, so it will succeed - result = acquire_singleton_lock(temp_dir, lock_name) - assert result is None # Should succeed after cleaning up stale lock - - def test_atomic_file_creation(self, temp_dir, lock_name): - """Test that PID file creation is atomic.""" - lock_file_path, pid_file_path = get_lock_file_paths(temp_dir, lock_name) - - # Acquire lock - acquire_singleton_lock(temp_dir, lock_name) - - # Verify no temporary file exists - temp_pid_file = pid_file_path.with_suffix(".tmp") - assert not temp_pid_file.exists() - - # Verify final PID file exists and contains correct PID - assert pid_file_path.exists() - with open(pid_file_path, "r") as f: - stored_pid = int(f.read().strip()) - assert stored_pid == os.getpid() - - def test_multiple_lock_names_independence(self, temp_dir): - """Test that different lock names are independent.""" - lock_name1 = "lock1" - lock_name2 = "lock2" - - # Acquire first lock - result1 = acquire_singleton_lock(temp_dir, lock_name1) - assert result1 is None - - # Acquire second lock (should succeed since different name) - result2 = acquire_singleton_lock(temp_dir, lock_name2) - assert result2 is None - - # Verify both locks are held - assert is_lock_held(temp_dir, lock_name1) is True - assert is_lock_held(temp_dir, lock_name2) is True - - # Verify lock files exist for both - lock_file1, pid_file1 = get_lock_file_paths(temp_dir, lock_name1) - lock_file2, pid_file2 = get_lock_file_paths(temp_dir, lock_name2) - assert lock_file1.exists() - assert pid_file1.exists() - assert lock_file2.exists() - assert pid_file2.exists() diff --git a/tests/utils/test_singleton_lock_multiprocessing.py b/tests/utils/test_singleton_lock_multiprocessing.py deleted file mode 100644 index 963dbc01..00000000 --- a/tests/utils/test_singleton_lock_multiprocessing.py +++ /dev/null @@ -1,606 +0,0 @@ -""" -Multiprocessing tests for the singleton lock functionality. - -This module tests the file-based singleton lock mechanism using actual -multiprocessing to simulate real concurrent scenarios. -""" - -import multiprocessing -import os -import tempfile -import time -from pathlib import Path -from typing import Optional - -import pytest - -from eval_protocol.utils.singleton_lock import ( - acquire_singleton_lock, - is_lock_held, - release_singleton_lock, -) - - -def worker_process_acquire_lock(base_dir: Path, lock_name: str, result_queue: multiprocessing.Queue, worker_id: int): - """Worker process that tries to acquire a lock.""" - try: - print(f"Worker {worker_id} (PID: {os.getpid()}) attempting to acquire lock") - result = acquire_singleton_lock(base_dir, lock_name) - result_queue.put((worker_id, result)) - print(f"Worker {worker_id} (PID: {os.getpid()}) got result: {result}") - - # If we got the lock, hold it for a bit then release - if result is None: - print(f"Worker {worker_id} (PID: {os.getpid()}) acquired lock, holding for 2 seconds") - time.sleep(2) - release_singleton_lock(base_dir, lock_name) - print(f"Worker {worker_id} (PID: {os.getpid()}) released lock") - except Exception as e: - print(f"Worker {worker_id} (PID: {os.getpid()}) got exception: {e}") - result_queue.put((worker_id, f"ERROR: {e}")) - - -def worker_process_check_lock(base_dir: Path, lock_name: str, result_queue: multiprocessing.Queue, worker_id: int): - """Worker process that checks if a lock is held.""" - try: - print(f"Worker {worker_id} (PID: {os.getpid()}) checking if lock is held") - is_held = is_lock_held(base_dir, lock_name) - result_queue.put((worker_id, is_held)) - print(f"Worker {worker_id} (PID: {os.getpid()}) lock held: {is_held}") - except Exception as e: - print(f"Worker {worker_id} (PID: {os.getpid()}) got exception: {e}") - result_queue.put((worker_id, f"ERROR: {e}")) - - -def worker_process_hold_lock( - base_dir: Path, lock_name: str, result_queue: multiprocessing.Queue, worker_id: int, hold_time: float = 5.0 -): - """Worker process that acquires and holds a lock for a specified time.""" - try: - print(f"Worker {worker_id} (PID: {os.getpid()}) attempting to acquire lock") - result = acquire_singleton_lock(base_dir, lock_name) - - if result is None: - print(f"Worker {worker_id} (PID: {os.getpid()}) acquired lock, holding for {hold_time} seconds") - result_queue.put((worker_id, "ACQUIRED")) - time.sleep(hold_time) - release_singleton_lock(base_dir, lock_name) - print(f"Worker {worker_id} (PID: {os.getpid()}) released lock") - result_queue.put((worker_id, "RELEASED")) - else: - print(f"Worker {worker_id} (PID: {os.getpid()}) failed to acquire lock, holder PID: {result}") - result_queue.put((worker_id, f"FAILED: {result}")) - except Exception as e: - print(f"Worker {worker_id} (PID: {os.getpid()}) got exception: {e}") - result_queue.put((worker_id, f"ERROR: {e}")) - - -class TestSingletonLockMultiprocessing: - """Test cases for singleton lock functionality using multiprocessing.""" - - @pytest.fixture - def temp_dir(self): - """Create a temporary directory for testing.""" - with tempfile.TemporaryDirectory() as temp_dir: - yield Path(temp_dir) - - @pytest.fixture - def lock_name(self): - """Test lock name.""" - return "test_multiprocessing_lock" - - def test_multiprocessing_lock_acquisition_sequential(self, temp_dir, lock_name): - """Test that only one process can acquire the lock at a time.""" - # Use multiprocessing.Manager() for better compatibility - manager = multiprocessing.Manager() - result_queue = manager.Queue() - - # Start multiple processes trying to acquire the lock - processes = [] - num_workers = 3 - - for i in range(num_workers): - p = multiprocessing.Process( - target=worker_process_acquire_lock, - args=(temp_dir, lock_name, result_queue, i), - daemon=False, # Don't use daemon processes - ) - processes.append(p) - p.start() - time.sleep(0.1) # Small delay to create race condition - - # Wait for all processes to complete - for p in processes: - p.join(timeout=10) - if p.is_alive(): - p.terminate() - p.join() - - # Collect results - results = [] - while not result_queue.empty(): - results.append(result_queue.get()) - - # Sort by worker ID for consistent ordering - results.sort(key=lambda x: x[0]) - - print(f"Results: {results}") - - # Verify that exactly one process acquired the lock (got None) - acquired_results = [r for r in results if r[1] is None] - assert len(acquired_results) == 1, f"Expected exactly one process to acquire lock, got {len(acquired_results)}" - - # Verify that other processes got the PID of the lock holder - acquired_worker_id = acquired_results[0][0] - for worker_id, result in results: - if worker_id != acquired_worker_id: - assert isinstance(result, int), f"Expected PID, got {result}" - assert result > 0, f"Expected positive PID, got {result}" - - def test_multiprocessing_lock_holder_detection(self, temp_dir, lock_name): - """Test that processes can detect when a lock is held.""" - manager = multiprocessing.Manager() - result_queue = manager.Queue() - - # Start a process that holds the lock - holder_process = multiprocessing.Process( - target=worker_process_hold_lock, - args=(temp_dir, lock_name, result_queue, 0, 3.0), # Hold for 3 seconds - daemon=False, - ) - holder_process.start() - - # Wait a moment for the holder to acquire the lock - time.sleep(0.5) - - # Start multiple processes checking if the lock is held - checker_processes = [] - num_checkers = 3 - - for i in range(num_checkers): - p = multiprocessing.Process( - target=worker_process_check_lock, - args=(temp_dir, lock_name, result_queue, i + 100), # Use different IDs - daemon=False, - ) - checker_processes.append(p) - p.start() - - # Wait for all processes to complete - holder_process.join(timeout=10) - for p in checker_processes: - p.join(timeout=5) - - # Collect results - results = [] - while not result_queue.empty(): - results.append(result_queue.get()) - - print(f"Results: {results}") - - # Verify that the holder process acquired the lock - holder_results = [r for r in results if r[0] == 0] - assert len(holder_results) >= 1, "Holder process should have reported acquiring the lock" - - # Verify that checker processes detected the lock as held - checker_results = [r for r in results if r[0] >= 100] - for worker_id, is_held in checker_results: - assert is_held is True, f"Checker {worker_id} should have detected lock as held" - - def test_multiprocessing_lock_cleanup_after_process_termination(self, temp_dir, lock_name): - """Test that locks are properly cleaned up when processes terminate.""" - manager = multiprocessing.Manager() - result_queue = manager.Queue() - - # Start a process that holds the lock - holder_process = multiprocessing.Process( - target=worker_process_hold_lock, - args=(temp_dir, lock_name, result_queue, 0, 10.0), # Hold for 10 seconds - daemon=False, - ) - holder_process.start() - - # Wait for the holder to acquire the lock and check results - time.sleep(1.0) - - # Check if the process actually acquired the lock - results = [] - while not result_queue.empty(): - results.append(result_queue.get()) - - # Look for the ACQUIRED message - acquired_results = [r for r in results if r[1] == "ACQUIRED"] - if not acquired_results: - # If no acquisition happened, this test is not meaningful - holder_process.terminate() - holder_process.join(timeout=5) - pytest.skip("Process did not acquire lock, skipping cleanup test") - - # Verify the lock is held - assert is_lock_held(temp_dir, lock_name) is True - - # Terminate the holder process - holder_process.terminate() - holder_process.join(timeout=5) - - # Wait a moment for cleanup - time.sleep(0.5) - - # Verify the lock is no longer held - assert is_lock_held(temp_dir, lock_name) is False - - # Try to acquire the lock - should succeed - result = acquire_singleton_lock(temp_dir, lock_name) - assert result is None, "Should be able to acquire lock after process termination" - - # Clean up - release_singleton_lock(temp_dir, lock_name) - - def test_multiprocessing_daemon_vs_non_daemon(self, temp_dir, lock_name): - """Test lock behavior with daemon vs non-daemon processes.""" - manager = multiprocessing.Manager() - result_queue = manager.Queue() - - # Test with daemon=True - print("Testing with daemon=True") - daemon_process = multiprocessing.Process( - target=worker_process_hold_lock, args=(temp_dir, lock_name, result_queue, 0, 2.0), daemon=True - ) - daemon_process.start() - time.sleep(0.5) - - # Check if lock is held - is_held_daemon = is_lock_held(temp_dir, lock_name) - print(f"Lock held with daemon process: {is_held_daemon}") - - daemon_process.join(timeout=5) - - # Test with daemon=False - print("Testing with daemon=False") - non_daemon_process = multiprocessing.Process( - target=worker_process_hold_lock, args=(temp_dir, lock_name, result_queue, 1, 2.0), daemon=False - ) - non_daemon_process.start() - time.sleep(0.5) - - # Check if lock is held - is_held_non_daemon = is_lock_held(temp_dir, lock_name) - print(f"Lock held with non-daemon process: {is_held_non_daemon}") - - non_daemon_process.join(timeout=5) - - # Both should work the same way for lock acquisition - assert ( - is_held_daemon == is_held_non_daemon - ), "Lock behavior should be the same for daemon and non-daemon processes" - - def test_multiprocessing_concurrent_acquisition_race_condition(self, temp_dir, lock_name): - """Test race condition handling with multiple processes trying to acquire simultaneously.""" - manager = multiprocessing.Manager() - result_queue = manager.Queue() - - # Start multiple processes simultaneously - processes = [] - num_workers = 5 - - # Start all processes at nearly the same time - for i in range(num_workers): - p = multiprocessing.Process( - target=worker_process_acquire_lock, args=(temp_dir, lock_name, result_queue, i), daemon=False - ) - processes.append(p) - - # Start all processes with minimal delay - for p in processes: - p.start() - - # Wait for all processes to complete - for p in processes: - p.join(timeout=10) - if p.is_alive(): - p.terminate() - p.join() - - # Collect results - results = [] - while not result_queue.empty(): - results.append(result_queue.get()) - - print(f"Race condition test results: {results}") - - # Verify that exactly one process acquired the lock - acquired_results = [r for r in results if r[1] is None] - assert ( - len(acquired_results) == 1 - ), f"Expected exactly one process to acquire lock in race condition, got {len(acquired_results)}" - - # Verify that other processes got valid PIDs - acquired_worker_id = acquired_results[0][0] - for worker_id, result in results: - if worker_id != acquired_worker_id: - assert isinstance(result, int), f"Expected PID, got {result}" - assert result > 0, f"Expected positive PID, got {result}" - - def test_multiprocessing_lock_independence(self, temp_dir): - """Test that different lock names are independent across processes.""" - lock_name1 = "lock1" - lock_name2 = "lock2" - - manager = multiprocessing.Manager() - result_queue = manager.Queue() - - # Start processes trying to acquire different locks - process1 = multiprocessing.Process( - target=worker_process_acquire_lock, args=(temp_dir, lock_name1, result_queue, 1), daemon=False - ) - process2 = multiprocessing.Process( - target=worker_process_acquire_lock, args=(temp_dir, lock_name2, result_queue, 2), daemon=False - ) - - process1.start() - process2.start() - - process1.join(timeout=5) - process2.join(timeout=5) - - # Collect results - results = [] - while not result_queue.empty(): - results.append(result_queue.get()) - - print(f"Lock independence test results: {results}") - - # Both processes should have acquired their respective locks - assert len(results) == 2, "Expected results from both processes" - for worker_id, result in results: - assert result is None, f"Process {worker_id} should have acquired its lock" - - def test_daemon_off_process_survives_parent_termination(self, temp_dir, lock_name): - """Test that a daemon=Off process continues to run and hold the lock when parent is killed.""" - import signal - import subprocess - import sys - - # Create a script that will be run as a separate process - script_content = f''' -import multiprocessing -import os -import sys -import time -from pathlib import Path -from typing import Optional, Tuple - -# Copy the singleton lock functions directly to avoid import issues -def get_lock_file_paths(base_dir: Path, lock_name: str) -> Tuple[Path, Path]: - lock_file_path = base_dir / f"{{lock_name}}.lock" - pid_file_path = base_dir / f"{{lock_name}}.pid" - return lock_file_path, pid_file_path - -def is_process_running(pid: int) -> bool: - try: - os.kill(pid, 0) - return True - except OSError: - return False - -def acquire_singleton_lock(base_dir: Path, lock_name: str) -> Optional[int]: - lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) - - # First, check if PID file exists and contains a running process - if pid_file_path.exists(): - try: - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - pid = int(content) - # Check if the process is still running - try: - os.kill(pid, 0) - # Process is running, we can't acquire the lock - return pid - except OSError: - # Process is not running, clean up stale files - pass - except (IOError, OSError): - pass - - # Try to create the PID file atomically - temp_pid_file = None - try: - # Use atomic file creation - temp_pid_file = pid_file_path.with_suffix(".tmp") - with open(temp_pid_file, "w") as temp_file: - temp_file.write(str(os.getpid())) - temp_file.flush() - os.fsync(temp_file.fileno()) - - # Atomically move the temp file to the final location - temp_pid_file.rename(pid_file_path) - - # Create the lock file to indicate we have the lock - with open(lock_file_path, "w") as lock_file: - lock_file.write(str(os.getpid())) - lock_file.flush() - os.fsync(lock_file.fileno()) - - return None # Successfully acquired lock - - except (IOError, OSError) as e: - # Failed to acquire lock - try: - if temp_pid_file and temp_pid_file.exists(): - temp_pid_file.unlink() - except (IOError, OSError): - pass - - # Check if someone else got the lock - if pid_file_path.exists(): - try: - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - return int(content) - except (IOError, OSError): - pass - - return 999999 # Dummy PID to indicate lock is held - -def release_singleton_lock(base_dir: Path, lock_name: str) -> None: - lock_file_path, pid_file_path = get_lock_file_paths(base_dir, lock_name) - try: - if pid_file_path.exists(): - pid_file_path.unlink() - if lock_file_path.exists(): - lock_file_path.unlink() - except (IOError, OSError): - pass - -def is_lock_held(base_dir: Path, lock_name: str) -> bool: - _, pid_file_path = get_lock_file_paths(base_dir, lock_name) - - try: - if pid_file_path.exists(): - with open(pid_file_path, "r") as pid_file: - content = pid_file.read().strip() - if content.isdigit(): - pid = int(content) - if is_process_running(pid): - return True - except (IOError, OSError): - pass - - return False - -def child_process_holder(base_dir, lock_name, pid_file): - """Child process that acquires and holds a lock.""" - try: - # Write our PID to a file so parent can read it - with open(pid_file, 'w') as f: - f.write(str(os.getpid())) - - result = acquire_singleton_lock(Path(base_dir), lock_name) - - if result is None: - # Keep the lock held by sleeping in a loop - while True: - time.sleep(1) - # Verify we still hold the lock - if not is_lock_held(Path(base_dir), lock_name): - break - else: - sys.exit(1) - except Exception as e: - sys.exit(1) - -if __name__ == "__main__": - base_dir = "{temp_dir}" - lock_name = "{lock_name}" - pid_file = "{temp_dir}/child_pid.txt" - - # Start child process with daemon=False - child = multiprocessing.Process( - target=child_process_holder, - args=(base_dir, lock_name, pid_file), - daemon=False - ) - child.start() - - # Wait for child to start and acquire lock - time.sleep(2) - - # Verify child is still running - if not child.is_alive(): - sys.exit(1) - - # Write parent PID to file - with open("{temp_dir}/parent_pid.txt", 'w') as f: - f.write(str(os.getpid())) - - # Sleep indefinitely - parent will be killed by test - while True: - time.sleep(1) -''' - - # Write the script to a temporary file - script_path = temp_dir / "test_daemon_off_script.py" - with open(script_path, "w") as f: - f.write(script_content) - - # Start the script as a separate process using uvx to ensure correct environment - process = subprocess.Popen( - ["uvx", "python", str(script_path)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True - ) - - # Wait for the script to start and child process to acquire lock - time.sleep(3) - - # Read the PIDs - parent_pid_file = temp_dir / "parent_pid.txt" - child_pid_file = temp_dir / "child_pid.txt" - - # Wait for PID files to be created - for _ in range(10): - if parent_pid_file.exists() and child_pid_file.exists(): - break - time.sleep(0.5) - else: - process.terminate() - process.wait(timeout=5) - pytest.fail("PID files were not created in time") - - with open(parent_pid_file, "r") as f: - parent_pid = int(f.read().strip()) - with open(child_pid_file, "r") as f: - child_pid = int(f.read().strip()) - - print(f"Parent PID: {parent_pid}, Child PID: {child_pid}") - - # Verify the lock is held by the child process - assert is_lock_held(temp_dir, lock_name) is True, "Lock should be held by child process" - - # Try to acquire the lock - should fail and return child's PID - result = acquire_singleton_lock(temp_dir, lock_name) - assert result == child_pid, f"Should get child PID {child_pid}, got {result}" - - # Kill the parent process - print(f"Killing parent process {parent_pid}") - os.kill(parent_pid, signal.SIGTERM) - - # Wait a moment for the parent to terminate - time.sleep(2) - - # Verify the child process is still running - try: - # Check if child process is still alive - os.kill(child_pid, 0) # This will raise OSError if process doesn't exist - child_still_alive = True - except OSError: - child_still_alive = False - - assert child_still_alive, "Child process should still be alive after parent termination" - - # Verify the lock is still held by the child process - assert is_lock_held(temp_dir, lock_name) is True, "Lock should still be held by child process" - - # Try to acquire the lock again - should still fail and return child's PID - result = acquire_singleton_lock(temp_dir, lock_name) - assert result == child_pid, f"Should still get child PID {child_pid}, got {result}" - - # Clean up by killing the child process - print(f"Killing child process {child_pid}") - os.kill(child_pid, signal.SIGTERM) - - # Wait for child to terminate - time.sleep(2) - - # Verify the lock is no longer held - assert is_lock_held(temp_dir, lock_name) is False, "Lock should be released after child termination" - - # Clean up the subprocess - process.terminate() - process.wait(timeout=5) - - -if __name__ == "__main__": - # Run the tests directly - pytest.main([__file__, "-v"]) From 7cd374afdb5292816137b566566038a2ff429d6e Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 7 Aug 2025 16:53:09 -0700 Subject: [PATCH 31/31] remove ignore test that doesn't exist --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 217e51b3..d0b21795 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,7 +92,6 @@ jobs: --ignore=tests/pytest/test_frozen_lake.py \ --ignore=tests/pytest/test_lunar_lander.py \ --ignore=tests/pytest/test_tau_bench_airline.py \ - --ignore=tests/test_eval_watcher.py \ --cov=eval_protocol --cov-append --cov-report=xml --cov-report=term-missing -v --durations=10 - name: Store coverage file