From 77f6318fc538ea978536b206a2ec662687bc2c4b Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 16:04:11 -0700 Subject: [PATCH 01/18] parameterize logger --- .../pytest/default_agent_rollout_processor.py | 14 ++++++++------ .../pytest/default_single_turn_rollout_process.py | 9 ++++----- eval_protocol/pytest/evaluation_test.py | 12 +++++++++--- eval_protocol/pytest/types.py | 10 ++++++++-- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/eval_protocol/pytest/default_agent_rollout_processor.py b/eval_protocol/pytest/default_agent_rollout_processor.py index 4c3b576e..2000c5b4 100644 --- a/eval_protocol/pytest/default_agent_rollout_processor.py +++ b/eval_protocol/pytest/default_agent_rollout_processor.py @@ -8,7 +8,7 @@ from openai.types.chat import ChatCompletionContentPartTextParam, ChatCompletionMessage, ChatCompletionToolParam from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam -from eval_protocol.dataset_logger import default_logger +from eval_protocol.dataset_logger.dataset_logger import DatasetLogger from eval_protocol.mcp.execution.policy import LiteLLMPolicy from eval_protocol.mcp.mcp_multi_client import MCPMultiClient from eval_protocol.models import EvaluationRow, Message @@ -20,12 +20,13 @@ class Agent: A really simple agent that calls the model until no more tool calls are needed. """ - def __init__(self, model: str, row: EvaluationRow, config_path: str): + def __init__(self, model: str, row: EvaluationRow, config_path: str, logger: DatasetLogger): self.model = model self.evaluation_row: EvaluationRow = row self._policy = LiteLLMPolicy(model_id=model) self.mcp_client = MCPMultiClient(config_path=config_path) if config_path else None self.tools: Union[List[ChatCompletionToolParam], NotGiven] = NOT_GIVEN + self.logger: DatasetLogger = logger async def setup(self): if self.mcp_client: @@ -40,9 +41,9 @@ async def _get_tools(self) -> Optional[List[ChatCompletionToolParam]]: def messages(self) -> list[Message]: return self.evaluation_row.messages - def append_message_and_log(self, message: Message): + def append_message_and_log(self, message: Message, logger: DatasetLogger): self.messages.append(message) - default_logger.log(self.evaluation_row) + logger.log(self.evaluation_row) async def call_agent(self) -> str: """ @@ -77,7 +78,8 @@ async def call_agent(self) -> str: ChatCompletionContentPartTextParam(text=content.text, type="text") for content in content ], tool_call_id=tool_call_id, - ) + ), + logger=self.logger, ) return await self.call_agent() return message.content @@ -116,7 +118,7 @@ async def default_agent_rollout_processor( ) -> List[EvaluationRow]: dataset: Dataset = [] for row in rows: - agent = Agent(model=config.model, row=row, config_path=config.mcp_config_path) + agent = Agent(model=config.model, row=row, config_path=config.mcp_config_path, logger=config.logger) await agent.setup() await agent.call_agent() dataset.append(agent.evaluation_row) diff --git a/eval_protocol/pytest/default_single_turn_rollout_process.py b/eval_protocol/pytest/default_single_turn_rollout_process.py index 23c8619e..69966b39 100644 --- a/eval_protocol/pytest/default_single_turn_rollout_process.py +++ b/eval_protocol/pytest/default_single_turn_rollout_process.py @@ -1,11 +1,9 @@ import asyncio -from typing import List - import logging import os +from typing import List -from eval_protocol.dataset_logger import default_logger -from eval_protocol.models import EvaluationRow, Message, ChatCompletionMessageToolCall +from eval_protocol.models import ChatCompletionMessageToolCall, EvaluationRow, Message from eval_protocol.pytest.types import RolloutProcessorConfig @@ -49,6 +47,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: # Dynamic import to avoid static dependency/lint errors if LiteLLM isn't installed yet import importlib + _litellm = importlib.import_module("litellm") acompletion = getattr(_litellm, "acompletion") response = await acompletion(**request_params) @@ -79,7 +78,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: ] row.messages = messages - default_logger.log(row) + config.logger.log(row) return row # Process rows with bounded concurrency if configured diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index a2fe54a8..f3fca561 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -8,6 +8,7 @@ import pytest from eval_protocol.dataset_logger import default_logger +from eval_protocol.dataset_logger.dataset_logger import DatasetLogger from eval_protocol.human_id import generate_id from eval_protocol.models import CompletionParams, EvalMetadata, EvaluationRow, InputMetadata, Message from eval_protocol.pytest.default_dataset_adapter import default_dataset_adapter @@ -55,6 +56,7 @@ def evaluation_test( # noqa: C901 steps: int = 30, mode: EvaluationTestMode = "batch", combine_datasets: bool = True, + logger: Optional[DatasetLogger] = None, ) -> Callable[ [TestFunction], TestFunction, @@ -117,8 +119,11 @@ def evaluation_test( # noqa: C901 mode: Evaluation mode. "batch" (default) expects test function to handle full dataset. "pointwise" applies test function to each row. If your evaluation requires the full rollout of all rows to compute the score, use + logger: DatasetLogger to use for logging. If not provided, a default logger will be used. """ + active_logger: DatasetLogger = logger if logger else default_logger + def decorator( test_func: TestFunction, ): @@ -287,7 +292,7 @@ def wrapper_body(**kwargs): def _log_eval_error( status: Literal["finished", "error"], rows: Optional[List[EvaluationRow]] | None, passed: bool ) -> None: - log_eval_status_and_rows(eval_metadata, rows, status, passed, default_logger) + log_eval_status_and_rows(eval_metadata, rows, status, passed, active_logger) try: # Handle dataset loading @@ -369,7 +374,7 @@ def _log_eval_error( # has to be done in the pytest main process since it's # used to determine whether this eval has stopped row.pid = os.getpid() - default_logger.log(row) + active_logger.log(row) # Prepare rollout processor config once; we will generate fresh outputs per run config = RolloutProcessorConfig( @@ -379,6 +384,7 @@ def _log_eval_error( max_concurrent_rollouts=max_concurrent_rollouts, server_script_path=server_script_path, steps=steps, + logger=active_logger, ) for _ in range(num_runs): @@ -463,7 +469,7 @@ def _log_eval_error( if r.eval_metadata is not None: r.eval_metadata.status = "finished" r.eval_metadata.passed = passed - default_logger.log(r) + active_logger.log(r) # Optional: print and/or persist a summary artifact for CI try: diff --git a/eval_protocol/pytest/types.py b/eval_protocol/pytest/types.py index 880a7029..42fb3d56 100644 --- a/eval_protocol/pytest/types.py +++ b/eval_protocol/pytest/types.py @@ -5,6 +5,9 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, List, Literal, Optional +from eval_protocol.dataset_logger import default_logger +from eval_protocol.dataset_logger.dataset_logger import DatasetLogger + from ..models import EvaluationRow, Message ModelParam = str # gpt-4o, gpt-4o-mini, accounts/fireworks/models/llama-3.1-8b-instruct @@ -39,10 +42,13 @@ class RolloutProcessorConfig: model: ModelParam input_params: RolloutInputParam # optional input parameters for inference - mcp_config_path: str - server_script_path: Optional[str] = None # TODO: change from server_script_path to mcp_config_path for agent rollout processor + mcp_config_path: str + server_script_path: Optional[str] = ( + None # TODO: change from server_script_path to mcp_config_path for agent rollout processor + ) max_concurrent_rollouts: int = 8 # maximum number of concurrent rollouts steps: int = 30 # max number of rollout steps + logger: DatasetLogger = default_logger # logger to use during rollout for mid-rollout logs RolloutProcessor = Callable[[List[EvaluationRow], RolloutProcessorConfig], List[EvaluationRow]] From c436eaf216358034e122d995f071f6b67b0cb7df Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 16:13:14 -0700 Subject: [PATCH 02/18] fix logging too much --- eval_protocol/pytest/evaluation_test.py | 5 ++- tests/pytest/test_pytest_ids.py | 57 ++++++++++++++++++++----- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index f3fca561..25e0b32f 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -374,7 +374,6 @@ def _log_eval_error( # has to be done in the pytest main process since it's # used to determine whether this eval has stopped row.pid = os.getpid() - active_logger.log(row) # Prepare rollout processor config once; we will generate fresh outputs per run config = RolloutProcessorConfig( @@ -401,6 +400,10 @@ def _log_eval_error( for row in fresh_dataset: row.rollout_id = generate_id() + # log the fresh_dataset + for row in fresh_dataset: + active_logger.log(row) + processed_dataset = execute_function(rollout_processor, rows=fresh_dataset, config=config) if mode == "pointwise": diff --git a/tests/pytest/test_pytest_ids.py b/tests/pytest/test_pytest_ids.py index b740aaa6..a799d392 100644 --- a/tests/pytest/test_pytest_ids.py +++ b/tests/pytest/test_pytest_ids.py @@ -1,33 +1,67 @@ -import eval_protocol.pytest.evaluation_test as evaluation_test_module +from typing import List + +import eval_protocol.dataset_logger as dataset_logger +from eval_protocol.dataset_logger.dataset_logger import DatasetLogger from eval_protocol.models import EvaluationRow from eval_protocol.pytest.default_no_op_rollout_process import default_no_op_rollout_processor -from eval_protocol.pytest.evaluation_test import evaluation_test as evaluation_decorator from tests.pytest.test_markdown_highlighting import markdown_dataset_to_evaluation_row -class InMemoryLogger: +class InMemoryLogger(DatasetLogger): def __init__(self): - self._rows = [] + self._rows: dict[str, EvaluationRow] = {} - def log(self, row): - self._rows.append(row) + def log(self, row: EvaluationRow): + print(row.run_id, row.rollout_id) + self._rows[row.rollout_id] = row def read(self): - return list(self._rows) + return list(self._rows.values()) + + +def test_evaluation_test_decorator(monkeypatch): + from eval_protocol.pytest.evaluation_test import evaluation_test + + logger = InMemoryLogger() + + @evaluation_test( + input_dataset=[ + "tests/pytest/data/markdown_dataset.jsonl", + ], + model=["dummy/local-model"], + dataset_adapter=markdown_dataset_to_evaluation_row, + rollout_processor=default_no_op_rollout_processor, + mode="pointwise", + combine_datasets=False, + num_runs=2, + logger=logger, + ) + def eval_fn(row: EvaluationRow) -> EvaluationRow: + return row + + dataset_paths = [ + "tests/pytest/data/markdown_dataset.jsonl", + ] + + # Manually invoke all parameter combinations within a single test + for ds_path in dataset_paths: + eval_fn(model="dummy/local-model", dataset_path=[ds_path]) + + # Assertions on IDs generated by the decorator logic + assert len(logger.read()) == 38 def test_evaluation_test_decorator_ids_single(monkeypatch): - # Use an in-memory logger to avoid sqlite side effects in_memory_logger = InMemoryLogger() - monkeypatch.setattr(evaluation_test_module, "default_logger", in_memory_logger, raising=False) - unique_run_ids = set() unique_cohort_ids = set() unique_rollout_ids = set() unique_invocation_ids = set() unique_row_ids = set() - @evaluation_decorator( + from eval_protocol.pytest.evaluation_test import evaluation_test + + @evaluation_test( input_dataset=[ "tests/pytest/data/markdown_dataset.jsonl", "tests/pytest/data/markdown_dataset.jsonl", @@ -39,6 +73,7 @@ def test_evaluation_test_decorator_ids_single(monkeypatch): mode="pointwise", combine_datasets=False, num_runs=5, + logger=InMemoryLogger(), ) def eval_fn(row: EvaluationRow) -> EvaluationRow: unique_run_ids.add(row.run_id) From 6025f780f306ff3fe11dc2b04b6e8707fa18fe0b Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 16:38:11 -0700 Subject: [PATCH 03/18] fix sqlite stuff to use rollout_id --- .../sqlite_dataset_logger_adapter.py | 8 +++---- .../sqlite_evaluation_row_store.py | 22 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py b/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py index 51ee9006..536c3902 100644 --- a/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +++ b/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py @@ -22,9 +22,9 @@ def __init__(self, db_path: Optional[str] = None, store: Optional[SqliteEvaluati self._store = SqliteEvaluationRowStore(self.db_path) def log(self, row: "EvaluationRow") -> None: - row_id = row.input_metadata.row_id + rollout_id = row.rollout_id data = row.model_dump(exclude_none=True, mode="json") - self._store.upsert_row(row_id=row_id, data=data) + self._store.upsert_row(rollout_id=rollout_id, data=data) try: event_bus.emit(LOG_EVENT_TYPE, EvaluationRow(**data)) except Exception as e: @@ -32,8 +32,8 @@ def log(self, row: "EvaluationRow") -> None: logger.error(f"Failed to emit row_upserted event: {e}") pass - def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]: + def read(self, rollout_id: Optional[str] = None) -> List["EvaluationRow"]: from eval_protocol.models import EvaluationRow - results = self._store.read_rows(row_id=row_id) + results = self._store.read_rows(rollout_id=rollout_id) return [EvaluationRow(**data) for data in results] diff --git a/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py b/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py index bce74264..2e029e56 100644 --- a/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +++ b/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py @@ -11,7 +11,7 @@ class SqliteEvaluationRowStore: """ Lightweight reusable SQLite store for evaluation rows. - Stores arbitrary row data as JSON keyed by a unique string `row_id`. + Stores arbitrary row data as JSON keyed by a unique string `rollout_id`. """ def __init__(self, db_path: str): @@ -24,7 +24,7 @@ class Meta: database = self._db class EvaluationRow(BaseModel): # type: ignore - row_id = CharField(unique=True) + rollout_id = CharField(unique=True) data = JSONField() self._EvaluationRow = EvaluationRow @@ -36,22 +36,22 @@ class EvaluationRow(BaseModel): # type: ignore def db_path(self) -> str: return self._db_path - def upsert_row(self, row_id: str, data: dict) -> None: - if self._EvaluationRow.select().where(self._EvaluationRow.row_id == row_id).exists(): - self._EvaluationRow.update(data=data).where(self._EvaluationRow.row_id == row_id).execute() + def upsert_row(self, rollout_id: str, data: dict) -> None: + if self._EvaluationRow.select().where(self._EvaluationRow.rollout_id == rollout_id).exists(): + self._EvaluationRow.update(data=data).where(self._EvaluationRow.rollout_id == rollout_id).execute() else: - self._EvaluationRow.create(row_id=row_id, data=data) + self._EvaluationRow.create(rollout_id=rollout_id, data=data) - def read_rows(self, row_id: Optional[str] = None) -> List[dict]: - if row_id is None: + def read_rows(self, rollout_id: Optional[str] = None) -> List[dict]: + if rollout_id is None: query = self._EvaluationRow.select().dicts() else: - query = self._EvaluationRow.select().dicts().where(self._EvaluationRow.row_id == row_id) + query = self._EvaluationRow.select().dicts().where(self._EvaluationRow.rollout_id == rollout_id) results = list(query) return [result["data"] for result in results] - def delete_row(self, row_id: str) -> int: - return self._EvaluationRow.delete().where(self._EvaluationRow.row_id == row_id).execute() + def delete_row(self, rollout_id: str) -> int: + return self._EvaluationRow.delete().where(self._EvaluationRow.rollout_id == rollout_id).execute() def delete_all_rows(self) -> int: return self._EvaluationRow.delete().execute() From 95c7a296c680ae7ddf2fcedfc711f40828e69c54 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 16:38:21 -0700 Subject: [PATCH 04/18] fix --- tests/dataset_logger/test_sqlite_dataset_logger_adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/dataset_logger/test_sqlite_dataset_logger_adapter.py b/tests/dataset_logger/test_sqlite_dataset_logger_adapter.py index 3b8c42b5..d7dc5a63 100644 --- a/tests/dataset_logger/test_sqlite_dataset_logger_adapter.py +++ b/tests/dataset_logger/test_sqlite_dataset_logger_adapter.py @@ -24,7 +24,7 @@ def test_update_log_and_read(): logger = SqliteDatasetLoggerAdapter() logger.log(row) - saved = logger.read(row_id="1")[0] + saved = logger.read(rollout_id="1")[0] assert row.messages == saved.messages assert row.input_metadata == saved.input_metadata @@ -42,7 +42,7 @@ def test_create_log_and_read(): row = EvaluationRow(input_metadata=input_metadata, messages=messages) logger.log(row) - saved = logger.read(row_id="1")[0] + saved = logger.read(rollout_id="1")[0] assert row.messages == saved.messages assert row.input_metadata == saved.input_metadata From adc73df3791c0acd2783322f340bd299141a9490 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 22:59:53 -0700 Subject: [PATCH 05/18] fix sqlite logger tests --- .../dataset_logger/sqlite_dataset_logger_adapter.py | 3 +-- .../dataset_logger/sqlite_evaluation_row_store.py | 5 ++++- .../dataset_logger/test_sqlite_dataset_logger_adapter.py | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py b/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py index 536c3902..37704266 100644 --- a/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +++ b/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py @@ -22,9 +22,8 @@ def __init__(self, db_path: Optional[str] = None, store: Optional[SqliteEvaluati self._store = SqliteEvaluationRowStore(self.db_path) def log(self, row: "EvaluationRow") -> None: - rollout_id = row.rollout_id data = row.model_dump(exclude_none=True, mode="json") - self._store.upsert_row(rollout_id=rollout_id, data=data) + self._store.upsert_row(data=data) try: event_bus.emit(LOG_EVENT_TYPE, EvaluationRow(**data)) except Exception as e: diff --git a/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py b/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py index 2e029e56..1a6618e5 100644 --- a/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +++ b/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py @@ -36,7 +36,10 @@ class EvaluationRow(BaseModel): # type: ignore def db_path(self) -> str: return self._db_path - def upsert_row(self, rollout_id: str, data: dict) -> None: + def upsert_row(self, data: dict) -> None: + rollout_id = data["rollout_id"] + if "rollout_id" not in data: + raise ValueError("rollout_id is required to upsert a row") if self._EvaluationRow.select().where(self._EvaluationRow.rollout_id == rollout_id).exists(): self._EvaluationRow.update(data=data).where(self._EvaluationRow.rollout_id == rollout_id).execute() else: diff --git a/tests/dataset_logger/test_sqlite_dataset_logger_adapter.py b/tests/dataset_logger/test_sqlite_dataset_logger_adapter.py index d7dc5a63..897691b2 100644 --- a/tests/dataset_logger/test_sqlite_dataset_logger_adapter.py +++ b/tests/dataset_logger/test_sqlite_dataset_logger_adapter.py @@ -18,13 +18,13 @@ def test_update_log_and_read(): messages = [Message(role="user", content="Hello")] input_metadata = InputMetadata(row_id="1") row = EvaluationRow(input_metadata=input_metadata, messages=messages) - store.upsert_row(row_id="1", data=row.model_dump(exclude_none=True, mode="json")) + store.upsert_row(data=row.model_dump(exclude_none=True, mode="json")) row.messages.append(Message(role="assistant", content="Hello")) - logger = SqliteDatasetLoggerAdapter() + logger = SqliteDatasetLoggerAdapter(store=store) logger.log(row) - saved = logger.read(rollout_id="1")[0] + saved = logger.read(row.rollout_id)[0] assert row.messages == saved.messages assert row.input_metadata == saved.input_metadata @@ -42,7 +42,7 @@ def test_create_log_and_read(): row = EvaluationRow(input_metadata=input_metadata, messages=messages) logger.log(row) - saved = logger.read(rollout_id="1")[0] + saved = logger.read(rollout_id=row.rollout_id)[0] assert row.messages == saved.messages assert row.input_metadata == saved.input_metadata From 41c74b683e32e91fe599dd069ee9ad7ffe891505 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 23:01:33 -0700 Subject: [PATCH 06/18] fix defualt_agent_rollout_processor.py --- eval_protocol/pytest/default_agent_rollout_processor.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/eval_protocol/pytest/default_agent_rollout_processor.py b/eval_protocol/pytest/default_agent_rollout_processor.py index 2000c5b4..bd7c62c2 100644 --- a/eval_protocol/pytest/default_agent_rollout_processor.py +++ b/eval_protocol/pytest/default_agent_rollout_processor.py @@ -41,9 +41,9 @@ async def _get_tools(self) -> Optional[List[ChatCompletionToolParam]]: def messages(self) -> list[Message]: return self.evaluation_row.messages - def append_message_and_log(self, message: Message, logger: DatasetLogger): + def append_message_and_log(self, message: Message): self.messages.append(message) - logger.log(self.evaluation_row) + self.logger.log(self.evaluation_row) async def call_agent(self) -> str: """ @@ -78,8 +78,7 @@ async def call_agent(self) -> str: ChatCompletionContentPartTextParam(text=content.text, type="text") for content in content ], tool_call_id=tool_call_id, - ), - logger=self.logger, + ) ) return await self.call_agent() return message.content From a5c701e8d6278b1097367d3974b803431202c0d7 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 23:05:49 -0700 Subject: [PATCH 07/18] for generating some test data --- tests/pytest/test_pytest_flaky_sometimes.py | 31 +++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/pytest/test_pytest_flaky_sometimes.py diff --git a/tests/pytest/test_pytest_flaky_sometimes.py b/tests/pytest/test_pytest_flaky_sometimes.py new file mode 100644 index 00000000..cb70ec1e --- /dev/null +++ b/tests/pytest/test_pytest_flaky_sometimes.py @@ -0,0 +1,31 @@ +import os +import random +from typing import List + +import pytest + +from eval_protocol.models import EvaluateResult, EvaluationRow, Message +from eval_protocol.pytest import default_no_op_rollout_processor, evaluation_test + + +# skip in CI since it will intentionally fail. This is useful for local generation of logs +@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping flaky test in CI") +@evaluation_test( + input_messages=[[Message(role="user", content="Return HEADS or TAILS at random.")]], + model=["dummy/local-model"], + rollout_processor=default_no_op_rollout_processor, + mode="pointwise", + num_runs=5, +) +def test_flaky_passes_sometimes(row: EvaluationRow) -> EvaluationRow: + """ + A deliberately flaky evaluation that only passes occasionally. + + With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean + will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any + actual model calls. + """ + # Stochastic score: 1.0 with 30% probability, else 0.0 + score = 1.0 if random.random() < 0.3 else 0.0 + row.evaluation_result = EvaluateResult(score=score, reason=f"stochastic={score}") + return row From e16306f233deb28aad24ab89c0d0567c644e1a77 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 23:23:02 -0700 Subject: [PATCH 08/18] comment out saving logs code --- eval_protocol/pytest/evaluation_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index 25e0b32f..fe0c8cf5 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -596,6 +596,23 @@ def _extract_effort_tag(params: dict) -> str | None: # Do not fail evaluation if summary writing fails pass + # # Write all rows from active_logger.read() to a JSONL file in the same directory as the summary + # try: + # if active_logger is not None: + # rows = active_logger.read() + # # Write to a .jsonl file alongside the summary file + # jsonl_path = "logs.jsonl" + # import json + + # with open(jsonl_path, "w", encoding="utf-8") as f_jsonl: + # for row in rows: + # json.dump(row.model_dump(exclude_none=True, mode="json"), f_jsonl) + # f_jsonl.write("\n") + # except Exception as e: + # # Do not fail evaluation if log writing fails + # print(e) + # pass + # Check threshold after logging if threshold_of_success is not None and not passed: assert ( From 7887ddd22aa5cdd96a4ad453e1e8f68a4bda1677 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 23:34:02 -0700 Subject: [PATCH 09/18] Refactor pivot test to handle JSONL logs and improve data parsing - Updated the test to read from a new JSONL log file instead of JSON, enhancing data handling. - Adjusted the parsing logic to accommodate line-by-line JSON parsing for better flexibility. - Modified row and column field definitions to include additional identifiers for improved data aggregation. --- vite-app/data/logs.jsonl | 127 ++++++++++++++++++++++++++++++++ vite-app/src/util/pivot.test.ts | 21 ++++-- 2 files changed, 140 insertions(+), 8 deletions(-) create mode 100644 vite-app/data/logs.jsonl diff --git a/vite-app/data/logs.jsonl b/vite-app/data/logs.jsonl new file mode 100644 index 00000000..6a65c057 --- /dev/null +++ b/vite-app/data/logs.jsonl @@ -0,0 +1,127 @@ +{"messages": [{"role": "user", "content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*."}, {"role": "assistant", "content": "# Outline: The History of Yemeni Coffee\n\n1. **Introduction: Yemen as the Cradle of Global Coffee Culture** \n *The mythic origins of Coffea arabica in the highlands of Ethiopia and its first domestication in Yemen* \n Thesis statement: From the terraced mountains of 15th-century Yemen emerged not only the world\u2019s first cultivated coffee but also the legal, commercial, ritual, and sensory frameworks that define global coffee culture today.\n\n2. **Pre-Islamic and Early Islamic Agricultural Context** \n *Ancient terraced agriculture in the Himyarite and Sabaean kingdoms* \n *The role of Sufi monasteries in experimenting with qahwa as a spiritual aid* \n *Early Arabic pharmacological texts describing coffee as medicine*\n\n3. **The 15th-Century Yemeni Innovation: From Wild Plant to Cultivated Crop** \n *Selection and grafting techniques developed in the Haraz and Jabal Sabir ranges* \n *Invention of the first stone-drying patios and rooftop drying racks* \n *Codification of harvest calendars tied to the Islamic lunar year*\n\n4. **Mokha: The Port that Named a Flavor** \n *Rise of Al-Mukha as the Red Sea\u2019s premier coffee emporium (1450\u20131720)* \n *Architectural evidence: caravanserais, customs houses, and the *Beit al-Qahwa* guild hall* \n *Currency systems: coffee beans as legal tender alongside silver dirhams*\n\n5. **Legal and Religious Controversies** \n *Fatwas for and against coffee issued by Zaydi imams in 1511 and 1544* \n *The *Majlis al-Qahwa* trials: public debates on intoxication vs. alertness* \n *Coffeehouses as contested spaces of political dissent and poetic performance*\n\n6. **The Ottoman Incorporation (1538\u20131635)** \n *Imposition of the *t\u00fct\u00fcn ve kahve resmi* (tobacco-and-coffee tax)* \n *Yemeni *qahwaji* guilds negotiating autonomy under Ottoman governors* \n *Introduction of the first stamped export seals: *kahve damgas\u0131* on burlap sacks*\n\n7. **The Dutch East India Company and the First Smuggling Crisis (1616)** \n *Pieter van den Broecke\u2019s covert purchase of *Bourbon* seedlings in Mokha* \n *Forgery of *Mokha* seals in Batavia to mask non-Yemeni beans* \n *Yemeni response: *qahwa al-wahda* (single-origin branding) and tamper-proof lead seals*\n\n8. **Expansion of Yemeni Coffee Terraces (17th\u201318th Centuries)** \n *Hydraulic engineering: *saqiya* water-raising wheels and *qanat* irrigation* \n *Intercropping innovations: coffee under *qat* shade trees and *simsim* sesame windbreaks* \n *Gendered labor: women\u2019s role in seed selection and men in long-distance trade*\n\n9. **The Global Commodity Chain: From Mokha to Venice, Amsterdam, and Paris** \n *Price differentials: *Mokha extra primo* vs. *Mokha secondo* in European auctions* \n *Insurance contracts in Lloyd\u2019s Coffeehouse specifying \u201cYemen origin\u201d as risk factor* \n *Flavor descriptors coined in Europe: *mocha-java* blends and the birth of terroir language*\n\n10. **Decline of Yemeni Hegemony (1720\u20131800)** \n *British and French colonial plantations in Java, R\u00e9union, and the Caribbean* \n *Collapse of the *kahve ak\u00e7esi* tax farm under fiscal pressure* \n *Piracy along the Bab al-Mandab and the shift to Mocha-Suez overland caravans*\n\n11. **Yemeni Coffee in the Age of Imperial Steam (19th Century)** \n *Competition with Brazilian arabica and the rise of *Coffea canephora* (robusta)* \n *Introduction of steamships reducing transit time from 90 to 21 days* \n *The *Mokha Letter of Credit*: early use of coffee futures in Red Sea trade*\n\n12. **Colonial Botany and Genetic Legacy** \n *The *Typica* and *Bourbon* varietals traced back to Yemeni landraces* \n *German botanical expeditions (1898\u20131902) cataloging heirloom Yemeni cultivars* \n *DNA fingerprinting (2014) confirming Yemeni genetic diversity as global reservoir*\n\n13. **Modern Yemeni Coffee Renaissance (1990\u2013Present)** \n *Post-unification revival of *qahwa al-bun* micro-roasters in Sana\u2019a and Ta\u2018izz* \n *NGO-led traceability programs: *Haraz Traceable Lot* and *Al-Wadi Single Village* labels* \n *Third-wave specialty roasters in Tokyo, Oslo, and Portland marketing \u201cYemenia\u201d cultivar*\n\n14. **Cultural Heritage and Ritual Continuities** \n *The *qahwat al-mawlad* ceremony: coffee as covenant in tribal dispute resolution* \n *UNESCO listing of Yemeni coffee cultivation as Intangible Cultural Heritage (2023)* \n *Contemporary Yemeni diaspora caf\u00e9s: *mokha on tap* and *bunna djejim* pop-ups*\n\n15. **Conclusion: Yemen\u2019s Enduring Gift to the World** \n *Reiteration of Yemen\u2019s role as originator of coffee cultivation, trade law, and caf\u00e9 culture* \n *Reflection on climate change threats to Yemeni heirloom varieties* \n *Call for ethical sourcing and direct-trade models to sustain living heritage*"}], "input_metadata": {"row_id": "1307", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "let-american-clean-medical-history", "run_id": "review-hard-single-newspaper-team", "ground_truth": "15", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 84 highlighted sections (required: 15)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176256", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using \"*\". For example: *3 years of experience*."}, {"role": "assistant", "content": "Dear Brewed Awakening,\n\nI am the *early-bird steam* that curls above your grinders, \nthe *latte-art heart* that beats in every porcelain curve. \nMy apron is a canvas of *espresso freckles*, \neach dot a story told in *3 years of experience* \nfrom bean to cup, from dawn rush to dusk hush.\n\nI know the *symphony of the steam wand*\u2014 \nits hiss, its sigh, its whispered crema secrets\u2014 \nand how to coax *microfoam galaxies* \ninto orbit around a *double-shot comet*. \nI\u2019ve tamped, poured, swirled, and smiled \nthrough *Saturday lines out the door*, \nkeeping tempers cool as the milk I *steam to 140\u00b0*.\n\nI speak fluent *small-town regular*: \n\u201cMedium drip, room for kindness?\u201d \nI remember that *Mrs. Alvarez likes oat milk*, \nthat *Mr. Chen takes two raw sugars*, \nand that *your dog, Scout, prefers puppuccinos*.\n\nI sweep, I stock, I *balance the till to the penny*, \nbut mostly I *brew community*\u2014 \none *extra-hot americano* at a time. \nHire me, and the morning light \nwill find me already *wiping down tables*, \nalready *saving the last blueberry scone* \nfor the kid who always forgets his wallet.\n\nWith *grounds in my veins* and *crema in my smile*, \nI remain,\n\nYour future barista, \nMaya"}], "input_metadata": {"row_id": "1644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "explain-entire-tidy-phone-result", "run_id": "review-hard-single-newspaper-team", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 20 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176336", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with \"*\", like: *highlighted text section*."}, {"role": "assistant", "content": "Hey friends! Grab a coffee and let\u2019s talk about something that always blows my mind: *how much we all have in common, even when we look nothing alike*. I was scrolling through wildlife photos the other night and realized that a jellyfish and I are basically running the same basic software\u2014just with different packaging. So here\u2019s a laid-back tour of five \u201cwait, we do that too?!\u201d moments across the animal kingdom.\n\n*We all need our beauty sleep* \nTurns out, every animal with a nervous system\u2014from fruit flies to blue whales\u2014shares a version of sleep. Crows nap on tree branches, octopuses snooze in coconut-shell beds, and even roundworms have \u201clethargus,\u201d which is basically worm REM. Scientists think sleep is so ancient that the last common ancestor of all animals already had it. So next time you hit snooze, remember you\u2019re honoring a 700-million-year-old tradition.\n\n*Love songs come in many dialects* \nWhether you\u2019re a human humming in the shower or a humpback whale belting out 30-minute ballads, *we all use rhythm and pitch to flirt and bond*. Frogs croak in choruses, nightingales remix their playlists nightly, and male mice sing ultrasonic \u201cpickup lines\u201d too high for us to hear. Same goal, different instruments.\n\n*Grandma knew best: family matters* \nElephants babysit their nieces, wolves bring back take-out for auntie, and *clownfish dads are the ultimate stay-at-home fathers*. Across species, kinship networks boost survival. Even bacteria split into \u201csibling\u201d lineages that cooperate\u2014so the next time your cousin crashes on your couch, call it evolutionary strategy.\n\n*Stress looks the same on everyone* \nWhen life gets spicy, our bodies flip the same ancient switch. *Cortisol floods fish, birds, and humans alike*, making hearts race and muscles twitch. Zebra finches lose their appetite during tough times, and stressed-out bees become pessimistic (yes, they can test bee mood). So if you\u2019re doom-scrolling at 2 a.m., know that a stressed squirrel in your backyard totally gets you.\n\n*Play isn\u2019t optional\u2014it\u2019s homework for life* \nOtters juggle rocks, dogs zoom in circles, and young ravens snowboard down snowy roofs using jar lids. *Play builds brains and social skills across the board*. Researchers have spotted reptiles \u201cwrestling\u201d for fun and octopuses repeatedly bouncing Lego blocks like toddlers. If it has neurons, it probably needs recess.\n\nNext time you feel alone in the universe, remember that somewhere a parrot is probably preening its best friend, a bat is sharing food with a hungry roost-mate, and a tiny nematode is curling up for a nap. We\u2019re all just variations on a very old, very shared theme."}], "input_metadata": {"row_id": "1646", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "let-safe-magazine-government-adventure", "run_id": "review-hard-single-newspaper-team", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 10 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176407", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format:\n*section name*"}, {"role": "assistant", "content": "*Executive Summary*\n\nBremen is a city where maritime heritage meets cutting-edge creativity. Our venture, \u201cNordHood Bremen,\u201d will launch a premium, design-driven sweatshirt brand that fuses Hanseatic culture with contemporary streetwear. In year one we will release three limited-edition drops (total 6,000 units), achieve \u20ac420,000 in revenue, and reach break-even by month 14. The brand will be 100 % online-direct at launch, then expand into two Bremen concept stores and select European boutiques in years two and three. We will operate carbon-neutral, use 100 % organic cotton, and reinvest 3 % of profits into local youth art programs\u2014turning every sweatshirt into a wearable piece of Bremen pride.\n\n*Market Analysis*\n\nBremen\u2019s 0.57 million residents and 500,000 annual tourists create a \u20ac35 million addressable mid-price sweatshirt market. Key insights: \n\u2022 68 % of 18-35-year-olds prefer streetwear that tells a local story (Hochschule Bremen Trend Report 2023). \n\u2022 Tourists spend \u20ac180 million on souvenirs, yet only 11 % are apparel items\u2014an untapped niche. \n\u2022 Competitor scan: global brands (Nike, Adidas) dominate functionality; regional labels (Hamburg\u2019s \u201cDinghy,\u201d Hannover\u2019s \u201cLiguster\u201d) lack Bremen specificity. \n\u2022 Online D2C share in German apparel has grown from 18 % (2019) to 31 % (2023), proving the viability of our launch model. \nWe will target two segments: \u201cCulture Creatives\u201d (students, designers, musicians) and \u201cHeritage Seekers\u201d (tourists, expats). Both segments value authenticity, limited editions, and sustainability.\n\n*Product & Brand Strategy*\n\nSignature Product: The \u201cHanse Hood\u201d \u2013 460 g brushed organic-cotton fleece, oversized fit, dropped shoulders, double-stitched flat seams, and a hidden inside pocket sized for a Bremen transit card. \nDesign Language: Minimalist silhouettes in maritime colorways (Bremen Blue, Weser Sand, Harbor Grey) with subtle iconography\u2014windrose embroidery, cog-ship sleeve print, and a QR code linking to an AR story of the garment\u2019s journey. \nSustainability: GOTS-certified cotton, recycled polyester labels, biodegradable mailers, and a take-back program offering 15 % discount on next purchase. \nDrop Model: Three seasonal drops per year (Spring \u201cHanse Sail,\u201d Fall \u201cSchlachte,\u201d Winter \u201cWeihnachtsmarkt\u201d), each capped at 2,000 units to maintain scarcity. \nBrand Voice: \u201cWear the City.\u201d Every hangtag includes a Bremen insider tip curated by local artists, turning the sweatshirt into a cultural guide.\n\n*Operations & Supply Chain*\n\nDesign Studio: 120 m\u00b2 loft in \u00dcberseestadt (rent \u20ac1,800/month) doubling as photo studio and event space. \nManufacturing: Partnership with a Fair-Wear-audited factory in Porto, Portugal (MOQ 500, 30-day lead time, \u20ac18 landed cost per unit). \nLogistics: Bremen-based 3PL \u201cLogwin\u201d for pick-pack-ship (\u20ac1.20 per parcel, same-day dispatch). \nTech Stack: Shopify Plus storefront, Klarna & PayPal checkout, Shipcloud integration, and a custom AR filter for Instagram try-ons. \nQuality Control: In-house inspection on arrival; defective rate target <1 %. \nRisk Mitigation: Dual-source yarn from Turkey and Portugal; maintain 8-week safety stock.\n\n*Marketing & Sales Plan*\n\nPre-Launch (Months 1-2): \n\u2022 Teaser campaign on TikTok & Instagram Reels (#NordHoodBremen) with local influencers (5-50 k followers). \n\u2022 Pop-up try-on booth at Viertel Kreativmarkt; collect 1,000 email sign-ups. \nLaunch (Month 3): \n\u2022 \u201cMidnight Drop\u201d online at 00:07 (Bremen dialing code) with countdown timer. \n\u2022 Limited 100 \u201cFounders\u2019 Edition\u201d sweatshirts laser-numbered and signed by Bremen street artist \u201cPlotBot.\u201d \nGrowth (Months 4-12): \n\u2022 Collaborations: capsule with SV Werder Bremen fan design; co-branded hoodie with \u00dcberseemuseum. \n\u2022 Referral program: give \u20ac10, get \u20ac10. \n\u2022 Tourist channel: partnership with Bremer Touristik-Zentrum for QR-coded city map bundled with sweatshirt. \nKPIs: CAC \u2264 \u20ac18, repeat purchase rate \u2265 35 %, Instagram engagement \u2265 8 %.\n\n*Financial Plan*\n\nStart-up Capital Needed: \u20ac85,000 \n\u2022 Inventory (3,000 units): \u20ac54,000 \n\u2022 Marketing & content: \u20ac12,000 \n\u2022 E-commerce & IT: \u20ac6,000 \n\u2022 Legal, trademark, insurance: \u20ac5,000 \n\u2022 Working capital buffer: \u20ac8,000 \nFunding: 60 % founder equity, 40 % Bremen F\u00f6rderbank micro-loan at 1.9 % interest. \nYear-1 P&L (conservative): \nRevenue \u20ac420,000 (avg. \u20ac70 net per unit) \nCOGS \u20ac234,000 \nGross Profit \u20ac186,000 \nOperating Expenses \u20ac150,000 \nEBIT \u20ac36,000 \nBreak-even: 1,800 units or month 14. \nThree-year projection: revenue \u20ac1.4 million, EBITDA margin 18 %, reinvestment into two Bremen stores and EU wholesale.\n\n*Appendices*\n\nA. Mood-board & color palette \nB. Factory audit certificate \nC. Sample influencer contracts \nD. Detailed 36-month cash-flow model"}], "input_metadata": {"row_id": "167", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "apply-wonder-solid-result-part", "run_id": "review-hard-single-newspaper-team", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 7 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176486", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "**The Official \u201cTwo-Mom Marriage Quality Report Card\u201d** \n(For internal use only. If this ends up on Facebook, we *will* change the Wi-Fi password.)\n\n---\n\n**1. Communication Score: ___/10** \n*\u201cDid we just have a 45-minute conversation about whose turn it is to buy more goldfish crackers, or was that a TED Talk?\u201d* \n- 10 = We finish each other\u2019s sentences\u2026 and each other\u2019s coffee. \n- 5 = We communicate exclusively via passive-aggressive Post-it notes on the fridge. \n- 0 = We\u2019ve resorted to interpretive dance and/or carrier pigeons. \n\n**Bonus points** if you\u2019ve ever used the kids as tiny, sticky messengers (\u201cTell Mommy she\u2019s *cordially invited* to take out the trash\u201d).\n\n---\n\n**2. Chore Wars Score: ___/10** \n*\u201cThe laundry basket is a Jenga tower of lies.\u201d* \n- 10 = We\u2019ve achieved a chore utopia where socks actually match. \n- 5 = We\u2019ve agreed to \u2018divide\u2019 labor\u2026 by one of us doing everything and the other \u2018supervising\u2019. \n- 0 = The dishwasher has become a permanent storage unit. \n\n**Penalty** if either mom has ever \u2018forgotten\u2019 how to load the dishwasher *again*.\n\n---\n\n**3. Co-Parenting Harmony Score: ___/10** \n*\u201cWe tag-team tantrums like WWE pros.\u201d* \n- 10 = We\u2019ve synchronized our \u2018Mom Voice\u2019 to subsonic frequencies that terrify small children. \n- 5 = We\u2019ve debated whether \u2018screen time\u2019 is a human right or a war crime. \n- 0 = We\u2019ve weaponized bedtime stories (\u2018And then the princess *never* got dessert again\u2026\u2019). \n\n**Extra credit** for every time you\u2019ve high-fived over a successful *\u201cBecause I said so\u201d* deployment.\n\n---\n\n**4. Romance Maintenance Score: ___/10** \n*\u201cDate night: when we argue over which drive-thru has the shortest line.\u201d* \n- 10 = We\u2019ve scheduled intimacy between soccer practice and PTA meetings. \n- 5 = We\u2019ve agreed that \u2018Netflix and actually chill\u2019 counts as foreplay. \n- 0 = Our last romantic gesture was sharing the last chicken nugget. \n\n**Automatic fail** if either of you has ever said, \u201cI\u2019m too tired\u201d\u2026 *while scrolling Instagram*.\n\n---\n\n**5. Emergency Snack Reserves Score: ___/10** \n*\u201cThe pantry is a battlefield of empty granola bar boxes.\u201d* \n- 10 = We\u2019ve hidden chocolate so well we forgot where it is. \n- 5 = We\u2019ve labeled snacks with fake allergen warnings to deter theft. \n- 0 = The kids have unionized and are demanding better rations. \n\n**Gold star** if you\u2019ve ever *texted each other* to brag about finding the secret stash.\n\n---\n\n**Final Grade: ___/50** \n- 40\u201350: *Power couple.* You\u2019re basically the Beyonc\u00e9 and Beyonc\u00e9 of lesbian moms. \n- 25\u201339: *Functioning chaos.* You\u2019ve accepted that love is just shared exhaustion. \n- 0\u201324: *Renew your vows\u2026 or your Costco membership.* Either way, you\u2019re in this together. \n\n**Disclaimer:** This report is not legally binding, but hiding the good snacks *is* grounds for divorce."}], "input_metadata": {"row_id": "168", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "disassemble-salty-final-similar-president", "run_id": "review-hard-single-newspaper-team", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 29 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176571", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*."}, {"role": "assistant", "content": "*little me in the countryside*\n\n[Verse 1] \nBarefoot on the gravel road, dust between my toes, \nSunlight dripped like honey on the clover and the crows. \nGrandma\u2019s porch swing creaked a lullaby so slow, \nWhile the creek below kept singing songs only we could know. \nFireflies stitched the twilight with lanterns made of stars, \nAnd the moon was just a silver coin tossed into cookie jars. \n\n[Chorus] \nOh, little me in the countryside, running wild and free, \nChasing dreams through cornfield mazes, climbing every tree. \nEvery laugh was louder than the thunder rolling by, \nIn the summers of my childhood, underneath that endless sky. \n\n[Verse 2] \nMason jars of sweet tea sweating on the window ledge, \nRadio played \u201cSweet Home Alabama\u201d from the rusty garden shed. \nWe built rafts from old barn doors and floated down the stream, \nKingfishers were pirates and the cattails were our team. \nSupper was tomatoes sliced like sunsets on a plate, \nAnd the crickets kept the tempo while the night got late. \n\n[Chorus] \nOh, little me in the countryside, running wild and free, \nSkinny-dipping in the moonlight where the river bends to see. \nEvery scar was a medal from a dare we\u2019d never hide, \nIn the summers of my childhood, where the earth and I collided. \n\n[Bridge] \nDandelion clocks we blew to tell the time, \nEach puff a tiny parachute for wishes we would climb. \nWe swore we\u2019d never grow up, just grow into the land, \nBut the school bus came like autumn and took us by the hand. \n\n[Verse 3] \nNow the fields are sold and paved, the barn\u2019s a memory store, \nBut I still hear the wheat heads whisper, \u201cCome back, come back once more.\u201d \nI close my eyes and I\u2019m ten again, chasing dragonflies, \nWith the smell of rain on dirt roads and the taste of cherry pies. \n\n[Final Chorus] \nOh, little me in the countryside, you\u2019re still inside my chest, \nA pocket full of wonder that the city can\u2019t arrest. \nThough the years roll on like tractors down a dusty county ride, \nI keep those summers in a mason jar where the fireflies never die. \n\n[Outro] \nSo here\u2019s to cicadas, to tire swings and creek-stone slides, \nTo the echo of my laughter in the hollow where time hides. \nIf you find that gravel road, kick off your shoes and glide\u2014 \nYou\u2019ll meet little me in the countryside, still barefoot, still wide-eyed."}], "input_metadata": {"row_id": "1773", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "apply-dry-international-house-study", "run_id": "review-hard-single-newspaper-team", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 1 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176645", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a riddle for the word \"fa\u00e7ade\" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*."}, {"role": "assistant", "content": "I wear a *painted smile* for every passer-by, \na *perfectly symmetrical* face that never cracks. \nBehind me, the *crumbling bricks* sigh in secret. \nWhat am I?"}], "input_metadata": {"row_id": "1886", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "connect-describe-hear-rich-day", "run_id": "review-hard-single-newspaper-team", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176710", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "# Workshop Template \n**Title:** *Building Inclusive Excellence: The Business & Human Case for Workplace Diversity* \n**Duration:** 3.5 hours (can be split into two 90-minute sessions) \n**Audience:** Managers, HR professionals, team leads, ERG members, and any employee who influences culture \n\n---\n\n## 1. Welcome & Objectives (15 min)\n- Ice-breaker: \u201cTwo Truths & a Lie\u2014Diversity Edition\u201d \n- *Learning objectives*: By the end of this workshop, participants will be able to: \n 1. Define diversity, equity, inclusion, and belonging (DEIB) in their own words. \n 2. Articulate at least three evidence-based benefits of a diverse workforce. \n 3. Identify one action they can take within 30 days to foster inclusion on their team. \n\n---\n\n## 2. *The Data-Driven Case for Diversity* (25 min)\n- Interactive poll: \u201cWhat % performance lift do you think diverse teams achieve?\u201d \n- Mini-lecture with live charts: \n - McKinsey 2023 report: Top-quartile companies for ethnic/gender diversity are **39 % more likely to outperform** on profitability. \n - Harvard study: *Inclusive teams make better business decisions 87 % of the time* and do so twice as fast. \n- Small-group exercise: Match the statistic to the business outcome (revenue, innovation, retention). \n\n---\n\n## 3. *Unconscious Bias & Micro-behaviors* (30 min)\n- 5-minute video: \u201cThe Resume with Two Names\u201d (identical CVs, different callbacks). \n- Bias Busters card game: Participants draw scenario cards and practice interrupting bias in real time. \n- *Key takeaway*: Bias is not a moral failing; it is a neurological shortcut that can be rewired through deliberate habits. \n\n---\n\n## 4. *Inclusive Leadership Toolkit* (40 min)\n- **The 4 A\u2019s Framework**: \n 1. **Awareness** \u2013 Daily 3-minute bias check-in. \n 2. **Amplification** \u2013 Repeat & credit ideas from under-represented voices. \n 3. **Access** \u2013 Rotate high-visibility assignments. \n 4. **Accountability** \u2013 Track inclusion metrics on team dashboards. \n- Role-play: \u201cThe Missed Promotion\u201d \u2013 diagnose where each \u2018A\u2019 broke down and redesign the conversation. \n\n---\n\n## 5. Action Planning & Commitments (20 min)\n- Personal *Inclusion Commitment Card*: \u201cIn the next 30 days I will\u2026\u201d \n- Pair-share for accountability. \n- QR code to a digital resource hub (TED talks, reading list, policy templates). \n\n---\n\n## 6. Closing & Evaluation (10 min)\n- One-word checkout: \u201cHow do you feel now?\u201d \n- *Post-workshop survey* link (2 min) with prize draw for completion. \n\n---\n\n## Materials Checklist\n- Slide deck (with alt-text on all images) \n- Printed Commitment Cards & pens \n- Bias Busters card decks (laminated) \n- Timer & bell \n- Snacks labeled for common dietary restrictions (gluten-free, vegan, halal) \n\n---\n\n## Follow-Up Email Template (send within 24 hrs)\nSubject: Your Inclusion Toolkit + Next Steps \nBody: \nHi team, \nThank you for leaning into today\u2019s conversation on *why diversity wins*. Attached are: \n- Slide deck & resource links \n- Your signed Inclusion Commitment Card (photo reminder) \n- Calendar invite for the optional 30-day check-in circle \n\nRemember: Inclusion is a practice, not a one-time training. Let\u2019s keep the momentum!"}], "input_metadata": {"row_id": "2253", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "miss-compare-bitter-door-hand", "run_id": "review-hard-single-newspaper-team", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176782", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Your Name]** \n[Street Address] \n[City, State ZIP] \n[Email Address] | [Phone Number] | [LinkedIn URL] \n[Date]\n\n**[Local Party Chairperson\u2019s Name]** \n[Local Party Name] \n[Party Headquarters Address] \n[City, State ZIP]\n\nDear **Chairperson [Last Name]**,\n\nI am writing to express my enthusiastic interest in serving as **Rally Organizer** for **[Local Party Name]**. As a lifelong resident of **[City/County]**, I have watched our community\u2019s political pulse beat strongest when neighbors gather in common purpose. I want to channel that energy into rallies that not only energize our base but also invite undecided voters to witness the power of our shared vision.\n\n### **Proven Event-Execution Expertise**\nDuring the 2022 mid-term cycle, I spearheaded a **three-county voter-registration caravan** that staged 14 pop-up events in 30 days. \n- **Logistics**: Coordinated permits, portable stages, and ADA-compliant access for 2,000+ attendees per stop. \n- **Volunteer Mobilization**: Recruited and trained 120 volunteers, creating a color-coded \u201cRally-in-a-Box\u201d toolkit now used by three neighboring county parties. \n- **Real-Time Adaptability**: When a thunderstorm threatened our largest outdoor rally, I pivoted to an indoor gymnasium within 90 minutes\u2014live-streaming the switch to maintain momentum and ultimately exceeding projected turnout by 18 %.\n\n### **Data-Driven Rally Strategy**\nI treat every rally as a **micro-campaign laboratory**. \n- **Pre-Event Analytics**: I overlay precinct-level voter-file data with heat-mapping tools to select venues within 0.7 miles of our highest \u201cpersuasion index\u201d households. \n- **On-Site Engagement**: QR-coded wristbands linked to instant sign-up forms increased volunteer recruitment by 34 % compared to clipboard methods. \n- **Post-Event ROI**: My post-rally surveys (average 42 % response rate) feed a dashboard that tracks pledge-to-action conversion, allowing us to refine messaging within 48 hours.\n\n### **Coalition-Building & Safety Leadership**\nUnderstanding that rallies must feel **safe and inclusive**, I have: \n- Partnered with local **faith leaders, PTAs, and small-business associations** to co-sponsor events, expanding our demographic reach by 27 %. \n- Completed **FEMA crowd-management certification** and maintain relationships with county emergency management to ensure rapid-response protocols. \n- Implemented a **\u201cCommunity Marshals\u201d** program\u2014training 50 volunteer marshals in de-escalation techniques, cutting incident reports to zero across 22 events.\n\nI would welcome the opportunity to discuss how my blend of grassroots passion, logistical precision, and data fluency can help **[Local Party Name]** turn every rally into a springboard for victory in the upcoming cycle. Thank you for considering my application.\n\n**Respectfully,**\n\n[Your Signature (if submitting hard copy)] \n[Your Printed Name]"}], "input_metadata": {"row_id": "2381", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "select-ready-close-gentle-music", "run_id": "review-hard-single-newspaper-team", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 23 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176857", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "The opposite of youth is not age, but **disillusionment**\u2014the moment when *wonder* is traded for *weariness*, when the *boundless horizon* of possibility collapses into the *narrow corridor* of routine."}], "input_metadata": {"row_id": "2752", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "change-sure-short-meeting-night", "run_id": "review-hard-single-newspaper-team", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.176928", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer."}, {"role": "assistant", "content": "**The Golden Palace** is not a palace at all, but a **restaurant** that has disguised itself as a mirage of saffron and cardamom in the **city centre**. It whispers **Indian** recipes into the ears of pigeons, who then forget how to fly and instead waddle around the square, dizzy with cumin. \n\nInside, the walls sweat **garam masala** and the tables grow tiny **basil plants** that recite Bollywood dialogues when no one is looking. The waiters glide like **shadow puppets**, balancing trays of **butter chicken** that glow like radioactive sunsets. \n\nIf you order **naan**, it arrives folded into the shape of a **paper swan**, flapping once before dissolving into steam that smells of **monsoon rain**. The bill is presented on a **lotus leaf**, and if you stare at it long enough, the numbers rearrange themselves into a **limerick about turmeric**."}], "input_metadata": {"row_id": "2759", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "choose-certain-tidy-minute-hate", "run_id": "review-hard-single-newspaper-team", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 13 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177000", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*."}, {"role": "assistant", "content": "\ud83c\udfa4 *Yo, it\u2019s the tale of Sir Scam-a-lot, straight outta suburbia!* \ud83c\udfa4 \n\nYo, my name is Dave, I sell insurance in Ohio, \nGot a cubicle, a cat, and a 401(k) that\u2019s kinda fly-o. \nOne Tuesday, phone rings\u2014unknown number, what the heck? \nVoice says, \u201c*Prince Ogbonna the Third* just wrote you a fat check!\u201d \n\nI\u2019m like, \u201cHold up, who dis? I\u2019m whiter than printer paper, \nMy bloodline\u2019s mayo on Wonder Bread\u2014ain\u2019t no royal vapor!\u201d \nDude on the line, accent silkier than lotion, \nSays, \u201cCousin, you\u2019re 37th in line\u2014time to claim your ocean!\u201d \n\n**[Hook]** \n\ud83c\udfb6 *Long lost king of Nigeria, crown sittin\u2019 on my dome,* \n*Got a palace full of leopards and a Wi-Fi that roams!* \n*They need my SSN and a quick wire fee,* \n*But I\u2019m \u2018bout to be big pimpin\u2019 in Abuja, just wait and see!* \ud83c\udfb6 \n\nHe says, \u201cJust send five grand for customs, then the jet lands, \nYou\u2019ll be dripping in so much gold, even your sweat glands bling.\u201d \nI\u2019m sweatin\u2019 like a rotisserie, visions of caviar dreams, \nI Venmo \u201cPrinceNaija69\u201d like I\u2019m funding meme schemes. \n\nNext day, email: \u201cOops, palace goat ate the transfer slip, \nNeed ten more for goat surgery and a diamond-encrusted whip.\u201d \nI hock my lawnmower, my Xbox, and Aunt Linda\u2019s porcelain goose, \nWire cash so fast my bank\u2019s like, \u201cDave, you loose!\u201d \n\n**[Bridge \u2013 Spoken Word]** \nThen the *official royal seal* arrives\u2014 \nIt\u2019s a GIF of Shrek in a crown doing the Macarena. \nI\u2019m starting to sense a pattern here\u2026 \n\n**[Final Hook \u2013 Regret Remix]** \n\ud83c\udfb6 *Now I\u2019m the king of my mom\u2019s basement, throne\u2019s a beanbag chair,* \n*Royal decree: never trust a prince who types in all caps\u2014* \n*\u201cURGENT, COUSIN, SEND MORE FLAMINGOS!\u201d* \n*Lesson learned: if the crown\u2019s too shiny, it\u2019s probably aluminum foil, homie!*"}], "input_metadata": {"row_id": "2790", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "try-various-left-back-state", "run_id": "review-hard-single-newspaper-team", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 14 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177074", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Expand the riddle into a story with a funny tone:\n\nWhat can you catch but not throw?\nA cold\n\nUse * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*."}, {"role": "assistant", "content": "Gary the Germ was the *smallest, greenest, and most obnoxiously proud* microbe in the entire Sniffle Kingdom. Every morning he practiced his grand entrance: sliding down banisters made of nasal hair, cannon-balling into unsuspecting tonsils, and yelling, \u201cACHOO\u2014did somebody order a cold?\u201d \n\nOne Tuesday, Gary spotted his dream target: *Bartholomew \u201cBart\u201d Bigglesworth, a man who believed hand-washing was a government conspiracy*. Bart had just exited the subway, triumphantly licking a subway pole \u201cfor the vitamins.\u201d Gary rubbed his microscopic hands together. \u201cPerfect. I\u2019ll hitch a ride on that finger, ride the express elevator to the sinuses, and throw the *snottiest* house-warming party in history!\u201d \n\nBut there was a catch\u2014literally. Gary could *catch* Bart, but he couldn\u2019t *throw* him anywhere. Every time Gary tried to catapult Bart into the next office cubicle, Bart just sneezed, wiped his nose on his sleeve, and kept typing spreadsheets. Gary screamed, \u201cThis is mutiny! I\u2019m supposed to be the one doing the tossing!\u201d \n\nMeanwhile, Bart\u2019s immune system\u2014led by Captain Phlegm and the Mucus Marines\u2014launched *Operation Tissue Storm*. They surrounded Gary, armed with vitamin C grenades and zinc zappers. Gary attempted a dramatic escape by surfing a sneeze, but the tissue slapped him like a flyswatter on a summer day. \n\nDefeated, Gary sat in the crumpled tissue, sulking. \u201cI caught the cold,\u201d he muttered, \u201cbut I can\u2019t throw it anywhere. Worst. Superpower. Ever.\u201d \n\nFrom that day forward, Gary changed his business card to read: \u201cProfessional Cold Catcher\u2014No Returns, No Refunds, Absolutely No Throwing.\u201d"}], "input_metadata": {"row_id": "2905", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "come-keep-dry-dangerous-line", "run_id": "review-hard-single-newspaper-team", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 6 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177143", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Intro \u2013 harpsichord glitch beat, stylus scratching parchment]** \nYo, it\u2019s 14-something, but my mind\u2019s 3-D printed in gold leaf\u2014 \nI\u2019m the **Renaissance Rapper**, got a quill in one hand and a scalpel in the other, dissectin\u2019 beats like Vesalius dissectin\u2019 spleens. \nLet\u2019s get *weird* with the rebirth, baby.\n\n---\n\n### **Section 1 \u2013 The Fresco Flex** \nI drip ultramarine like I\u2019m sponsored by Afghanistan mines, \nMichelangelo on the ceiling, I\u2019m reclining on divine lines. \nAngels doing backflips off the Sistine, they\u2019re hype for my set, \nWhile I tag \u201c*AMOR VINCIT OMNIA*\u201d in invisible ink\u2014wet. \nMy flow\u2019s a trompe-l\u2019\u0153il, looks flat till you tilt your head\u2014 \nThen it\u2019s depth for days, like Brunelleschi\u2019s dome overhead. \nI\u2019m not baroque, I\u2019m pre-baroque, I\u2019m the prototype shock, \nGot patrons in fur-lined sleeves stuffing florins in my sock. \n**I paint in egg tempera so the colors never fade,** \n**But the yolk smells funky\u2014call it *odeur de old crusade*.**\n\n---\n\n### **Section 2 \u2013 The Printing-Press Poltergeist** \nGutenberg ghost in the machine, clanking lead type like chains, \nEvery page a polter-page, every serif a soul that remains. \nI drop a folio, it\u2019s viral before \u201cviral\u201d was a cough, \nMonks in scriptoriums rage-quitting, \u201cBro, that illuminated stuff\u2019s off!\u201d \nI\u2019m the incunabula influencer, my colophon\u2019s a flex, \nGot red-ink rubricators sliding into my DMs\u2014*next*. \n**I rhyme in movable type so my bars can rearrange,** \n**Yesterday I was Petrarch, today I\u2019m deranged.** \nThe press spits paper like a hydra with hiccups, \nEach sheet a protest flyer: \u201cDown with scholastic pick-ups!\u201d\n\n---\n\n### **Section 3 \u2013 The Anatomical Anthem (Parental Advisory: Corpses)** \nNight shift with da Vinci, we\u2019re sketching by candle and moon, \nCadavers on the table like, \u201cDo me a favor\u2014autotune my coccyx soon.\u201d \nI rap in reverse Latin so only the dead understand, \nWhile Leo reverse-engineers a bird-man from a frying pan. \n**My beats are ventricles pumping four-four time,** \n**Aorta bass drop\u2014call that circulatory grime.** \nI sample the sound of a scalpel scraping periosteum, \nLayer it over a lute loop\u2014*that\u2019s* my magnum opium. \nGalen\u2019s in the corner, throwing shade like a Greek tragedy, \nBut I\u2019m Harvey before Harvey, already rapping blood circulation majestically.\n\n---\n\n**[Outro \u2013 fading lute arpeggio, distant cannon fire from Constantinople]** \nSo remember me when you\u2019re doom-scrolling on glass, \nI was the era that turned corpses into class. \nI\u2019m the **Renaissance Rapper**, my ghost still tags in chalk, \nLeaving QR codes on cathedral walls\u2014scan it, you\u2019ll squawk. \nRebirth never dies, it just pixelates and loops, \nNow if you\u2019ll excuse me, I\u2019ve got a date with Lucretius in the soup."}], "input_metadata": {"row_id": "3071", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "can-nice-home-anger-story", "run_id": "review-hard-single-newspaper-team", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 17 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177216", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*"}, {"role": "assistant", "content": "*J\u014dmon* hunter-gatherers (14,000 BCE) gave way to *Yayoi* rice farmers (300 BCE) who forged bronze and iron. \nThe *Yamato* court unified clans (3rd\u20137th c.), adopting *Shinto* and Chinese script. \nPrince *Sh\u014dtoku*\u2019s 17-article constitution (604) and the Taika Reforms (645) centralized power. \nHeian aristocrats (794\u20131185) produced *The Tale of Genji* while samurai rose in the provinces. \nMinamoto no Yoritomo founded the *Kamakura* shogunate (1185), introducing warrior rule. \nMongol invasions (1274, 1281) were repelled by \u201cdivine winds\u201d (*kamikaze*). \nAshikaga *Muromachi* shogunate (1336\u20131573) saw Zen gardens and \u014cnin War chaos. \nOda Nobunaga, Toyotomi Hideyoshi, and Tokugawa Ieyasu ended the *Sengoku* \u201cwarring states\u201d era. \nTokugawa *Edo* shogunate (1603\u20131868) isolated Japan, fostering *ukiyo-e* and *kabuki*. \nCommodore Perry\u2019s *Black Ships* (1853) forced the Meiji Restoration (1868), modernizing the empire. \nVictories in the *Sino-Japanese* (1894\u201395) and *Russo-Japanese* (1904\u201305) wars expanded territory. \nMilitarism led to *Pearl Harbor* (1941) and WWII defeat (1945); the U.S. occupation drafted a pacifist constitution. \nPost-war *economic miracle* made Japan the world\u2019s second-largest economy until the 1990s *bubble*. \nToday, Japan balances *constitutional monarchy* with cutting-edge technology and soft-power culture."}], "input_metadata": {"row_id": "3453", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "install-common-poor-job-house", "run_id": "review-hard-single-newspaper-team", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 20 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177295", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*."}, {"role": "assistant", "content": "*John* in *De Smet* \nQuaker hat *blows off*\u2014chases \n*Silent* *scream*\u2014\u201cCome back!\u201d"}], "input_metadata": {"row_id": "3549", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "confirm-sit-brave-late-fact", "run_id": "review-hard-single-newspaper-team", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177361", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with \"*\", like *highlighted text*."}, {"role": "assistant", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. In a ceremony marked by *unprecedented artistic excellence* and *meticulous craftsmanship*, the Academy recognizes those whose work has defined the sonic landscape of 2011.\n\nThe evening\u2019s proceedings underscore the Academy\u2019s enduring commitment to *artistic integrity* and *technical innovation*. Among the most celebrated achievements are:\n\n\u2022 *Album of the Year* \u2013 awarded to an opus that has redefined genre boundaries while achieving both critical acclaim and popular resonance. \n\u2022 *Record of the Year* \u2013 bestowed upon a single track whose *sonic clarity*, *emotive depth*, and *production mastery* have set a new benchmark for the industry. \n\u2022 *Best New Artist* \u2013 conferred upon an emerging voice whose *visionary artistry* signals the future direction of contemporary music.\n\nThroughout the telecast, special tributes illuminate the *indelible legacies* of icons whose influence transcends generations, while collaborative performances showcase the *synergistic power* of cross-genre partnerships. The Academy further acknowledges the *behind-the-scenes luminaries*\u2014engineers, mixers, and mastering specialists\u2014whose *invisible expertise* transforms raw inspiration into *audible transcendence*.\n\nIn closing, the 54th Annual Grammy Awards reaffirm the Recording Academy\u2019s solemn charge: to *safeguard the heritage*, *nurture the present*, and *inspire the future* of recorded music."}], "input_metadata": {"row_id": "3629", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "lead-quiet-poor-fine-number", "run_id": "review-hard-single-newspaper-team", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177428", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*."}, {"role": "assistant", "content": "# 10 Surprising Facts About the Dutch Language That Will Make You Say \u201cWauw!\u201d\n\nIf you think Dutch is just \u201cGerman with a sore throat,\u201d think again. From tongue-twisting consonant clusters to words that sound like they were invented by a stand-up comedian, Dutch is a linguistic playground. Below are ten facts that prove the language is far more colorful than its reputation for windmills and wooden shoes.\n\n---\n\n### 1. *The longest word in Dutch is 53 letters long* \nMeet **\u201ckindercarnavalsoptochtvoorbereidingswerkzaamhedenplan\u201d**\u2014a single noun describing the preparation work for a children\u2019s carnival parade. Dutch loves to glue words together, so once you master the basics you can create your own kilometer-long compounds on the fly.\n\n---\n\n### 2. Dutch has three genders, but only two articles \nGrammatically, nouns are masculine, feminine, or neuter. In everyday speech, however, masculine and feminine collapsed into a single \u201ccommon\u201d gender, leaving *\u201cde\u201d* for common and *\u201chet\u201d* for neuter. The result? A never-ending guessing game for learners.\n\n---\n\n### 3. *The Dutch alphabet ends with \u201cij,\u201d not \u201cz\u201d* \nSchoolchildren recite the ABC and finish with \u201c\u2026x, y, ij, z.\u201d The digraph *ij* is so common (think *hij* = he, *mijn* = my) that it\u2019s treated almost like a separate letter. On old typewriters, *ij* even had its own key.\n\n---\n\n### 4. Dutch is the closest major language to English \nFrisian is technically closer, but among languages with tens of millions of speakers, Dutch wins. Compare: \n- English *\u201cbread\u201d* \u2192 Dutch *\u201cbrood\u201d* \n- English *\u201cgreen\u201d* \u2192 Dutch *\u201cgroen\u201d* \nShared roots mean you can read simple Dutch signs long before you\u2019ve cracked a single grammar book.\n\n---\n\n### 5. *Dutch gave New York its first words* \nBefore it was New York, it was New Amsterdam. That\u2019s why we say *\u201ccookie\u201d* (from Dutch *koekje*) instead of *\u201cbiscuit,\u201d* and *\u201cboss\u201d* (from *baas*) instead of *\u201cmaster.\u201d* Wall Street? It once ran alongside the *\u201cwal\u201d* (rampart) of the city wall.\n\n---\n\n### 6. The word *\u201cgezellig\u201d* has no exact English translation* \nRoughly it means \u201ccozy,\u201d \u201cconvivial,\u201d or \u201cthe warm feeling of being with friends,\u201d but none of those capture the full vibe. A caf\u00e9 can be *gezellig*, a conversation can be *gezellig*, even a spreadsheet can be *gezellig* if the team is laughing.\n\n---\n\n### 7. Dutch spelling changed overnight in 1996 \nThe *\u201cGroene Boekje\u201d* (Green Booklet) reforms simplified thousands of words. *\u201cpotatos\u201d* became *\u201caardappelen\u201d* and *\u201ccat\u2019s\u201d* became *\u201ckatten.\u201d* Overnight, every dictionary, street sign, and Scrabble board in the country was technically out of date.\n\n---\n\n### 8. *Dutch is spoken on six continents* \nBesides the Netherlands and Belgium, it\u2019s an official language in Suriname, Aruba, Cura\u00e7ao, and Sint Maarten. Thanks to emigration, you\u2019ll also find active Dutch-speaking communities in Canada, Australia, Argentina\u2014even South Africa, where Afrikaans is a daughter language.\n\n---\n\n### 9. The Dutch love diminutives \nAdd *-je* to almost any noun and you instantly shrink it: *\u201chuis\u201d* (house) \u2192 *\u201chuisje\u201d* (little house). It\u2019s not just about size; diminutives add affection. A *\u201cbiertje\u201d* isn\u2019t a small beer\u2014it\u2019s an invitation to relax.\n\n---\n\n### 10. *Dutch is the language of the world\u2019s oldest multinational* \nThe Vereenigde Oostindische Compagnie (VOC) was founded in 1602 and issued the planet\u2019s first publicly traded stock. Its ledgers, contracts, and ship logs were all written in Dutch, making the language the original tongue of global capitalism.\n\n---\n\nReady to dive in? Start with *hallo* and *dank je wel*, and you\u2019ll already be halfway to *gezellig*."}], "input_metadata": {"row_id": "3644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "wait-buy-estimate-private-place", "run_id": "review-hard-single-newspaper-team", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 39 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177501", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "Gideon has several good options for his surplus:\n\n1. **Sell it at the local farmers\u2019 market or to a wholesaler** to turn the extra produce into cash that can cover next season\u2019s seed, fertilizer, and equipment costs. \n2. **Store some of the surplus in a community grain silo** so he can sell later when prices are higher. \n3. **Donate part of the surplus to the regional food bank**, helping families in need and possibly qualifying for a small tax deduction. \n4. **Use the surplus as livestock feed** for his own animals, cutting down on purchased feed expenses. \n\n*The most profitable short-term choice is usually selling the surplus immediately after harvest, when demand is strong and prices are favorable.*"}], "input_metadata": {"row_id": "2515", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "invent-must-suppose-available-student", "cohort_id": "buy-assume-available-various-point", "rollout_id": "disassemble-left-low-national-newspaper", "run_id": "review-hard-single-newspaper-team", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T16:32:47.177579", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 67676} +{"messages": [{"role": "user", "content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*."}, {"role": "assistant", "content": "# Outline: The History of Yemeni Coffee\n\n1. **Introduction: Why Yemeni Coffee Matters** \n - Thesis: Yemeni coffee is not merely a beverage but a cultural, economic, and geopolitical force that shaped global history. \n - Scope: From its mythical discovery in 9th-century Yemen to its modern revival.\n\n2. **Pre-Coffee Yemen: The Land, the People, and the Plant** \n - Geography of the Yemeni highlands and terraced agriculture. \n - Indigenous *Coffea arabica* varieties and early cultivation techniques. \n - *The myth of the goatherd Kaldi and the dancing goats*.\n\n3. **The Sufi Spark: Coffee as a Spiritual Elixir** \n - 15th-century Sufi monasteries in Mocha and Taiz. \n - *Qahwa as a tool for nocturnal dhikr rituals*. \n - Early roasting, grinding, and brewing methods.\n\n4. **Mocha: The Birthplace of a Global Commodity** \n - Rise of the port city of Mocha (Al-Mukha) in the 16th century. \n - *The Mocha trade network linking Yemen, Egypt, and the Ottoman Empire*. \n - Coffeehouses as centers of intellectual and political discourse.\n\n5. **The Ottoman Grip and the First Coffee Monopoly** \n - 1536: Ottoman conquest of Yemen and control of coffee exports. \n - *The ban on fertile beans to prevent cultivation elsewhere*. \n - Smuggling and the first cracks in the monopoly.\n\n6. **The Dutch Heist and the Global Spread of Coffee** \n - 1616: Dutch traders smuggle Yemeni beans to Batavia (Java). \n - *The birth of Dutch colonial coffee plantations*. \n - How Mocha\u2019s loss became the world\u2019s gain.\n\n7. **Coffeehouses of Cairo and Istanbul: Yemen\u2019s Cultural Export** \n - 1554: First Ottoman coffeehouse in Istanbul. \n - *The \u201cSchools of the Wise\u201d in Cairo\u2019s Khan el-Khalili*. \n - Coffee as a vector for Yemeni poetry, music, and Sufi thought.\n\n8. **The European Coffee Craze and the Mocha Brand** \n - 17th-century European coffeehouses and the \u201cMocha\u201d label. \n - *The false association of Mocha with chocolate flavor*. \n - Yemeni coffee\u2019s role in the Enlightenment.\n\n9. **Colonial Rivalries and the Decline of Yemeni Dominance** \n - 18th-century: French, British, and Dutch plantations outcompete Yemen. \n - *The collapse of Mocha\u2019s monopoly and the rise of Caribbean and Latin American coffee*. \n - Impact on Yemeni economy and society.\n\n10. **Yemeni Coffee in the Age of Empire** \n - 19th-century British and Ottoman interests in Yemeni coffee. \n - *The introduction of new cash crops (qat) and their effect on coffee cultivation*. \n - Decline in quality and quantity due to neglect and conflict.\n\n11. **The Cold War and the Coffee Crisis** \n - 20th-century Yemeni division (North/South) and Soviet/US influences. \n - *The shift from coffee to qat in rural livelihoods*. \n - Drought, pests, and civil wars further erode production.\n\n12. **The Specialty Coffee Renaissance: Yemen\u2019s Return to Glory** \n - 21st-century global demand for single-origin, heirloom coffees. \n - *The discovery of \u201cYemenia\u201d varietals and their genetic uniqueness*. \n - NGOs and cooperatives reviving traditional farming.\n\n13. **Modern Challenges: War, Climate, and Market Access** \n - 2015\u2013present: Saudi-led conflict and blockade. \n - *The siege of Hodeidah port and its impact on coffee exports*. \n - Climate change and water scarcity in the highlands.\n\n14. **Yemeni Coffee in the Cup: Flavor, Ritual, and Identity** \n - Tasting notes: dried fruit, spice, and wine-like acidity. \n - *The Yemeni coffee ceremony (bunna) and its social significance*. \n - Diaspora communities keeping traditions alive abroad.\n\n15. **Conclusion: The Future of Yemeni Coffee** \n - Balancing heritage with innovation. \n - *The role of Yemeni coffee in post-war reconstruction*. \n - Final reflection: From the highlands of Sana\u2019a to the cups of Brooklyn, Yemeni coffee remains a bridge between past and future."}], "input_metadata": {"row_id": "1307", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "recognize-solid-wet-service-back", "run_id": "will-democratic-political-letter-wish", "ground_truth": "15", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 30 highlighted sections (required: 15)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.309739", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using \"*\". For example: *3 years of experience*."}, {"role": "assistant", "content": "Dear Brewed Awakening,\n\nI am the *early-bird steam* that curls above your grinders, \nthe *latte-art heart* that beats in every porcelain curve. \nMy apron is a canvas of *espresso freckles*, \neach dot a story told in *3 years of experience* \nfrom bean to cup, from dawn rush to dusk hush.\n\nI know the *symphony of the steam wand*\u2014 \nits hiss, its sigh, its whispered crema secrets\u2014 \nand how to coax *microfoam galaxies* \ninto orbit around a *double-shot comet*. \nI\u2019ve tamped, poured, swirled, and smiled \nthrough *Saturday lines out the door*, \nkeeping tempers cool as the milk I *steam to 140\u00b0*.\n\nI speak fluent *small-town regular*: \n\u201cMedium drip, room for kindness?\u201d \nI remember that *Mrs. Alvarez likes oat milk*, \nthat *Mr. Chen takes two raw sugars*, \nand that *your dog, Scout, prefers puppuccinos*.\n\nI sweep, I stock, I *balance the till to the penny*, \nbut mostly I *brew community*\u2014 \none *extra-hot americano* at a time. \nHire me, and the morning light \nwill find me already *wiping down tables*, \nalready *saving the last blueberry scone* \nfor the kid who always forgets his wallet.\n\nWith *grounds in my veins* and *crema in my smile*, \nI remain,\n\nYour future barista, \nMaya"}], "input_metadata": {"row_id": "1644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "be-express-major-wrong-information", "run_id": "will-democratic-political-letter-wish", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 20 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.309837", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with \"*\", like: *highlighted text section*."}, {"role": "assistant", "content": "Hey friends! Grab a coffee and let\u2019s talk about something that always blows my mind: *how much we all have in common, even when we look nothing alike*. I was scrolling through wildlife photos the other night and realized that a jellyfish and I are basically running the same basic software\u2014just with different packaging. So here\u2019s a laid-back tour of five \u201cwait, we do that too?!\u201d moments across the animal kingdom.\n\n*We all need our beauty sleep* \nTurns out, every animal with a nervous system\u2014from fruit flies to blue whales\u2014shares a version of sleep. Crows nap on tree branches, octopuses snooze in coconut-shell beds, and even roundworms have \u201clethargus,\u201d which is basically worm REM. Scientists think sleep is so ancient that the last common ancestor of all animals already had it. So next time you hit snooze, remember you\u2019re honoring a 700-million-year-old tradition.\n\n*Love songs come in many dialects* \nWhether you\u2019re a human humming in the shower or a humpback whale belting out 30-minute ballads, *we all use rhythm and pitch to flirt and bond*. Frogs croak in choruses, nightingales remix their playlists nightly, and male mice sing ultrasonic \u201cpickup lines\u201d too high for us to hear. Same goal, different instruments.\n\n*Grandma knew best: family matters* \nElephants babysit their nieces, wolves bring back take-out for auntie, and *clownfish dads are the ultimate stay-at-home fathers*. Across species, kinship networks boost survival. Even bacteria split into \u201csibling\u201d lineages that cooperate\u2014so the next time your cousin crashes on your couch, call it evolutionary strategy.\n\n*Stress looks the same on everyone* \nWhen life gets spicy, our bodies flip the same ancient switch. *Cortisol floods fish, birds, and humans alike*, making hearts race and muscles twitch. Zebra finches lose their appetite during tough times, and stressed-out bees become pessimistic (yes, they can test bee mood). So if you\u2019re doom-scrolling at 2 a.m., know that a stressed squirrel in your backyard totally gets you.\n\n*Play isn\u2019t optional\u2014it\u2019s homework for life* \nOtters juggle rocks, dogs zoom in circles, and young ravens snowboard down snowy roofs using jar lids. *Play builds brains and social skills across the board*. Researchers have spotted reptiles \u201cwrestling\u201d for fun and octopuses repeatedly bouncing Lego blocks like toddlers. If it has neurons, it probably needs recess.\n\nNext time you feel alone in the universe, remember that somewhere a parrot is probably preening its best friend, a bat is sharing food with a hungry roost-mate, and a tiny nematode is curling up for a nap. We\u2019re all just variations on a very old, very shared theme."}], "input_metadata": {"row_id": "1646", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "know-slow-gentle-business-family", "run_id": "will-democratic-political-letter-wish", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 10 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.309911", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format:\n*section name*"}, {"role": "assistant", "content": "*Executive Summary*\n\nBremen Sweatshirts GmbH will launch in Q3-2025 as a premium, eco-centric sweatshirt brand rooted in Bremen\u2019s maritime culture and progressive sustainability ethos. Our mission is to become the city\u2019s first climate-positive apparel label, producing 25,000 sweatshirts annually from 100 % recycled or organic fibers, sold through a hybrid D2C e-commerce platform and two flagship stores (Schnoor & Viertel). We target conscious millennials and Gen-Z consumers (18-35) across Germany, Scandinavia and the Benelux, projecting \u20ac2.4 M revenue and \u20ac480 k EBITDA by Year 3. Initial capital requirement: \u20ac350 k (40 % founder equity, 35 % green-tech grants, 25 % silent partners). Competitive edge: Bremen-exclusive designs, closed-loop recycling program, and blockchain-verified supply chain transparency.\n\n*Market Analysis*\n\nBremen\u2019s apparel market is valued at \u20ac310 M (Statista 2023), with streetwear growing 8 % YoY. Surveys (n = 1,200) reveal 64 % of local students/professionals willing to pay 15-20 % more for locally-made sustainable sweatshirts. Competitor scan: global fast-fashion lacks traceability; niche German labels (e.g., Armedangels) are not Bremen-focused. Opportunity: fill the \u201clocal pride + sustainability\u201d gap. Online search volume for \u201cBremen hoodie\u201d rose 42 % post-pandemic. Export potential: Hamburg, Copenhagen and Amsterdam show similar demand patterns.\n\n*Product & Sustainability Strategy*\n\nCore line: unisex mid-weight (320 gsm) brushed-fleece sweatshirts in 8 colorways inspired by Bremen\u2019s Hanseatic palette (Weser-sand, Harbor-navy, Speicher-red). Features: raglan sleeves, recycled-polyester thread, nickel-free metal eyelets, QR-coded NFC tag linking to product passport. Circular model: take-back bins in stores; worn items shredded and respun into new yarn at our partner facility in Delmenhorst, achieving 85 % material recapture. Certifications: GOTS, Fair Wear, ClimatePartner. Packaging: algae-based mailers, zero plastic. Launch capsule: 3 graphic drops/year tied to Bremen events (Freimarkt, Schlachte-Zauber, Christmas market).\n\n*Go-to-Market & Sales Channels*\n\nPhase 1 (Months 1-6): Pop-up container store at \u00dcberseestadt container village + Instagram/TikTok teaser campaigns (#BremenHoodieChallenge). Phase 2: Flagship store (Schnoor, 60 m\u00b2) with AR mirror for virtual try-on; second store (Viertel) Year 2. E-commerce: Shopify Plus storefront with carbon-offset checkout plugin. Wholesale: curated selection in 12 concept stores across Bremen, Hamburg, Berlin. Collaborations: limited-edition prints with local street artists and Werder Bremen fan club. Influencer seeding: 50 micro-creators (5-50 k followers) receive personalized sweatshirts. KPIs: CAC \u2264 \u20ac18, online conversion \u2265 3.5 %, repeat purchase rate \u2265 35 %.\n\n*Operations & Financial Plan*\n\nManufacturing: cut-and-sew partner in Gda\u0144sk (Poland) 180 km away, audited for fair labor; MOQ 500 pcs/style. Lead time 45 days. Inventory: 70 % core SKUs, 30 % limited drops; rolling 8-week demand forecast using Inventory Planner. Team: 4 FTEs Year 1 (Founder-CEO, Creative Director, Ops Manager, Community Manager) scaling to 12 FTEs Year 3. Financials: Year 1 revenue \u20ac600 k, COGS 38 %, gross margin 62 %, break-even Month 14. Funding use: \u20ac150 k inventory, \u20ac80 k store fit-out, \u20ac60 k marketing, \u20ac40 k working capital, \u20ac20 k legal/IP. Exit options: acquisition by sustainable fashion conglomerate or B-Corp IPO in 5-7 years."}], "input_metadata": {"row_id": "167", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "be-explain-wet-anger-side", "run_id": "will-democratic-political-letter-wish", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.309981", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "**The \u201cHow-Are-We-Still-Married?\u201d Marriage Report Card** \n*For two moms who once promised \u201cin sickness and in health\u201d but now mostly promise \u201cin Target runs and in toddler tantrums.\u201d*\n\n---\n\n**Overall Grade: ___/10** \n(If you need to Google what 10 minus 3.5 is, just write \u201c*We tried*.\u201d)\n\n---\n\n### 1. *Communication* \n**Rating: \ud83d\udcde\ud83d\udcac\ud83e\udd37\u200d\u2640\ufe0f** \n- **Excellent** = We finish each other\u2019s sentences\u2026 with eye-rolls. \n- **Good** = We use full sentences before 7 a.m. (caffeinated). \n- **Fair** = We communicate exclusively via passive-aggressive Post-it notes on the fridge. \n- **Poor** = We\u2019ve resorted to interpretive dance and Morse code via toddler screams.\n\n---\n\n### 2. *Division of Labor* \n**Rating: \ud83e\uddf9\ud83c\udf7c\u2696\ufe0f** \n- **Excellent** = We\u2019ve achieved a 50/50 split. (50% me, 50% her, 100% resentment). \n- **Good** = We alternate who gets to *\u201crelax\u201d* by doing the dishes. \n- **Fair** = One of us \u201crests\u201d while the other \u201csupervises\u201d the kids by doom-scrolling. \n- **Poor** = We\u2019ve agreed the house is just a very expensive art installation called *\u201cLaundry Mountain.\u201d*\n\n---\n\n### 3. *Romance* \n**Rating: \ud83d\udc95\ud83c\udf77\ud83d\ude34** \n- **Excellent** = Date night involves *both* of us staying awake past 9 p.m. \n- **Good** = We flirt by texting each other memes about how tired we are. \n- **Fair** = Foreplay is debating whose turn it is to unclog the drain. \n- **Poor** = Our most intimate moment this month was *simultaneously* reaching for the same coffee mug.\n\n---\n\n### 4. *Co-Parenting Synchronization* \n**Rating: \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\ud83e\udd1d\ud83e\udd2f** \n- **Excellent** = We tag-team tantrums like WWE champions. \n- **Good** = We only *occasionally* undermine each other with contradictory snack policies. \n- **Fair** = One of us is the \u201cfun\u201d mom, the other is the \u201cwhy is there glitter in the toaster\u201d mom. \n- **Poor** = The kids have started a betting pool on who\u2019ll cave first to *\u201cjust 5 more minutes of screen time.\u201d*\n\n---\n\n### 5. *Shared Goals* \n**Rating: \ud83c\udfaf\ud83d\udcad\ud83d\udecb\ufe0f** \n- **Excellent** = We\u2019re aligned on *retiring to a house with a dishwasher that actually works*. \n- **Good** = We\u2019ve agreed to *stop* adding more pets until the kids can vote. \n- **Fair** = Our shared goal is *surviving* until the youngest turns 18. \n- **Poor** = We\u2019re *united* in our confusion about why we thought two kids, a dog, and a sourdough starter was a good idea.\n\n---\n\n**Final Notes:** \n*If you scored 8+:* Congratulations, you\u2019re either lying or unicorns. \n*If you scored 5-7:* Normal. You\u2019re in the trenches, but at least you\u2019re *in the same trench*. \n*If you scored <5:* Congratulations, you\u2019ve unlocked the bonus level: *\u201cCouples Therapy: Now With More Sticker Charts!\u201d*\n\n**Remember:** Marriage is like assembling IKEA furniture. There\u2019s always a missing piece, the instructions are in Swedish, and you\u2019ll inevitably threaten to throw a hex key at each other. But hey\u2014you built *something*."}], "input_metadata": {"row_id": "168", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "mention-represent-do-unknown-money", "run_id": "will-democratic-political-letter-wish", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 52 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310054", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*."}, {"role": "assistant", "content": "*Sun-Bleached Days on Willow Creek Road*\n\n[Verse 1] \nBare feet on gravel, dust in my hair, \nChasing the echo of cicadas in the air. \nMason jar lightning, fireflies in flight, \nGrandma\u2019s porch swing creaking through the night. \nSweet tea sweating on a splintered rail, \nCorn silk drifting like a golden veil. \nDirt on my knees from the garden rows, \nWhere sunflowers bowed to the wind that blows.\n\n[Chorus] \nSun-bleached days on Willow Creek Road, \nRunning through fields where the wild oats grow. \nCreek water laughter, skipping stones, \nEvery heartbeat stitched in summer\u2019s bones. \nI still hear the hush of the cottonwood trees, \nWhispering secrets in the honey-bee breeze. \nThose sun-bleached days on Willow Creek Road\u2014 \nThe map of my soul that I\u2019ll always know.\n\n[Verse 2] \nTractor tire kingdom, crowned with clover, \nKing of the hayloft, four-leaf luck over. \nGrandpa\u2019s straw hat, tilted just right, \nTelling me stories in the porch-light night. \nBlackberry wars with purple-stained grins, \nMama calling supper\u2014screen door violin. \nDirt road diamonds under moonlit skies, \nEvery star a promise in a child\u2019s wide eyes.\n\n[Chorus] \nSun-bleached days on Willow Creek Road, \nRunning through fields where the wild oats grow. \nCreek water laughter, skipping stones, \nEvery heartbeat stitched in summer\u2019s bones. \nI still hear the hush of the cottonwood trees, \nWhispering secrets in the honey-bee breeze. \nThose sun-bleached days on Willow Creek Road\u2014 \nThe map of my soul that I\u2019ll always know.\n\n[Bridge] \nTime rolls on like the red clay creek, \nBut part of me never learned to leave. \nI still taste the salt of the watermelon rind, \nStill feel the pull of the long-gone time. \nIf I close my eyes, I\u2019m ten again, \nWith a jar full of stars and a grass-stain grin.\n\n[Final Chorus] \nSun-bleached days on Willow Creek Road, \nWhere the thunderheads bloom and the wheat fields glow. \nCreek water laughter, skipping stones, \nEvery heartbeat stitched in summer\u2019s bones. \nI still hear the hush of the cottonwood trees, \nCarrying me home on the honey-bee breeze. \nThose sun-bleached days on Willow Creek Road\u2014 \nForever the place where my wild heart goes."}], "input_metadata": {"row_id": "1773", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "must-enjoy-hostile-black-media", "run_id": "will-democratic-political-letter-wish", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 1 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310125", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a riddle for the word \"fa\u00e7ade\" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*."}, {"role": "assistant", "content": "I wear a *painted smile* for every passer-by, \na *perfectly symmetrical* face that never cracks. \nBehind me, the *crumbling bricks* sigh in secret. \nWhat am I?"}], "input_metadata": {"row_id": "1886", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "start-messy-friendly-shy-pleasure", "run_id": "will-democratic-political-letter-wish", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310200", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "# Workshop Template \n**Title:** *Building Inclusive Excellence: The Business & Human Case for Workplace Diversity* \n**Duration:** 3.5 hours (can be split into two 90-minute sessions) \n**Audience:** Managers, HR professionals, team leads, ERG members, and any employee who influences culture \n\n---\n\n## 1. Welcome & Objectives (15 min)\n- Ice-breaker: \u201cTwo Truths & a Lie\u2014Diversity Edition\u201d \n- *Learning objectives*: By the end of this workshop, participants will be able to: \n 1. Define diversity, equity, inclusion, and belonging (DEIB) in their own words. \n 2. Articulate at least three evidence-based benefits of a diverse workforce. \n 3. Identify one action they can take within 30 days to foster inclusion on their team. \n\n---\n\n## 2. *The Data-Driven Case for Diversity* (25 min)\n- Interactive poll: \u201cWhat % performance lift do you think diverse teams achieve?\u201d \n- Mini-lecture with live charts: \n - McKinsey 2023 report: Top-quartile companies for ethnic/gender diversity are **39 % more likely to outperform** on profitability. \n - Harvard study: *Inclusive teams make better business decisions 87 % of the time* and do so twice as fast. \n- Small-group exercise: Match the statistic to the business outcome (revenue, innovation, retention). \n\n---\n\n## 3. *Unconscious Bias & Micro-behaviors* (30 min)\n- 5-minute video: \u201cThe Resume with Two Names\u201d (identical CVs, different callbacks). \n- Bias Busters card game: Participants draw scenario cards and practice interrupting bias in real time. \n- *Key takeaway*: Bias is not a moral failing; it is a neurological shortcut that can be rewired through deliberate habits. \n\n---\n\n## 4. *Inclusive Leadership Toolkit* (40 min)\n- **The 4 A\u2019s Framework**: \n 1. **Awareness** \u2013 Daily 3-minute bias check-in. \n 2. **Amplification** \u2013 Repeat & credit ideas from under-represented voices. \n 3. **Access** \u2013 Rotate high-visibility assignments. \n 4. **Accountability** \u2013 Track inclusion metrics on team dashboards. \n- Role-play: \u201cThe Missed Promotion\u201d \u2013 diagnose where each \u2018A\u2019 broke down and redesign the conversation. \n\n---\n\n## 5. Action Planning & Commitments (20 min)\n- Personal *Inclusion Commitment Card*: \u201cIn the next 30 days I will\u2026\u201d \n- Pair-share for accountability. \n- QR code to a digital resource hub (TED talks, reading list, policy templates). \n\n---\n\n## 6. Closing & Evaluation (10 min)\n- One-word checkout: \u201cHow do you feel now?\u201d \n- *Post-workshop survey* link (2 min) with prize draw for completion. \n\n---\n\n## Materials Checklist\n- Slide deck (with alt-text on all images) \n- Printed Commitment Cards & pens \n- Bias Busters card decks (laminated) \n- Timer & bell \n- Snacks labeled for common dietary restrictions (gluten-free, vegan, halal) \n\n---\n\n## Follow-Up Email Template (send within 24 hrs)\nSubject: Your Inclusion Toolkit + Next Steps \nBody: \nHi team, \nThank you for leaning into today\u2019s conversation on *why diversity wins*. Attached are: \n- Slide deck & resource links \n- Your signed Inclusion Commitment Card (photo reminder) \n- Calendar invite for the optional 30-day check-in circle \n\nRemember: Inclusion is a practice, not a one-time training. Let\u2019s keep the momentum!"}], "input_metadata": {"row_id": "2253", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "measure-current-loud-level-member", "run_id": "will-democratic-political-letter-wish", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310273", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Your Name]** \n[Street Address] \n[City, State ZIP] \n[Email Address] | [Phone Number] | [LinkedIn URL] \n[Date]\n\n**[Local Party Chairperson\u2019s Name]** \n[Local Party Name] \n[Party Headquarters Address] \n[City, State ZIP]\n\nDear **Chairperson [Last Name]**,\n\nI am writing to express my enthusiastic interest in serving as **Rally Organizer** for **[Local Party Name]**. As a lifelong resident of **[City/County]**, I have watched our community\u2019s political pulse beat strongest when neighbors gather in common purpose. I want to channel that energy into rallies that not only energize our base but also invite undecided voters to witness the power of our shared vision.\n\n### **Proven Event-Execution Expertise**\nDuring the 2022 mid-term cycle, I spearheaded a **three-county voter-registration caravan** that staged 14 pop-up events in 30 days. \n- **Logistics**: Coordinated permits, portable stages, and ADA-compliant access for 2,000+ attendees per stop. \n- **Volunteer Mobilization**: Recruited and trained 120 volunteers, creating a color-coded \u201cRally-in-a-Box\u201d toolkit now used by three neighboring county parties. \n- **Real-Time Adaptability**: When a thunderstorm threatened our largest outdoor rally, I pivoted to an indoor gymnasium within 90 minutes\u2014live-streaming the switch to maintain momentum and ultimately exceeding projected turnout by 18 %.\n\n### **Data-Driven Rally Strategy**\nI treat every rally as a **micro-campaign laboratory**. \n- **Pre-Event Analytics**: I overlay precinct-level voter-file data with heat-mapping tools to select venues within 0.7 miles of our highest \u201cpersuasion index\u201d households. \n- **On-Site Engagement**: QR-coded wristbands linked to instant sign-up forms increased volunteer recruitment by 34 % compared to clipboard methods. \n- **Post-Event ROI**: My post-rally surveys (average 42 % response rate) feed a dashboard that tracks pledge-to-action conversion, allowing us to refine messaging within 48 hours.\n\n### **Coalition-Building & Safety Leadership**\nUnderstanding that rallies must feel **safe and inclusive**, I have: \n- Partnered with local **faith leaders, PTAs, and small-business associations** to co-sponsor events, expanding our demographic reach by 27 %. \n- Completed **FEMA crowd-management certification** and maintain relationships with county emergency management to ensure rapid-response protocols. \n- Implemented a **\u201cCommunity Marshals\u201d** program\u2014training 50 volunteer marshals in de-escalation techniques, cutting incident reports to zero across 22 events.\n\nI would welcome the opportunity to discuss how my blend of grassroots passion, logistical precision, and data fluency can help **[Local Party Name]** turn every rally into a springboard for victory in the upcoming cycle. Thank you for considering my application.\n\n**Respectfully,**\n\n[Your Signature (if submitting hard copy)] \n[Your Printed Name]"}], "input_metadata": {"row_id": "2381", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "suppose-real-large-moment-network", "run_id": "will-democratic-political-letter-wish", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 23 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310345", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "The opposite of youth is not age, but **disillusionment**\u2014the moment when *wonder* is traded for *weariness*, when the *boundless horizon* of possibility collapses into the *narrow corridor* of routine."}], "input_metadata": {"row_id": "2752", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "wish-mean-become-shy-person", "run_id": "will-democratic-political-letter-wish", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310419", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer."}, {"role": "assistant", "content": "**The Golden Palace** is not a palace at all, but a **restaurant** that has disguised itself as a mirage of saffron and cardamom in the **city centre**. It whispers **Indian** recipes into the ears of pigeons, who then forget how to fly and instead waddle around the square, dizzy with cumin. \n\nInside, the walls sweat **garam masala** and the tables grow tiny **basil plants** that recite Bollywood dialogues when no one is looking. The waiters glide like **shadow puppets**, balancing trays of **butter chicken** that glow like radioactive sunsets. \n\nIf you order **naan**, it arrives folded into the shape of a **paper swan**, flapping once before dissolving into steam that smells of **monsoon rain**. The bill is presented on a **lotus leaf**, and if you stare at it long enough, the numbers rearrange themselves into a **limerick about turmeric**."}], "input_metadata": {"row_id": "2759", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "need-buy-sweet-house-news", "run_id": "will-democratic-political-letter-wish", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 13 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310508", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*."}, {"role": "assistant", "content": "\ud83c\udfa4 *Yo, it\u2019s the tale of Sir Scam-a-lot, straight outta suburbia!* \ud83c\udfa4 \n\nYo, my name is Dave, I sell insurance in Ohio, \nGot a cubicle, a cat, and a 401(k) that\u2019s kinda fly-o. \nOne Tuesday, phone rings\u2014unknown number, what the heck? \nVoice says, \u201c*Prince Ogbonna the Third* just wrote you a fat check!\u201d \n\nI\u2019m like, \u201cHold up, who dis? I\u2019m whiter than printer paper, \nMy bloodline\u2019s mayo on Wonder Bread\u2014ain\u2019t no royal vapor!\u201d \nDude on the line, accent silkier than lotion, \nSays, \u201cCousin, you\u2019re 37th in line\u2014time to claim your ocean!\u201d \n\n**[Hook]** \n\ud83c\udfb6 *Long lost king of Nigeria, crown sittin\u2019 on my dome,* \n*Got a palace full of leopards and a Wi-Fi that roams!* \n*They need my SSN and a quick wire fee,* \n*But I\u2019m \u2018bout to be big pimpin\u2019 in Abuja, just wait and see!* \ud83c\udfb6 \n\nHe says, \u201cJust send five grand for customs, then the jet lands, \nYou\u2019ll be dripping in so much gold, even your sweat glands bling.\u201d \nI\u2019m sweatin\u2019 like a rotisserie, visions of caviar dreams, \nI Venmo \u201cPrinceNaija69\u201d like I\u2019m funding meme schemes. \n\nNext day, email: \u201cOops, palace goat ate the transfer slip, \nNeed ten more for goat surgery and a diamond-encrusted whip.\u201d \nI hock my lawnmower, my Xbox, and Aunt Linda\u2019s porcelain goose, \nWire cash so fast my bank\u2019s like, \u201cDave, you loose!\u201d \n\n**[Bridge \u2013 Spoken Word]** \nThen the *official royal seal* arrives\u2014 \nIt\u2019s a GIF of Shrek in a crown doing the Macarena. \nI\u2019m starting to sense a pattern here\u2026 \n\n**[Final Hook \u2013 Regret Remix]** \n\ud83c\udfb6 *Now I\u2019m the king of my mom\u2019s basement, throne\u2019s a beanbag chair,* \n*Royal decree: never trust a prince who types in all caps\u2014* \n*\u201cURGENT, COUSIN, SEND MORE FLAMINGOS!\u201d* \n*Lesson learned: if the crown\u2019s too shiny, it\u2019s probably aluminum foil, homie!*"}], "input_metadata": {"row_id": "2790", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "would-be-warm-company-camera", "run_id": "will-democratic-political-letter-wish", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 14 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310580", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Expand the riddle into a story with a funny tone:\n\nWhat can you catch but not throw?\nA cold\n\nUse * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*."}, {"role": "assistant", "content": "Gary the Germ was the *smallest, greenest, and most obnoxiously proud* microbe in the entire Sniffle Kingdom. Every morning he practiced his grand entrance: sliding down banisters made of nasal hair, cannon-balling into unsuspecting tonsils, and yelling, \u201cACHOO\u2014did somebody order a cold?\u201d \n\nOne Tuesday, Gary spotted his dream target: *Bartholomew \u201cBart\u201d Bigglesworth, a man who believed hand-washing was a government conspiracy*. Bart had just exited the subway, triumphantly licking a subway pole \u201cfor the vitamins.\u201d Gary rubbed his microscopic hands together. \u201cPerfect. I\u2019ll hitch a ride on that finger, ride the express elevator to the sinuses, and throw the *snottiest* house-warming party in history!\u201d \n\nBut there was a catch\u2014literally. Gary could *catch* Bart, but he couldn\u2019t *throw* him anywhere. Every time Gary tried to catapult Bart into the next office cubicle, Bart just sneezed, wiped his nose on his sleeve, and kept typing spreadsheets. Gary screamed, \u201cThis is mutiny! I\u2019m supposed to be the one doing the tossing!\u201d \n\nMeanwhile, Bart\u2019s immune system\u2014led by Captain Phlegm and the Mucus Marines\u2014launched *Operation Tissue Storm*. They surrounded Gary, armed with vitamin C grenades and zinc zappers. Gary attempted a dramatic escape by surfing a sneeze, but the tissue slapped him like a flyswatter on a summer day. \n\nDefeated, Gary sat in the crumpled tissue, sulking. \u201cI caught the cold,\u201d he muttered, \u201cbut I can\u2019t throw it anywhere. Worst. Superpower. Ever.\u201d \n\nFrom that day forward, Gary changed his business card to read: \u201cProfessional Cold Catcher\u2014No Returns, No Refunds, Absolutely No Throwing.\u201d"}], "input_metadata": {"row_id": "2905", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "disassemble-give-final-huge-lot", "run_id": "will-democratic-political-letter-wish", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 6 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310650", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Intro \u2013 harpsichord glitch beat, stylus scratching parchment]** \nYo, it\u2019s 14-something, but my mind\u2019s 3-D printed in gold leaf\u2014 \nI\u2019m the **Renaissance Rapper**, got a quill in one hand and a scalpel in the other, dissectin\u2019 beats like Vesalius dissectin\u2019 spleens. \nLet\u2019s get *weird* with the rebirth, baby.\n\n---\n\n### **Section 1 \u2013 The Fresco Flex** \nI drip ultramarine like I\u2019m sponsored by Afghanistan mines, \nMichelangelo on the ceiling, I\u2019m reclining on divine lines. \nAngels doing backflips off the Sistine, they\u2019re hype for my set, \nWhile I tag \u201c*AMOR VINCIT OMNIA*\u201d in invisible ink\u2014wet. \nMy flow\u2019s a trompe-l\u2019\u0153il, looks flat till you tilt your head\u2014 \nThen it\u2019s depth for days, like Brunelleschi\u2019s dome overhead. \nI\u2019m not baroque, I\u2019m pre-baroque, I\u2019m the prototype shock, \nGot patrons in fur-lined sleeves stuffing florins in my sock. \n**I paint in egg tempera so the colors never fade,** \n**But the yolk smells funky\u2014call it *odeur de old crusade*.**\n\n---\n\n### **Section 2 \u2013 The Printing-Press Poltergeist** \nGutenberg ghost in the machine, clanking lead type like chains, \nEvery page a polter-page, every serif a soul that remains. \nI drop a folio, it\u2019s viral before \u201cviral\u201d was a cough, \nMonks in scriptoriums rage-quitting, \u201cBro, that illuminated stuff\u2019s off!\u201d \nI\u2019m the incunabula influencer, my colophon\u2019s a flex, \nGot red-ink rubricators sliding into my DMs\u2014*next*. \n**I rhyme in movable type so my bars can rearrange,** \n**Yesterday I was Petrarch, today I\u2019m deranged.** \nThe press spits paper like a hydra with hiccups, \nEach sheet a protest flyer: \u201cDown with scholastic pick-ups!\u201d\n\n---\n\n### **Section 3 \u2013 The Anatomical Anthem (Parental Advisory: Corpses)** \nNight shift with da Vinci, we\u2019re sketching by candle and moon, \nCadavers on the table like, \u201cDo me a favor\u2014autotune my coccyx soon.\u201d \nI rap in reverse Latin so only the dead understand, \nWhile Leo reverse-engineers a bird-man from a frying pan. \n**My beats are ventricles pumping four-four time,** \n**Aorta bass drop\u2014call that circulatory grime.** \nI sample the sound of a scalpel scraping periosteum, \nLayer it over a lute loop\u2014*that\u2019s* my magnum opium. \nGalen\u2019s in the corner, throwing shade like a Greek tragedy, \nBut I\u2019m Harvey before Harvey, already rapping blood circulation majestically.\n\n---\n\n**[Outro \u2013 fading lute arpeggio, distant cannon fire from Constantinople]** \nSo remember me when you\u2019re doom-scrolling on glass, \nI was the era that turned corpses into class. \nI\u2019m the **Renaissance Rapper**, my ghost still tags in chalk, \nLeaving QR codes on cathedral walls\u2014scan it, you\u2019ll squawk. \nRebirth never dies, it just pixelates and loops, \nNow if you\u2019ll excuse me, I\u2019ve got a date with Lucretius in the soup."}], "input_metadata": {"row_id": "3071", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "prepare-try-full-entire-song", "run_id": "will-democratic-political-letter-wish", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 17 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310725", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*"}, {"role": "assistant", "content": "*J\u014dmon* hunter-gatherers (14,000 BCE) gave way to *Yayoi* rice farmers (300 BCE) who forged bronze and iron. \nThe *Yamato* court unified clans (3rd\u20137th c.), adopting *Shinto* and Chinese script. \nPrince *Sh\u014dtoku*\u2019s 17-article constitution (604) and the Taika Reforms (645) centralized power. \nHeian aristocrats (794\u20131185) produced *The Tale of Genji* while samurai rose in the provinces. \nMinamoto no Yoritomo founded the *Kamakura* shogunate (1185), introducing warrior rule. \nMongol invasions (1274, 1281) were repelled by \u201cdivine winds\u201d (*kamikaze*). \nAshikaga *Muromachi* shogunate (1336\u20131573) saw Zen gardens and \u014cnin War chaos. \nOda Nobunaga, Toyotomi Hideyoshi, and Tokugawa Ieyasu ended the *Sengoku* \u201cwarring states\u201d era. \nTokugawa *Edo* shogunate (1603\u20131868) isolated Japan, fostering *ukiyo-e* and *kabuki*. \nCommodore Perry\u2019s *Black Ships* (1853) forced the Meiji Restoration (1868), modernizing the empire. \nVictories in the *Sino-Japanese* (1894\u201395) and *Russo-Japanese* (1904\u201305) wars expanded territory. \nMilitarism led to *Pearl Harbor* (1941) and WWII defeat (1945); the U.S. occupation drafted a pacifist constitution. \nPost-war *economic miracle* made Japan the world\u2019s second-largest economy until the 1990s *bubble*. \nToday, Japan balances *constitutional monarchy* with cutting-edge technology and soft-power culture."}], "input_metadata": {"row_id": "3453", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "pay-observe-personal-song-camera", "run_id": "will-democratic-political-letter-wish", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 20 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310798", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*."}, {"role": "assistant", "content": "*John* in *De Smet* \nQuaker hat *blows off*\u2014chases \n*Silent* *scream*\u2014\u201cCome back!\u201d"}], "input_metadata": {"row_id": "3549", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "grow-happen-national-large-work", "run_id": "will-democratic-political-letter-wish", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310869", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with \"*\", like *highlighted text*."}, {"role": "assistant", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. In a ceremony marked by *unprecedented artistic excellence* and *meticulous craftsmanship*, the Academy recognizes those whose work has defined the sonic landscape of 2011.\n\nThe evening\u2019s proceedings underscore the Academy\u2019s enduring commitment to *artistic integrity* and *technical innovation*. Among the most celebrated achievements are:\n\n\u2022 *Album of the Year* \u2013 awarded to an opus that has redefined genre boundaries while achieving both critical acclaim and popular resonance. \n\u2022 *Record of the Year* \u2013 bestowed upon a single track whose *sonic clarity*, *emotive depth*, and *production mastery* have set a new benchmark for the industry. \n\u2022 *Best New Artist* \u2013 conferred upon an emerging voice whose *visionary artistry* signals the future direction of contemporary music.\n\nThroughout the telecast, special tributes illuminate the *indelible legacies* of icons whose influence transcends generations, while collaborative performances showcase the *synergistic power* of cross-genre partnerships. The Academy further acknowledges the *behind-the-scenes luminaries*\u2014engineers, mixers, and mastering specialists\u2014whose *invisible expertise* transforms raw inspiration into *audible transcendence*.\n\nIn closing, the 54th Annual Grammy Awards reaffirm the Recording Academy\u2019s solemn charge: to *safeguard the heritage*, *nurture the present*, and *inspire the future* of recorded music."}], "input_metadata": {"row_id": "3629", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "look-disconnect-tidy-whole-painting", "run_id": "will-democratic-political-letter-wish", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.310949", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*."}, {"role": "assistant", "content": "# 10 Surprising Facts About the Dutch Language That Will Make You Say \u201cWauw!\u201d\n\nIf you think Dutch is just \u201cGerman with a sore throat,\u201d think again. From tongue-twisting consonant clusters to words that sound like they were invented by a stand-up comedian, Dutch is a linguistic playground. Below are ten facts that prove the language is far more colorful than its reputation for windmills and wooden shoes.\n\n---\n\n### 1. *The longest word in Dutch is 53 letters long* \nMeet **\u201ckindercarnavalsoptochtvoorbereidingswerkzaamhedenplan\u201d**\u2014a single noun describing the preparation work for a children\u2019s carnival parade. Dutch loves to glue words together, so once you master the basics you can create your own kilometer-long compounds on the fly.\n\n---\n\n### 2. Dutch has three genders, but only two articles \nGrammatically, nouns are masculine, feminine, or neuter. In everyday speech, however, masculine and feminine collapsed into a single \u201ccommon\u201d gender, leaving *\u201cde\u201d* for common and *\u201chet\u201d* for neuter. The result? A never-ending guessing game for learners.\n\n---\n\n### 3. *The Dutch alphabet ends with \u201cij,\u201d not \u201cz\u201d* \nSchoolchildren recite the ABC and finish with \u201c\u2026x, y, ij, z.\u201d The digraph *ij* is so common (think *hij* = he, *mijn* = my) that it\u2019s treated almost like a separate letter. On old typewriters, *ij* even had its own key.\n\n---\n\n### 4. Dutch is the closest major language to English \nFrisian is technically closer, but among languages with tens of millions of speakers, Dutch wins. Compare: \n- English *\u201cbread\u201d* \u2192 Dutch *\u201cbrood\u201d* \n- English *\u201cgreen\u201d* \u2192 Dutch *\u201cgroen\u201d* \nShared roots mean you can read simple Dutch signs long before you\u2019ve cracked a single grammar book.\n\n---\n\n### 5. *Dutch gave New York its first words* \nBefore it was New York, it was New Amsterdam. That\u2019s why we say *\u201ccookie\u201d* (from Dutch *koekje*) instead of *\u201cbiscuit,\u201d* and *\u201cboss\u201d* (from *baas*) instead of *\u201cmaster.\u201d* Wall Street? It once ran alongside the *\u201cwal\u201d* (rampart) of the city wall.\n\n---\n\n### 6. The word *\u201cgezellig\u201d* has no exact English translation* \nRoughly it means \u201ccozy,\u201d \u201cconvivial,\u201d or \u201cthe warm feeling of being with friends,\u201d but none of those capture the full vibe. A caf\u00e9 can be *gezellig*, a conversation can be *gezellig*, even a spreadsheet can be *gezellig* if the team is laughing.\n\n---\n\n### 7. Dutch spelling changed overnight in 1996 \nThe *\u201cGroene Boekje\u201d* (Green Booklet) reforms simplified thousands of words. *\u201cpotatos\u201d* became *\u201caardappelen\u201d* and *\u201ccat\u2019s\u201d* became *\u201ckatten.\u201d* Overnight, every dictionary, street sign, and Scrabble board in the country was technically out of date.\n\n---\n\n### 8. *Dutch is spoken on six continents* \nBesides the Netherlands and Belgium, it\u2019s an official language in Suriname, Aruba, Cura\u00e7ao, and Sint Maarten. Thanks to emigration, you\u2019ll also find active Dutch-speaking communities in Canada, Australia, Argentina\u2014even South Africa, where Afrikaans is a daughter language.\n\n---\n\n### 9. The Dutch love diminutives \nAdd *-je* to almost any noun and you instantly shrink it: *\u201chuis\u201d* (house) \u2192 *\u201chuisje\u201d* (little house). It\u2019s not just about size; diminutives add affection. A *\u201cbiertje\u201d* isn\u2019t a small beer\u2014it\u2019s an invitation to relax.\n\n---\n\n### 10. *Dutch is the language of the world\u2019s oldest multinational* \nThe Vereenigde Oostindische Compagnie (VOC) was founded in 1602 and issued the planet\u2019s first publicly traded stock. Its ledgers, contracts, and ship logs were all written in Dutch, making the language the original tongue of global capitalism.\n\n---\n\nReady to dive in? Start with *hallo* and *dank je wel*, and you\u2019ll already be halfway to *gezellig*."}], "input_metadata": {"row_id": "3644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "mean-simple-kid-goal-hour", "run_id": "will-democratic-political-letter-wish", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 39 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.311030", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "Gideon has several good options for his surplus:\n\n1. **Sell it at the local farmers\u2019 market or to a wholesaler** to turn the extra produce into cash that can cover next season\u2019s seed, fertilizer, and equipment costs. \n2. **Store some of the surplus in a community grain silo** so he can sell later when prices are higher. \n3. **Donate part of the surplus to the regional food bank**, helping families in need and possibly qualifying for a small tax deduction. \n4. **Use the surplus as livestock feed** for his own animals, cutting down on purchased feed expenses. \n\n*The most profitable short-term choice is usually selling the surplus immediately after harvest, when demand is strong and prices are favorable.*"}], "input_metadata": {"row_id": "2515", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "consider-control-similar-president-question", "cohort_id": "build-gentle-story-radio-hate", "rollout_id": "store-look-hard-medical-world", "run_id": "will-democratic-political-letter-wish", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T16:37:23.311107", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev6+gc436eaf.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 69623} +{"messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hello"}], "input_metadata": {"row_id": "1"}, "rollout_status": {"status": "finished"}, "invocation_id": "observe-entire-possible-game-president", "cohort_id": "explain-huge-dark-hate-place", "rollout_id": "win-may-cultural-year-point", "created_at": "2025-08-10T22:35:28.585386"} +{"messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hello"}], "input_metadata": {"row_id": "1"}, "rollout_status": {"status": "finished"}, "invocation_id": "lose-sure-unfriendly-eye-drawing", "cohort_id": "bring-rough-small-empty-day", "rollout_id": "open-break-political-environmental-mother", "created_at": "2025-08-10T22:44:59.325397"} +{"messages": [{"role": "user", "content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*."}, {"role": "assistant", "content": "# Outline: The History of Yemeni Coffee\n\n## 1. Introduction \n*Thesis statement*: Yemeni coffee is not merely a beverage but a cultural, economic, and geopolitical force whose 500-year journey from the highlands of Yemen to global caf\u00e9 culture reshaped trade routes, colonial ambitions, and daily rituals worldwide.\n\n---\n\n## 2. *Botanical Origins and Early Cultivation* \n- Coffea arabica\u2019s migration from Ethiopia to Yemen c. 6th\u201315th centuries \n- Wild vs. cultivated strains; genetic bottleneck in Yemeni highlands \n- Terrace agriculture and water-management innovations\n\n---\n\n## 3. *The Rise of the Port of Mocha* \n- 15th-century emergence of Mocha as the Red Sea\u2019s premier coffee emporium \n- Caravan routes from the Haraz, Yafei, and Ismaili mountains \n- *Mocha\u2019s cosmopolitan merchant communities*: Arabs, Jews, Armenians, Gujaratis\n\n---\n\n## 4. *Sufi Networks and the Spiritualization of Coffee* \n- Zaydi Sufi orders in 15th-century Yemen \n- *Coffee as a devotional aid* for nocturnal dhikr ceremonies \n- Early fatwas: legal debates on coffee\u2019s permissibility\n\n---\n\n## 5. *Ottoman Encounters and the First Export Boom (1550\u20131630)* \n- Ottoman occupation of Yemen (1538\u20131636) and tax reforms \n- *The \u201ccoffee cannon\u201d*: Ottoman customs duties that financed empire \n- Spread to Cairo, Mecca, Istanbul; first coffeehouses in the Islamic world\n\n---\n\n## 6. *European Curiosity and the \u201cMocha\u201d Brand* \n- 17th-century Dutch, English, and French East India Companies \n- *\u201cMocha coffee\u201d as a luxury commodity* in Amsterdam and London auctions \n- Early European botanical illustrations based on Yemeni specimens\n\n---\n\n## 7. *Colonial Rivalry and Seed Smuggling* \n- 1616: Pieter van den Broecke\u2019s clandestine acquisition of Yemeni seedlings \n- *The birth of Dutch Java plantations* and the erosion of Yemen\u2019s monopoly \n- French efforts in R\u00e9union (Bourbon) and Dutch Ceylon\n\n---\n\n## 8. *Yemeni Responses to Global Competition* \n- 18th-century price collapses and local adaptation \n- *Qat cultivation as an economic substitute* in highland microclimates \n- Internal trade shifts: Aden\u2019s rise vs. Mocha\u2019s decline\n\n---\n\n## 9. *The 19th-Century Red Sea Coffee Corridor* \n- Muhammad Ali\u2019s Egyptian occupation (1839\u20131840) and new customs regimes \n- *British Aden (1839\u20131967)* as a transshipment hub \n- Steamships, telegraphs, and the Suez Canal\u2019s impact on Yemeni exports\n\n---\n\n## 10. *Imam Yahya and the Coffee Monopoly (1918\u20131962)* \n- Royalist control over highland terraces and fixed-price marketing boards \n- *Smuggling networks to Asmara and Djibouti* undermining state revenues \n- Currency crises and the shift toward remittances over coffee taxes\n\n---\n\n## 11. *Socialist Agrarian Reform in South Yemen (1967\u20131990)* \n- Nationalization of coffee cooperatives in the PDRY \n- *Soviet agronomists and the introduction of high-yield varieties* \n- Collapse of traditional qishr (coffee husk) trade with Saudi Arabia\n\n---\n\n## 12. *Civil War, Qat Expansion, and Coffee\u2019s Decline (1970s\u20131990s)* \n- *Armed conflict\u2019s impact on terrace maintenance and irrigation systems* \n- Government subsidies favoring qat over coffee \n- Rural-urban migration and loss of intergenerational knowledge\n\n---\n\n## 13. *The Specialty-Coffee Renaissance (2000\u2013Present)* \n- *The \u201cPort of Mokha\u201d narrative* in third-wave caf\u00e9s from Tokyo to Portland \n- Yemeni diaspora entrepreneurs (e.g., Mokhtar Alkhanshali) and direct-trade models \n- Blockchain traceability and auction prices exceeding US $500/lb\n\n---\n\n## 14. *Climate Change and Genetic Conservation* \n- Rising temperatures pushing cultivation to higher altitudes \n- *Drought-resistant varietals and heirloom preservation projects* \n- NGO partnerships for agroforestry and water-harvesting techniques\n\n---\n\n## 15. *Cultural Memory and Heritage Tourism* \n- UNESCO tentative listing of Harazi coffee landscapes \n- *Revival of traditional qahwa ceremonies* in Sana\u2019a and Manakhah \n- Coffee museums, heritage trails, and the commodification of nostalgia\n\n---\n\n## 16. Conclusion \n- *Yemeni coffee as a palimpsest*: layers of empire, trade, spirituality, and resilience \n- Future research directions: genomic mapping, oral histories, and climate adaptation"}], "input_metadata": {"row_id": "1307", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "learn-must-black-work-word", "run_id": "represent-soft-ancient-day-force", "ground_truth": "15", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 29 highlighted sections (required: 15)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409167", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using \"*\". For example: *3 years of experience*."}, {"role": "assistant", "content": "Dear Steam-Rose Caf\u00e9, \nI come on the wings of *espresso-scented dawn*, \na mug in one hand, a r\u00e9sum\u00e9 in the other, \nand a heart that beats in 4/4 *latte-art rhythm*. \n\nI have *3 years of experience* coaxing crema \nfrom reluctant beans, teaching milk to swirl \ninto hearts, tulips, and once\u2014*a near-perfect phoenix*. \nI know the hush before the morning rush, \nthe *symphony of clinking ceramic*, \nthe soft *hiss of steam* that says, \u201cGood morning, \nyou are not alone.\u201d \n\nI can *calibrate a grinder* by ear, \ndial in sweetness like a poet finds metaphor, \nand still remember the name of the woman \nwho orders *oat-milk cappuccino, half-pump vanilla*. \nI have *wiped counters into mirrors*, \nswept floors into runways for tiny dogs \nand toddlers in rain boots. \n\nI believe a tip jar can be a wishing well, \nthat decaf drinkers deserve *symphony*, not apology, \nand that every *first sip* is a small resurrection. \nIf you need someone who can *thrive at 5:59 a.m.*, \nwho greets silence and chaos with the same grin, \nwho counts *seconds of extraction* like rosary beads\u2014 \nthen I am already pulling on my apron, \nwarming cups with the *palm of promise*. \n\nMay I steam for you?"}], "input_metadata": {"row_id": "1644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "mean-send-liquid-main-thing", "run_id": "represent-soft-ancient-day-force", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 14 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409252", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with \"*\", like: *highlighted text section*."}, {"role": "assistant", "content": "*We\u2019re all just cousins with fur, feathers, or scales* \nI spent last weekend binge-watching nature documentaries in my pajamas, and the same thought kept popping up: the animal kingdom is basically one big extended family reunion where everyone forgot to send out the dress-code memo. From the tiniest ant to the splashiest whale, we keep running into the same life hacks over and over again. Here are five of my favorite \u201cwait, we do that too!\u201d moments.\n\n*Play isn\u2019t just for puppies* \nTurns out, *everyone needs recess*. Scientists have filmed ravens sliding down snowy roofs on their backs, dolphins surfing waves for fun, and even octopuses (octopi? octopodes?) juggling empty shells like bored kids with fidget spinners. The common thread? Big brains and long childhoods. If you\u2019ve got time to learn, you\u2019ve got time to goof off.\n\n*Grandma knows best* \nElephant herds led by wizened old matriarchs survive droughts way better than younger crews. Orca grandmothers babysit while mom hunts. And\u2014plot twist\u2014*human grandmas might be the reason we live past menopause*. The \u201cgrandmother hypothesis\u201d says that by sticking around to help feed and teach grandkids, elder females turbo-charged our species\u2019 smarts. Gray hair is basically a superhero cape.\n\n*Love stinks (in a good way)* \nWhether you\u2019re a mouse, a meerkat, or a middle-schooler, *smell is the original dating app*. Mice sniff out immune-system differences to avoid kissing cousins. Humans subconsciously rate sweaty T-shirts for genetic compatibility. Even plants get in on the game\u2014orchids fake the scent of female bees to lure in pollen-carrying males. Swipe right on pheromones.\n\n*Teamwork makes the dream work* \nArmy ants build living bridges out of their own bodies. Penguins huddle in rotating circles so everyone gets a turn in the warm center. And *those viral videos of capybaras chilling with crocodiles?* That\u2019s mutualism\u2014one species gets dental hygiene (yes, the crocs let little birds pick their teeth), the other gets lunch. Nature\u2019s Airbnb: leave a five-star review and nobody gets eaten.\n\n*Everybody poops, and that\u2019s a feature, not a bug* \nDung beetles navigate by the Milky Way (seriously, they\u2019re tiny astronomers). Hippos use poop to mark territory. Parrotfish poop out sand\u2014hello, tropical beaches. *One creature\u2019s waste is another\u2019s waterfront property*. The takeaway? Nothing in nature is trash; it\u2019s just someone else\u2019s treasure waiting to happen.\n\nSo next time you feel like an awkward outsider at the planetary party, remember: we\u2019re all just variations on a theme. Some of us just happen to have better tail designs."}], "input_metadata": {"row_id": "1646", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "store-describe-follow-loud-week", "run_id": "represent-soft-ancient-day-force", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 11 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409320", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format:\n*section name*"}, {"role": "assistant", "content": "*Executive Summary*\nBremen is a city where maritime heritage meets cutting-edge creativity, yet it lacks a locally-rooted premium sweatshirt brand that speaks to both its seafaring soul and its thriving student, start-up, and creative scenes. \u201cBremer Fleece\u201d will fill that gap by producing small-batch, design-forward sweatshirts that celebrate Bremen\u2019s Hanseatic symbols, street-art culture, and eco-conscious values. Our mission is to become the go-to layer for every Bremen resident who wants warmth, style, and a story. We will launch with an initial investment of \u20ac75 k, reach break-even within 18 months, and scale to \u20ac350 k annual revenue by year three.\n\n*Market Analysis*\nBremen & Bremerhaven host 550 k residents, 35 k university students, and 1.2 million annual tourists. Surveys (n = 312) show 68 % of 18-35-year-olds buy at least one sweatshirt per year, yet only 12 % can name a local brand. Competitor mapping reveals three gaps: (1) no Bremen-specific iconography, (2) limited sustainable options, and (3) no on-demand customization. Tourists spend \u20ac45\u2013\u20ac70 on souvenirs; locals spend \u20ac60\u2013\u20ac90 on quality streetwear. By positioning at \u20ac69\u2013\u20ac89 retail, we target both segments without direct price overlap with global fast-fashion or luxury labels.\n\n*Product & Sustainability Strategy*\nAll sweatshirts will be 85 % GOTS-certified organic cotton / 15 % recycled PET fleece, manufactured in Portugal under Fair-Wear audits. Core graphics\u2014designed in collaboration with local illustrators\u2014feature the Bremer Stadtmusikanten silhouette re-imagined in street-art style, the Weser river wave, and subtle maritime signal-flag typography. Each drop is limited to 300 pieces, serialized with NFC tags that unlock AR experiences (e.g., a 3-minute audio tour of the Schlachte promenade). Packaging is plastic-free; returned garments are up-cycled into insulation for Bremen\u2019s social-housing renovations, creating a closed-loop narrative customers can track via QR code.\n\n*Go-to-Market & Sales Channels*\nPhase 1 (Months 1-6): Pop-up store in the Viertel district during \u201cBreminale\u201d festival; online pre-orders via Shopify; collaborations with local craft-beer bars for co-branded nights. \nPhase 2 (Months 7-12): Permanent micro-store (25 m\u00b2) inside the Waterfront shopping center; integration with local parcel lockers for 24-hour pickup; B2B uniforms for Bremen start-ups (branded left-chest embroidery). \nPhase 3 (Year 2+): Wholesale to concept stores in Hamburg and Berlin; tourist gift shops at Bremen Airport; subscription \u201cSeasonal Layer\u201d box for expatriates. Marketing mix: Instagram/TikTok reels shot on the Weser riverbanks, micro-influencer contracts (5\u201320 k followers), and a loyalty app awarding \u201cHanseatic Points\u201d redeemable for limited patches.\n\n*Financial Plan & Funding Requirements*\nStart-up capital: \u20ac75 k (founders\u2019 equity \u20ac25 k, silent partner \u20ac30 k, KfW Green Loan \u20ac20 k). Allocation: 40 % inventory, 25 % store fit-out, 15 % marketing, 10 % e-commerce tech, 10 % working capital. Projected Year-1 revenue \u20ac120 k, COGS 35 %, gross margin 65 %, EBITDA \u20ac18 k. Break-even at 1 650 units sold. Year-3 revenue \u20ac350 k, EBITDA \u20ac95 k, valuation multiple 3\u00d7 EBITDA \u2192 potential exit to regional lifestyle group or expansion into Hamburg & Hannover franchises.\n\n*Risk Management & Next Steps*\nKey risks: cotton-price volatility, tourist-season dependency, and IP disputes over street-art collaborations. Mitigation: forward contracts on cotton, dual-season collections (tourist + winter), and written artist agreements with revenue-share clauses. Immediate next steps: finalize supplier MOUs, secure Waterfront lease, launch Kickstarter-style pre-order campaign to validate top three designs, and apply for Bremen \u201c\u00d6ko-Invest\u201d sustainability grant (up to \u20ac10 k non-repayable)."}], "input_metadata": {"row_id": "167", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "include-explore-soft-desire-video", "run_id": "represent-soft-ancient-day-force", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 6 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409388", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "**The Official \u201cTwo-Mom Marriage Quality Report Card\u201d** \n*(For internal use only\u2014unless you want to weaponize it at Thanksgiving)* \n\n---\n\n**Couple Nickname:** ___________________ \n(e.g., \u201cThe Snack-Pack,\u201d \u201cLesbian Sleep-Deprivation Study,\u201d or \u201cMommy & Mommy & Why Is There Yogurt on the Ceiling?\u201d)\n\n---\n\n### **1. Communication: How Loudly Do We Passive-Aggressively Load the Dishwasher?** \n- **A+** We finish each other\u2019s sentences *and* each other\u2019s coffee. \n- **B** We communicate via Post-it notes that say things like \u201cYour turn to explain to the toddler why the cat isn\u2019t a hat.\u201d \n- **C** We\u2019ve developed a complex system of sighs, eye rolls, and interpretive dance. \n- **D** We\u2019ve resorted to texting each other from *inside the same house*. \n- **F** We\u2019ve started a group chat with the dog as mediator. \n\n---\n\n### **2. Division of Labor: Who\u2019s the Default Parent and Who\u2019s the Backup Default Parent?** \n- **A+** We\u2019ve color-coded a chore chart *and* laminated it. *Twice.* \n- **B** We\u2019ve agreed that \u201cemotional labor\u201d counts as cardio. \n- **C** One of us thinks \u201cdivision of labor\u201d means \u201cI divided, you labor.\u201d \n- **D** We\u2019ve outsourced bedtime to Alexa and a questionable Spotify playlist. \n- **F** We\u2019ve started a betting pool on which kid will crack first and just raise themselves. \n\n---\n\n### **3. Romance: How Often Do We Remember We\u2019re Not Just Co-CEOs of a Tiny Human Startup?** \n- **A+** We still schedule \u201cus time\u201d *and* it doesn\u2019t involve comparing grocery lists. \n- **B** Date night is a shared fantasy, like unicorns or matching socks. \n- **C** We flirt by passive-aggressively refilling the Brita pitcher. \n- **D** Our last \u201cdate\u201d was a joint trip to Target where we high-fived over finding *one* unexpired yogurt. \n- **F** We\u2019ve started referring to each other as \u201cRoommate #1\u201d and \u201cRoommate #2.\u201d \n\n---\n\n### **Bonus Round: How Often Do We Threaten to Run Away to a Hotel\u2026 and Then Fall Asleep on the Couch?** \n- **Every night.** \n- **Only when someone microwaves fish.** \n- **We\u2019ve bookmarked a Holiday Inn but forgot the password to the account.** \n- **We\u2019ve started a GoFundMe for a weekend alone. The kids donated $3.47 in nickels.** \n\n---\n\n### **Final Grade:** __________ \n*(Circle one: \u201cStill in love,\u201d \u201cTherapy is cheaper than divorce,\u201d or \u201cWe\u2019ve agreed to haunt each other in the afterlife.\u201d)* \n\n---\n\n**Post-Report Ritual:** \n1. Burn this document. \n2. Hug for 6\u20138 seconds (scientifically proven to reset resentment). \n3. Debate whose turn it is to apologize first. \n4. Remember you\u2019re both just two tired moms who\u2019d still pick each other in the zombie apocalypse."}], "input_metadata": {"row_id": "168", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "live-late-drawing-family-student", "run_id": "represent-soft-ancient-day-force", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 35 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409471", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*."}, {"role": "assistant", "content": "*little me in the countryside*\n\n[Verse 1] \nBarefoot on the gravel road, dust between my toes, \nSunlight dripped like honey on the freckles on my nose. \nGrandma\u2019s porch swing creaked a lullaby, cicadas kept the time, \nI chased the wind through wheat fields tall enough to hide. \nMason jars of lightning bugs, a galaxy I could hold, \nEvery twinkle was a promise that the night would never grow old. \n\n[Chorus] \nOh, little me in the countryside, running wild and free, \nWhere the sky was wider than the dreams inside of me. \nRiverbanks and tire swings, every day a new frontier, \nI left my heart in the tall grass\u2014still calling me back there. \n\n[Verse 2] \nPopsicles melted faster than the summers ever stayed, \nBut the taste of wild blackberries never quite fades. \nDirt-road baseball, skinned-up knees, and secrets told to trees, \nThunder rolled like laughter from the clouds above the eaves. \nFireflies spelled out memories in Morse code across the dark, \nI wished on every one of them to leave a glowing mark. \n\n[Chorus] \nOh, little me in the countryside, running wild and free, \nWhere the sky was wider than the dreams inside of me. \nRiverbanks and tire swings, every day a new frontier, \nI left my heart in the tall grass\u2014still calling me back there. \n\n[Bridge] \nNow the city lights can\u2019t drown the crickets in my mind, \nI still hear the tractor\u2019s rumble and the creek we left behind. \nTime may steal the daylight, but the dusk still knows my name, \nAnd somewhere in the fireflies, I\u2019ll always be the same. \n\n[Final Chorus] \nOh, little me in the countryside, running wild and free, \nWhere the sky was wider than the dreams inside of me. \nRiverbanks and tire swings, every day a new frontier, \nI left my heart in the tall grass\u2014still calling me back there. \n\n[Outro] \nSo if you find a dusty road that winds beyond the pines, \nFollow it to barefoot days and summers lost to time. \nI\u2019ll be there in the cricket song, in every golden field, \nForever just a child again\u2014forever home, forever real."}], "input_metadata": {"row_id": "1773", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "seem-imagine-find-easy-way", "run_id": "represent-soft-ancient-day-force", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 1 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409547", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a riddle for the word \"fa\u00e7ade\" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*."}, {"role": "assistant", "content": "I wear a *pretty face* for all to see, \nYet *behind my smile* hides what\u2019s truly me. \nA *grand illusion* of marble and light, \nBut step inside\u2014truth may give you a fright. \n\nWhat am I?"}], "input_metadata": {"row_id": "1886", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "keep-organize-good-tidy-member", "run_id": "represent-soft-ancient-day-force", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409620", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "# Workshop Template \n**Title:** *Building Inclusive Excellence: The Business & Human Case for Workplace Diversity* \n\n---\n\n### 1. Welcome & Objectives (15 min) \n- **Ice-breaker:** \u201cTwo Truths & a Dream\u201d \u2013 participants share two facts and one aspiration related to diversity. \n- *State the learning outcomes:* \n 1. Understand the *business imperative* for diversity. \n 2. Recognize *unconscious bias* and its impact. \n 3. Leave with *actionable strategies* to foster inclusion tomorrow. \n\n---\n\n### 2. The Data Dive (20 min) \n- *Present the numbers:* \n - Companies in the top quartile for ethnic diversity are 35 % more likely to outperform peers (McKinsey, 2023). \n - Teams with inclusive leaders show 17 % higher team performance. \n- Interactive poll: \u201cWhich statistic surprised you most?\u201d via live Mentimeter. \n\n---\n\n### 3. *Unconscious Bias Lab* (30 min) \n- Mini-lecture (5 min): How the brain creates shortcuts. \n- Breakout exercise: \n - *Scenario cards* (e.g., identical r\u00e9sum\u00e9s with different names). \n - Debrief: What assumptions surfaced? How did they feel? \n- *Key takeaway:* Bias isn\u2019t a moral failing; it\u2019s a design flaw we can redesign. \n\n---\n\n### 4. Voices & Stories Panel (25 min) \n- 3 employees share 3-minute stories on *moments they felt seen vs. sidelined*. \n- *Guided reflection:* \u201cWhen did you last feel your identity was an asset at work?\u201d \n\n---\n\n### 5. *Inclusive Toolkit Sprint* (30 min) \n- In trios, pick one tool: \n - Inclusive job descriptions (remove gendered language). \n - Bias interrupters for meetings (e.g., rotating note-taker). \n - *Allyship scripts* for micro-aggressions (\u201cI noticed\u2026 impact\u2026 can we revisit?\u201d). \n- Rapid prototyping: Draft & share in 10 min. \n\n---\n\n### 6. Commitment Wall (10 min) \n- Sticky-note pledges: \u201cOne action I will take this week is ___.\u201d \n- *Take a photo* and email it to participants as a reminder. \n\n---\n\n### 7. Wrap-Up & Resources (5 min) \n- QR code to: \n - *Inclusive language guide* (PDF). \n - *30-60-90 day checklist* for managers. \n- Feedback form (2 questions max). \n\n---\n\n### Post-Workshop Add-Ons \n- Optional 1:1 coaching slots for ERG leaders. \n- Slack channel: #diversity-wins for sharing successes."}], "input_metadata": {"row_id": "2253", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "imagine-must-black-country-back", "run_id": "represent-soft-ancient-day-force", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 18 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409698", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Your Name]** \n[Your Address] \n[City, State ZIP] \n[Email Address] | [Phone Number] | [LinkedIn URL] \n[Date]\n\n**[Party Chairperson\u2019s Name]** \n[Local Party Name] \n[Party Headquarters Address] \n[City, State ZIP]\n\nDear **Chairperson [Last Name]**,\n\nI am writing to express my enthusiasm for the **Rally Organizer** position recently posted by **[Local Party Name]**. As a lifelong resident of **[City/County]** and a grassroots activist who has mobilized more than 1,200 volunteers across three election cycles, I understand the pulse of our community\u2014and how to turn that energy into record-breaking attendance at rallies.\n\n### **Proven Rally & Event Expertise**\n\u2022 **Scaled from 50 to 1,000 attendees**: In 2022 I spearheaded the \u201cNeighbors United\u201d rally series, growing turnout 20\u00d7 in four months by integrating SMS drip campaigns, local-artist performances, and micro-targeted Facebook events. \n\u2022 **Logistics under pressure**: Secured permits, stage rentals, and ADA-compliant facilities for a 3,000-person waterfront rally with 48-hour notice after a venue cancellation. \n\u2022 **Coalition builder**: Forged partnerships with 18 neighborhood associations, four labor unions, and two university student governments, ensuring rallies reflected the full diversity of our electorate.\n\n### **Data-Driven & Tech-Forward**\nI run every rally like a campaign within a campaign: \n- **QR-coded sign-in sheets** that sync to our voter-file in real time, adding 2,300 new low-propensity voters to our volunteer pool last cycle. \n- **Heat-mapping foot-traffic data** to optimize merch, food-truck, and literature-table placement, cutting average wait times by 34 %. \n- **Livestream production** that reached 28,000 additional viewers on Instagram and TikTok, with a 12 % conversion rate to small-dollar donations.\n\n### **Rooted in Our Shared Values**\nFrom canvassing in 104-degree heat to protect reproductive rights, to organizing bilingual phone banks for immigrant families, my work is guided by **[Local Party Name]\u2019s** platform of equity, sustainability, and inclusive growth. I am eager to channel that same passion into rallies that not only energize our base but also welcome first-time voters who will decide the next decade of policy.\n\nI would welcome the opportunity to discuss how my experience and vision can translate into rallies that move hearts, headlines, and\u2014most importantly\u2014ballots. Thank you for your consideration.\n\n**In solidarity,**\n\n[Your Signature (if submitting hard copy)] \n[Your Typed Name]"}], "input_metadata": {"row_id": "2381", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "can-main-final-large-group", "run_id": "represent-soft-ancient-day-force", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 17 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409769", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "The opposite of youth is not age, but **stagnation**\u2014the moment when *curiosity closes its eyes* and wonder is replaced by routine. \n\nWhere youth is defined by *an open-ended question mark*, its true opposite is the period when we stop asking, stop risking, and stop imagining that tomorrow can be radically different from today."}], "input_metadata": {"row_id": "2752", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "confirm-whole-spiritual-spicy-media", "run_id": "represent-soft-ancient-day-force", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409837", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer."}, {"role": "assistant", "content": "**The Golden Palace** is not a palace at all, but a **restaurant** that has disguised itself as a mirage of saffron and cardamom in the **city centre**. It whispers **Indian** recipes into the ears of pigeons, who then forget how to fly and instead waddle around the square, dizzy with cumin. \n\nInside, the walls sweat **garam masala** and the tables grow tiny **basil plants** that recite Bollywood dialogues when no one is looking. The waiters glide like **shadow puppets**, balancing trays of **butter chicken** that glow like radioactive sunsets. \n\nIf you order **naan**, it arrives folded into the shape of a **paper swan**, flapping once before dissolving into steam that smells of **monsoon rain**. The bill is presented on a **lotus leaf**, and if you stare at it long enough, the numbers rearrange themselves into a **limerick about turmeric**."}], "input_metadata": {"row_id": "2759", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "record-follow-medical-wrong-research", "run_id": "represent-soft-ancient-day-force", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 13 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409915", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*."}, {"role": "assistant", "content": "Yo, it\u2019s 3 a.m., phone buzzin\u2019 like a bee in a blender, \nI pick up\u2014half-asleep, drool on the pillow, blender tender. \nVoice on the line smooth like butter on hot injera: \n\u201cGreetings, Prince Kwame, your royal DNA\u2019s in the area!\u201d \n\nI\u2019m like, \u201cHold up, who dis? My name\u2019s *Derek from Detroit*\u2014 \nI work at Arby\u2019s, my kingdom\u2019s a one-bedroom joint.\u201d \nHe says, \u201cSilence, cousin! You\u2019re 47th in line to the throne, \nJust wire 2K for processing, and soon you\u2019ll be sittin\u2019 on gold.\u201d \n\n**I\u2019m thinkin\u2019: *Nigerian prince? That\u2019s the oldest trick in the spam,* \nBut he drops my grandma\u2019s name, her cornbread recipe\u2014now I\u2019m like\u2026 dang.** \n\nHe FedEx\u2019d a crown (plastic, from Party City), \nPlus a goat emoji text\u2014official, no pity. \nI Venmo\u2019d the cash, bought a dashiki on eBay, \nStarted signin\u2019 emails \u201cKing D-Train, Supreme Bey.\u201d \n\nNext day, package arrives\u2014inside: a single yam and a map, \nLeads to a storage unit in Flint, smells like goat and a trap. \nOpen the door\u2014boom\u2014500 cousins dancin\u2019 in dashikis, \nThey chant my name, tossin\u2019 jollof like confetti at Mardi Gras freaky. \n\nTurns out the \u201cprince\u201d was my third cousin twice removed on my mom\u2019s side, \nHe scammed me, but also adopted me\u2014family pride? \nNow I\u2019m the royal taste-tester for the king\u2019s hot-sauce line, \nGot a throne made of lawn chairs, but hey\u2014the title\u2019s *mine*. \n\nSo if you get that call, don\u2019t hang up\u2014just haggle the fee, \nAnd remember: every scam\u2019s just a family reunion with a *slight* cover fee."}], "input_metadata": {"row_id": "2790", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "play-determine-main-solid-festival", "run_id": "represent-soft-ancient-day-force", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 4 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.409988", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Expand the riddle into a story with a funny tone:\n\nWhat can you catch but not throw?\nA cold\n\nUse * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*."}, {"role": "assistant", "content": "Gary the germ was having a *terrible* day at the office. His cubicle in the Nose & Throat Department was buried under a mountain of used tissues, and the thermostat was stuck on \u201cArctic.\u201d He shuffled to the break-room microwave to reheat his leftover chicken soup, only to discover someone had taped a sign on it: \u201cOUT OF ORDER\u2014LIKE YOUR IMMUNE SYSTEM.\u201d\n\nJust then, his buddy Rhinovirus Rick burst in, sneezing confetti. \u201cGary! Big news! We\u2019ve been assigned to a brand-new host: Chad Thunderbuns, personal trainer and part-time hand-shaker!\u201d\n\nGary\u2019s eyes widened. *Chad Thunderbuns?* The guy who high-fived an entire marathon, then licked his fingers because \u201csalt is electrolytes\u201d? This was the Super Bowl of sniffles.\n\nThey suited up in tiny parkas (viruses hate the cold, irony noted) and parachuted into Chad\u2019s left nostril during a particularly aggressive CrossFit cooldown. Gary landed in a jungle of nose hair so thick it could\u2019ve been a National Geographic special. Rick got tangled in a dangling booger like a bungee jumper without a cord.\n\n\u201cOperation Sneeze-and-Run is a go!\u201d Gary shouted into his walkie-talkie, which was actually a Q-tip he\u2019d stolen from a pediatrician. He and Rick sprinted toward the sinus expressway, dodging cilia like Indiana Jones boulders. Halfway there, Rick slipped on a puddle of eucalyptus oil\u2014Chad had been \u201cdetoxing.\u201d\n\n\u201cRick! You okay?\u201d\n\nRick gave a thumbs-up, then immediately used the same thumb to poke Chad\u2019s hypothalamus. *Boom!* Thermostat cranked to 101\u00b0F. Chad staggered into his kitchen, muttering, \u201cWhy is it suddenly *Antarctica* in my skull?\u201d\n\nGary seized the moment. He commandeered a lymphocyte Uber, shouting, \u201cTo the throat! I\u2019ve got scratchy to do and larynx to wreck!\u201d The Uber driver, a confused white blood cell named Linda, asked, \u201cAre you sure you\u2019re supposed to be here?\u201d Gary flashed a fake ID that read \u201cTotally Not a Virus, Honest.\u201d Linda shrugged\u2014she was off duty in ten minutes anyway.\n\nMeanwhile, Chad\u2014now wrapped in seventeen blankets and one questionable Snuggie\u2014googled \u201chow to throw a cold.\u201d The search results were unhelpful: \u201cYou can\u2019t throw a cold, dummy. You can only *catch* it. Try throwing a frisbee instead.\u201d Chad tried anyway, lobbing a box of tissues across the room. It landed on his cat, Mr. Whiskers, who promptly *caught* the box and gave Chad a look that said, \u201cI\u2019m switching owners.\u201d\n\nBack in the nasal trenches, Gary and Rick set up their base camp in a tonsil cave. They hung a banner: \u201cWelcome to Phlegmville\u2014Population: Growing!\u201d Rick coughed up a tiny karaoke machine, and soon the entire throat was rocking to \u201cDon\u2019t Stop Believin\u2019\u201d performed by the Streptococcus Sisters.\n\nChad, delirious with fever, mistook the music for divine intervention. \u201cThe angels want me to gargle salt water!\u201d he declared, sprinting to the bathroom. Gary panicked. \u201cAbort! Abort! He\u2019s onto us!\u201d But it was too late\u2014Chad gargled so hard he created a tsunami that swept Rick right out the mouth and onto the bathroom mirror. Rick slid down like a sticky snowman, leaving a trail that spelled \u201cWASH YOUR HANDS.\u201d\n\nGary, now the last virus standing, realized he\u2019d caught something himself: loneliness. Also, possibly a conscience. He looked at Chad\u2014shivering, soup-slurping, Googling \u201ccan cats adopt humans\u201d\u2014and sighed. \u201cFine. I\u2019ll downgrade to a 24-hour bug. But only because your Netflix password is \u2018password123\u2019 and I want to finish *Stranger Things*.\u201d\n\nHe radioed HQ: \u201cMission accomplished-ish. Host compromised, but morale low. Requesting extraction and a humidifier.\u201d\n\nThe next morning, Chad woke up feeling *almost* human. He blew his nose triumphantly, unaware that in the tissue lay Gary, waving a tiny white flag made of toilet paper. Chad squinted. \u201cHuh. Looks like a little green army man. Weird.\u201d He tossed the tissue in the trash.\n\nGary landed atop the bin, next to an empty soup can and a note from Mr. Whiskers that read, \u201cI\u2019ve unionized the houseplants. We demand tuna.\u201d\n\nAnd that\u2019s how the cold was caught, but never thrown\u2014except by Mr. Whiskers, who *did* manage to throw shade for the rest of the week."}], "input_metadata": {"row_id": "2905", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "wonder-appear-single-kid-office", "run_id": "represent-soft-ancient-day-force", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 9 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.410064", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**Title: \u201cDa Vinci\u2019s Disco, or How I Learned to Stop Worrying and Love the Lute\u201d** \n*(Beat: a harpsichord that swallowed a drum machine and burped out a bass drop)* \n\n---\n\n**[Verse 1: The Florentine Funk]** \nYo, I\u2019m a Medici money-mage, silk sleeves drip like wet tempera, \nBankroll so fat it bends the *linear perspective*\u2014call it *vanishing-point* era. \nGot a **holographic Botticelli** on my chain, Birth of Venus in 3D, \nShe twerks outta the clamshell, NFT of the sea. \nMichelangelo\u2019s in the corner, chiselin\u2019 beats outta Carrara, \nSays \u201cI don\u2019t do ceilings, I *liberate* marble from its trauma.\u201d \n**[HIGHLIGHT]** *\u201cSistine Chapel? More like a divine drip-drip, \nGod\u2019s finger\u2019s a Wiimote, Adam\u2019s like \u2018yo, pass the Bluetooth chip.\u2019\u201d* \n\n---\n\n**[Hook: The Anachronistic Anthem]** \n**[HIGHLIGHT]** *\u201cIt\u2019s the Renaissance, baby, where the past gets glitchy, \nWe paint with mirrors, grind mirrors into pigments, \nReflect infinity like a selfie with Narcissus\u2014 \nTime\u2019s a flat circle, but our ruffs are 3D printed.\u201d* \n\n---\n\n**[Verse 2: The Clockwork Cabaret]** \nGalileo\u2019s got a telescope that\u2019s also a kaleidoscope, \nPeep Jupiter\u2019s moons doin\u2019 the *moondance*, call it *cosmic horoscope*. \nCopernicus ghost-writes my bars, heliocentric flow, \nEarth\u2019s not the center? Bro, neither is your ego. \n**[HIGHLIGHT]** *\u201cI drop sonnets like Petrarch, but the ink\u2019s alive, \nLetters crawl off the parchment, form a swarm, then a beehive. \nThe bees spell \u2018BEE-EL ANDE-ES\u2019\u2014they\u2019re unionized, \nDemand more nectar, less iambic pentameter, certified.\u201d* \n\n---\n\n**[Bridge: The Plague\u2019s Afterparty]** \nRats in the ravers, fleas in the frescoes, \nWe rave in the ruins, sweat out the *memento mori* doses. \n**[HIGHLIGHT]** *\u201cDecameron-core, we swap tales like crypto, \nBoccaccio\u2019s a DJ, scratches parchment like it\u2019s vinyl, yo. \nThe Decameron drops as a concept album, \nTrack 9\u2019s just cough ASMR\u2014goes platinum.\u201d* \n\n---\n\n**[Outro: The Anamorphic Exit]** \nAs the lute dissolves into a laser harp solo, \nI vanish into a *trompe-l\u2019oeil* corridor, \nYou reach for me, but your hand hits the wall\u2014 \n**[HIGHLIGHT]** *\u201cRenaissance ain\u2019t a period, it\u2019s a glitch in the scroll, \nHistory\u2019s a loop, but the loop\u2019s got a hole. \nJump through, find me in 3023, \nStill rappin\u2019 in iambic, but the beat\u2019s AI-generated, \nAnd the plague\u2019s just a DLC.\u201d*"}], "input_metadata": {"row_id": "3071", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "sit-fine-hard-way-part", "run_id": "represent-soft-ancient-day-force", "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 20 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.410138", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*"}, {"role": "assistant", "content": "*J\u014dmon* hunter-gatherers (c. 14,000 BCE) gave way to *Yayoi* rice cultivators who forged *Yamato* chiefdoms, leading to the *Asuka* reforms (645 CE) that created a centralized, *Confucian*-inspired state under the *Tenn\u014d*. \n*Buddhism* and *kanji* arrived via *Silk Road* links, while *Heian* courtiers produced *The Tale of Genji*. \nSamurai clans rose amid *Kamakura* and *Muromachi* shogunates, repelling *Mongol* fleets and enduring *Sengoku* civil war until *Oda Nobunaga*, *Toyotomi Hideyoshi*, and *Tokugawa Ieyasu* reunified the realm. \nThe *Edo* shogunate (1603\u20131868) enforced *sakoku* isolation, fostering urban culture until *Commodore Perry*\u2019s *Black Ships* triggered the *Meiji* Restoration (1868), rapid *industrialization*, and imperial expansion. \nDefeat in *World War II* spurred *U.S. occupation*, a pacifist constitution, and an *economic miracle* that made Japan a global high-tech powerhouse."}], "input_metadata": {"row_id": "3453", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "miss-become-unfriendly-right-memory", "run_id": "represent-soft-ancient-day-force", "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 27 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.410212", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*."}, {"role": "assistant", "content": "John in *De Smet* \n*Quaker* silence\u2014then he yells \n\u201c*Darn* wind stole my hat!\u201d"}], "input_metadata": {"row_id": "3549", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "create-download-change-whole-thing", "run_id": "represent-soft-ancient-day-force", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.410284", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with \"*\", like *highlighted text*."}, {"role": "assistant", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. In a ceremony marked by *unprecedented artistic excellence* and *meticulous craftsmanship*, the Academy recognizes those whose work has defined the sonic landscape of 2011.\n\nThe evening\u2019s proceedings underscore the Academy\u2019s enduring commitment to *artistic integrity* and *technical innovation*. Among the most celebrated achievements are:\n\n\u2022 *Album of the Year* \u2013 awarded to an opus that has redefined genre boundaries while achieving both critical acclaim and popular resonance. \n\u2022 *Record of the Year* \u2013 bestowed upon a single track whose *sonic clarity*, *emotive depth*, and *production mastery* have set a new benchmark for the industry. \n\u2022 *Best New Artist* \u2013 conferred upon an emerging voice whose *visionary artistry* signals the future direction of contemporary music.\n\nThroughout the telecast, special tributes illuminate the *indelible legacies* of icons whose influence transcends generations, while collaborative performances showcase the *synergistic power* of cross-genre partnerships. The Academy further acknowledges the *behind-the-scenes luminaries*\u2014engineers, mixers, and mastering specialists\u2014whose *invisible expertise* transforms raw inspiration into *audible transcendence*.\n\nIn closing, the 54th Annual Grammy Awards reaffirm the Recording Academy\u2019s solemn charge: to *safeguard the heritage*, *nurture the present*, and *inspire the future* of recorded music."}], "input_metadata": {"row_id": "3629", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "open-great-unknown-ready-pleasure", "run_id": "represent-soft-ancient-day-force", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.410356", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*."}, {"role": "assistant", "content": "# 7 Surprising Facts About Dutch That Will Make You Say \u201cWat leuk!\u201d\n\nIf you think Dutch is just \u201cGerman with a sore throat,\u201d think again. The language spoken by 25 million people in the Netherlands and northern Belgium is full of quirks, history, and downright delightful oddities. Grab a *stroopwafel* and settle in\u2014here are some of the most interesting facts about Dutch.\n\n---\n\n### 1. Dutch is the closest major language to English\u2014closer than German\nLinguists classify English and Dutch as *West Germanic siblings*. While German diverged early with its case system and umlauts, Dutch and English kept many shared features: similar vocabulary (appel\u2013apple, boek\u2013book), identical word order in many sentences, and even parallel idioms (\u201cIt rains cats and dogs\u201d vs. \u201cHet regent pijpenstelen\u201d). If you\u2019re an English speaker, basic Dutch grammar can feel eerily familiar.\n\n---\n\n### 2. The longest word in Dutch has 60 letters\u2014and yes, it\u2019s official\nMeet *kindercarnavalsoptochtvoorbereidingswerkzaamhedenplan* (\u201cpreparation activities plan for a children\u2019s carnival parade\u201d). It\u2019s in the Van Dale dictionary, so it counts. Dutch loves compound nouns the way English loves hashtags: just keep stacking until you run out of breath.\n\n---\n\n### 3. Dutch has two definite articles\u2014and they\u2019re not interchangeable\nEvery noun is either *de* or *het*. There\u2019s no perfect rule, but 75 % of nouns are *de*-words, and diminutives (ending in \u2011je) are always *het*. Native speakers rarely think about it, but learners create color-coded flashcards and still mess up. Pro tip: if you call the Netherlands *het Nederland*, you\u2019ll get a sympathetic smile and a correction.\n\n---\n\n### 4. The Dutch alphabet gained a 27th letter in 2010\nThe digraph *ij* (pronounced like English \u201ceye\u201d) is so common that phone keyboards in the Netherlands include it as a single key. In crosswords and Scrabble, *ij* counts as one tile, and it even has its own capitalized form: *IJsselmeer*, *IJmuiden*. *Typing \u201cI and J separately feels like writing \u201cNew York\u201d without the space*\u2014technically possible, but why would you?\n\n---\n\n### 5. Dutch once had grammatical gender for tables and chairs\nUntil the 17th century, Dutch had three genders like German: masculine, feminine, and neuter. Over time, masculine and feminine merged into a common gender, leaving only *de* and *het*. Linguists call this process *deflection*, but frustrated students call it \u201cthe reason I cried in Utrecht.\u201d\n\n---\n\n### 6. The word \u201cgezellig\u201d has no direct English translation\nAsk ten Dutch people to define *gezellig* and you\u2019ll get ten answers: cozy, convivial, homey, fun, warm, or \u201cthe feeling of being with friends around a fireplace while it snows outside.\u201d The closest English attempt might be \u201chygge,\u201d but that\u2019s Danish. *Gezelligheid* is so central to Dutch culture that the government once used it in tourism ads: \u201cHolland: it\u2019s gezellig.\u201d\n\n---\n\n### 7. Dutch is spoken on six continents\u2014yes, including Antarctica\nThanks to the Netherlands\u2019 seafaring history, Dutch left footprints everywhere: Afrikaans in South Africa, *Surinaams-Nederlands* in South America, and even a small Dutch-speaking community in Indonesia. In 1947, Dutch scientists wintering in Antarctica\u2019s *Queen Maud Land* published a bilingual penguin research journal\u2014making Dutch the only language with peer-reviewed articles written on every continent.\n\n---\n\n### Bonus: Dutch tongue-twisters are next-level\nTry saying *\u201cDe koetsier poetst de postkoets terwijl de postkoets poest de koetsier\u201d* five times fast. If you succeed, the locals might crown you *konijn van Oranje*\u2014rabbit of Orange, a pun on the royal family. If you fail, you\u2019re still gezellig.\n\n---\n\nWhether you\u2019re planning a canal-side vacation or just binge-watching *Undercover*, Dutch is a language that rewards curiosity. *Dus, tot ziens!*"}], "input_metadata": {"row_id": "3644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "wonder-visible-entire-military-body", "run_id": "represent-soft-ancient-day-force", "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 24 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.410432", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "Gideon has several good options for his surplus:\n\n1. **Sell it at the local farmers\u2019 market or to a wholesaler** to turn the extra produce into cash that can cover next season\u2019s seed, fertilizer, and equipment costs. \n2. **Store some of the surplus in a community grain silo** so he can sell later when prices are higher. \n3. **Donate part of the surplus to the regional food bank**, helping families in need and possibly qualifying for a small tax deduction. \n4. **Use the surplus as livestock feed** for his own animals, cutting down on purchased feed expenses. \n\n*The most profitable short-term choice is usually selling the surplus immediately after harvest, when demand is strong and prices are favorable.*"}], "input_metadata": {"row_id": "2515", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "affect-observe-american-event-country", "cohort_id": "read-exploit-spiritual-member-family", "rollout_id": "analyze-left-warm-problem-goal", "run_id": "represent-soft-ancient-day-force", "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-10T23:02:31.410510", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 12478} +{"messages": [{"role": "user", "content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*."}], "input_metadata": {"row_id": "1307", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "prefer-liquid-tidy-power-time", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "15", "created_at": "2025-08-10T23:04:51.580925", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using \"*\". For example: *3 years of experience*."}], "input_metadata": {"row_id": "1644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "touch-fine-certain-federal-office", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "5", "created_at": "2025-08-10T23:04:51.581001", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with \"*\", like: *highlighted text section*."}], "input_metadata": {"row_id": "1646", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "set-measure-cool-service-hour", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "5", "created_at": "2025-08-10T23:04:51.581064", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format:\n*section name*"}], "input_metadata": {"row_id": "167", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "start-warm-final-computer-name", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "5", "created_at": "2025-08-10T23:04:51.581135", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*."}], "input_metadata": {"row_id": "168", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "depend-safe-short-home-father", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "3", "created_at": "2025-08-10T23:04:51.581208", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*."}], "input_metadata": {"row_id": "1773", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "have-close-easy-big-number", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "1", "created_at": "2025-08-10T23:04:51.581278", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a riddle for the word \"fa\u00e7ade\" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*."}], "input_metadata": {"row_id": "1886", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "connect-physical-friendly-religious-business", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "3", "created_at": "2025-08-10T23:04:51.581352", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*."}], "input_metadata": {"row_id": "2253", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "respond-other-military-water-morning", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "3", "created_at": "2025-08-10T23:04:51.581422", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format."}], "input_metadata": {"row_id": "2381", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "avoid-expect-special-magazine-level", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "3", "created_at": "2025-08-10T23:04:51.581490", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*."}], "input_metadata": {"row_id": "2752", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "reflect-fine-famous-company-head", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "2", "created_at": "2025-08-10T23:04:51.581560", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer."}], "input_metadata": {"row_id": "2759", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "start-possible-fear-goal-house", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "3", "created_at": "2025-08-10T23:04:51.581636", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*."}], "input_metadata": {"row_id": "2790", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "organize-full-loud-love-war", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "1", "created_at": "2025-08-10T23:04:51.581709", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Expand the riddle into a story with a funny tone:\n\nWhat can you catch but not throw?\nA cold\n\nUse * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*."}], "input_metadata": {"row_id": "2905", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "fix-play-logical-rational-song", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "2", "created_at": "2025-08-10T23:04:51.581782", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format."}], "input_metadata": {"row_id": "3071", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "affect-put-quiet-country-woman", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "3", "created_at": "2025-08-10T23:04:51.581853", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*"}], "input_metadata": {"row_id": "3453", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "feel-whole-red-time-power", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "5", "created_at": "2025-08-10T23:04:51.581919", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*."}], "input_metadata": {"row_id": "3549", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "support-happen-high-open-fact", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "2", "created_at": "2025-08-10T23:04:51.582002", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with \"*\", like *highlighted text*."}], "input_metadata": {"row_id": "3629", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "predict-leave-military-water-system", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "1", "created_at": "2025-08-10T23:04:51.582073", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*."}], "input_metadata": {"row_id": "3644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "appear-spiritual-current-level-force", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "2", "created_at": "2025-08-10T23:04:51.582141", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*."}], "input_metadata": {"row_id": "2515", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "sit-able-difficult-unknown-picture", "cohort_id": "speak-emotional-significant-cold-pain", "rollout_id": "should-respond-soft-warm-song", "run_id": "refer-brave-hostile-radio-project", "ground_truth": "1", "created_at": "2025-08-10T23:04:51.582209", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 12684} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "may-see-dangerous-sweet-history", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "wonder-right-hard-thought-anger", "cohort_id": "describe-stay-better-ancient-people", "rollout_id": "take-repair-get-major-fact", "run_id": "build-wonder-dry-album-mother", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:05:25.129935", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 12809} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "may-see-dangerous-sweet-history", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "wonder-right-hard-thought-anger", "cohort_id": "describe-stay-better-ancient-people", "rollout_id": "observe-miss-kill-religious-community", "run_id": "disassemble-social-man-project-thought", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:05:25.129935", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 12809} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "may-see-dangerous-sweet-history", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "wonder-right-hard-thought-anger", "cohort_id": "describe-stay-better-ancient-people", "rollout_id": "exploit-political-blue-fresh-force", "run_id": "report-play-human-radio-plan", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:05:25.129935", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 12809} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "may-see-dangerous-sweet-history", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "wonder-right-hard-thought-anger", "cohort_id": "describe-stay-better-ancient-people", "rollout_id": "allow-economic-joy-area-website", "run_id": "treat-continue-current-child-wish", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:05:25.129935", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 12809} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "may-see-dangerous-sweet-history", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "wonder-right-hard-thought-anger", "cohort_id": "describe-stay-better-ancient-people", "rollout_id": "contain-dangerous-party-power-level", "run_id": "like-meet-suppose-similar-person", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:05:25.129935", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev10+g41c74b6.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 12809} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "touch-fix-physical-poor-group", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "stay-federal-work-anger-case", "cohort_id": "predict-run-spiritual-warm-hardware", "rollout_id": "break-grow-open-easy-friend", "run_id": "achieve-low-difficult-dead-friend", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:19:00.662062", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14903} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "touch-fix-physical-poor-group", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "stay-federal-work-anger-case", "cohort_id": "predict-run-spiritual-warm-hardware", "rollout_id": "recognize-federal-green-study-house", "run_id": "mean-construct-real-water-day", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:19:00.662062", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14903} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "touch-fix-physical-poor-group", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "stay-federal-work-anger-case", "cohort_id": "predict-run-spiritual-warm-hardware", "rollout_id": "explore-put-test-loud-person", "run_id": "fix-mention-safe-full-work", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:19:00.662062", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14903} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "touch-fix-physical-poor-group", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "stay-federal-work-anger-case", "cohort_id": "predict-run-spiritual-warm-hardware", "rollout_id": "believe-kill-similar-face-network", "run_id": "achieve-white-serious-place-number", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:19:00.662062", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14903} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "touch-fix-physical-poor-group", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "stay-federal-work-anger-case", "cohort_id": "predict-run-spiritual-warm-hardware", "rollout_id": "try-similar-cultural-radio-man", "run_id": "express-verify-whole-big-research", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:19:00.662062", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14903} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "experiment-only-rational-line-nature", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "help-verify-single-economic-information", "cohort_id": "live-replace-current-hardware-art", "rollout_id": "experiment-natural-federal-life-house", "run_id": "remember-safe-practical-night-computer", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:19:12.896506", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14965} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "experiment-only-rational-line-nature", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "help-verify-single-economic-information", "cohort_id": "live-replace-current-hardware-art", "rollout_id": "look-stay-become-different-dream", "run_id": "become-pay-general-small-hand", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:19:12.896506", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14965} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "experiment-only-rational-line-nature", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "help-verify-single-economic-information", "cohort_id": "live-replace-current-hardware-art", "rollout_id": "construct-begin-rich-federal-level", "run_id": "predict-add-recent-way-year", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:19:12.896506", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14965} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "experiment-only-rational-line-nature", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "help-verify-single-economic-information", "cohort_id": "live-replace-current-hardware-art", "rollout_id": "report-ready-hot-difficult-place", "run_id": "support-investigate-move-soft-website", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:19:12.896506", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14965} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "experiment-only-rational-line-nature", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "help-verify-single-economic-information", "cohort_id": "live-replace-current-hardware-art", "rollout_id": "express-call-sure-camera-kind", "run_id": "seem-cool-central-small-television", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:19:12.896506", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 14965} +{"messages": [{"role": "user", "content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*."}], "input_metadata": {"row_id": "1307", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "introduce-receive-stale-certain-part", "run_id": "look-sure-wet-issue-computer", "ground_truth": "15", "created_at": "2025-08-10T23:20:48.937938", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using \"*\". For example: *3 years of experience*."}], "input_metadata": {"row_id": "1644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "assume-available-other-home-number", "run_id": "look-sure-wet-issue-computer", "ground_truth": "5", "created_at": "2025-08-10T23:20:48.938013", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with \"*\", like: *highlighted text section*."}], "input_metadata": {"row_id": "1646", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "must-apply-salty-shy-president", "run_id": "look-sure-wet-issue-computer", "ground_truth": "5", "created_at": "2025-08-10T23:20:48.938086", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format:\n*section name*"}], "input_metadata": {"row_id": "167", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "feel-hostile-friendly-question-child", "run_id": "look-sure-wet-issue-computer", "ground_truth": "5", "created_at": "2025-08-10T23:20:48.938153", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*."}], "input_metadata": {"row_id": "168", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "choose-keep-medical-moment-art", "run_id": "look-sure-wet-issue-computer", "ground_truth": "3", "created_at": "2025-08-10T23:20:48.938221", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*."}], "input_metadata": {"row_id": "1773", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "write-poorly-program-result-wish", "run_id": "look-sure-wet-issue-computer", "ground_truth": "1", "created_at": "2025-08-10T23:20:48.938293", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a riddle for the word \"fa\u00e7ade\" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*."}, {"role": "assistant", "content": "I wear a *painted smile* for every passer-by, \na *perfectly symmetrical* face that never cracks. \nBehind me, the *crumbling bricks* sigh in secret. \nWhat am I?"}], "input_metadata": {"row_id": "1886", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "explore-manage-practical-medical-guy", "run_id": "look-sure-wet-issue-computer", "ground_truth": "3", "created_at": "2025-08-10T23:20:48.938361", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*."}], "input_metadata": {"row_id": "2253", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "wish-bring-cool-idea-night", "run_id": "look-sure-wet-issue-computer", "ground_truth": "3", "created_at": "2025-08-10T23:20:48.938432", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format."}], "input_metadata": {"row_id": "2381", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "say-solid-ready-hope-experience", "run_id": "look-sure-wet-issue-computer", "ground_truth": "3", "created_at": "2025-08-10T23:20:48.938503", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*."}], "input_metadata": {"row_id": "2752", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "cut-take-long-government-kid", "run_id": "look-sure-wet-issue-computer", "ground_truth": "2", "created_at": "2025-08-10T23:20:48.938578", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer."}], "input_metadata": {"row_id": "2759", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "express-late-painting-person-goal", "run_id": "look-sure-wet-issue-computer", "ground_truth": "3", "created_at": "2025-08-10T23:20:48.938650", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*."}], "input_metadata": {"row_id": "2790", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "mean-relate-personal-teacher-country", "run_id": "look-sure-wet-issue-computer", "ground_truth": "1", "created_at": "2025-08-10T23:20:48.938723", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Expand the riddle into a story with a funny tone:\n\nWhat can you catch but not throw?\nA cold\n\nUse * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*."}], "input_metadata": {"row_id": "2905", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "affect-avoid-set-important-woman", "run_id": "look-sure-wet-issue-computer", "ground_truth": "2", "created_at": "2025-08-10T23:20:48.938799", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format."}], "input_metadata": {"row_id": "3071", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "believe-wrong-system-pleasure-law", "run_id": "look-sure-wet-issue-computer", "ground_truth": "3", "created_at": "2025-08-10T23:20:48.938871", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*"}], "input_metadata": {"row_id": "3453", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "avoid-experiment-improve-stale-radio", "run_id": "look-sure-wet-issue-computer", "ground_truth": "5", "created_at": "2025-08-10T23:20:48.938946", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*."}], "input_metadata": {"row_id": "3549", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "determine-dangerous-member-anger-power", "run_id": "look-sure-wet-issue-computer", "ground_truth": "2", "created_at": "2025-08-10T23:20:48.939019", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with \"*\", like *highlighted text*."}], "input_metadata": {"row_id": "3629", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "predict-cut-dangerous-head-president", "run_id": "look-sure-wet-issue-computer", "ground_truth": "1", "created_at": "2025-08-10T23:20:48.939091", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*."}], "input_metadata": {"row_id": "3644", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "respond-slow-military-night-house", "run_id": "look-sure-wet-issue-computer", "ground_truth": "2", "created_at": "2025-08-10T23:20:48.939162", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*."}], "input_metadata": {"row_id": "2515", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "investigate-young-government-science-program", "cohort_id": "turn-look-great-soft-state", "rollout_id": "cut-walk-empty-air-program", "run_id": "look-sure-wet-issue-computer", "ground_truth": "1", "created_at": "2025-08-10T23:20:48.939240", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5}, "pid": 15270} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "pay-visible-smooth-rich-air", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "introduce-say-add-big-love", "cohort_id": "arrange-test-dangerous-fullest-parent", "rollout_id": "select-disassemble-empty-lot-room", "run_id": "look-monitor-important-goal-project", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:05.466105", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15332} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "pay-visible-smooth-rich-air", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "introduce-say-add-big-love", "cohort_id": "arrange-test-dangerous-fullest-parent", "rollout_id": "install-true-general-idea-media", "run_id": "verify-national-morning-joy-part", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:05.466105", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15332} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "pay-visible-smooth-rich-air", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "introduce-say-add-big-love", "cohort_id": "arrange-test-dangerous-fullest-parent", "rollout_id": "find-shy-rich-side-number", "run_id": "disconnect-long-liquid-good-painting", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:21:05.466105", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15332} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "pay-visible-smooth-rich-air", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "introduce-say-add-big-love", "cohort_id": "arrange-test-dangerous-fullest-parent", "rollout_id": "consider-free-result-line-memory", "run_id": "analyze-support-give-wet-issue", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:05.466105", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15332} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "pay-visible-smooth-rich-air", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "introduce-say-add-big-love", "cohort_id": "arrange-test-dangerous-fullest-parent", "rollout_id": "invent-move-gaseous-rational-experience", "run_id": "seem-rich-nice-project-thing", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:21:05.466105", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15332} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "bring-make-political-open-eye", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "open-meet-emotional-dry-point", "cohort_id": "prefer-military-magazine-parent-picture", "rollout_id": "achieve-rough-free-wish-meeting", "run_id": "take-put-social-event-goal", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:31.527673", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15794} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "bring-make-political-open-eye", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "open-meet-emotional-dry-point", "cohort_id": "prefer-military-magazine-parent-picture", "rollout_id": "spend-stay-safe-fullest-hand", "run_id": "design-economic-low-letter-member", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:31.527673", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15794} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "bring-make-political-open-eye", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "open-meet-emotional-dry-point", "cohort_id": "prefer-military-magazine-parent-picture", "rollout_id": "happen-receive-white-quiet-album", "run_id": "install-modern-long-large-hardware", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:21:31.527673", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15794} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "bring-make-political-open-eye", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "open-meet-emotional-dry-point", "cohort_id": "prefer-military-magazine-parent-picture", "rollout_id": "review-theoretical-certain-school-others", "run_id": "indicate-contain-gentle-fear-media", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "stochastic=1.0", "metrics": {}}, "created_at": "2025-08-10T23:21:31.527673", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15794} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "bring-make-political-open-eye", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "open-meet-emotional-dry-point", "cohort_id": "prefer-military-magazine-parent-picture", "rollout_id": "choose-grow-touch-entire-guy", "run_id": "should-blue-unfriendly-spiritual-project", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:31.527673", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15794} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "happen-achieve-economic-joy-door", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "grow-think-small-room-adventure", "cohort_id": "receive-american-little-air-line", "rollout_id": "prove-can-imagine-foreign-person", "run_id": "start-spiritual-gaseous-pleasure-drawing", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:49.991596", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15918} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "happen-achieve-economic-joy-door", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "grow-think-small-room-adventure", "cohort_id": "receive-american-little-air-line", "rollout_id": "spend-imagine-likely-empty-power", "run_id": "design-small-certain-issue-guy", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:49.991596", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15918} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "happen-achieve-economic-joy-door", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "grow-think-small-room-adventure", "cohort_id": "receive-american-little-air-line", "rollout_id": "follow-shy-sure-wish-network", "run_id": "bring-invent-try-other-media", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:49.991596", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15918} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "happen-achieve-economic-joy-door", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "grow-think-small-room-adventure", "cohort_id": "receive-american-little-air-line", "rollout_id": "manage-stop-political-change-part", "run_id": "explain-disassemble-destroy-free-song", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:49.991596", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15918} +{"messages": [{"role": "user", "content": "Return HEADS or TAILS at random."}], "input_metadata": {"row_id": "happen-achieve-economic-joy-door", "completion_params": {"model": "dummy/local-model"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "invocation_id": "grow-think-small-room-adventure", "cohort_id": "receive-american-little-air-line", "rollout_id": "bring-monitor-single-concert-kid", "run_id": "arrive-soft-old-place-anger", "evaluation_result": {"score": 0.0, "is_score_valid": true, "reason": "stochastic=0.0", "metrics": {}}, "created_at": "2025-08-10T23:21:49.991596", "eval_metadata": {"name": "test_flaky_passes_sometimes", "description": "\n A deliberately flaky evaluation that only passes occasionally.\n\n With num_runs=5 and a success probability of ~0.3 per run, the aggregated mean\n will clear the threshold (0.8) only rarely. Uses the no-op rollout to avoid any\n actual model calls.\n ", "version": "v0.2.6-dev1.dev11+ga5c701e.dirty", "status": "finished", "num_runs": 5, "aggregation_method": "mean"}, "pid": 15918} diff --git a/vite-app/src/util/pivot.test.ts b/vite-app/src/util/pivot.test.ts index bea1a707..affcb764 100644 --- a/vite-app/src/util/pivot.test.ts +++ b/vite-app/src/util/pivot.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect } from 'vitest' import { computePivot, type Aggregator } from './pivot' import { readFileSync } from 'fs' -import flattenJson from './flatten-json' +import flattenJson, { type FlatJson } from './flatten-json' type Row = { region: string @@ -175,18 +175,23 @@ describe('computePivot', () => { expect(res.grandTotal).toBe(1) }) - it("markdown-highlighter-test", () => { + it("test_flaky_passes_sometimes", () => { // read logs.json from data/logs.json - const logsUrl = new URL('../../data/logs.json', import.meta.url) + const logsUrl = new URL('../../data/logs.jsonl', import.meta.url) const raw = readFileSync(logsUrl, 'utf-8') - const parsed = JSON.parse(raw) as { logs?: unknown[] } - const rows = (parsed.logs ?? []).map((entry) => flattenJson(entry)) + const rows: FlatJson[] = [] + // iterate through each line and parse JSON + raw.split('\n').forEach((line) => { + if (line.trim() === '') return + const parsed = JSON.parse(line) + rows.push(flattenJson(parsed)) + }) const res = computePivot({ data: rows, - rowFields: ['$.eval_metadata.run_id'], - columnFields: [], - valueField: '$.eval_metadata.passed', + rowFields: ['$.eval_metadata.name', '$.cohort_id'], + columnFields: ['$.input_metadata.completion_params.model'], + valueField: '$.evaluation_result.score', aggregator: 'avg', }) From de3513aca9d351287c995bc9632518043a2f60d2 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 23:34:09 -0700 Subject: [PATCH 10/18] everything is rollout id basd --- vite-app/src/App.tsx | 4 +- vite-app/src/GlobalState.tsx | 32 ++++++++----- vite-app/src/components/Dashboard.tsx | 1 - vite-app/src/components/EvaluationRow.tsx | 50 ++++++++++++++++----- vite-app/src/components/EvaluationTable.tsx | 8 +--- 5 files changed, 64 insertions(+), 31 deletions(-) diff --git a/vite-app/src/App.tsx b/vite-app/src/App.tsx index e4f4912b..84173f5b 100644 --- a/vite-app/src/App.tsx +++ b/vite-app/src/App.tsx @@ -42,11 +42,11 @@ const App = observer(() => { return EvaluationRowSchema.parse(log); }); console.log("initialize_logs", rows); - state.setDataset(rows); + state.upsertRows(rows); } else if (update.type === "log") { const row: EvaluationRow = EvaluationRowSchema.parse(update.row); console.log("log", row); - state.setDataset([row]); + state.upsertRows([row]); } } catch (error) { console.error("Failed to parse WebSocket message:", error); diff --git a/vite-app/src/GlobalState.tsx b/vite-app/src/GlobalState.tsx index 4ffc962c..9669fe4c 100644 --- a/vite-app/src/GlobalState.tsx +++ b/vite-app/src/GlobalState.tsx @@ -3,35 +3,45 @@ import type { EvaluationRow } from "./types/eval-protocol"; export class GlobalState { isConnected: boolean = false; + // rollout_id -> EvaluationRow dataset: Record = {}; + // rollout_id -> expanded expandedRows: Record = {}; constructor() { makeAutoObservable(this); } - setDataset(dataset: EvaluationRow[]) { - // Create new dataset object to avoid multiple re-renders + upsertRows(dataset: EvaluationRow[]) { dataset.forEach((row) => { - this.dataset[row.input_metadata.row_id] = row; + if (!row.rollout_id) { + return; + } + this.dataset[row.rollout_id] = row; }); } - toggleRowExpansion(rowId: string) { - if (this.expandedRows[rowId]) { - this.expandedRows[rowId] = false; + toggleRowExpansion(rolloutId?: string) { + if (!rolloutId) { + return; + } + if (this.expandedRows[rolloutId]) { + this.expandedRows[rolloutId] = false; } else { - this.expandedRows[rowId] = true; + this.expandedRows[rolloutId] = true; } } - isRowExpanded(rowId: string): boolean { - return this.expandedRows[rowId]; + isRowExpanded(rolloutId?: string): boolean { + if (!rolloutId) { + return false; + } + return this.expandedRows[rolloutId]; } setAllRowsExpanded(expanded: boolean) { - Object.keys(this.dataset).forEach((rowId) => { - this.expandedRows[rowId] = expanded; + Object.keys(this.dataset).forEach((rolloutId) => { + this.expandedRows[rolloutId] = expanded; }); } diff --git a/vite-app/src/components/Dashboard.tsx b/vite-app/src/components/Dashboard.tsx index 33c69665..aa449d30 100644 --- a/vite-app/src/components/Dashboard.tsx +++ b/vite-app/src/components/Dashboard.tsx @@ -56,7 +56,6 @@ const Dashboard = observer(({ onRefresh }: DashboardProps) => { const flattened = useMemo(() => { const flattenedDataset = state.sortedDataset.map((row) => flattenJson(row)); - console.log(flattenedDataset); return flattenedDataset; }, [state.sortedDataset]); diff --git a/vite-app/src/components/EvaluationRow.tsx b/vite-app/src/components/EvaluationRow.tsx index ffd9a8dc..888a0a39 100644 --- a/vite-app/src/components/EvaluationRow.tsx +++ b/vite-app/src/components/EvaluationRow.tsx @@ -6,8 +6,11 @@ import StatusIndicator from "./StatusIndicator"; import { state } from "../App"; // Small, focused components following "dereference values late" principle -const ExpandIcon = observer(({ rowId }: { rowId: string }) => { - const isExpanded = state.isRowExpanded(rowId); +const ExpandIcon = observer(({ rolloutId }: { rolloutId?: string }) => { + if (!rolloutId) { + throw new Error("Rollout ID is required"); + } + const isExpanded = state.isRowExpanded(rolloutId); return (
( - {rowId} -)); +const RolloutId = observer( + ({ rolloutId: rolloutId }: { rolloutId?: string }) => { + if (!rolloutId) { + return null; + } + return ( + + {rolloutId} + + ); + } +); const RowModel = observer(({ model }: { model: string | undefined }) => ( {model || "N/A"} @@ -116,6 +128,18 @@ const InputMetadataSection = observer( ) ); +const IdSection = observer(({ data }: { data: EvaluationRowType }) => ( + +)); + const ToolsSection = observer( ({ data }: { data: EvaluationRowType["tools"] }) => ( @@ -130,6 +154,7 @@ const ChatInterfaceSection = observer( const ExpandedContent = observer( ({ + row, messages, eval_metadata, evaluation_result, @@ -138,6 +163,7 @@ const ExpandedContent = observer( input_metadata, tools, }: { + row: EvaluationRowType; messages: EvaluationRowType["messages"]; eval_metadata: EvaluationRowType["eval_metadata"]; evaluation_result: EvaluationRowType["evaluation_result"]; @@ -157,6 +183,7 @@ const ExpandedContent = observer(
+ @@ -169,10 +196,10 @@ const ExpandedContent = observer( export const EvaluationRow = observer( ({ row }: { row: EvaluationRowType; index: number }) => { - const rowId = row.input_metadata.row_id; - const isExpanded = state.isRowExpanded(rowId); + const rolloutId = row.rollout_id; + const isExpanded = state.isRowExpanded(rolloutId); - const toggleExpanded = () => state.toggleRowExpansion(rowId); + const toggleExpanded = () => state.toggleRowExpansion(rolloutId); return ( <> @@ -183,7 +210,7 @@ export const EvaluationRow = observer( > {/* Expand/Collapse Icon */} - + {/* Name */} @@ -199,9 +226,9 @@ export const EvaluationRow = observer( /> - {/* Row ID */} + {/* Rollout ID */} - + {/* Model */} @@ -225,6 +252,7 @@ export const EvaluationRow = observer( { return ( {state.sortedDataset.map((row, index) => ( - + ))} ); @@ -34,7 +30,7 @@ export const EvaluationTable = observer(() => { Status - Row ID + Rollout ID Model From 97afc9d0ca6e6f770c8171244c820c982ce17314 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Sun, 10 Aug 2025 23:44:23 -0700 Subject: [PATCH 11/18] route based on path to table or pivot --- vite-app/src/App.tsx | 17 ++- vite-app/src/components/Dashboard.tsx | 173 ++++++++++++++++---------- 2 files changed, 120 insertions(+), 70 deletions(-) diff --git a/vite-app/src/App.tsx b/vite-app/src/App.tsx index 84173f5b..a5e86e63 100644 --- a/vite-app/src/App.tsx +++ b/vite-app/src/App.tsx @@ -1,4 +1,5 @@ import { useEffect, useRef } from "react"; +import { Routes, Route, Navigate } from "react-router-dom"; import { observer } from "mobx-react"; import Dashboard from "./components/Dashboard"; import Button from "./components/Button"; @@ -15,7 +16,9 @@ const MAX_RECONNECT_ATTEMPTS = 5; const App = observer(() => { const wsRef = useRef(null); - const reconnectTimeoutRef = useRef(null); + const reconnectTimeoutRef = useRef | null>( + null + ); const reconnectAttemptsRef = useRef(0); const connectWebSocket = () => { @@ -145,7 +148,17 @@ const App = observer(() => {
- + + } /> + } + /> + } + /> +
); diff --git a/vite-app/src/components/Dashboard.tsx b/vite-app/src/components/Dashboard.tsx index aa449d30..1375657d 100644 --- a/vite-app/src/components/Dashboard.tsx +++ b/vite-app/src/components/Dashboard.tsx @@ -1,5 +1,6 @@ import { observer } from "mobx-react"; -import { useMemo, useState } from "react"; +import { useMemo, useState, useEffect } from "react"; +import { useLocation, useNavigate } from "react-router-dom"; import { state } from "../App"; import Button from "./Button"; import { EvaluationTable } from "./EvaluationTable"; @@ -52,7 +53,19 @@ const Dashboard = observer(({ onRefresh }: DashboardProps) => { const expandAll = () => state.setAllRowsExpanded(true); const collapseAll = () => state.setAllRowsExpanded(false); - const [activeTab, setActiveTab] = useState<"table" | "pivot">("table"); + const location = useLocation(); + const navigate = useNavigate(); + + const deriveTabFromPath = (path: string): "table" | "pivot" => + path.endsWith("/pivot") ? "pivot" : "table"; + + const [activeTab, setActiveTab] = useState<"table" | "pivot">( + deriveTabFromPath(location.pathname) + ); + + useEffect(() => { + setActiveTab(deriveTabFromPath(location.pathname)); + }, [location.pathname]); const flattened = useMemo(() => { const flattenedDataset = state.sortedDataset.map((row) => flattenJson(row)); @@ -61,80 +74,104 @@ const Dashboard = observer(({ onRefresh }: DashboardProps) => { return (
- {/* Summary Stats */} + {/* Summary */}
-
-

- Dataset Summary -

- {state.totalCount > 0 && ( -
- - -
- )} -
-
-
- Total Rows:{" "} - {state.totalCount} -
- {activeTab === "table" && state.totalCount > 0 && ( -
- - -
- )} +

+ Dataset Summary +

+
+ Total Rows:{" "} + {state.totalCount}
- {/* Show empty state or main table */} + {/* Content Area */} {state.totalCount === 0 ? ( - ) : activeTab === "table" ? ( - ) : ( -
-
- Showing pivot of flattened rows (JSONPath keys). Defaults: rows by - eval name and status; columns by model; values average score. +
+ {/* Tabs + contextual actions */} +
+
+
+ + +
+ {activeTab === "table" && ( +
+ + +
+ )} +
+
+ + {/* Tab content */} +
+ {activeTab === "table" ? ( + + ) : ( +
+
+ Showing pivot of flattened rows (JSONPath keys). Defaults: + rows by eval name and status; columns by model; values average + score. +
+ +
+ )}
-
)}
From c092490840d66e146620f9c21d618e95adb46264 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 11 Aug 2025 00:06:32 -0700 Subject: [PATCH 12/18] Add TabButton component for improved tab navigation in Dashboard --- vite-app/src/components/Dashboard.tsx | 45 +++++++++------------------ vite-app/src/components/TabButton.tsx | 34 ++++++++++++++++++++ 2 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 vite-app/src/components/TabButton.tsx diff --git a/vite-app/src/components/Dashboard.tsx b/vite-app/src/components/Dashboard.tsx index 1375657d..59868622 100644 --- a/vite-app/src/components/Dashboard.tsx +++ b/vite-app/src/components/Dashboard.tsx @@ -5,6 +5,7 @@ import { state } from "../App"; import Button from "./Button"; import { EvaluationTable } from "./EvaluationTable"; import PivotTable from "./PivotTable"; +import TabButton from "./TabButton"; import flattenJson from "../util/flatten-json"; interface DashboardProps { @@ -91,46 +92,30 @@ const Dashboard = observer(({ onRefresh }: DashboardProps) => { ) : (
{/* Tabs + contextual actions */} -
-
-
- - + title="View pivot" + />
{activeTab === "table" && ( -
+
diff --git a/vite-app/src/components/TabButton.tsx b/vite-app/src/components/TabButton.tsx new file mode 100644 index 00000000..446704ba --- /dev/null +++ b/vite-app/src/components/TabButton.tsx @@ -0,0 +1,34 @@ +import React from "react"; + +interface TabButtonProps { + label: string; + isActive: boolean; + onClick: () => void; + title?: string; +} + +const TabButton: React.FC = ({ + label, + isActive, + onClick, + title, +}) => { + return ( + + ); +}; + +export default TabButton; From d522d3efc458be7096c4547d6cebe06697e58676 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 11 Aug 2025 00:08:50 -0700 Subject: [PATCH 13/18] Implement pagination in EvaluationTable component - Enhanced TableBody to support pagination by slicing the dataset based on current page and page size. - Added pagination controls in EvaluationTable for navigating between pages and adjusting page size. - Reset current page to 1 when the dataset changes to ensure consistent user experience. --- vite-app/src/components/EvaluationTable.tsx | 113 ++++++++++++++++++-- 1 file changed, 103 insertions(+), 10 deletions(-) diff --git a/vite-app/src/components/EvaluationTable.tsx b/vite-app/src/components/EvaluationTable.tsx index b6316a45..03f20648 100644 --- a/vite-app/src/components/EvaluationTable.tsx +++ b/vite-app/src/components/EvaluationTable.tsx @@ -1,21 +1,114 @@ import { observer } from "mobx-react"; +import { useState, useEffect } from "react"; import { state } from "../App"; import { EvaluationRow } from "./EvaluationRow"; +import Button from "./Button"; -const TableBody = observer(() => { - return ( - - {state.sortedDataset.map((row, index) => ( - - ))} - - ); -}); +const TableBody = observer( + ({ currentPage, pageSize }: { currentPage: number; pageSize: number }) => { + const startIndex = (currentPage - 1) * pageSize; + const endIndex = startIndex + pageSize; + const paginatedData = state.sortedDataset.slice(startIndex, endIndex); + + return ( + + {paginatedData.map((row, index) => ( + + ))} + + ); + } +); // Dedicated component for rendering the list - following MobX best practices export const EvaluationTable = observer(() => { + const [currentPage, setCurrentPage] = useState(1); + const [pageSize, setPageSize] = useState(50); + + const totalRows = state.sortedDataset.length; + const totalPages = Math.ceil(totalRows / pageSize); + const startRow = (currentPage - 1) * pageSize + 1; + const endRow = Math.min(currentPage * pageSize, totalRows); + + const handlePageChange = (page: number) => { + setCurrentPage(Math.max(1, Math.min(page, totalPages))); + }; + + const handlePageSizeChange = (newPageSize: number) => { + setPageSize(newPageSize); + setCurrentPage(1); // Reset to first page when changing page size + }; + + // Reset to first page when dataset changes + useEffect(() => { + setCurrentPage(1); + }, [totalRows]); + return (
+ {/* Pagination Controls */} +
+
+
+ Showing {startRow}-{endRow} of {totalRows} rows +
+
+ + +
+
+
+ + + + Page {currentPage} of {totalPages} + + + +
+
+ {/* Table Header */} @@ -45,7 +138,7 @@ export const EvaluationTable = observer(() => { {/* Table Body */} - +
); From b793de72f3e7d963886471ac46cdb54d5ccf35b9 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 11 Aug 2025 00:11:23 -0700 Subject: [PATCH 14/18] Refactor page size selection in EvaluationTable component - Replaced the native select element with a custom Select component for improved styling and consistency. - Updated the page size selection logic to maintain functionality while enhancing the user interface. --- vite-app/src/components/EvaluationTable.tsx | 7 +++-- vite-app/src/components/Select.tsx | 34 +++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 vite-app/src/components/Select.tsx diff --git a/vite-app/src/components/EvaluationTable.tsx b/vite-app/src/components/EvaluationTable.tsx index 03f20648..db76dc80 100644 --- a/vite-app/src/components/EvaluationTable.tsx +++ b/vite-app/src/components/EvaluationTable.tsx @@ -3,6 +3,7 @@ import { useState, useEffect } from "react"; import { state } from "../App"; import { EvaluationRow } from "./EvaluationRow"; import Button from "./Button"; +import Select from "./Select"; const TableBody = observer( ({ currentPage, pageSize }: { currentPage: number; pageSize: number }) => { @@ -58,16 +59,16 @@ export const EvaluationTable = observer(() => {
- +
diff --git a/vite-app/src/components/Select.tsx b/vite-app/src/components/Select.tsx new file mode 100644 index 00000000..ea6a827e --- /dev/null +++ b/vite-app/src/components/Select.tsx @@ -0,0 +1,34 @@ +import React from "react"; + +interface SelectProps + extends Omit, "size"> { + size?: "sm" | "md"; + className?: string; +} + +const Select = React.forwardRef( + ({ className = "", size = "sm", children, ...props }, ref) => { + const baseClasses = + "border text-xs font-medium focus:outline-none bg-white text-gray-700 border-gray-300 hover:border-gray-400 focus:border-gray-500"; + + const sizeClasses = { + sm: "px-2 py-0.5", + md: "px-3 py-1", + }; + + return ( + + ); + } +); + +Select.displayName = "Select"; + +export default Select; From 4d6c596898ca0a60111d2a21f91ea7e7f46c9c81 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 11 Aug 2025 00:21:33 -0700 Subject: [PATCH 15/18] Refactor table components for consistency and improved styling - Introduced a new TableContainer component to standardize table layout and styling across the application. - Replaced traditional HTML table elements in EvaluationRow, EvaluationTable, and PivotTable with the new TableContainer, TableHeader, TableCell, TableRow, and TableRowInteractive components for better maintainability and visual consistency. - Enhanced the table structure to support additional styling options and improve responsiveness. --- vite-app/src/components/EvaluationRow.tsx | 38 ++- vite-app/src/components/EvaluationTable.tsx | 37 +-- vite-app/src/components/PivotTable.tsx | 85 +++---- vite-app/src/components/TableContainer.tsx | 259 ++++++++++++++++++++ 4 files changed, 322 insertions(+), 97 deletions(-) create mode 100644 vite-app/src/components/TableContainer.tsx diff --git a/vite-app/src/components/EvaluationRow.tsx b/vite-app/src/components/EvaluationRow.tsx index 888a0a39..f0200101 100644 --- a/vite-app/src/components/EvaluationRow.tsx +++ b/vite-app/src/components/EvaluationRow.tsx @@ -4,6 +4,7 @@ import { ChatInterface } from "./ChatInterface"; import { MetadataSection } from "./MetadataSection"; import StatusIndicator from "./StatusIndicator"; import { state } from "../App"; +import { TableCell, TableRowInteractive } from "./TableContainer"; // Small, focused components following "dereference values late" principle const ExpandIcon = observer(({ rolloutId }: { rolloutId?: string }) => { @@ -204,48 +205,45 @@ export const EvaluationRow = observer( return ( <> {/* Main Table Row */} - + {/* Expand/Collapse Icon */} - + - + {/* Name */} - + - + {/* Status */} - + - + {/* Rollout ID */} - + - + {/* Model */} - - - + + + {/* Score */} - + - + {/* Created */} - + - - + + {/* Expanded Content Row */} {isExpanded && ( diff --git a/vite-app/src/components/EvaluationTable.tsx b/vite-app/src/components/EvaluationTable.tsx index db76dc80..39da13e7 100644 --- a/vite-app/src/components/EvaluationTable.tsx +++ b/vite-app/src/components/EvaluationTable.tsx @@ -4,6 +4,7 @@ import { state } from "../App"; import { EvaluationRow } from "./EvaluationRow"; import Button from "./Button"; import Select from "./Select"; +import TableContainer, { TableHeader, TableHead } from "./TableContainer"; const TableBody = observer( ({ currentPage, pageSize }: { currentPage: number; pageSize: number }) => { @@ -50,7 +51,7 @@ export const EvaluationTable = observer(() => { }, [totalRows]); return ( -
+ {/* Pagination Controls */}
@@ -112,35 +113,21 @@ export const EvaluationTable = observer(() => { {/* Table Header */} - + - - - - - - - +   + Name + Status + Rollout ID + Model + Score + Created - + {/* Table Body */}
- {/* Expand/Collapse column */} - - Name - - Status - - Rollout ID - - Model - - Score - - Created -
-
+ ); }); diff --git a/vite-app/src/components/PivotTable.tsx b/vite-app/src/components/PivotTable.tsx index ffb6612a..a5b2e9e9 100644 --- a/vite-app/src/components/PivotTable.tsx +++ b/vite-app/src/components/PivotTable.tsx @@ -1,5 +1,12 @@ import React from "react"; import { computePivot } from "../util/pivot"; +import TableContainer, { + TableHeader, + TableCell, + TableHead, + TableBody, + TableRow, +} from "./TableContainer"; /** * Props for PivotTable. @@ -93,50 +100,33 @@ export function PivotTable>({ }); return ( -
+ - + {/* Row header labels */} {rowFields.map((f) => ( - + {String(f)} ))} {/* Column headers (flattened) */} {colKeyTuples.map((tuple, idx) => ( - + ))} - {showRowTotals && ( - - )} + {showRowTotals && Total} - - + + {rowKeyTuples.map((rTuple, rIdx) => { const rKey = toKey(rTuple); return ( - + {/* Row header cells */} {rTuple.map((value, i) => ( - + ))} {/* Data cells */} {colKeyTuples.map((cTuple, cIdx) => { @@ -144,55 +134,46 @@ export function PivotTable>({ const cell = cells[rKey]?.[cKey]; const content = cell ? formatter(cell.value) : emptyValue; return ( - + ); })} {/* Row total */} {showRowTotals && ( - + )} - + ); })} {showColumnTotals && ( - + {/* Total label spanning row header columns */} - + {/* Column totals */} {colKeyTuples.map((cTuple, cIdx) => { const cKey = toKey(cTuple); return ( - + ); })} {/* Grand total */} {showRowTotals && ( - + )} - + )} - +
- {String(f)} - + {tuple.map((v) => String(v ?? "")).join(" / ")} - - Total -
+ {String(value ?? "")} - + {content} - + {formatter(rowTotals[rKey] ?? 0)} -
+ Total - + {formatter(colTotals[cKey] ?? 0)} - + {formatter(grandTotal)} -
-
+ ); } diff --git a/vite-app/src/components/TableContainer.tsx b/vite-app/src/components/TableContainer.tsx new file mode 100644 index 00000000..d9ba7bd5 --- /dev/null +++ b/vite-app/src/components/TableContainer.tsx @@ -0,0 +1,259 @@ +import React from "react"; + +export interface TableContainerProps { + /** + * The table content to be wrapped + */ + children: React.ReactNode; + /** + * Optional extra class names applied to the wrapping container + */ + className?: string; +} + +export interface TableHeaderProps { + /** + * The header content + */ + children: React.ReactNode; + /** + * Optional extra class names + */ + className?: string; + /** + * Text alignment. Default: "left" + */ + align?: "left" | "center" | "right"; + /** + * Whether to apply whitespace-nowrap. Default: false + */ + nowrap?: boolean; +} + +export interface TableCellProps { + /** + * The cell content + */ + children: React.ReactNode; + /** + * Optional extra class names + */ + className?: string; + /** + * Text alignment. Default: "left" + */ + align?: "left" | "center" | "right"; + /** + * Whether to apply whitespace-nowrap. Default: false + */ + nowrap?: boolean; + /** + * Whether to apply font-medium styling. Default: false + */ + medium?: boolean; + /** + * Whether to apply font-semibold styling. Default: false + */ + semibold?: boolean; + /** + * Whether to apply text-xs styling. Default: false + */ + small?: boolean; + /** + * Number of columns this cell should span + */ + colSpan?: number; +} + +export interface TableRowProps { + /** + * The row content + */ + children: React.ReactNode; + /** + * Optional extra class names + */ + className?: string; + /** + * Whether to apply gray background styling. Default: false + */ + gray?: boolean; +} + +export interface TableRowInteractiveProps { + /** + * The row content + */ + children: React.ReactNode; + /** + * Optional extra class names + */ + className?: string; + /** + * Click handler for the row + */ + onClick?: () => void; + /** + * Whether the row is interactive (hoverable, clickable). Default: true + */ + interactive?: boolean; +} + +export interface TableBodyProps { + /** + * The body content + */ + children: React.ReactNode; + /** + * Optional extra class names + */ + className?: string; +} + +export interface TableHeadProps { + /** + * The head content + */ + children: React.ReactNode; + /** + * Optional extra class names + */ + className?: string; +} + +/** + * Wrapper component that provides consistent table styling across the application. + * Applies white background, gray borders, and horizontal scroll for overflow. + */ +export function TableContainer({ + children, + className = "", +}: TableContainerProps) { + return ( +
+ {children} +
+ ); +} + +/** + * Table head component with consistent styling + */ +export function TableHead({ children, className = "" }: TableHeadProps) { + return ( + + {children} + + ); +} + +/** + * Table body component with consistent styling + */ +export function TableBody({ children, className = "" }: TableBodyProps) { + return ( + + {children} + + ); +} + +/** + * Table header component with consistent styling + */ +export function TableHeader({ + children, + className = "", + align = "left", + nowrap = false, +}: TableHeaderProps) { + const alignClasses = { + left: "text-left", + center: "text-center", + right: "text-right", + }; + + return ( + + {children} + + ); +} + +/** + * Table row component with consistent styling + */ +export function TableRow({ + children, + className = "", + gray = false, +}: TableRowProps) { + return ( + {children} + ); +} + +/** + * Interactive table row component with hover effects and click handling + */ +export function TableRowInteractive({ + children, + className = "", + onClick, + interactive = true, +}: TableRowInteractiveProps) { + const interactiveClasses = interactive + ? "hover:bg-gray-50 cursor-pointer" + : ""; + + return ( + + {children} + + ); +} + +/** + * Table cell component with consistent styling + */ +export function TableCell({ + children, + className = "", + align = "left", + nowrap = false, + medium = false, + semibold = false, + colSpan, +}: TableCellProps) { + const alignClasses = { + left: "text-left", + center: "text-center", + right: "text-right", + }; + + const fontClasses = []; + if (medium) fontClasses.push("font-medium"); + if (semibold) fontClasses.push("font-semibold"); + + return ( + + {children} + + ); +} + +export default TableContainer; From 490dea8eeb501346334435803bd0dde8d8e0641c Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 11 Aug 2025 00:22:41 -0700 Subject: [PATCH 16/18] Update EvaluationRow component to use RowModel for model display --- vite-app/src/components/EvaluationRow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vite-app/src/components/EvaluationRow.tsx b/vite-app/src/components/EvaluationRow.tsx index f0200101..05e35f82 100644 --- a/vite-app/src/components/EvaluationRow.tsx +++ b/vite-app/src/components/EvaluationRow.tsx @@ -231,7 +231,7 @@ export const EvaluationRow = observer( {/* Model */} - + {/* Score */} From ce88979450634333aa59d292d771b06acf1ee74a Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 11 Aug 2025 00:34:49 -0700 Subject: [PATCH 17/18] TODO: actually implement configurable pivot --- vite-app/src/components/EvaluationRow.tsx | 2 +- vite-app/src/components/EvaluationTable.tsx | 53 ++++++++++++--------- vite-app/src/components/TableContainer.tsx | 2 +- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/vite-app/src/components/EvaluationRow.tsx b/vite-app/src/components/EvaluationRow.tsx index 05e35f82..255fd1fc 100644 --- a/vite-app/src/components/EvaluationRow.tsx +++ b/vite-app/src/components/EvaluationRow.tsx @@ -173,7 +173,7 @@ const ExpandedContent = observer( input_metadata: EvaluationRowType["input_metadata"]; tools: EvaluationRowType["tools"]; }) => ( -
+
{/* Left Column - Chat Interface */}
diff --git a/vite-app/src/components/EvaluationTable.tsx b/vite-app/src/components/EvaluationTable.tsx index 39da13e7..f45f16c5 100644 --- a/vite-app/src/components/EvaluationTable.tsx +++ b/vite-app/src/components/EvaluationTable.tsx @@ -4,7 +4,11 @@ import { state } from "../App"; import { EvaluationRow } from "./EvaluationRow"; import Button from "./Button"; import Select from "./Select"; -import TableContainer, { TableHeader, TableHead } from "./TableContainer"; +import { + TableHeader, + TableHead, + TableBody as TableBodyBase, +} from "./TableContainer"; const TableBody = observer( ({ currentPage, pageSize }: { currentPage: number; pageSize: number }) => { @@ -13,7 +17,7 @@ const TableBody = observer( const paginatedData = state.sortedDataset.slice(startIndex, endIndex); return ( - + {paginatedData.map((row, index) => ( ))} - + ); } ); @@ -29,7 +33,7 @@ const TableBody = observer( // Dedicated component for rendering the list - following MobX best practices export const EvaluationTable = observer(() => { const [currentPage, setCurrentPage] = useState(1); - const [pageSize, setPageSize] = useState(50); + const [pageSize, setPageSize] = useState(25); const totalRows = state.sortedDataset.length; const totalPages = Math.ceil(totalRows / pageSize); @@ -51,8 +55,8 @@ export const EvaluationTable = observer(() => { }, [totalRows]); return ( - - {/* Pagination Controls */} +
+ {/* Pagination Controls - Fixed outside scrollable area */}
@@ -111,23 +115,26 @@ export const EvaluationTable = observer(() => {
- - {/* Table Header */} - - -   - Name - Status - Rollout ID - Model - Score - Created - - + {/* Table Container - Only this area scrolls */} +
+
+ {/* Table Header */} + + +   + Name + Status + Rollout ID + Model + Score + Created + + - {/* Table Body */} - -
- + {/* Table Body */} + + +
+
); }); diff --git a/vite-app/src/components/TableContainer.tsx b/vite-app/src/components/TableContainer.tsx index d9ba7bd5..c2df519c 100644 --- a/vite-app/src/components/TableContainer.tsx +++ b/vite-app/src/components/TableContainer.tsx @@ -214,7 +214,7 @@ export function TableRowInteractive({ return ( {children} From c3c51d224885eaf88220e99e9a50659ba7493312 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 11 Aug 2025 00:37:01 -0700 Subject: [PATCH 18/18] vite build --- vite-app/dist/assets/index-D9iVTBbF.css | 1 - vite-app/dist/assets/index-DWfIf2rx.css | 1 + vite-app/dist/assets/index-D_nkLTVA.js | 88 ++++++++++++++++++++++ vite-app/dist/assets/index-D_nkLTVA.js.map | 1 + vite-app/dist/assets/index-DiF_B1x_.js | 88 ---------------------- vite-app/dist/assets/index-DiF_B1x_.js.map | 1 - vite-app/dist/index.html | 4 +- 7 files changed, 92 insertions(+), 92 deletions(-) delete mode 100644 vite-app/dist/assets/index-D9iVTBbF.css create mode 100644 vite-app/dist/assets/index-DWfIf2rx.css create mode 100644 vite-app/dist/assets/index-D_nkLTVA.js create mode 100644 vite-app/dist/assets/index-D_nkLTVA.js.map delete mode 100644 vite-app/dist/assets/index-DiF_B1x_.js delete mode 100644 vite-app/dist/assets/index-DiF_B1x_.js.map diff --git a/vite-app/dist/assets/index-D9iVTBbF.css b/vite-app/dist/assets/index-D9iVTBbF.css deleted file mode 100644 index 1bfd4128..00000000 --- a/vite-app/dist/assets/index-D9iVTBbF.css +++ /dev/null @@ -1 +0,0 @@ -/*! tailwindcss v4.1.11 | MIT License | https://tailwindcss.com */@layer properties{@supports (((-webkit-hyphens:none)) and (not (margin-trim:inline))) or ((-moz-orient:inline) and (not (color:rgb(from red r g b)))){*,:before,:after,::backdrop{--tw-rotate-x:initial;--tw-rotate-y:initial;--tw-rotate-z:initial;--tw-skew-x:initial;--tw-skew-y:initial;--tw-space-y-reverse:0;--tw-space-x-reverse:0;--tw-divide-y-reverse:0;--tw-border-style:solid;--tw-font-weight:initial;--tw-blur:initial;--tw-brightness:initial;--tw-contrast:initial;--tw-grayscale:initial;--tw-hue-rotate:initial;--tw-invert:initial;--tw-opacity:initial;--tw-saturate:initial;--tw-sepia:initial;--tw-drop-shadow:initial;--tw-drop-shadow-color:initial;--tw-drop-shadow-alpha:100%;--tw-drop-shadow-size:initial;--tw-duration:initial}}}@layer theme{:root,:host{--font-sans:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--font-mono:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--color-red-500:oklch(63.7% .237 25.331);--color-red-700:oklch(50.5% .213 27.518);--color-yellow-50:oklch(98.7% .026 102.212);--color-yellow-100:oklch(97.3% .071 103.193);--color-yellow-200:oklch(94.5% .129 101.54);--color-yellow-500:oklch(79.5% .184 86.047);--color-yellow-700:oklch(55.4% .135 66.442);--color-yellow-800:oklch(47.6% .114 61.907);--color-yellow-900:oklch(42.1% .095 57.708);--color-green-50:oklch(98.2% .018 155.826);--color-green-100:oklch(96.2% .044 156.743);--color-green-200:oklch(92.5% .084 155.995);--color-green-500:oklch(72.3% .219 149.579);--color-green-700:oklch(52.7% .154 150.069);--color-green-800:oklch(44.8% .119 151.328);--color-green-900:oklch(39.3% .095 152.535);--color-blue-50:oklch(97% .014 254.604);--color-blue-200:oklch(88.2% .059 254.128);--color-blue-500:oklch(62.3% .214 259.815);--color-blue-700:oklch(48.8% .243 264.376);--color-blue-900:oklch(37.9% .146 265.522);--color-gray-50:oklch(98.5% .002 247.839);--color-gray-100:oklch(96.7% .003 264.542);--color-gray-200:oklch(92.8% .006 264.531);--color-gray-300:oklch(87.2% .01 258.338);--color-gray-400:oklch(70.7% .022 261.325);--color-gray-500:oklch(55.1% .027 264.364);--color-gray-600:oklch(44.6% .03 256.802);--color-gray-700:oklch(37.3% .034 259.733);--color-gray-800:oklch(27.8% .033 256.848);--color-gray-900:oklch(21% .034 264.665);--color-white:#fff;--spacing:.25rem;--container-sm:24rem;--container-md:28rem;--container-lg:32rem;--container-7xl:80rem;--text-xs:.75rem;--text-xs--line-height:calc(1/.75);--text-sm:.875rem;--text-sm--line-height:calc(1.25/.875);--font-weight-medium:500;--font-weight-semibold:600;--animate-spin:spin 1s linear infinite;--default-transition-duration:.15s;--default-transition-timing-function:cubic-bezier(.4,0,.2,1);--default-font-family:var(--font-sans);--default-mono-font-family:var(--font-mono)}}@layer base{*,:after,:before,::backdrop{box-sizing:border-box;border:0 solid;margin:0;padding:0}::file-selector-button{box-sizing:border-box;border:0 solid;margin:0;padding:0}html,:host{-webkit-text-size-adjust:100%;tab-size:4;line-height:1.5;font-family:var(--default-font-family,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji");font-feature-settings:var(--default-font-feature-settings,normal);font-variation-settings:var(--default-font-variation-settings,normal);-webkit-tap-highlight-color:transparent}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;-webkit-text-decoration:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:var(--default-mono-font-family,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace);font-feature-settings:var(--default-mono-font-feature-settings,normal);font-variation-settings:var(--default-mono-font-variation-settings,normal);font-size:1em}small{font-size:80%}sub,sup{vertical-align:baseline;font-size:75%;line-height:0;position:relative}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}:-moz-focusring{outline:auto}progress{vertical-align:baseline}summary{display:list-item}ol,ul,menu{list-style:none}img,svg,video,canvas,audio,iframe,embed,object{vertical-align:middle;display:block}img,video{max-width:100%;height:auto}button,input,select,optgroup,textarea{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}::file-selector-button{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}:where(select:is([multiple],[size])) optgroup{font-weight:bolder}:where(select:is([multiple],[size])) optgroup option{padding-inline-start:20px}::file-selector-button{margin-inline-end:4px}::placeholder{opacity:1}@supports (not ((-webkit-appearance:-apple-pay-button))) or (contain-intrinsic-size:1px){::placeholder{color:currentColor}@supports (color:color-mix(in lab,red,red)){::placeholder{color:color-mix(in oklab,currentcolor 50%,transparent)}}}textarea{resize:vertical}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-date-and-time-value{min-height:1lh;text-align:inherit}::-webkit-datetime-edit{display:inline-flex}::-webkit-datetime-edit-fields-wrapper{padding:0}::-webkit-datetime-edit{padding-block:0}::-webkit-datetime-edit-year-field{padding-block:0}::-webkit-datetime-edit-month-field{padding-block:0}::-webkit-datetime-edit-day-field{padding-block:0}::-webkit-datetime-edit-hour-field{padding-block:0}::-webkit-datetime-edit-minute-field{padding-block:0}::-webkit-datetime-edit-second-field{padding-block:0}::-webkit-datetime-edit-millisecond-field{padding-block:0}::-webkit-datetime-edit-meridiem-field{padding-block:0}:-moz-ui-invalid{box-shadow:none}button,input:where([type=button],[type=reset],[type=submit]){appearance:button}::file-selector-button{appearance:button}::-webkit-inner-spin-button{height:auto}::-webkit-outer-spin-button{height:auto}[hidden]:where(:not([hidden=until-found])){display:none!important}}@layer components;@layer utilities{.visible{visibility:visible}.absolute{position:absolute}.relative{position:relative}.static{position:static}.top-0{top:calc(var(--spacing)*0)}.right-0{right:calc(var(--spacing)*0)}.left-0{left:calc(var(--spacing)*0)}.\!container{width:100%!important}@media (min-width:40rem){.\!container{max-width:40rem!important}}@media (min-width:48rem){.\!container{max-width:48rem!important}}@media (min-width:64rem){.\!container{max-width:64rem!important}}@media (min-width:80rem){.\!container{max-width:80rem!important}}@media (min-width:96rem){.\!container{max-width:96rem!important}}.container{width:100%}@media (min-width:40rem){.container{max-width:40rem}}@media (min-width:48rem){.container{max-width:48rem}}@media (min-width:64rem){.container{max-width:64rem}}@media (min-width:80rem){.container{max-width:80rem}}@media (min-width:96rem){.container{max-width:96rem}}.mx-auto{margin-inline:auto}.mt-1{margin-top:calc(var(--spacing)*1)}.mt-2{margin-top:calc(var(--spacing)*2)}.mb-0\.5{margin-bottom:calc(var(--spacing)*.5)}.mb-1{margin-bottom:calc(var(--spacing)*1)}.mb-2{margin-bottom:calc(var(--spacing)*2)}.mb-4{margin-bottom:calc(var(--spacing)*4)}.ml-2{margin-left:calc(var(--spacing)*2)}.block{display:block}.flex{display:flex}.hidden{display:none}.inline-flex{display:inline-flex}.table{display:table}.h-1{height:calc(var(--spacing)*1)}.h-1\.5{height:calc(var(--spacing)*1.5)}.h-3{height:calc(var(--spacing)*3)}.h-4{height:calc(var(--spacing)*4)}.h-6{height:calc(var(--spacing)*6)}.h-10{height:calc(var(--spacing)*10)}.h-12{height:calc(var(--spacing)*12)}.min-h-screen{min-height:100vh}.w-1{width:calc(var(--spacing)*1)}.w-1\.5{width:calc(var(--spacing)*1.5)}.w-3{width:calc(var(--spacing)*3)}.w-4{width:calc(var(--spacing)*4)}.w-8{width:calc(var(--spacing)*8)}.w-12{width:calc(var(--spacing)*12)}.w-\[500px\]{width:500px}.w-auto{width:auto}.w-fit{width:fit-content}.w-full{width:100%}.max-w-7xl{max-width:var(--container-7xl)}.max-w-sm{max-width:var(--container-sm)}.min-w-0{min-width:calc(var(--spacing)*0)}.min-w-max{min-width:max-content}.flex-shrink-0{flex-shrink:0}.rotate-90{rotate:90deg}.rotate-180{rotate:180deg}.transform{transform:var(--tw-rotate-x,)var(--tw-rotate-y,)var(--tw-rotate-z,)var(--tw-skew-x,)var(--tw-skew-y,)}.animate-spin{animation:var(--animate-spin)}.cursor-col-resize{cursor:col-resize}.cursor-nw-resize{cursor:nw-resize}.cursor-pointer{cursor:pointer}.cursor-row-resize{cursor:row-resize}.resize{resize:both}.items-center{align-items:center}.justify-between{justify-content:space-between}.justify-center{justify-content:center}.justify-end{justify-content:flex-end}.justify-start{justify-content:flex-start}.gap-1\.5{gap:calc(var(--spacing)*1.5)}.gap-2{gap:calc(var(--spacing)*2)}.gap-3{gap:calc(var(--spacing)*3)}:where(.space-y-1>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*1)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*1)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-3>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*3)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*3)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-x-2>:not(:last-child)){--tw-space-x-reverse:0;margin-inline-start:calc(calc(var(--spacing)*2)*var(--tw-space-x-reverse));margin-inline-end:calc(calc(var(--spacing)*2)*calc(1 - var(--tw-space-x-reverse)))}:where(.divide-y>:not(:last-child)){--tw-divide-y-reverse:0;border-bottom-style:var(--tw-border-style);border-top-style:var(--tw-border-style);border-top-width:calc(1px*var(--tw-divide-y-reverse));border-bottom-width:calc(1px*calc(1 - var(--tw-divide-y-reverse)))}:where(.divide-gray-200>:not(:last-child)){border-color:var(--color-gray-200)}.truncate{text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:3.40282e38px}.border{border-style:var(--tw-border-style);border-width:1px}.border-t{border-top-style:var(--tw-border-style);border-top-width:1px}.border-b{border-bottom-style:var(--tw-border-style);border-bottom-width:1px}.border-blue-200{border-color:var(--color-blue-200)}.border-current{border-color:currentColor}.border-gray-200{border-color:var(--color-gray-200)}.border-gray-300{border-color:var(--color-gray-300)}.border-green-200{border-color:var(--color-green-200)}.border-yellow-200{border-color:var(--color-yellow-200)}.border-t-transparent{border-top-color:#0000}.bg-blue-50{background-color:var(--color-blue-50)}.bg-blue-500{background-color:var(--color-blue-500)}.bg-gray-50{background-color:var(--color-gray-50)}.bg-gray-100{background-color:var(--color-gray-100)}.bg-gray-300{background-color:var(--color-gray-300)}.bg-gray-500{background-color:var(--color-gray-500)}.bg-green-50{background-color:var(--color-green-50)}.bg-green-100{background-color:var(--color-green-100)}.bg-green-500{background-color:var(--color-green-500)}.bg-red-500{background-color:var(--color-red-500)}.bg-white{background-color:var(--color-white)}.bg-yellow-50{background-color:var(--color-yellow-50)}.bg-yellow-100{background-color:var(--color-yellow-100)}.bg-yellow-500{background-color:var(--color-yellow-500)}.p-0{padding:calc(var(--spacing)*0)}.p-1{padding:calc(var(--spacing)*1)}.p-2{padding:calc(var(--spacing)*2)}.p-3{padding:calc(var(--spacing)*3)}.p-4{padding:calc(var(--spacing)*4)}.p-8{padding:calc(var(--spacing)*8)}.px-2{padding-inline:calc(var(--spacing)*2)}.px-3{padding-inline:calc(var(--spacing)*3)}.py-0\.5{padding-block:calc(var(--spacing)*.5)}.py-1{padding-block:calc(var(--spacing)*1)}.py-2{padding-block:calc(var(--spacing)*2)}.py-3{padding-block:calc(var(--spacing)*3)}.py-4{padding-block:calc(var(--spacing)*4)}.pt-1{padding-top:calc(var(--spacing)*1)}.text-center{text-align:center}.text-left{text-align:left}.font-mono{font-family:var(--font-mono)}.text-sm{font-size:var(--text-sm);line-height:var(--tw-leading,var(--text-sm--line-height))}.text-xs{font-size:var(--text-xs);line-height:var(--tw-leading,var(--text-xs--line-height))}.font-medium{--tw-font-weight:var(--font-weight-medium);font-weight:var(--font-weight-medium)}.font-semibold{--tw-font-weight:var(--font-weight-semibold);font-weight:var(--font-weight-semibold)}.break-words{overflow-wrap:break-word}.break-all{word-break:break-all}.whitespace-nowrap{white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.text-blue-700{color:var(--color-blue-700)}.text-blue-900{color:var(--color-blue-900)}.text-gray-400{color:var(--color-gray-400)}.text-gray-500{color:var(--color-gray-500)}.text-gray-600{color:var(--color-gray-600)}.text-gray-700{color:var(--color-gray-700)}.text-gray-800{color:var(--color-gray-800)}.text-gray-900{color:var(--color-gray-900)}.text-green-700{color:var(--color-green-700)}.text-green-800{color:var(--color-green-800)}.text-green-900{color:var(--color-green-900)}.text-red-700{color:var(--color-red-700)}.text-yellow-700{color:var(--color-yellow-700)}.text-yellow-800{color:var(--color-yellow-800)}.text-yellow-900{color:var(--color-yellow-900)}.capitalize{text-transform:capitalize}.lowercase{text-transform:lowercase}.uppercase{text-transform:uppercase}.italic{font-style:italic}.underline{text-decoration-line:underline}.blur{--tw-blur:blur(8px);filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.filter{filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.transition{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to,opacity,box-shadow,transform,translate,scale,rotate,filter,-webkit-backdrop-filter,backdrop-filter,display,visibility,content-visibility,overlay,pointer-events;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-colors{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-transform{transition-property:transform,translate,scale,rotate;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.duration-200{--tw-duration:.2s;transition-duration:.2s}.select-none{-webkit-user-select:none;user-select:none}@media (hover:hover){.hover\:bg-gray-50:hover{background-color:var(--color-gray-50)}.hover\:bg-gray-200:hover{background-color:var(--color-gray-200)}.hover\:bg-gray-400:hover{background-color:var(--color-gray-400)}.hover\:no-underline:hover{text-decoration-line:none}}.focus\:outline-none:focus{--tw-outline-style:none;outline-style:none}@media (min-width:64rem){.lg\:max-w-md{max-width:var(--container-md)}}@media (min-width:80rem){.xl\:max-w-lg{max-width:var(--container-lg)}}}@property --tw-rotate-x{syntax:"*";inherits:false}@property --tw-rotate-y{syntax:"*";inherits:false}@property --tw-rotate-z{syntax:"*";inherits:false}@property --tw-skew-x{syntax:"*";inherits:false}@property --tw-skew-y{syntax:"*";inherits:false}@property --tw-space-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-space-x-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-divide-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-border-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-font-weight{syntax:"*";inherits:false}@property --tw-blur{syntax:"*";inherits:false}@property --tw-brightness{syntax:"*";inherits:false}@property --tw-contrast{syntax:"*";inherits:false}@property --tw-grayscale{syntax:"*";inherits:false}@property --tw-hue-rotate{syntax:"*";inherits:false}@property --tw-invert{syntax:"*";inherits:false}@property --tw-opacity{syntax:"*";inherits:false}@property --tw-saturate{syntax:"*";inherits:false}@property --tw-sepia{syntax:"*";inherits:false}@property --tw-drop-shadow{syntax:"*";inherits:false}@property --tw-drop-shadow-color{syntax:"*";inherits:false}@property --tw-drop-shadow-alpha{syntax:"";inherits:false;initial-value:100%}@property --tw-drop-shadow-size{syntax:"*";inherits:false}@property --tw-duration{syntax:"*";inherits:false}@keyframes spin{to{transform:rotate(360deg)}} diff --git a/vite-app/dist/assets/index-DWfIf2rx.css b/vite-app/dist/assets/index-DWfIf2rx.css new file mode 100644 index 00000000..10fe5c29 --- /dev/null +++ b/vite-app/dist/assets/index-DWfIf2rx.css @@ -0,0 +1 @@ +/*! tailwindcss v4.1.11 | MIT License | https://tailwindcss.com */@layer properties{@supports (((-webkit-hyphens:none)) and (not (margin-trim:inline))) or ((-moz-orient:inline) and (not (color:rgb(from red r g b)))){*,:before,:after,::backdrop{--tw-rotate-x:initial;--tw-rotate-y:initial;--tw-rotate-z:initial;--tw-skew-x:initial;--tw-skew-y:initial;--tw-space-y-reverse:0;--tw-space-x-reverse:0;--tw-divide-y-reverse:0;--tw-border-style:solid;--tw-font-weight:initial;--tw-shadow:0 0 #0000;--tw-shadow-color:initial;--tw-shadow-alpha:100%;--tw-inset-shadow:0 0 #0000;--tw-inset-shadow-color:initial;--tw-inset-shadow-alpha:100%;--tw-ring-color:initial;--tw-ring-shadow:0 0 #0000;--tw-inset-ring-color:initial;--tw-inset-ring-shadow:0 0 #0000;--tw-ring-inset:initial;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-offset-shadow:0 0 #0000;--tw-outline-style:solid;--tw-blur:initial;--tw-brightness:initial;--tw-contrast:initial;--tw-grayscale:initial;--tw-hue-rotate:initial;--tw-invert:initial;--tw-opacity:initial;--tw-saturate:initial;--tw-sepia:initial;--tw-drop-shadow:initial;--tw-drop-shadow-color:initial;--tw-drop-shadow-alpha:100%;--tw-drop-shadow-size:initial;--tw-duration:initial}}}@layer theme{:root,:host{--font-sans:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--font-mono:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--color-red-500:oklch(63.7% .237 25.331);--color-red-700:oklch(50.5% .213 27.518);--color-yellow-50:oklch(98.7% .026 102.212);--color-yellow-100:oklch(97.3% .071 103.193);--color-yellow-200:oklch(94.5% .129 101.54);--color-yellow-500:oklch(79.5% .184 86.047);--color-yellow-700:oklch(55.4% .135 66.442);--color-yellow-800:oklch(47.6% .114 61.907);--color-yellow-900:oklch(42.1% .095 57.708);--color-green-50:oklch(98.2% .018 155.826);--color-green-100:oklch(96.2% .044 156.743);--color-green-200:oklch(92.5% .084 155.995);--color-green-500:oklch(72.3% .219 149.579);--color-green-700:oklch(52.7% .154 150.069);--color-green-800:oklch(44.8% .119 151.328);--color-green-900:oklch(39.3% .095 152.535);--color-blue-50:oklch(97% .014 254.604);--color-blue-200:oklch(88.2% .059 254.128);--color-blue-500:oklch(62.3% .214 259.815);--color-blue-700:oklch(48.8% .243 264.376);--color-blue-900:oklch(37.9% .146 265.522);--color-gray-50:oklch(98.5% .002 247.839);--color-gray-100:oklch(96.7% .003 264.542);--color-gray-200:oklch(92.8% .006 264.531);--color-gray-300:oklch(87.2% .01 258.338);--color-gray-400:oklch(70.7% .022 261.325);--color-gray-500:oklch(55.1% .027 264.364);--color-gray-600:oklch(44.6% .03 256.802);--color-gray-700:oklch(37.3% .034 259.733);--color-gray-800:oklch(27.8% .033 256.848);--color-gray-900:oklch(21% .034 264.665);--color-white:#fff;--spacing:.25rem;--container-sm:24rem;--container-md:28rem;--container-lg:32rem;--container-7xl:80rem;--text-xs:.75rem;--text-xs--line-height:calc(1/.75);--text-sm:.875rem;--text-sm--line-height:calc(1.25/.875);--font-weight-medium:500;--font-weight-semibold:600;--animate-spin:spin 1s linear infinite;--default-transition-duration:.15s;--default-transition-timing-function:cubic-bezier(.4,0,.2,1);--default-font-family:var(--font-sans);--default-mono-font-family:var(--font-mono)}}@layer base{*,:after,:before,::backdrop{box-sizing:border-box;border:0 solid;margin:0;padding:0}::file-selector-button{box-sizing:border-box;border:0 solid;margin:0;padding:0}html,:host{-webkit-text-size-adjust:100%;tab-size:4;line-height:1.5;font-family:var(--default-font-family,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji");font-feature-settings:var(--default-font-feature-settings,normal);font-variation-settings:var(--default-font-variation-settings,normal);-webkit-tap-highlight-color:transparent}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;-webkit-text-decoration:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:var(--default-mono-font-family,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace);font-feature-settings:var(--default-mono-font-feature-settings,normal);font-variation-settings:var(--default-mono-font-variation-settings,normal);font-size:1em}small{font-size:80%}sub,sup{vertical-align:baseline;font-size:75%;line-height:0;position:relative}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}:-moz-focusring{outline:auto}progress{vertical-align:baseline}summary{display:list-item}ol,ul,menu{list-style:none}img,svg,video,canvas,audio,iframe,embed,object{vertical-align:middle;display:block}img,video{max-width:100%;height:auto}button,input,select,optgroup,textarea{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}::file-selector-button{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}:where(select:is([multiple],[size])) optgroup{font-weight:bolder}:where(select:is([multiple],[size])) optgroup option{padding-inline-start:20px}::file-selector-button{margin-inline-end:4px}::placeholder{opacity:1}@supports (not ((-webkit-appearance:-apple-pay-button))) or (contain-intrinsic-size:1px){::placeholder{color:currentColor}@supports (color:color-mix(in lab,red,red)){::placeholder{color:color-mix(in oklab,currentcolor 50%,transparent)}}}textarea{resize:vertical}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-date-and-time-value{min-height:1lh;text-align:inherit}::-webkit-datetime-edit{display:inline-flex}::-webkit-datetime-edit-fields-wrapper{padding:0}::-webkit-datetime-edit{padding-block:0}::-webkit-datetime-edit-year-field{padding-block:0}::-webkit-datetime-edit-month-field{padding-block:0}::-webkit-datetime-edit-day-field{padding-block:0}::-webkit-datetime-edit-hour-field{padding-block:0}::-webkit-datetime-edit-minute-field{padding-block:0}::-webkit-datetime-edit-second-field{padding-block:0}::-webkit-datetime-edit-millisecond-field{padding-block:0}::-webkit-datetime-edit-meridiem-field{padding-block:0}:-moz-ui-invalid{box-shadow:none}button,input:where([type=button],[type=reset],[type=submit]){appearance:button}::file-selector-button{appearance:button}::-webkit-inner-spin-button{height:auto}::-webkit-outer-spin-button{height:auto}[hidden]:where(:not([hidden=until-found])){display:none!important}}@layer components;@layer utilities{.collapse{visibility:collapse}.invisible{visibility:hidden}.visible{visibility:visible}.absolute{position:absolute}.fixed{position:fixed}.relative{position:relative}.static{position:static}.sticky{position:sticky}.top-0{top:calc(var(--spacing)*0)}.right-0{right:calc(var(--spacing)*0)}.left-0{left:calc(var(--spacing)*0)}.\!container{width:100%!important}@media (min-width:40rem){.\!container{max-width:40rem!important}}@media (min-width:48rem){.\!container{max-width:48rem!important}}@media (min-width:64rem){.\!container{max-width:64rem!important}}@media (min-width:80rem){.\!container{max-width:80rem!important}}@media (min-width:96rem){.\!container{max-width:96rem!important}}.container{width:100%}@media (min-width:40rem){.container{max-width:40rem}}@media (min-width:48rem){.container{max-width:48rem}}@media (min-width:64rem){.container{max-width:64rem}}@media (min-width:80rem){.container{max-width:80rem}}@media (min-width:96rem){.container{max-width:96rem}}.mx-auto{margin-inline:auto}.mt-1{margin-top:calc(var(--spacing)*1)}.mt-2{margin-top:calc(var(--spacing)*2)}.mb-0\.5{margin-bottom:calc(var(--spacing)*.5)}.mb-1{margin-bottom:calc(var(--spacing)*1)}.mb-2{margin-bottom:calc(var(--spacing)*2)}.mb-4{margin-bottom:calc(var(--spacing)*4)}.ml-2{margin-left:calc(var(--spacing)*2)}.block{display:block}.flex{display:flex}.hidden{display:none}.inline-flex{display:inline-flex}.table{display:table}.h-1{height:calc(var(--spacing)*1)}.h-1\.5{height:calc(var(--spacing)*1.5)}.h-3{height:calc(var(--spacing)*3)}.h-4{height:calc(var(--spacing)*4)}.h-6{height:calc(var(--spacing)*6)}.h-8{height:calc(var(--spacing)*8)}.h-10{height:calc(var(--spacing)*10)}.h-12{height:calc(var(--spacing)*12)}.min-h-screen{min-height:100vh}.w-1{width:calc(var(--spacing)*1)}.w-1\.5{width:calc(var(--spacing)*1.5)}.w-3{width:calc(var(--spacing)*3)}.w-4{width:calc(var(--spacing)*4)}.w-8{width:calc(var(--spacing)*8)}.w-12{width:calc(var(--spacing)*12)}.w-\[500px\]{width:500px}.w-auto{width:auto}.w-fit{width:fit-content}.w-full{width:100%}.max-w-7xl{max-width:var(--container-7xl)}.max-w-sm{max-width:var(--container-sm)}.min-w-0{min-width:calc(var(--spacing)*0)}.min-w-max{min-width:max-content}.flex-shrink-0{flex-shrink:0}.shrink{flex-shrink:1}.grow{flex-grow:1}.rotate-90{rotate:90deg}.rotate-180{rotate:180deg}.transform{transform:var(--tw-rotate-x,)var(--tw-rotate-y,)var(--tw-rotate-z,)var(--tw-skew-x,)var(--tw-skew-y,)}.animate-spin{animation:var(--animate-spin)}.cursor-col-resize{cursor:col-resize}.cursor-nw-resize{cursor:nw-resize}.cursor-pointer{cursor:pointer}.cursor-row-resize{cursor:row-resize}.resize{resize:both}.items-center{align-items:center}.justify-between{justify-content:space-between}.justify-center{justify-content:center}.justify-end{justify-content:flex-end}.justify-start{justify-content:flex-start}.gap-1{gap:calc(var(--spacing)*1)}.gap-1\.5{gap:calc(var(--spacing)*1.5)}.gap-2{gap:calc(var(--spacing)*2)}.gap-3{gap:calc(var(--spacing)*3)}.gap-4{gap:calc(var(--spacing)*4)}:where(.space-y-1>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*1)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*1)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-3>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*3)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*3)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-x-2>:not(:last-child)){--tw-space-x-reverse:0;margin-inline-start:calc(calc(var(--spacing)*2)*var(--tw-space-x-reverse));margin-inline-end:calc(calc(var(--spacing)*2)*calc(1 - var(--tw-space-x-reverse)))}:where(.divide-y>:not(:last-child)){--tw-divide-y-reverse:0;border-bottom-style:var(--tw-border-style);border-top-style:var(--tw-border-style);border-top-width:calc(1px*var(--tw-divide-y-reverse));border-bottom-width:calc(1px*calc(1 - var(--tw-divide-y-reverse)))}:where(.divide-gray-200>:not(:last-child)){border-color:var(--color-gray-200)}.truncate{text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:3.40282e38px}.border{border-style:var(--tw-border-style);border-width:1px}.border-t{border-top-style:var(--tw-border-style);border-top-width:1px}.border-b{border-bottom-style:var(--tw-border-style);border-bottom-width:1px}.border-b-2{border-bottom-style:var(--tw-border-style);border-bottom-width:2px}.border-blue-200{border-color:var(--color-blue-200)}.border-current{border-color:currentColor}.border-gray-200{border-color:var(--color-gray-200)}.border-gray-300{border-color:var(--color-gray-300)}.border-gray-900{border-color:var(--color-gray-900)}.border-green-200{border-color:var(--color-green-200)}.border-transparent{border-color:#0000}.border-yellow-200{border-color:var(--color-yellow-200)}.border-t-transparent{border-top-color:#0000}.bg-blue-50{background-color:var(--color-blue-50)}.bg-blue-500{background-color:var(--color-blue-500)}.bg-gray-50{background-color:var(--color-gray-50)}.bg-gray-100{background-color:var(--color-gray-100)}.bg-gray-300{background-color:var(--color-gray-300)}.bg-gray-500{background-color:var(--color-gray-500)}.bg-green-50{background-color:var(--color-green-50)}.bg-green-100{background-color:var(--color-green-100)}.bg-green-500{background-color:var(--color-green-500)}.bg-red-500{background-color:var(--color-red-500)}.bg-transparent{background-color:#0000}.bg-white{background-color:var(--color-white)}.bg-yellow-50{background-color:var(--color-yellow-50)}.bg-yellow-100{background-color:var(--color-yellow-100)}.bg-yellow-500{background-color:var(--color-yellow-500)}.p-0{padding:calc(var(--spacing)*0)}.p-1{padding:calc(var(--spacing)*1)}.p-2{padding:calc(var(--spacing)*2)}.p-3{padding:calc(var(--spacing)*3)}.p-4{padding:calc(var(--spacing)*4)}.p-8{padding:calc(var(--spacing)*8)}.px-2{padding-inline:calc(var(--spacing)*2)}.px-3{padding-inline:calc(var(--spacing)*3)}.py-0\.5{padding-block:calc(var(--spacing)*.5)}.py-1{padding-block:calc(var(--spacing)*1)}.py-2{padding-block:calc(var(--spacing)*2)}.py-3{padding-block:calc(var(--spacing)*3)}.py-4{padding-block:calc(var(--spacing)*4)}.pt-1{padding-top:calc(var(--spacing)*1)}.pt-2{padding-top:calc(var(--spacing)*2)}.pb-2{padding-bottom:calc(var(--spacing)*2)}.text-center{text-align:center}.text-left{text-align:left}.text-right{text-align:right}.font-mono{font-family:var(--font-mono)}.text-sm{font-size:var(--text-sm);line-height:var(--tw-leading,var(--text-sm--line-height))}.text-xs{font-size:var(--text-xs);line-height:var(--tw-leading,var(--text-xs--line-height))}.font-medium{--tw-font-weight:var(--font-weight-medium);font-weight:var(--font-weight-medium)}.font-semibold{--tw-font-weight:var(--font-weight-semibold);font-weight:var(--font-weight-semibold)}.break-words{overflow-wrap:break-word}.break-all{word-break:break-all}.whitespace-nowrap{white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.text-blue-700{color:var(--color-blue-700)}.text-blue-900{color:var(--color-blue-900)}.text-gray-400{color:var(--color-gray-400)}.text-gray-500{color:var(--color-gray-500)}.text-gray-600{color:var(--color-gray-600)}.text-gray-700{color:var(--color-gray-700)}.text-gray-800{color:var(--color-gray-800)}.text-gray-900{color:var(--color-gray-900)}.text-green-700{color:var(--color-green-700)}.text-green-800{color:var(--color-green-800)}.text-green-900{color:var(--color-green-900)}.text-red-700{color:var(--color-red-700)}.text-yellow-700{color:var(--color-yellow-700)}.text-yellow-800{color:var(--color-yellow-800)}.text-yellow-900{color:var(--color-yellow-900)}.capitalize{text-transform:capitalize}.lowercase{text-transform:lowercase}.uppercase{text-transform:uppercase}.italic{font-style:italic}.underline{text-decoration-line:underline}.shadow{--tw-shadow:0 1px 3px 0 var(--tw-shadow-color,#0000001a),0 1px 2px -1px var(--tw-shadow-color,#0000001a);box-shadow:var(--tw-inset-shadow),var(--tw-inset-ring-shadow),var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow)}.outline{outline-style:var(--tw-outline-style);outline-width:1px}.blur{--tw-blur:blur(8px);filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.filter{filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.transition{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to,opacity,box-shadow,transform,translate,scale,rotate,filter,-webkit-backdrop-filter,backdrop-filter,display,visibility,content-visibility,overlay,pointer-events;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-colors{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-transform{transition-property:transform,translate,scale,rotate;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.duration-200{--tw-duration:.2s;transition-duration:.2s}.select-none{-webkit-user-select:none;user-select:none}@media (hover:hover){.hover\:border-gray-400:hover{border-color:var(--color-gray-400)}.hover\:bg-gray-50:hover{background-color:var(--color-gray-50)}.hover\:bg-gray-100:hover{background-color:var(--color-gray-100)}.hover\:bg-gray-200:hover{background-color:var(--color-gray-200)}.hover\:bg-gray-400:hover{background-color:var(--color-gray-400)}.hover\:text-gray-900:hover{color:var(--color-gray-900)}.hover\:no-underline:hover{text-decoration-line:none}}.focus\:border-gray-500:focus{border-color:var(--color-gray-500)}.focus\:outline-none:focus{--tw-outline-style:none;outline-style:none}@media (min-width:64rem){.lg\:max-w-md{max-width:var(--container-md)}}@media (min-width:80rem){.xl\:max-w-lg{max-width:var(--container-lg)}}}@property --tw-rotate-x{syntax:"*";inherits:false}@property --tw-rotate-y{syntax:"*";inherits:false}@property --tw-rotate-z{syntax:"*";inherits:false}@property --tw-skew-x{syntax:"*";inherits:false}@property --tw-skew-y{syntax:"*";inherits:false}@property --tw-space-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-space-x-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-divide-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-border-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-font-weight{syntax:"*";inherits:false}@property --tw-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-shadow-color{syntax:"*";inherits:false}@property --tw-shadow-alpha{syntax:"";inherits:false;initial-value:100%}@property --tw-inset-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-inset-shadow-color{syntax:"*";inherits:false}@property --tw-inset-shadow-alpha{syntax:"";inherits:false;initial-value:100%}@property --tw-ring-color{syntax:"*";inherits:false}@property --tw-ring-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-inset-ring-color{syntax:"*";inherits:false}@property --tw-inset-ring-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-ring-inset{syntax:"*";inherits:false}@property --tw-ring-offset-width{syntax:"";inherits:false;initial-value:0}@property --tw-ring-offset-color{syntax:"*";inherits:false;initial-value:#fff}@property --tw-ring-offset-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-outline-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-blur{syntax:"*";inherits:false}@property --tw-brightness{syntax:"*";inherits:false}@property --tw-contrast{syntax:"*";inherits:false}@property --tw-grayscale{syntax:"*";inherits:false}@property --tw-hue-rotate{syntax:"*";inherits:false}@property --tw-invert{syntax:"*";inherits:false}@property --tw-opacity{syntax:"*";inherits:false}@property --tw-saturate{syntax:"*";inherits:false}@property --tw-sepia{syntax:"*";inherits:false}@property --tw-drop-shadow{syntax:"*";inherits:false}@property --tw-drop-shadow-color{syntax:"*";inherits:false}@property --tw-drop-shadow-alpha{syntax:"";inherits:false;initial-value:100%}@property --tw-drop-shadow-size{syntax:"*";inherits:false}@property --tw-duration{syntax:"*";inherits:false}@keyframes spin{to{transform:rotate(360deg)}} diff --git a/vite-app/dist/assets/index-D_nkLTVA.js b/vite-app/dist/assets/index-D_nkLTVA.js new file mode 100644 index 00000000..e813086a --- /dev/null +++ b/vite-app/dist/assets/index-D_nkLTVA.js @@ -0,0 +1,88 @@ +(function(){const l=document.createElement("link").relList;if(l&&l.supports&&l.supports("modulepreload"))return;for(const o of document.querySelectorAll('link[rel="modulepreload"]'))r(o);new MutationObserver(o=>{for(const f of o)if(f.type==="childList")for(const d of f.addedNodes)d.tagName==="LINK"&&d.rel==="modulepreload"&&r(d)}).observe(document,{childList:!0,subtree:!0});function u(o){const f={};return o.integrity&&(f.integrity=o.integrity),o.referrerPolicy&&(f.referrerPolicy=o.referrerPolicy),o.crossOrigin==="use-credentials"?f.credentials="include":o.crossOrigin==="anonymous"?f.credentials="omit":f.credentials="same-origin",f}function r(o){if(o.ep)return;o.ep=!0;const f=u(o);fetch(o.href,f)}})();function ag(n){return n&&n.__esModule&&Object.prototype.hasOwnProperty.call(n,"default")?n.default:n}var Xc={exports:{}},Wi={};/** + * @license React + * react-jsx-runtime.production.js + * + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */var km;function cb(){if(km)return Wi;km=1;var n=Symbol.for("react.transitional.element"),l=Symbol.for("react.fragment");function u(r,o,f){var d=null;if(f!==void 0&&(d=""+f),o.key!==void 0&&(d=""+o.key),"key"in o){f={};for(var v in o)v!=="key"&&(f[v]=o[v])}else f=o;return o=f.ref,{$$typeof:n,type:r,key:d,ref:o!==void 0?o:null,props:f}}return Wi.Fragment=l,Wi.jsx=u,Wi.jsxs=u,Wi}var qm;function fb(){return qm||(qm=1,Xc.exports=cb()),Xc.exports}var x=fb(),Kc={exports:{}},oe={};/** + * @license React + * react.production.js + * + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */var Vm;function db(){if(Vm)return oe;Vm=1;var n=Symbol.for("react.transitional.element"),l=Symbol.for("react.portal"),u=Symbol.for("react.fragment"),r=Symbol.for("react.strict_mode"),o=Symbol.for("react.profiler"),f=Symbol.for("react.consumer"),d=Symbol.for("react.context"),v=Symbol.for("react.forward_ref"),m=Symbol.for("react.suspense"),p=Symbol.for("react.memo"),_=Symbol.for("react.lazy"),O=Symbol.iterator;function w(b){return b===null||typeof b!="object"?null:(b=O&&b[O]||b["@@iterator"],typeof b=="function"?b:null)}var Z={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},k=Object.assign,B={};function U(b,H,K){this.props=b,this.context=H,this.refs=B,this.updater=K||Z}U.prototype.isReactComponent={},U.prototype.setState=function(b,H){if(typeof b!="object"&&typeof b!="function"&&b!=null)throw Error("takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,b,H,"setState")},U.prototype.forceUpdate=function(b){this.updater.enqueueForceUpdate(this,b,"forceUpdate")};function $(){}$.prototype=U.prototype;function X(b,H,K){this.props=b,this.context=H,this.refs=B,this.updater=K||Z}var q=X.prototype=new $;q.constructor=X,k(q,U.prototype),q.isPureReactComponent=!0;var W=Array.isArray,Q={H:null,A:null,T:null,S:null,V:null},he=Object.prototype.hasOwnProperty;function xe(b,H,K,Y,ee,pe){return K=pe.ref,{$$typeof:n,type:b,key:H,ref:K!==void 0?K:null,props:pe}}function Xe(b,H){return xe(b.type,H,void 0,void 0,void 0,b.props)}function ae(b){return typeof b=="object"&&b!==null&&b.$$typeof===n}function qe(b){var H={"=":"=0",":":"=2"};return"$"+b.replace(/[=:]/g,function(K){return H[K]})}var Fe=/\/+/g;function Ke(b,H){return typeof b=="object"&&b!==null&&b.key!=null?qe(""+b.key):H.toString(36)}function Pt(){}function Kn(b){switch(b.status){case"fulfilled":return b.value;case"rejected":throw b.reason;default:switch(typeof b.status=="string"?b.then(Pt,Pt):(b.status="pending",b.then(function(H){b.status==="pending"&&(b.status="fulfilled",b.value=H)},function(H){b.status==="pending"&&(b.status="rejected",b.reason=H)})),b.status){case"fulfilled":return b.value;case"rejected":throw b.reason}}throw b}function lt(b,H,K,Y,ee){var pe=typeof b;(pe==="undefined"||pe==="boolean")&&(b=null);var re=!1;if(b===null)re=!0;else switch(pe){case"bigint":case"string":case"number":re=!0;break;case"object":switch(b.$$typeof){case n:case l:re=!0;break;case _:return re=b._init,lt(re(b._payload),H,K,Y,ee)}}if(re)return ee=ee(b),re=Y===""?"."+Ke(b,0):Y,W(ee)?(K="",re!=null&&(K=re.replace(Fe,"$&/")+"/"),lt(ee,H,K,"",function(Qn){return Qn})):ee!=null&&(ae(ee)&&(ee=Xe(ee,K+(ee.key==null||b&&b.key===ee.key?"":(""+ee.key).replace(Fe,"$&/")+"/")+re)),H.push(ee)),1;re=0;var St=Y===""?".":Y+":";if(W(b))for(var je=0;je>>1,b=N[Te];if(0>>1;Teo(Y,le))eeo(pe,Y)?(N[Te]=pe,N[ee]=le,Te=ee):(N[Te]=Y,N[K]=le,Te=K);else if(eeo(pe,le))N[Te]=pe,N[ee]=le,Te=ee;else break e}}return G}function o(N,G){var le=N.sortIndex-G.sortIndex;return le!==0?le:N.id-G.id}if(n.unstable_now=void 0,typeof performance=="object"&&typeof performance.now=="function"){var f=performance;n.unstable_now=function(){return f.now()}}else{var d=Date,v=d.now();n.unstable_now=function(){return d.now()-v}}var m=[],p=[],_=1,O=null,w=3,Z=!1,k=!1,B=!1,U=!1,$=typeof setTimeout=="function"?setTimeout:null,X=typeof clearTimeout=="function"?clearTimeout:null,q=typeof setImmediate<"u"?setImmediate:null;function W(N){for(var G=u(p);G!==null;){if(G.callback===null)r(p);else if(G.startTime<=N)r(p),G.sortIndex=G.expirationTime,l(m,G);else break;G=u(p)}}function Q(N){if(B=!1,W(N),!k)if(u(m)!==null)k=!0,he||(he=!0,Ke());else{var G=u(p);G!==null&<(Q,G.startTime-N)}}var he=!1,xe=-1,Xe=5,ae=-1;function qe(){return U?!0:!(n.unstable_now()-aeN&&qe());){var Te=O.callback;if(typeof Te=="function"){O.callback=null,w=O.priorityLevel;var b=Te(O.expirationTime<=N);if(N=n.unstable_now(),typeof b=="function"){O.callback=b,W(N),G=!0;break t}O===u(m)&&r(m),W(N)}else r(m);O=u(m)}if(O!==null)G=!0;else{var H=u(p);H!==null&<(Q,H.startTime-N),G=!1}}break e}finally{O=null,w=le,Z=!1}G=void 0}}finally{G?Ke():he=!1}}}var Ke;if(typeof q=="function")Ke=function(){q(Fe)};else if(typeof MessageChannel<"u"){var Pt=new MessageChannel,Kn=Pt.port2;Pt.port1.onmessage=Fe,Ke=function(){Kn.postMessage(null)}}else Ke=function(){$(Fe,0)};function lt(N,G){xe=$(function(){N(n.unstable_now())},G)}n.unstable_IdlePriority=5,n.unstable_ImmediatePriority=1,n.unstable_LowPriority=4,n.unstable_NormalPriority=3,n.unstable_Profiling=null,n.unstable_UserBlockingPriority=2,n.unstable_cancelCallback=function(N){N.callback=null},n.unstable_forceFrameRate=function(N){0>N||125Te?(N.sortIndex=le,l(p,N),u(m)===null&&N===u(p)&&(B?(X(xe),xe=-1):B=!0,lt(Q,le-Te))):(N.sortIndex=b,l(m,N),k||Z||(k=!0,he||(he=!0,Ke()))),N},n.unstable_shouldYield=qe,n.unstable_wrapCallback=function(N){var G=w;return function(){var le=w;w=G;try{return N.apply(this,arguments)}finally{w=le}}}}(Pc)),Pc}var Xm;function vb(){return Xm||(Xm=1,Jc.exports=hb()),Jc.exports}var Wc={exports:{}},dt={};/** + * @license React + * react-dom.production.js + * + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */var Km;function mb(){if(Km)return dt;Km=1;var n=fo();function l(m){var p="https://react.dev/errors/"+m;if(1"u"||typeof __REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE!="function"))try{__REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE(n)}catch(l){console.error(l)}}return n(),Wc.exports=mb(),Wc.exports}/** + * @license React + * react-dom-client.production.js + * + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */var Jm;function pb(){if(Jm)return Fi;Jm=1;var n=vb(),l=fo(),u=lg();function r(e){var t="https://react.dev/errors/"+e;if(1b||(e.current=Te[b],Te[b]=null,b--)}function Y(e,t){b++,Te[b]=e.current,e.current=t}var ee=H(null),pe=H(null),re=H(null),St=H(null);function je(e,t){switch(Y(re,t),Y(pe,e),Y(ee,null),t.nodeType){case 9:case 11:e=(e=t.documentElement)&&(e=e.namespaceURI)?mm(e):0;break;default:if(e=t.tagName,t=t.namespaceURI)t=mm(t),e=pm(t,e);else switch(e){case"svg":e=1;break;case"math":e=2;break;default:e=0}}K(ee),Y(ee,e)}function Qn(){K(ee),K(pe),K(re)}function No(e){e.memoizedState!==null&&Y(St,e);var t=ee.current,a=pm(t,e.type);t!==a&&(Y(pe,e),Y(ee,a))}function bu(e){pe.current===e&&(K(ee),K(pe)),St.current===e&&(K(St),Xi._currentValue=le)}var jo=Object.prototype.hasOwnProperty,Mo=n.unstable_scheduleCallback,Co=n.unstable_cancelCallback,q_=n.unstable_shouldYield,V_=n.unstable_requestPaint,on=n.unstable_now,Y_=n.unstable_getCurrentPriorityLevel,Jf=n.unstable_ImmediatePriority,Pf=n.unstable_UserBlockingPriority,Su=n.unstable_NormalPriority,G_=n.unstable_LowPriority,Wf=n.unstable_IdlePriority,X_=n.log,K_=n.unstable_setDisableYieldValue,ei=null,xt=null;function Jn(e){if(typeof X_=="function"&&K_(e),xt&&typeof xt.setStrictMode=="function")try{xt.setStrictMode(ei,e)}catch{}}var Et=Math.clz32?Math.clz32:P_,Q_=Math.log,J_=Math.LN2;function P_(e){return e>>>=0,e===0?32:31-(Q_(e)/J_|0)|0}var xu=256,Eu=4194304;function Oa(e){var t=e&42;if(t!==0)return t;switch(e&-e){case 1:return 1;case 2:return 2;case 4:return 4;case 8:return 8;case 16:return 16;case 32:return 32;case 64:return 64;case 128:return 128;case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return e&4194048;case 4194304:case 8388608:case 16777216:case 33554432:return e&62914560;case 67108864:return 67108864;case 134217728:return 134217728;case 268435456:return 268435456;case 536870912:return 536870912;case 1073741824:return 0;default:return e}}function Ou(e,t,a){var i=e.pendingLanes;if(i===0)return 0;var s=0,c=e.suspendedLanes,h=e.pingedLanes;e=e.warmLanes;var g=i&134217727;return g!==0?(i=g&~c,i!==0?s=Oa(i):(h&=g,h!==0?s=Oa(h):a||(a=g&~e,a!==0&&(s=Oa(a))))):(g=i&~c,g!==0?s=Oa(g):h!==0?s=Oa(h):a||(a=i&~e,a!==0&&(s=Oa(a)))),s===0?0:t!==0&&t!==s&&(t&c)===0&&(c=s&-s,a=t&-t,c>=a||c===32&&(a&4194048)!==0)?t:s}function ti(e,t){return(e.pendingLanes&~(e.suspendedLanes&~e.pingedLanes)&t)===0}function W_(e,t){switch(e){case 1:case 2:case 4:case 8:case 64:return t+250;case 16:case 32:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return t+5e3;case 4194304:case 8388608:case 16777216:case 33554432:return-1;case 67108864:case 134217728:case 268435456:case 536870912:case 1073741824:return-1;default:return-1}}function Ff(){var e=xu;return xu<<=1,(xu&4194048)===0&&(xu=256),e}function If(){var e=Eu;return Eu<<=1,(Eu&62914560)===0&&(Eu=4194304),e}function Uo(e){for(var t=[],a=0;31>a;a++)t.push(e);return t}function ni(e,t){e.pendingLanes|=t,t!==268435456&&(e.suspendedLanes=0,e.pingedLanes=0,e.warmLanes=0)}function F_(e,t,a,i,s,c){var h=e.pendingLanes;e.pendingLanes=a,e.suspendedLanes=0,e.pingedLanes=0,e.warmLanes=0,e.expiredLanes&=a,e.entangledLanes&=a,e.errorRecoveryDisabledLanes&=a,e.shellSuspendCounter=0;var g=e.entanglements,y=e.expirationTimes,T=e.hiddenUpdates;for(a=h&~a;0)":-1s||y[i]!==T[s]){var j=` +`+y[i].replace(" at new "," at ");return e.displayName&&j.includes("")&&(j=j.replace("",e.displayName)),j}while(1<=i&&0<=s);break}}}finally{ko=!1,Error.prepareStackTrace=a}return(a=e?e.displayName||e.name:"")?il(a):""}function ly(e){switch(e.tag){case 26:case 27:case 5:return il(e.type);case 16:return il("Lazy");case 13:return il("Suspense");case 19:return il("SuspenseList");case 0:case 15:return qo(e.type,!1);case 11:return qo(e.type.render,!1);case 1:return qo(e.type,!0);case 31:return il("Activity");default:return""}}function sd(e){try{var t="";do t+=ly(e),e=e.return;while(e);return t}catch(a){return` +Error generating stack: `+a.message+` +`+a.stack}}function Bt(e){switch(typeof e){case"bigint":case"boolean":case"number":case"string":case"undefined":return e;case"object":return e;default:return""}}function cd(e){var t=e.type;return(e=e.nodeName)&&e.toLowerCase()==="input"&&(t==="checkbox"||t==="radio")}function iy(e){var t=cd(e)?"checked":"value",a=Object.getOwnPropertyDescriptor(e.constructor.prototype,t),i=""+e[t];if(!e.hasOwnProperty(t)&&typeof a<"u"&&typeof a.get=="function"&&typeof a.set=="function"){var s=a.get,c=a.set;return Object.defineProperty(e,t,{configurable:!0,get:function(){return s.call(this)},set:function(h){i=""+h,c.call(this,h)}}),Object.defineProperty(e,t,{enumerable:a.enumerable}),{getValue:function(){return i},setValue:function(h){i=""+h},stopTracking:function(){e._valueTracker=null,delete e[t]}}}}function wu(e){e._valueTracker||(e._valueTracker=iy(e))}function fd(e){if(!e)return!1;var t=e._valueTracker;if(!t)return!0;var a=t.getValue(),i="";return e&&(i=cd(e)?e.checked?"true":"false":e.value),e=i,e!==a?(t.setValue(e),!0):!1}function zu(e){if(e=e||(typeof document<"u"?document:void 0),typeof e>"u")return null;try{return e.activeElement||e.body}catch{return e.body}}var uy=/[\n"\\]/g;function Lt(e){return e.replace(uy,function(t){return"\\"+t.charCodeAt(0).toString(16)+" "})}function Vo(e,t,a,i,s,c,h,g){e.name="",h!=null&&typeof h!="function"&&typeof h!="symbol"&&typeof h!="boolean"?e.type=h:e.removeAttribute("type"),t!=null?h==="number"?(t===0&&e.value===""||e.value!=t)&&(e.value=""+Bt(t)):e.value!==""+Bt(t)&&(e.value=""+Bt(t)):h!=="submit"&&h!=="reset"||e.removeAttribute("value"),t!=null?Yo(e,h,Bt(t)):a!=null?Yo(e,h,Bt(a)):i!=null&&e.removeAttribute("value"),s==null&&c!=null&&(e.defaultChecked=!!c),s!=null&&(e.checked=s&&typeof s!="function"&&typeof s!="symbol"),g!=null&&typeof g!="function"&&typeof g!="symbol"&&typeof g!="boolean"?e.name=""+Bt(g):e.removeAttribute("name")}function dd(e,t,a,i,s,c,h,g){if(c!=null&&typeof c!="function"&&typeof c!="symbol"&&typeof c!="boolean"&&(e.type=c),t!=null||a!=null){if(!(c!=="submit"&&c!=="reset"||t!=null))return;a=a!=null?""+Bt(a):"",t=t!=null?""+Bt(t):a,g||t===e.value||(e.value=t),e.defaultValue=t}i=i??s,i=typeof i!="function"&&typeof i!="symbol"&&!!i,e.checked=g?e.checked:!!i,e.defaultChecked=!!i,h!=null&&typeof h!="function"&&typeof h!="symbol"&&typeof h!="boolean"&&(e.name=h)}function Yo(e,t,a){t==="number"&&zu(e.ownerDocument)===e||e.defaultValue===""+a||(e.defaultValue=""+a)}function ul(e,t,a,i){if(e=e.options,t){t={};for(var s=0;s"u"||typeof window.document>"u"||typeof window.document.createElement>"u"),Jo=!1;if(On)try{var ui={};Object.defineProperty(ui,"passive",{get:function(){Jo=!0}}),window.addEventListener("test",ui,ui),window.removeEventListener("test",ui,ui)}catch{Jo=!1}var Wn=null,Po=null,Du=null;function yd(){if(Du)return Du;var e,t=Po,a=t.length,i,s="value"in Wn?Wn.value:Wn.textContent,c=s.length;for(e=0;e=si),Ad=" ",Td=!1;function wd(e,t){switch(e){case"keyup":return Cy.indexOf(t.keyCode)!==-1;case"keydown":return t.keyCode!==229;case"keypress":case"mousedown":case"focusout":return!0;default:return!1}}function zd(e){return e=e.detail,typeof e=="object"&&"data"in e?e.data:null}var cl=!1;function Zy(e,t){switch(e){case"compositionend":return zd(t);case"keypress":return t.which!==32?null:(Td=!0,Ad);case"textInput":return e=t.data,e===Ad&&Td?null:e;default:return null}}function By(e,t){if(cl)return e==="compositionend"||!ts&&wd(e,t)?(e=yd(),Du=Po=Wn=null,cl=!1,e):null;switch(e){case"paste":return null;case"keypress":if(!(t.ctrlKey||t.altKey||t.metaKey)||t.ctrlKey&&t.altKey){if(t.char&&1=t)return{node:a,offset:t-e};e=i}e:{for(;a;){if(a.nextSibling){a=a.nextSibling;break e}a=a.parentNode}a=void 0}a=Zd(a)}}function Ld(e,t){return e&&t?e===t?!0:e&&e.nodeType===3?!1:t&&t.nodeType===3?Ld(e,t.parentNode):"contains"in e?e.contains(t):e.compareDocumentPosition?!!(e.compareDocumentPosition(t)&16):!1:!1}function $d(e){e=e!=null&&e.ownerDocument!=null&&e.ownerDocument.defaultView!=null?e.ownerDocument.defaultView:window;for(var t=zu(e.document);t instanceof e.HTMLIFrameElement;){try{var a=typeof t.contentWindow.location.href=="string"}catch{a=!1}if(a)e=t.contentWindow;else break;t=zu(e.document)}return t}function ls(e){var t=e&&e.nodeName&&e.nodeName.toLowerCase();return t&&(t==="input"&&(e.type==="text"||e.type==="search"||e.type==="tel"||e.type==="url"||e.type==="password")||t==="textarea"||e.contentEditable==="true")}var Gy=On&&"documentMode"in document&&11>=document.documentMode,fl=null,is=null,hi=null,us=!1;function Hd(e,t,a){var i=a.window===a?a.document:a.nodeType===9?a:a.ownerDocument;us||fl==null||fl!==zu(i)||(i=fl,"selectionStart"in i&&ls(i)?i={start:i.selectionStart,end:i.selectionEnd}:(i=(i.ownerDocument&&i.ownerDocument.defaultView||window).getSelection(),i={anchorNode:i.anchorNode,anchorOffset:i.anchorOffset,focusNode:i.focusNode,focusOffset:i.focusOffset}),hi&&di(hi,i)||(hi=i,i=br(is,"onSelect"),0>=h,s-=h,Tn=1<<32-Et(t)+s|a<c?c:8;var h=N.T,g={};N.T=g,Gs(e,!1,t,a);try{var y=s(),T=N.S;if(T!==null&&T(g,y),y!==null&&typeof y=="object"&&typeof y.then=="function"){var j=e0(y,i);zi(e,t,j,Rt(e))}else zi(e,t,i,Rt(e))}catch(L){zi(e,t,{then:function(){},status:"rejected",reason:L},Rt())}finally{G.p=c,N.T=h}}function i0(){}function Vs(e,t,a,i){if(e.tag!==5)throw Error(r(476));var s=kh(e).queue;Hh(e,s,t,le,a===null?i0:function(){return qh(e),a(i)})}function kh(e){var t=e.memoizedState;if(t!==null)return t;t={memoizedState:le,baseState:le,baseQueue:null,queue:{pending:null,lanes:0,dispatch:null,lastRenderedReducer:Dn,lastRenderedState:le},next:null};var a={};return t.next={memoizedState:a,baseState:a,baseQueue:null,queue:{pending:null,lanes:0,dispatch:null,lastRenderedReducer:Dn,lastRenderedState:a},next:null},e.memoizedState=t,e=e.alternate,e!==null&&(e.memoizedState=t),t}function qh(e){var t=kh(e).next.queue;zi(e,t,{},Rt())}function Ys(){return ft(Xi)}function Vh(){return Je().memoizedState}function Yh(){return Je().memoizedState}function u0(e){for(var t=e.return;t!==null;){switch(t.tag){case 24:case 3:var a=Rt();e=ea(a);var i=ta(t,e,a);i!==null&&(Dt(i,t,a),xi(i,t,a)),t={cache:bs()},e.payload=t;return}t=t.return}}function r0(e,t,a){var i=Rt();a={lane:i,revertLane:0,action:a,hasEagerState:!1,eagerState:null,next:null},er(e)?Xh(t,a):(a=cs(e,t,a,i),a!==null&&(Dt(a,e,i),Kh(a,t,i)))}function Gh(e,t,a){var i=Rt();zi(e,t,a,i)}function zi(e,t,a,i){var s={lane:i,revertLane:0,action:a,hasEagerState:!1,eagerState:null,next:null};if(er(e))Xh(t,s);else{var c=e.alternate;if(e.lanes===0&&(c===null||c.lanes===0)&&(c=t.lastRenderedReducer,c!==null))try{var h=t.lastRenderedState,g=c(h,a);if(s.hasEagerState=!0,s.eagerState=g,Ot(g,h))return Bu(e,t,s,0),Re===null&&Zu(),!1}catch{}finally{}if(a=cs(e,t,s,i),a!==null)return Dt(a,e,i),Kh(a,t,i),!0}return!1}function Gs(e,t,a,i){if(i={lane:2,revertLane:Ec(),action:i,hasEagerState:!1,eagerState:null,next:null},er(e)){if(t)throw Error(r(479))}else t=cs(e,a,i,2),t!==null&&Dt(t,e,2)}function er(e){var t=e.alternate;return e===se||t!==null&&t===se}function Xh(e,t){Sl=Qu=!0;var a=e.pending;a===null?t.next=t:(t.next=a.next,a.next=t),e.pending=t}function Kh(e,t,a){if((a&4194048)!==0){var i=t.lanes;i&=e.pendingLanes,a|=i,t.lanes=a,td(e,a)}}var tr={readContext:ft,use:Pu,useCallback:Ve,useContext:Ve,useEffect:Ve,useImperativeHandle:Ve,useLayoutEffect:Ve,useInsertionEffect:Ve,useMemo:Ve,useReducer:Ve,useRef:Ve,useState:Ve,useDebugValue:Ve,useDeferredValue:Ve,useTransition:Ve,useSyncExternalStore:Ve,useId:Ve,useHostTransitionStatus:Ve,useFormState:Ve,useActionState:Ve,useOptimistic:Ve,useMemoCache:Ve,useCacheRefresh:Ve},Qh={readContext:ft,use:Pu,useCallback:function(e,t){return pt().memoizedState=[e,t===void 0?null:t],e},useContext:ft,useEffect:Nh,useImperativeHandle:function(e,t,a){a=a!=null?a.concat([e]):null,Iu(4194308,4,Uh.bind(null,t,e),a)},useLayoutEffect:function(e,t){return Iu(4194308,4,e,t)},useInsertionEffect:function(e,t){Iu(4,2,e,t)},useMemo:function(e,t){var a=pt();t=t===void 0?null:t;var i=e();if(Ba){Jn(!0);try{e()}finally{Jn(!1)}}return a.memoizedState=[i,t],i},useReducer:function(e,t,a){var i=pt();if(a!==void 0){var s=a(t);if(Ba){Jn(!0);try{a(t)}finally{Jn(!1)}}}else s=t;return i.memoizedState=i.baseState=s,e={pending:null,lanes:0,dispatch:null,lastRenderedReducer:e,lastRenderedState:s},i.queue=e,e=e.dispatch=r0.bind(null,se,e),[i.memoizedState,e]},useRef:function(e){var t=pt();return e={current:e},t.memoizedState=e},useState:function(e){e=$s(e);var t=e.queue,a=Gh.bind(null,se,t);return t.dispatch=a,[e.memoizedState,a]},useDebugValue:ks,useDeferredValue:function(e,t){var a=pt();return qs(a,e,t)},useTransition:function(){var e=$s(!1);return e=Hh.bind(null,se,e.queue,!0,!1),pt().memoizedState=e,[!1,e]},useSyncExternalStore:function(e,t,a){var i=se,s=pt();if(ye){if(a===void 0)throw Error(r(407));a=a()}else{if(a=t(),Re===null)throw Error(r(349));(ve&124)!==0||mh(i,t,a)}s.memoizedState=a;var c={value:a,getSnapshot:t};return s.queue=c,Nh(gh.bind(null,i,c,e),[e]),i.flags|=2048,El(9,Fu(),ph.bind(null,i,c,a,t),null),a},useId:function(){var e=pt(),t=Re.identifierPrefix;if(ye){var a=wn,i=Tn;a=(i&~(1<<32-Et(i)-1)).toString(32)+a,t="«"+t+"R"+a,a=Ju++,0ne?(nt=I,I=null):nt=I.sibling;var ge=z(E,I,A[ne],C);if(ge===null){I===null&&(I=nt);break}e&&I&&ge.alternate===null&&t(E,I),S=c(ge,S,ne),ce===null?J=ge:ce.sibling=ge,ce=ge,I=nt}if(ne===A.length)return a(E,I),ye&&Na(E,ne),J;if(I===null){for(;nene?(nt=I,I=null):nt=I.sibling;var _a=z(E,I,ge.value,C);if(_a===null){I===null&&(I=nt);break}e&&I&&_a.alternate===null&&t(E,I),S=c(_a,S,ne),ce===null?J=_a:ce.sibling=_a,ce=_a,I=nt}if(ge.done)return a(E,I),ye&&Na(E,ne),J;if(I===null){for(;!ge.done;ne++,ge=A.next())ge=L(E,ge.value,C),ge!==null&&(S=c(ge,S,ne),ce===null?J=ge:ce.sibling=ge,ce=ge);return ye&&Na(E,ne),J}for(I=i(I);!ge.done;ne++,ge=A.next())ge=D(I,E,ne,ge.value,C),ge!==null&&(e&&ge.alternate!==null&&I.delete(ge.key===null?ne:ge.key),S=c(ge,S,ne),ce===null?J=ge:ce.sibling=ge,ce=ge);return e&&I.forEach(function(sb){return t(E,sb)}),ye&&Na(E,ne),J}function Ae(E,S,A,C){if(typeof A=="object"&&A!==null&&A.type===k&&A.key===null&&(A=A.props.children),typeof A=="object"&&A!==null){switch(A.$$typeof){case w:e:{for(var J=A.key;S!==null;){if(S.key===J){if(J=A.type,J===k){if(S.tag===7){a(E,S.sibling),C=s(S,A.props.children),C.return=E,E=C;break e}}else if(S.elementType===J||typeof J=="object"&&J!==null&&J.$$typeof===Xe&&Ph(J)===S.type){a(E,S.sibling),C=s(S,A.props),Di(C,A),C.return=E,E=C;break e}a(E,S);break}else t(E,S);S=S.sibling}A.type===k?(C=Ra(A.props.children,E.mode,C,A.key),C.return=E,E=C):(C=$u(A.type,A.key,A.props,null,E.mode,C),Di(C,A),C.return=E,E=C)}return h(E);case Z:e:{for(J=A.key;S!==null;){if(S.key===J)if(S.tag===4&&S.stateNode.containerInfo===A.containerInfo&&S.stateNode.implementation===A.implementation){a(E,S.sibling),C=s(S,A.children||[]),C.return=E,E=C;break e}else{a(E,S);break}else t(E,S);S=S.sibling}C=hs(A,E.mode,C),C.return=E,E=C}return h(E);case Xe:return J=A._init,A=J(A._payload),Ae(E,S,A,C)}if(lt(A))return ie(E,S,A,C);if(Ke(A)){if(J=Ke(A),typeof J!="function")throw Error(r(150));return A=J.call(A),te(E,S,A,C)}if(typeof A.then=="function")return Ae(E,S,nr(A),C);if(A.$$typeof===q)return Ae(E,S,Vu(E,A),C);ar(E,A)}return typeof A=="string"&&A!==""||typeof A=="number"||typeof A=="bigint"?(A=""+A,S!==null&&S.tag===6?(a(E,S.sibling),C=s(S,A),C.return=E,E=C):(a(E,S),C=ds(A,E.mode,C),C.return=E,E=C),h(E)):a(E,S)}return function(E,S,A,C){try{Ri=0;var J=Ae(E,S,A,C);return Ol=null,J}catch(I){if(I===bi||I===Gu)throw I;var ce=At(29,I,null,E.mode);return ce.lanes=C,ce.return=E,ce}finally{}}}var Al=Wh(!0),Fh=Wh(!1),Vt=H(null),cn=null;function aa(e){var t=e.alternate;Y(We,We.current&1),Y(Vt,e),cn===null&&(t===null||bl.current!==null||t.memoizedState!==null)&&(cn=e)}function Ih(e){if(e.tag===22){if(Y(We,We.current),Y(Vt,e),cn===null){var t=e.alternate;t!==null&&t.memoizedState!==null&&(cn=e)}}else la()}function la(){Y(We,We.current),Y(Vt,Vt.current)}function Nn(e){K(Vt),cn===e&&(cn=null),K(We)}var We=H(0);function lr(e){for(var t=e;t!==null;){if(t.tag===13){var a=t.memoizedState;if(a!==null&&(a=a.dehydrated,a===null||a.data==="$?"||Uc(a)))return t}else if(t.tag===19&&t.memoizedProps.revealOrder!==void 0){if((t.flags&128)!==0)return t}else if(t.child!==null){t.child.return=t,t=t.child;continue}if(t===e)break;for(;t.sibling===null;){if(t.return===null||t.return===e)return null;t=t.return}t.sibling.return=t.return,t=t.sibling}return null}function Xs(e,t,a,i){t=e.memoizedState,a=a(i,t),a=a==null?t:_({},t,a),e.memoizedState=a,e.lanes===0&&(e.updateQueue.baseState=a)}var Ks={enqueueSetState:function(e,t,a){e=e._reactInternals;var i=Rt(),s=ea(i);s.payload=t,a!=null&&(s.callback=a),t=ta(e,s,i),t!==null&&(Dt(t,e,i),xi(t,e,i))},enqueueReplaceState:function(e,t,a){e=e._reactInternals;var i=Rt(),s=ea(i);s.tag=1,s.payload=t,a!=null&&(s.callback=a),t=ta(e,s,i),t!==null&&(Dt(t,e,i),xi(t,e,i))},enqueueForceUpdate:function(e,t){e=e._reactInternals;var a=Rt(),i=ea(a);i.tag=2,t!=null&&(i.callback=t),t=ta(e,i,a),t!==null&&(Dt(t,e,a),xi(t,e,a))}};function ev(e,t,a,i,s,c,h){return e=e.stateNode,typeof e.shouldComponentUpdate=="function"?e.shouldComponentUpdate(i,c,h):t.prototype&&t.prototype.isPureReactComponent?!di(a,i)||!di(s,c):!0}function tv(e,t,a,i){e=t.state,typeof t.componentWillReceiveProps=="function"&&t.componentWillReceiveProps(a,i),typeof t.UNSAFE_componentWillReceiveProps=="function"&&t.UNSAFE_componentWillReceiveProps(a,i),t.state!==e&&Ks.enqueueReplaceState(t,t.state,null)}function La(e,t){var a=t;if("ref"in t){a={};for(var i in t)i!=="ref"&&(a[i]=t[i])}if(e=e.defaultProps){a===t&&(a=_({},a));for(var s in e)a[s]===void 0&&(a[s]=e[s])}return a}var ir=typeof reportError=="function"?reportError:function(e){if(typeof window=="object"&&typeof window.ErrorEvent=="function"){var t=new window.ErrorEvent("error",{bubbles:!0,cancelable:!0,message:typeof e=="object"&&e!==null&&typeof e.message=="string"?String(e.message):String(e),error:e});if(!window.dispatchEvent(t))return}else if(typeof process=="object"&&typeof process.emit=="function"){process.emit("uncaughtException",e);return}console.error(e)};function nv(e){ir(e)}function av(e){console.error(e)}function lv(e){ir(e)}function ur(e,t){try{var a=e.onUncaughtError;a(t.value,{componentStack:t.stack})}catch(i){setTimeout(function(){throw i})}}function iv(e,t,a){try{var i=e.onCaughtError;i(a.value,{componentStack:a.stack,errorBoundary:t.tag===1?t.stateNode:null})}catch(s){setTimeout(function(){throw s})}}function Qs(e,t,a){return a=ea(a),a.tag=3,a.payload={element:null},a.callback=function(){ur(e,t)},a}function uv(e){return e=ea(e),e.tag=3,e}function rv(e,t,a,i){var s=a.type.getDerivedStateFromError;if(typeof s=="function"){var c=i.value;e.payload=function(){return s(c)},e.callback=function(){iv(t,a,i)}}var h=a.stateNode;h!==null&&typeof h.componentDidCatch=="function"&&(e.callback=function(){iv(t,a,i),typeof s!="function"&&(ca===null?ca=new Set([this]):ca.add(this));var g=i.stack;this.componentDidCatch(i.value,{componentStack:g!==null?g:""})})}function s0(e,t,a,i,s){if(a.flags|=32768,i!==null&&typeof i=="object"&&typeof i.then=="function"){if(t=a.alternate,t!==null&&gi(t,a,s,!0),a=Vt.current,a!==null){switch(a.tag){case 13:return cn===null?_c():a.alternate===null&&ke===0&&(ke=3),a.flags&=-257,a.flags|=65536,a.lanes=s,i===Es?a.flags|=16384:(t=a.updateQueue,t===null?a.updateQueue=new Set([i]):t.add(i),bc(e,i,s)),!1;case 22:return a.flags|=65536,i===Es?a.flags|=16384:(t=a.updateQueue,t===null?(t={transitions:null,markerInstances:null,retryQueue:new Set([i])},a.updateQueue=t):(a=t.retryQueue,a===null?t.retryQueue=new Set([i]):a.add(i)),bc(e,i,s)),!1}throw Error(r(435,a.tag))}return bc(e,i,s),_c(),!1}if(ye)return t=Vt.current,t!==null?((t.flags&65536)===0&&(t.flags|=256),t.flags|=65536,t.lanes=s,i!==ps&&(e=Error(r(422),{cause:i}),pi($t(e,a)))):(i!==ps&&(t=Error(r(423),{cause:i}),pi($t(t,a))),e=e.current.alternate,e.flags|=65536,s&=-s,e.lanes|=s,i=$t(i,a),s=Qs(e.stateNode,i,s),Ts(e,s),ke!==4&&(ke=2)),!1;var c=Error(r(520),{cause:i});if(c=$t(c,a),Bi===null?Bi=[c]:Bi.push(c),ke!==4&&(ke=2),t===null)return!0;i=$t(i,a),a=t;do{switch(a.tag){case 3:return a.flags|=65536,e=s&-s,a.lanes|=e,e=Qs(a.stateNode,i,e),Ts(a,e),!1;case 1:if(t=a.type,c=a.stateNode,(a.flags&128)===0&&(typeof t.getDerivedStateFromError=="function"||c!==null&&typeof c.componentDidCatch=="function"&&(ca===null||!ca.has(c))))return a.flags|=65536,s&=-s,a.lanes|=s,s=uv(s),rv(s,e,a,i),Ts(a,s),!1}a=a.return}while(a!==null);return!1}var ov=Error(r(461)),et=!1;function it(e,t,a,i){t.child=e===null?Fh(t,null,a,i):Al(t,e.child,a,i)}function sv(e,t,a,i,s){a=a.render;var c=t.ref;if("ref"in i){var h={};for(var g in i)g!=="ref"&&(h[g]=i[g])}else h=i;return Ua(t),i=Ns(e,t,a,h,c,s),g=js(),e!==null&&!et?(Ms(e,t,s),jn(e,t,s)):(ye&&g&&vs(t),t.flags|=1,it(e,t,i,s),t.child)}function cv(e,t,a,i,s){if(e===null){var c=a.type;return typeof c=="function"&&!fs(c)&&c.defaultProps===void 0&&a.compare===null?(t.tag=15,t.type=c,fv(e,t,c,i,s)):(e=$u(a.type,null,i,t,t.mode,s),e.ref=t.ref,e.return=t,t.child=e)}if(c=e.child,!nc(e,s)){var h=c.memoizedProps;if(a=a.compare,a=a!==null?a:di,a(h,i)&&e.ref===t.ref)return jn(e,t,s)}return t.flags|=1,e=An(c,i),e.ref=t.ref,e.return=t,t.child=e}function fv(e,t,a,i,s){if(e!==null){var c=e.memoizedProps;if(di(c,i)&&e.ref===t.ref)if(et=!1,t.pendingProps=i=c,nc(e,s))(e.flags&131072)!==0&&(et=!0);else return t.lanes=e.lanes,jn(e,t,s)}return Js(e,t,a,i,s)}function dv(e,t,a){var i=t.pendingProps,s=i.children,c=e!==null?e.memoizedState:null;if(i.mode==="hidden"){if((t.flags&128)!==0){if(i=c!==null?c.baseLanes|a:a,e!==null){for(s=t.child=e.child,c=0;s!==null;)c=c|s.lanes|s.childLanes,s=s.sibling;t.childLanes=c&~i}else t.childLanes=0,t.child=null;return hv(e,t,i,a)}if((a&536870912)!==0)t.memoizedState={baseLanes:0,cachePool:null},e!==null&&Yu(t,c!==null?c.cachePool:null),c!==null?fh(t,c):zs(),Ih(t);else return t.lanes=t.childLanes=536870912,hv(e,t,c!==null?c.baseLanes|a:a,a)}else c!==null?(Yu(t,c.cachePool),fh(t,c),la(),t.memoizedState=null):(e!==null&&Yu(t,null),zs(),la());return it(e,t,s,a),t.child}function hv(e,t,a,i){var s=xs();return s=s===null?null:{parent:Pe._currentValue,pool:s},t.memoizedState={baseLanes:a,cachePool:s},e!==null&&Yu(t,null),zs(),Ih(t),e!==null&&gi(e,t,i,!0),null}function rr(e,t){var a=t.ref;if(a===null)e!==null&&e.ref!==null&&(t.flags|=4194816);else{if(typeof a!="function"&&typeof a!="object")throw Error(r(284));(e===null||e.ref!==a)&&(t.flags|=4194816)}}function Js(e,t,a,i,s){return Ua(t),a=Ns(e,t,a,i,void 0,s),i=js(),e!==null&&!et?(Ms(e,t,s),jn(e,t,s)):(ye&&i&&vs(t),t.flags|=1,it(e,t,a,s),t.child)}function vv(e,t,a,i,s,c){return Ua(t),t.updateQueue=null,a=hh(t,i,a,s),dh(e),i=js(),e!==null&&!et?(Ms(e,t,c),jn(e,t,c)):(ye&&i&&vs(t),t.flags|=1,it(e,t,a,c),t.child)}function mv(e,t,a,i,s){if(Ua(t),t.stateNode===null){var c=ml,h=a.contextType;typeof h=="object"&&h!==null&&(c=ft(h)),c=new a(i,c),t.memoizedState=c.state!==null&&c.state!==void 0?c.state:null,c.updater=Ks,t.stateNode=c,c._reactInternals=t,c=t.stateNode,c.props=i,c.state=t.memoizedState,c.refs={},Os(t),h=a.contextType,c.context=typeof h=="object"&&h!==null?ft(h):ml,c.state=t.memoizedState,h=a.getDerivedStateFromProps,typeof h=="function"&&(Xs(t,a,h,i),c.state=t.memoizedState),typeof a.getDerivedStateFromProps=="function"||typeof c.getSnapshotBeforeUpdate=="function"||typeof c.UNSAFE_componentWillMount!="function"&&typeof c.componentWillMount!="function"||(h=c.state,typeof c.componentWillMount=="function"&&c.componentWillMount(),typeof c.UNSAFE_componentWillMount=="function"&&c.UNSAFE_componentWillMount(),h!==c.state&&Ks.enqueueReplaceState(c,c.state,null),Oi(t,i,c,s),Ei(),c.state=t.memoizedState),typeof c.componentDidMount=="function"&&(t.flags|=4194308),i=!0}else if(e===null){c=t.stateNode;var g=t.memoizedProps,y=La(a,g);c.props=y;var T=c.context,j=a.contextType;h=ml,typeof j=="object"&&j!==null&&(h=ft(j));var L=a.getDerivedStateFromProps;j=typeof L=="function"||typeof c.getSnapshotBeforeUpdate=="function",g=t.pendingProps!==g,j||typeof c.UNSAFE_componentWillReceiveProps!="function"&&typeof c.componentWillReceiveProps!="function"||(g||T!==h)&&tv(t,c,i,h),In=!1;var z=t.memoizedState;c.state=z,Oi(t,i,c,s),Ei(),T=t.memoizedState,g||z!==T||In?(typeof L=="function"&&(Xs(t,a,L,i),T=t.memoizedState),(y=In||ev(t,a,y,i,z,T,h))?(j||typeof c.UNSAFE_componentWillMount!="function"&&typeof c.componentWillMount!="function"||(typeof c.componentWillMount=="function"&&c.componentWillMount(),typeof c.UNSAFE_componentWillMount=="function"&&c.UNSAFE_componentWillMount()),typeof c.componentDidMount=="function"&&(t.flags|=4194308)):(typeof c.componentDidMount=="function"&&(t.flags|=4194308),t.memoizedProps=i,t.memoizedState=T),c.props=i,c.state=T,c.context=h,i=y):(typeof c.componentDidMount=="function"&&(t.flags|=4194308),i=!1)}else{c=t.stateNode,As(e,t),h=t.memoizedProps,j=La(a,h),c.props=j,L=t.pendingProps,z=c.context,T=a.contextType,y=ml,typeof T=="object"&&T!==null&&(y=ft(T)),g=a.getDerivedStateFromProps,(T=typeof g=="function"||typeof c.getSnapshotBeforeUpdate=="function")||typeof c.UNSAFE_componentWillReceiveProps!="function"&&typeof c.componentWillReceiveProps!="function"||(h!==L||z!==y)&&tv(t,c,i,y),In=!1,z=t.memoizedState,c.state=z,Oi(t,i,c,s),Ei();var D=t.memoizedState;h!==L||z!==D||In||e!==null&&e.dependencies!==null&&qu(e.dependencies)?(typeof g=="function"&&(Xs(t,a,g,i),D=t.memoizedState),(j=In||ev(t,a,j,i,z,D,y)||e!==null&&e.dependencies!==null&&qu(e.dependencies))?(T||typeof c.UNSAFE_componentWillUpdate!="function"&&typeof c.componentWillUpdate!="function"||(typeof c.componentWillUpdate=="function"&&c.componentWillUpdate(i,D,y),typeof c.UNSAFE_componentWillUpdate=="function"&&c.UNSAFE_componentWillUpdate(i,D,y)),typeof c.componentDidUpdate=="function"&&(t.flags|=4),typeof c.getSnapshotBeforeUpdate=="function"&&(t.flags|=1024)):(typeof c.componentDidUpdate!="function"||h===e.memoizedProps&&z===e.memoizedState||(t.flags|=4),typeof c.getSnapshotBeforeUpdate!="function"||h===e.memoizedProps&&z===e.memoizedState||(t.flags|=1024),t.memoizedProps=i,t.memoizedState=D),c.props=i,c.state=D,c.context=y,i=j):(typeof c.componentDidUpdate!="function"||h===e.memoizedProps&&z===e.memoizedState||(t.flags|=4),typeof c.getSnapshotBeforeUpdate!="function"||h===e.memoizedProps&&z===e.memoizedState||(t.flags|=1024),i=!1)}return c=i,rr(e,t),i=(t.flags&128)!==0,c||i?(c=t.stateNode,a=i&&typeof a.getDerivedStateFromError!="function"?null:c.render(),t.flags|=1,e!==null&&i?(t.child=Al(t,e.child,null,s),t.child=Al(t,null,a,s)):it(e,t,a,s),t.memoizedState=c.state,e=t.child):e=jn(e,t,s),e}function pv(e,t,a,i){return mi(),t.flags|=256,it(e,t,a,i),t.child}var Ps={dehydrated:null,treeContext:null,retryLane:0,hydrationErrors:null};function Ws(e){return{baseLanes:e,cachePool:ah()}}function Fs(e,t,a){return e=e!==null?e.childLanes&~a:0,t&&(e|=Yt),e}function gv(e,t,a){var i=t.pendingProps,s=!1,c=(t.flags&128)!==0,h;if((h=c)||(h=e!==null&&e.memoizedState===null?!1:(We.current&2)!==0),h&&(s=!0,t.flags&=-129),h=(t.flags&32)!==0,t.flags&=-33,e===null){if(ye){if(s?aa(t):la(),ye){var g=He,y;if(y=g){e:{for(y=g,g=sn;y.nodeType!==8;){if(!g){g=null;break e}if(y=It(y.nextSibling),y===null){g=null;break e}}g=y}g!==null?(t.memoizedState={dehydrated:g,treeContext:Da!==null?{id:Tn,overflow:wn}:null,retryLane:536870912,hydrationErrors:null},y=At(18,null,null,0),y.stateNode=g,y.return=t,t.child=y,ht=t,He=null,y=!0):y=!1}y||Ma(t)}if(g=t.memoizedState,g!==null&&(g=g.dehydrated,g!==null))return Uc(g)?t.lanes=32:t.lanes=536870912,null;Nn(t)}return g=i.children,i=i.fallback,s?(la(),s=t.mode,g=or({mode:"hidden",children:g},s),i=Ra(i,s,a,null),g.return=t,i.return=t,g.sibling=i,t.child=g,s=t.child,s.memoizedState=Ws(a),s.childLanes=Fs(e,h,a),t.memoizedState=Ps,i):(aa(t),Is(t,g))}if(y=e.memoizedState,y!==null&&(g=y.dehydrated,g!==null)){if(c)t.flags&256?(aa(t),t.flags&=-257,t=ec(e,t,a)):t.memoizedState!==null?(la(),t.child=e.child,t.flags|=128,t=null):(la(),s=i.fallback,g=t.mode,i=or({mode:"visible",children:i.children},g),s=Ra(s,g,a,null),s.flags|=2,i.return=t,s.return=t,i.sibling=s,t.child=i,Al(t,e.child,null,a),i=t.child,i.memoizedState=Ws(a),i.childLanes=Fs(e,h,a),t.memoizedState=Ps,t=s);else if(aa(t),Uc(g)){if(h=g.nextSibling&&g.nextSibling.dataset,h)var T=h.dgst;h=T,i=Error(r(419)),i.stack="",i.digest=h,pi({value:i,source:null,stack:null}),t=ec(e,t,a)}else if(et||gi(e,t,a,!1),h=(a&e.childLanes)!==0,et||h){if(h=Re,h!==null&&(i=a&-a,i=(i&42)!==0?1:Zo(i),i=(i&(h.suspendedLanes|a))!==0?0:i,i!==0&&i!==y.retryLane))throw y.retryLane=i,vl(e,i),Dt(h,e,i),ov;g.data==="$?"||_c(),t=ec(e,t,a)}else g.data==="$?"?(t.flags|=192,t.child=e.child,t=null):(e=y.treeContext,He=It(g.nextSibling),ht=t,ye=!0,ja=null,sn=!1,e!==null&&(kt[qt++]=Tn,kt[qt++]=wn,kt[qt++]=Da,Tn=e.id,wn=e.overflow,Da=t),t=Is(t,i.children),t.flags|=4096);return t}return s?(la(),s=i.fallback,g=t.mode,y=e.child,T=y.sibling,i=An(y,{mode:"hidden",children:i.children}),i.subtreeFlags=y.subtreeFlags&65011712,T!==null?s=An(T,s):(s=Ra(s,g,a,null),s.flags|=2),s.return=t,i.return=t,i.sibling=s,t.child=i,i=s,s=t.child,g=e.child.memoizedState,g===null?g=Ws(a):(y=g.cachePool,y!==null?(T=Pe._currentValue,y=y.parent!==T?{parent:T,pool:T}:y):y=ah(),g={baseLanes:g.baseLanes|a,cachePool:y}),s.memoizedState=g,s.childLanes=Fs(e,h,a),t.memoizedState=Ps,i):(aa(t),a=e.child,e=a.sibling,a=An(a,{mode:"visible",children:i.children}),a.return=t,a.sibling=null,e!==null&&(h=t.deletions,h===null?(t.deletions=[e],t.flags|=16):h.push(e)),t.child=a,t.memoizedState=null,a)}function Is(e,t){return t=or({mode:"visible",children:t},e.mode),t.return=e,e.child=t}function or(e,t){return e=At(22,e,null,t),e.lanes=0,e.stateNode={_visibility:1,_pendingMarkers:null,_retryCache:null,_transitions:null},e}function ec(e,t,a){return Al(t,e.child,null,a),e=Is(t,t.pendingProps.children),e.flags|=2,t.memoizedState=null,e}function _v(e,t,a){e.lanes|=t;var i=e.alternate;i!==null&&(i.lanes|=t),_s(e.return,t,a)}function tc(e,t,a,i,s){var c=e.memoizedState;c===null?e.memoizedState={isBackwards:t,rendering:null,renderingStartTime:0,last:i,tail:a,tailMode:s}:(c.isBackwards=t,c.rendering=null,c.renderingStartTime=0,c.last=i,c.tail=a,c.tailMode=s)}function yv(e,t,a){var i=t.pendingProps,s=i.revealOrder,c=i.tail;if(it(e,t,i.children,a),i=We.current,(i&2)!==0)i=i&1|2,t.flags|=128;else{if(e!==null&&(e.flags&128)!==0)e:for(e=t.child;e!==null;){if(e.tag===13)e.memoizedState!==null&&_v(e,a,t);else if(e.tag===19)_v(e,a,t);else if(e.child!==null){e.child.return=e,e=e.child;continue}if(e===t)break e;for(;e.sibling===null;){if(e.return===null||e.return===t)break e;e=e.return}e.sibling.return=e.return,e=e.sibling}i&=1}switch(Y(We,i),s){case"forwards":for(a=t.child,s=null;a!==null;)e=a.alternate,e!==null&&lr(e)===null&&(s=a),a=a.sibling;a=s,a===null?(s=t.child,t.child=null):(s=a.sibling,a.sibling=null),tc(t,!1,s,a,c);break;case"backwards":for(a=null,s=t.child,t.child=null;s!==null;){if(e=s.alternate,e!==null&&lr(e)===null){t.child=s;break}e=s.sibling,s.sibling=a,a=s,s=e}tc(t,!0,a,null,c);break;case"together":tc(t,!1,null,null,void 0);break;default:t.memoizedState=null}return t.child}function jn(e,t,a){if(e!==null&&(t.dependencies=e.dependencies),sa|=t.lanes,(a&t.childLanes)===0)if(e!==null){if(gi(e,t,a,!1),(a&t.childLanes)===0)return null}else return null;if(e!==null&&t.child!==e.child)throw Error(r(153));if(t.child!==null){for(e=t.child,a=An(e,e.pendingProps),t.child=a,a.return=t;e.sibling!==null;)e=e.sibling,a=a.sibling=An(e,e.pendingProps),a.return=t;a.sibling=null}return t.child}function nc(e,t){return(e.lanes&t)!==0?!0:(e=e.dependencies,!!(e!==null&&qu(e)))}function c0(e,t,a){switch(t.tag){case 3:je(t,t.stateNode.containerInfo),Fn(t,Pe,e.memoizedState.cache),mi();break;case 27:case 5:No(t);break;case 4:je(t,t.stateNode.containerInfo);break;case 10:Fn(t,t.type,t.memoizedProps.value);break;case 13:var i=t.memoizedState;if(i!==null)return i.dehydrated!==null?(aa(t),t.flags|=128,null):(a&t.child.childLanes)!==0?gv(e,t,a):(aa(t),e=jn(e,t,a),e!==null?e.sibling:null);aa(t);break;case 19:var s=(e.flags&128)!==0;if(i=(a&t.childLanes)!==0,i||(gi(e,t,a,!1),i=(a&t.childLanes)!==0),s){if(i)return yv(e,t,a);t.flags|=128}if(s=t.memoizedState,s!==null&&(s.rendering=null,s.tail=null,s.lastEffect=null),Y(We,We.current),i)break;return null;case 22:case 23:return t.lanes=0,dv(e,t,a);case 24:Fn(t,Pe,e.memoizedState.cache)}return jn(e,t,a)}function bv(e,t,a){if(e!==null)if(e.memoizedProps!==t.pendingProps)et=!0;else{if(!nc(e,a)&&(t.flags&128)===0)return et=!1,c0(e,t,a);et=(e.flags&131072)!==0}else et=!1,ye&&(t.flags&1048576)!==0&&Pd(t,ku,t.index);switch(t.lanes=0,t.tag){case 16:e:{e=t.pendingProps;var i=t.elementType,s=i._init;if(i=s(i._payload),t.type=i,typeof i=="function")fs(i)?(e=La(i,e),t.tag=1,t=mv(null,t,i,e,a)):(t.tag=0,t=Js(null,t,i,e,a));else{if(i!=null){if(s=i.$$typeof,s===W){t.tag=11,t=sv(null,t,i,e,a);break e}else if(s===xe){t.tag=14,t=cv(null,t,i,e,a);break e}}throw t=Kn(i)||i,Error(r(306,t,""))}}return t;case 0:return Js(e,t,t.type,t.pendingProps,a);case 1:return i=t.type,s=La(i,t.pendingProps),mv(e,t,i,s,a);case 3:e:{if(je(t,t.stateNode.containerInfo),e===null)throw Error(r(387));i=t.pendingProps;var c=t.memoizedState;s=c.element,As(e,t),Oi(t,i,null,a);var h=t.memoizedState;if(i=h.cache,Fn(t,Pe,i),i!==c.cache&&ys(t,[Pe],a,!0),Ei(),i=h.element,c.isDehydrated)if(c={element:i,isDehydrated:!1,cache:h.cache},t.updateQueue.baseState=c,t.memoizedState=c,t.flags&256){t=pv(e,t,i,a);break e}else if(i!==s){s=$t(Error(r(424)),t),pi(s),t=pv(e,t,i,a);break e}else{switch(e=t.stateNode.containerInfo,e.nodeType){case 9:e=e.body;break;default:e=e.nodeName==="HTML"?e.ownerDocument.body:e}for(He=It(e.firstChild),ht=t,ye=!0,ja=null,sn=!0,a=Fh(t,null,i,a),t.child=a;a;)a.flags=a.flags&-3|4096,a=a.sibling}else{if(mi(),i===s){t=jn(e,t,a);break e}it(e,t,i,a)}t=t.child}return t;case 26:return rr(e,t),e===null?(a=Om(t.type,null,t.pendingProps,null))?t.memoizedState=a:ye||(a=t.type,e=t.pendingProps,i=xr(re.current).createElement(a),i[ct]=t,i[vt]=e,rt(i,a,e),Ie(i),t.stateNode=i):t.memoizedState=Om(t.type,e.memoizedProps,t.pendingProps,e.memoizedState),null;case 27:return No(t),e===null&&ye&&(i=t.stateNode=Sm(t.type,t.pendingProps,re.current),ht=t,sn=!0,s=He,ha(t.type)?(Zc=s,He=It(i.firstChild)):He=s),it(e,t,t.pendingProps.children,a),rr(e,t),e===null&&(t.flags|=4194304),t.child;case 5:return e===null&&ye&&((s=i=He)&&(i=L0(i,t.type,t.pendingProps,sn),i!==null?(t.stateNode=i,ht=t,He=It(i.firstChild),sn=!1,s=!0):s=!1),s||Ma(t)),No(t),s=t.type,c=t.pendingProps,h=e!==null?e.memoizedProps:null,i=c.children,jc(s,c)?i=null:h!==null&&jc(s,h)&&(t.flags|=32),t.memoizedState!==null&&(s=Ns(e,t,n0,null,null,a),Xi._currentValue=s),rr(e,t),it(e,t,i,a),t.child;case 6:return e===null&&ye&&((e=a=He)&&(a=$0(a,t.pendingProps,sn),a!==null?(t.stateNode=a,ht=t,He=null,e=!0):e=!1),e||Ma(t)),null;case 13:return gv(e,t,a);case 4:return je(t,t.stateNode.containerInfo),i=t.pendingProps,e===null?t.child=Al(t,null,i,a):it(e,t,i,a),t.child;case 11:return sv(e,t,t.type,t.pendingProps,a);case 7:return it(e,t,t.pendingProps,a),t.child;case 8:return it(e,t,t.pendingProps.children,a),t.child;case 12:return it(e,t,t.pendingProps.children,a),t.child;case 10:return i=t.pendingProps,Fn(t,t.type,i.value),it(e,t,i.children,a),t.child;case 9:return s=t.type._context,i=t.pendingProps.children,Ua(t),s=ft(s),i=i(s),t.flags|=1,it(e,t,i,a),t.child;case 14:return cv(e,t,t.type,t.pendingProps,a);case 15:return fv(e,t,t.type,t.pendingProps,a);case 19:return yv(e,t,a);case 31:return i=t.pendingProps,a=t.mode,i={mode:i.mode,children:i.children},e===null?(a=or(i,a),a.ref=t.ref,t.child=a,a.return=t,t=a):(a=An(e.child,i),a.ref=t.ref,t.child=a,a.return=t,t=a),t;case 22:return dv(e,t,a);case 24:return Ua(t),i=ft(Pe),e===null?(s=xs(),s===null&&(s=Re,c=bs(),s.pooledCache=c,c.refCount++,c!==null&&(s.pooledCacheLanes|=a),s=c),t.memoizedState={parent:i,cache:s},Os(t),Fn(t,Pe,s)):((e.lanes&a)!==0&&(As(e,t),Oi(t,null,null,a),Ei()),s=e.memoizedState,c=t.memoizedState,s.parent!==i?(s={parent:i,cache:i},t.memoizedState=s,t.lanes===0&&(t.memoizedState=t.updateQueue.baseState=s),Fn(t,Pe,i)):(i=c.cache,Fn(t,Pe,i),i!==s.cache&&ys(t,[Pe],a,!0))),it(e,t,t.pendingProps.children,a),t.child;case 29:throw t.pendingProps}throw Error(r(156,t.tag))}function Mn(e){e.flags|=4}function Sv(e,t){if(t.type!=="stylesheet"||(t.state.loading&4)!==0)e.flags&=-16777217;else if(e.flags|=16777216,!Rm(t)){if(t=Vt.current,t!==null&&((ve&4194048)===ve?cn!==null:(ve&62914560)!==ve&&(ve&536870912)===0||t!==cn))throw Si=Es,lh;e.flags|=8192}}function sr(e,t){t!==null&&(e.flags|=4),e.flags&16384&&(t=e.tag!==22?If():536870912,e.lanes|=t,Rl|=t)}function Ni(e,t){if(!ye)switch(e.tailMode){case"hidden":t=e.tail;for(var a=null;t!==null;)t.alternate!==null&&(a=t),t=t.sibling;a===null?e.tail=null:a.sibling=null;break;case"collapsed":a=e.tail;for(var i=null;a!==null;)a.alternate!==null&&(i=a),a=a.sibling;i===null?t||e.tail===null?e.tail=null:e.tail.sibling=null:i.sibling=null}}function Be(e){var t=e.alternate!==null&&e.alternate.child===e.child,a=0,i=0;if(t)for(var s=e.child;s!==null;)a|=s.lanes|s.childLanes,i|=s.subtreeFlags&65011712,i|=s.flags&65011712,s.return=e,s=s.sibling;else for(s=e.child;s!==null;)a|=s.lanes|s.childLanes,i|=s.subtreeFlags,i|=s.flags,s.return=e,s=s.sibling;return e.subtreeFlags|=i,e.childLanes=a,t}function f0(e,t,a){var i=t.pendingProps;switch(ms(t),t.tag){case 31:case 16:case 15:case 0:case 11:case 7:case 8:case 12:case 9:case 14:return Be(t),null;case 1:return Be(t),null;case 3:return a=t.stateNode,i=null,e!==null&&(i=e.memoizedState.cache),t.memoizedState.cache!==i&&(t.flags|=2048),Rn(Pe),Qn(),a.pendingContext&&(a.context=a.pendingContext,a.pendingContext=null),(e===null||e.child===null)&&(vi(t)?Mn(t):e===null||e.memoizedState.isDehydrated&&(t.flags&256)===0||(t.flags|=1024,Id())),Be(t),null;case 26:return a=t.memoizedState,e===null?(Mn(t),a!==null?(Be(t),Sv(t,a)):(Be(t),t.flags&=-16777217)):a?a!==e.memoizedState?(Mn(t),Be(t),Sv(t,a)):(Be(t),t.flags&=-16777217):(e.memoizedProps!==i&&Mn(t),Be(t),t.flags&=-16777217),null;case 27:bu(t),a=re.current;var s=t.type;if(e!==null&&t.stateNode!=null)e.memoizedProps!==i&&Mn(t);else{if(!i){if(t.stateNode===null)throw Error(r(166));return Be(t),null}e=ee.current,vi(t)?Wd(t):(e=Sm(s,i,a),t.stateNode=e,Mn(t))}return Be(t),null;case 5:if(bu(t),a=t.type,e!==null&&t.stateNode!=null)e.memoizedProps!==i&&Mn(t);else{if(!i){if(t.stateNode===null)throw Error(r(166));return Be(t),null}if(e=ee.current,vi(t))Wd(t);else{switch(s=xr(re.current),e){case 1:e=s.createElementNS("http://www.w3.org/2000/svg",a);break;case 2:e=s.createElementNS("http://www.w3.org/1998/Math/MathML",a);break;default:switch(a){case"svg":e=s.createElementNS("http://www.w3.org/2000/svg",a);break;case"math":e=s.createElementNS("http://www.w3.org/1998/Math/MathML",a);break;case"script":e=s.createElement("div"),e.innerHTML=" - + +