diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 583985b4..02573038 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -245,8 +245,8 @@ class EvaluationRow(BaseModel): supporting both row-wise batch evaluation and trajectory-based RL evaluation. """ - # Core conversation data - messages: List[Message] = Field(description="List of messages in the conversation/trajectory.") + # Core OpenAI ChatCompletion compatible conversation data + messages: List[Message] = Field(description="List of messages in the conversation. Also known as a trajectory.") # Tool and function call information tools: Optional[List[Dict[str, Any]]] = Field( @@ -264,6 +264,26 @@ class EvaluationRow(BaseModel): description="The status of the rollout.", ) + invocation_id: Optional[str] = Field( + default_factory=generate_id, + description="The ID of the invocation that this row belongs to.", + ) + + cohort_id: Optional[str] = Field( + default_factory=generate_id, + description="The ID of the cohort that this row belongs to.", + ) + + rollout_id: Optional[str] = Field( + default_factory=generate_id, + description="The ID of the rollout that this row belongs to.", + ) + + run_id: Optional[str] = Field( + None, + description=("The ID of the run that this row belongs to."), + ) + # Ground truth reference (moved from EvaluateResult to top level) ground_truth: Optional[str] = Field( default=None, description="Optional ground truth reference for this evaluation." diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py index d7cba33d..2e8da8b3 100644 --- a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +++ b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py @@ -200,6 +200,8 @@ async def default_mcp_gym_rollout_processor( Returns: List of EvaluationRow objects with completed conversations """ + if config.server_script_path is None: + raise ValueError("server_script_path is required for default_mcp_gym_rollout_processor") server = MCPServerManager(config.server_script_path, port=9700) try: diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index 937f4e4c..a2fe54a8 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -1,13 +1,14 @@ -import inspect -import os import copy +import inspect import math +import os import statistics -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, Dict, List, Literal, Optional import pytest from eval_protocol.dataset_logger import default_logger +from eval_protocol.human_id import generate_id from eval_protocol.models import CompletionParams, EvalMetadata, EvaluationRow, InputMetadata, Message from eval_protocol.pytest.default_dataset_adapter import default_dataset_adapter from eval_protocol.pytest.default_no_op_rollout_process import default_no_op_rollout_processor @@ -28,13 +29,14 @@ aggregate, create_dynamically_parameterized_wrapper, execute_function, + log_eval_status_and_rows, ) +from eval_protocol.stats.confidence_intervals import compute_fixed_set_mu_ci from ..common_utils import load_jsonl -from eval_protocol.stats.confidence_intervals import compute_fixed_set_mu_ci -def evaluation_test( +def evaluation_test( # noqa: C901 *, model: List[ModelParam], input_messages: Optional[List[InputMessagesParam]] = None, @@ -59,6 +61,37 @@ def evaluation_test( ]: """Decorator to create pytest-based evaluation tests. + Here are some key concepts to understand the terminology in EP: + + - "invocation" is a single execution of a test function. An invocation can + generate 1 or more cohorts. Grouping by invocation might be useful to + aggregate eval scores across multiple invocations when you want to aggregate + scores across multiple datasets. + - "cohort" is a group of runs with for a combination of parameters. A single + cohort will have multiple runs if num_runs > 1. + 1. If your evaluation_test has combinations of parameters, it will generate + multiple cohorts per combination of parameters. + 2. A new execution of a test function will generate a new cohort. + - "run" is a group of rollouts. For multiple num_runs > 1, there will be + multiple "run_id"s. + - "rollout" is the execution/process that produces a "trajectory". You + "execute" multiple rollouts to generate a dataset of trajectories. + - "trajectory" is the result produced by a rollout — a list of OpenAI Chat + Completion messages (e.g. the "messages" field in EvaluationRow). + - "row" both the input and output of an evaluation. For example, in + tau-bench, a row is a task within the dataset that can be identified as + "airline_task_0" or "airline_task_1" etc. The "row_id" can be populated from + the dataset itself to identify a particular task you want to evaluate. If + not provided, EP will generate a "row_id" for each row whenever you call the + evaluation test. + - "dataset" is a collection of rows (e.g. List[EvauluationRow]) + - "eval" is a rubric implemented in the body of an @evaluation_test + decorated test. It simply produces a score from 0 to 1 and attached it + to the row as the "evaluation_result" field. + + "invocation", "cohort", "run", "rollout", and "row" each have a unique ID + which can be used to easily group and identify your dataset by. + Args: model: Model identifiers to query. input_messages: Messages to send to the model. This is useful if you @@ -75,7 +108,7 @@ def evaluation_test( aggregation_method: How to aggregate scores across rows. threshold_of_success: If set, fail the test if the aggregated score is below this threshold. - num_runs: Number of times to repeat the evaluation. + num_runs: Number of times to repeat the rollout and evaluations. max_dataset_rows: Limit dataset to the first N rows. mcp_config_path: Path to MCP config file that follows MCPMultiClientConfiguration schema max_concurrent_rollouts: Maximum number of concurrent rollouts to run in parallel. @@ -119,15 +152,15 @@ def decorator( def execute_with_params( test_func: TestFunction, - row: EvaluationRow | None = None, - input_dataset: List[EvaluationRow] | None = None, + processed_row: EvaluationRow | None = None, + processed_dataset: List[EvaluationRow] | None = None, evaluation_test_kwargs: Optional[EvaluationInputParam] = None, ): kwargs = {} - if input_dataset is not None: - kwargs["rows"] = input_dataset - if row is not None: - kwargs["row"] = row + if processed_dataset is not None: + kwargs["rows"] = processed_dataset + if processed_row is not None: + kwargs["row"] = processed_row if evaluation_test_kwargs is not None: if "row" in evaluation_test_kwargs: raise ValueError("'row' is a reserved parameter for the evaluation function") @@ -176,7 +209,7 @@ def generate_combinations(): datasets = [[input_dataset]] # type: ignore else: datasets = [None] - params: List[Optional[RolloutInputParam]] = rollout_input_params if rollout_input_params is not None else [None] # type: ignore + rips: List[Optional[RolloutInputParam]] = rollout_input_params if rollout_input_params is not None else [None] # type: ignore # Apply EP_MAX_DATASET_ROWS to input_messages, but do NOT parameterize over # each row. Instead, pass the entire sliced list through in a single test run # so summaries aggregate all rows together (AIME-style behavior). @@ -195,7 +228,7 @@ def generate_combinations(): # Generate all combinations for m in model: for ds in datasets: - for ip in params: + for rip in rips: for im in messages: for etk in kwargs: # if no dataset and no messages, raise an error @@ -203,7 +236,7 @@ def generate_combinations(): raise ValueError( "No dataset or messages provided. Please provide at least one of input_dataset or input_messages." ) - combinations.append((m, ds, ip, im, etk)) + combinations.append((m, ds, rip, im, etk)) return combinations @@ -216,12 +249,12 @@ def generate_combinations(): # Create parameter tuples for pytest.mark.parametrize param_tuples = [] for combo in combinations: - model_name, dataset, params, messages, etk = combo + model_name, dataset, rip, messages, etk = combo param_tuple = [model_name] if input_dataset is not None: param_tuple.append(dataset) if rollout_input_params is not None: - param_tuple.append(params) + param_tuple.append(rip) if input_messages is not None: param_tuple.append(messages) if evaluation_test_kwargs is not None: @@ -242,11 +275,20 @@ def generate_combinations(): # Create wrapper function with exact signature that pytest expects def create_wrapper_with_signature() -> Callable: # Create the function body that will be used + invocation_id = generate_id() + def wrapper_body(**kwargs): model_name = kwargs["model"] eval_metadata = None all_results: List[EvaluationRow] = [] + cohort_id = generate_id() + + def _log_eval_error( + status: Literal["finished", "error"], rows: Optional[List[EvaluationRow]] | None, passed: bool + ) -> None: + log_eval_status_and_rows(eval_metadata, rows, status, passed, default_logger) + try: # Handle dataset loading data: List[EvaluationRow] = [] @@ -283,6 +325,7 @@ def wrapper_body(**kwargs): # into input_params (e.g., '{"temperature":0,"extra_body":{"reasoning":{"effort":"low"}}}'). try: import json as _json + _env_override = os.getenv("EP_INPUT_PARAMS_JSON") if _env_override: override_obj = _json.loads(_env_override) @@ -320,6 +363,8 @@ def wrapper_body(**kwargs): row.input_metadata.session_data["mode"] = mode # Initialize eval_metadata for each row row.eval_metadata = eval_metadata + row.cohort_id = cohort_id + row.invocation_id = invocation_id # has to be done in the pytest main process since it's # used to determine whether this eval has stopped @@ -339,14 +384,25 @@ def wrapper_body(**kwargs): for _ in range(num_runs): # Regenerate outputs each run by deep-copying the pristine dataset # so model responses are not reused across runs. - fresh_rows = [copy.deepcopy(r) for r in data] - input_dataset = execute_function(rollout_processor, rows=fresh_rows, config=config) + run_id = generate_id() + fresh_dataset = [copy.deepcopy(r) for r in data] + + # apply new run_id to fresh_dataset + for row in fresh_dataset: + row.run_id = run_id + + # generate new rollout_id for each row + for row in fresh_dataset: + row.rollout_id = generate_id() + + processed_dataset = execute_function(rollout_processor, rows=fresh_dataset, config=config) + if mode == "pointwise": # Pointwise mode: apply the evaluator function to each row - for row in input_dataset: + for row in processed_dataset: result = execute_with_params( test_func, - row=row, + processed_row=row, evaluation_test_kwargs=kwargs.get("evaluation_test_kwargs") or {}, ) if result is None or not isinstance(result, EvaluationRow): @@ -358,7 +414,7 @@ def wrapper_body(**kwargs): # Batch mode: call the test function with the full dataset results = execute_with_params( test_func, - input_dataset=input_dataset, + processed_dataset=processed_dataset, evaluation_test_kwargs=kwargs.get("evaluation_test_kwargs") or {}, ) if results is None: @@ -430,6 +486,7 @@ def wrapper_body(**kwargs): # Aggregate per-metric mean and 95% CI when available metrics_summary: Dict[str, Dict[str, float]] = {} from collections import defaultdict + metric_scores: Dict[str, list] = defaultdict(list) for r in all_results: if r.evaluation_result and r.evaluation_result.metrics: @@ -470,7 +527,10 @@ def wrapper_body(**kwargs): ) # As per project convention, avoid printing per-metric CI lines to reduce noise if summary_path: - import json, pathlib, time, re + import json + import pathlib + import re + import time def _sanitize_filename(text: str) -> str: safe = re.sub(r"[^A-Za-z0-9._-]+", "-", text.strip()) @@ -487,7 +547,11 @@ def _extract_effort_tag(params: dict) -> str | None: return str(eb["reasoning"]["effort"]).lower() if "reasoning_effort" in eb: return str(eb["reasoning_effort"]).lower() - if "reasoning" in params and isinstance(params["reasoning"], dict) and "effort" in params["reasoning"]: + if ( + "reasoning" in params + and isinstance(params["reasoning"], dict) + and "effort" in params["reasoning"] + ): return str(params["reasoning"]["effort"]).lower() except Exception: return None @@ -529,25 +593,11 @@ def _extract_effort_tag(params: dict) -> str | None: agg_score >= threshold_of_success ), f"Aggregated score {agg_score:.3f} below threshold {threshold_of_success}" + except AssertionError: + _log_eval_error("finished", data if "data" in locals() else None, passed=False) + raise except Exception: - # Update eval metadata status to error and log it - if eval_metadata is not None: - eval_metadata.status = "error" - eval_metadata.passed = False - - # Create a minimal result row to log the error if we don't have any results yet - if not data: - error_row = EvaluationRow(messages=[], eval_metadata=eval_metadata, evaluation_result=None) - default_logger.log(error_row) - else: - # Update existing results with error status - for r in data: - if r.eval_metadata is not None: - r.eval_metadata.status = "error" - r.eval_metadata.passed = False - default_logger.log(r) - - # Re-raise the exception to maintain pytest behavior + _log_eval_error("error", data if "data" in locals() else None, passed=False) raise return create_dynamically_parameterized_wrapper(test_func, wrapper_body, test_param_names) diff --git a/eval_protocol/pytest/utils.py b/eval_protocol/pytest/utils.py index c57a6fb8..981c1ed3 100644 --- a/eval_protocol/pytest/utils.py +++ b/eval_protocol/pytest/utils.py @@ -1,6 +1,9 @@ import asyncio import inspect -from typing import Any, Callable, List, Literal +from typing import Any, Callable, List, Literal, Optional + +from eval_protocol.dataset_logger.dataset_logger import DatasetLogger +from eval_protocol.models import EvalMetadata, EvaluationRow def execute_function(func: Callable, **kwargs) -> Any: @@ -92,3 +95,32 @@ def wrapper(**kwargs): wrapper.__signature__ = inspect.Signature(parameters) return wrapper + + +def log_eval_status_and_rows( + eval_metadata: Optional[EvalMetadata], + rows: Optional[List[EvaluationRow]] | None, + status: Literal["finished", "error"], + passed: bool, + logger: DatasetLogger, +) -> None: + """Update eval status and emit rows to the given logger. + + If no rows are provided, emits a minimal placeholder row so downstream + consumers still observe a terminal status. + """ + if eval_metadata is None: + return + + eval_metadata.status = status + eval_metadata.passed = passed + + rows_to_log: List[EvaluationRow] = rows or [] + if not rows_to_log: + error_row = EvaluationRow(messages=[], eval_metadata=eval_metadata, evaluation_result=None) + logger.log(error_row) + else: + for r in rows_to_log: + if r.eval_metadata is not None: + r.eval_metadata.status = status + logger.log(r) diff --git a/eval_protocol/utils/logs_server.py b/eval_protocol/utils/logs_server.py index 4338882c..97199ccc 100644 --- a/eval_protocol/utils/logs_server.py +++ b/eval_protocol/utils/logs_server.py @@ -39,9 +39,11 @@ async def connect(self, websocket: WebSocket): connection_count = len(self.active_connections) logger.info(f"WebSocket connected. Total connections: {connection_count}") logs = default_logger.read() - await websocket.send_text( - json.dumps({"type": "initialize_logs", "logs": [log.model_dump_json(exclude_none=True) for log in logs]}) - ) + data = { + "type": "initialize_logs", + "logs": [log.model_dump(exclude_none=True, mode="json") for log in logs], + } + await websocket.send_text(json.dumps(data)) def disconnect(self, websocket: WebSocket): with self._lock: @@ -57,7 +59,7 @@ def broadcast_row_upserted(self, row: "EvaluationRow"): """ try: # Serialize pydantic model - json_message = json.dumps({"type": "log", "row": json.loads(row.model_dump_json(exclude_none=True))}) + json_message = json.dumps({"type": "log", "row": row.model_dump(exclude_none=True, mode="json")}) # Queue the message for broadcasting in the main event loop self._broadcast_queue.put(json_message) except Exception as e: diff --git a/examples/aime2025_chat_completion/tests/test_evaluation.py b/examples/aime2025_chat_completion/tests/test_aime2025.py similarity index 95% rename from examples/aime2025_chat_completion/tests/test_evaluation.py rename to examples/aime2025_chat_completion/tests/test_aime2025.py index 7558dab1..60feff25 100644 --- a/examples/aime2025_chat_completion/tests/test_evaluation.py +++ b/examples/aime2025_chat_completion/tests/test_aime2025.py @@ -1,18 +1,15 @@ -from typing import Any, Dict, List import os +from typing import Any, Dict, List from eval_protocol.models import EvaluateResult, EvaluationRow, Message, MetricResult from eval_protocol.pytest.default_single_turn_rollout_process import ( default_single_turn_rollout_processor, ) from eval_protocol.pytest.evaluation_test import evaluation_test - from examples.aime2025_chat_completion.main import _extract_boxed_text, _normalize_to_int_or_none - SYSTEM_PROMPT = ( - "You are a helpful math assistant. Please reason step by step, and put your " - "final answer within \\boxed{...}." + "You are a helpful math assistant. Please reason step by step, and put your " "final answer within \\boxed{...}." ) """ @@ -36,8 +33,6 @@ def _ep_int(var_name: str, default_value: int | None) -> int | None: return default_value - - def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]: """ Convert raw AIME2025 rows (with keys 'question' and 'answer') to EvaluationRow. @@ -94,9 +89,7 @@ def test_aime2025_pointwise(row: EvaluationRow) -> EvaluationRow: reason=( "Parsed both integers and they matched" if score == 1.0 - else ( - "Parsed integers did not match" if is_valid else "Failed to parse integer" - ) + else ("Parsed integers did not match" if is_valid else "Failed to parse integer") ), data={ "extracted_text": extracted_text, @@ -113,5 +106,3 @@ def test_aime2025_pointwise(row: EvaluationRow) -> EvaluationRow: metrics=metrics, ) return row - - diff --git a/examples/gpqa/tests/test_evaluation.py b/examples/gpqa/tests/test_gpqa.py similarity index 96% rename from examples/gpqa/tests/test_evaluation.py rename to examples/gpqa/tests/test_gpqa.py index 79863fc0..13d526e0 100644 --- a/examples/gpqa/tests/test_evaluation.py +++ b/examples/gpqa/tests/test_gpqa.py @@ -1,16 +1,15 @@ -from typing import List - import csv import io import re +from typing import List + import requests from eval_protocol.models import EvaluateResult, EvaluationRow, Message, MetricResult -from eval_protocol.pytest.evaluation_test import evaluation_test from eval_protocol.pytest.default_single_turn_rollout_process import ( default_single_turn_rollout_processor, ) - +from eval_protocol.pytest.evaluation_test import evaluation_test SYSTEM_PROMPT = ( "You are a helpful assistant. Read the question and options carefully. " @@ -65,7 +64,9 @@ def _load_gpqa_messages_from_csv() -> List[List[Message]]: @evaluation_test( model=["fireworks_ai/accounts/fireworks/models/gpt-oss-120b"], input_messages=_GPQA_INPUT_MESSAGES, - rollout_input_params=[{"extra_body": {"reasoning_effort": "low"}}], # default to low effort; override via CLI plugin + rollout_input_params=[ + {"extra_body": {"reasoning_effort": "low"}} + ], # default to low effort; override via CLI plugin rollout_processor=default_single_turn_rollout_processor, aggregation_method="mean", threshold_of_success=None, @@ -98,5 +99,3 @@ def test_gpqa_pointwise(row: EvaluationRow) -> EvaluationRow: }, ) return row - - diff --git a/tests/pytest/test_markdown_highlighting.py b/tests/pytest/test_markdown_highlighting.py index cc2ae4f5..4dcaacf2 100644 --- a/tests/pytest/test_markdown_highlighting.py +++ b/tests/pytest/test_markdown_highlighting.py @@ -5,9 +5,9 @@ """ import re -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List -from eval_protocol.models import EvaluateResult, EvaluationRow, Message +from eval_protocol.models import EvaluateResult, EvaluationRow, InputMetadata, Message from eval_protocol.pytest import default_single_turn_rollout_processor, evaluation_test @@ -16,7 +16,11 @@ def markdown_dataset_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Evalu Convert entries from markdown dataset to EvaluationRow objects. """ return [ - EvaluationRow(messages=[Message(role="user", content=row["prompt"])], ground_truth=str(row["num_highlights"])) + EvaluationRow( + messages=[Message(role="user", content=row["prompt"])], + ground_truth=str(row["num_highlights"]), + input_metadata=InputMetadata(row_id=str(row["key"])), + ) for row in data ] @@ -39,7 +43,8 @@ def test_markdown_highlighting_evaluation(row: EvaluationRow) -> EvaluationRow: assistant_response = row.messages[-1].content if not assistant_response: - return EvaluateResult(score=0.0, reason="❌ No assistant response found") + row.evaluation_result = EvaluateResult(score=0.0, reason="❌ No assistant response found") + return row required_highlights = int(row.ground_truth) diff --git a/tests/pytest/test_pytest_ids.py b/tests/pytest/test_pytest_ids.py new file mode 100644 index 00000000..b740aaa6 --- /dev/null +++ b/tests/pytest/test_pytest_ids.py @@ -0,0 +1,67 @@ +import eval_protocol.pytest.evaluation_test as evaluation_test_module +from eval_protocol.models import EvaluationRow +from eval_protocol.pytest.default_no_op_rollout_process import default_no_op_rollout_processor +from eval_protocol.pytest.evaluation_test import evaluation_test as evaluation_decorator +from tests.pytest.test_markdown_highlighting import markdown_dataset_to_evaluation_row + + +class InMemoryLogger: + def __init__(self): + self._rows = [] + + def log(self, row): + self._rows.append(row) + + def read(self): + return list(self._rows) + + +def test_evaluation_test_decorator_ids_single(monkeypatch): + # Use an in-memory logger to avoid sqlite side effects + in_memory_logger = InMemoryLogger() + monkeypatch.setattr(evaluation_test_module, "default_logger", in_memory_logger, raising=False) + + unique_run_ids = set() + unique_cohort_ids = set() + unique_rollout_ids = set() + unique_invocation_ids = set() + unique_row_ids = set() + + @evaluation_decorator( + input_dataset=[ + "tests/pytest/data/markdown_dataset.jsonl", + "tests/pytest/data/markdown_dataset.jsonl", + ], + rollout_input_params=[{"temperature": 0.0}, {"temperature": 1.0}], + model=["dummy/local-model"], + dataset_adapter=markdown_dataset_to_evaluation_row, + rollout_processor=default_no_op_rollout_processor, + mode="pointwise", + combine_datasets=False, + num_runs=5, + ) + def eval_fn(row: EvaluationRow) -> EvaluationRow: + unique_run_ids.add(row.run_id) + unique_cohort_ids.add(row.cohort_id) + unique_rollout_ids.add(row.rollout_id) + unique_invocation_ids.add(row.invocation_id) + unique_row_ids.add(row.input_metadata.row_id) + return row + + dataset_paths = [ + "tests/pytest/data/markdown_dataset.jsonl", + "tests/pytest/data/markdown_dataset.jsonl", + ] + input_params_list = [{"temperature": 0.0}, {"temperature": 1.0}] + + # Manually invoke all parameter combinations within a single test + for ds_path in dataset_paths: + for params in input_params_list: + eval_fn(model="dummy/local-model", dataset_path=[ds_path], input_params=params) + + # Assertions on IDs generated by the decorator logic + assert len(unique_invocation_ids) == 1 + assert len(unique_run_ids) == 20 # 4 combinations * 5 runs each + assert len(unique_cohort_ids) == 2 * 2 # 2 datasets * 2 param sets + assert len(unique_row_ids) == 19 # from the markdown dataset + assert len(unique_rollout_ids) == 19 * 5 * 2 * 2 # rows * runs * datasets * params diff --git a/vite-app/data/logs.json b/vite-app/data/logs.json new file mode 100644 index 00000000..07af6533 --- /dev/null +++ b/vite-app/data/logs.json @@ -0,0 +1 @@ +{"type": "initialize_logs", "logs": [{"messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hello"}], "input_metadata": {"row_id": "1"}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T09:52:30.335005"}, {"messages": [{"role": "user", "content": "Hello 0"}], "input_metadata": {"row_id": "row_0"}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T09:47:43.816761"}, {"messages": [{"role": "user", "content": "Hello 1"}], "input_metadata": {"row_id": "row_1"}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T09:41:55.015216"}, {"messages": [{"role": "user", "content": "Hello 2"}], "input_metadata": {"row_id": "row_2"}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T09:41:55.015767"}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "describe-use-entire-person-education", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T10:00:08.522553", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev12+g2dc40d5.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 67635}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "meet-solid-big-picture-car", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T10:01:12.825337", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev12+g2dc40d5.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 68229}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}, {"role": "assistant", "content": "I'll help you learn about managing deployments on Fireworks AI. Let me search for comprehensive information about deployment management.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment management Fireworks AI\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Fireworks AI Developer Platform\nLink: https://docs.fireworks.ai/getting-started/introduction\nContent: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nFireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nStart building with open source AI models\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Do you host your deployments in the EU or Asia?\nLink: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia\nContent: We are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nDo you host your deployments in the EU or Asia?\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\n", "type": "text"}, {"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: On-demand deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nOn-demand deployments\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\n", "type": "text"}, {"text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nLink: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks\nContent: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nI have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\n", "type": "text"}, {"text": "Title: Introduction\nLink: https://docs.fireworks.ai/api-reference/introduction\nContent: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nIntroduction\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Create a Deployment on Fireworks AI platform\n\nCreate a Deployment on Fireworks AI platform\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: How much does Fireworks cost?\nLink: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost\nContent: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nHow much does Fireworks cost?\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\n", "type": "text"}, {"text": "Title: Python (Fireworks)\nLink: https://docs.fireworks.ai/getting-started/quickstart\nContent: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nSet up your developer environment & call the Fireworks API\nThis is the recommended way to get started : This tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx OPENAI_API_BASE \"https://api.fireworks.ai/inference/v1\"\nsetx OPENAI_API_KEY \"\"\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Let me search for more specific information about deployment management tools and commands.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"firectl deployment management commands CLI\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: Installing the CLI\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: To create and manage on-demand deployments, use our CLI tool firectl . Install the tool by following the installation guide . Verify that your installation is working and you are logged into the correct account by running: firectl whoami\n\nTo create and manage on-demand deployments, use our CLI tool firectl . Install the tool by following the installation guide . Verify that your installation is working and you are logged into the correct account by running: firectl whoami\n\nTo create and manage on-demand deployments, use our CLI tool firectl . Install the tool by following the installation guide . Verify that your installation is working and you are logged into the correct account by running:\n\n", "type": "text"}, {"text": "Title: Cli overview\nLink: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview\nContent: Command Line Interface Reference\nThe Reward Kit provides a command-line interface (CLI) for common operations like previewing evaluations, deploying reward functions, and running agent evaluations.\n\nCommand Line Interface Reference\nThe Reward Kit provides a command-line interface (CLI) for common operations like previewing evaluations, deploying reward functions, and running agent evaluations.\n\nCommand Overview\nThe Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon).\n\n", "type": "text"}, {"text": "Title: Using the CLI (reward-kit deploy)\nLink: https://docs.fireworks.ai/evaluators/developer_guide/evaluation_workflows\nContent: The reward-kit deploy command is suitable for deploying reward functions defined in script files. The --metrics-folders argument should point to the directory containing your reward function script (e.g., a main.py with the @reward_function decorator). For more details on reward-kit deploy , see the CLI Reference.\n\nThe reward-kit deploy command is suitable for deploying reward functions defined in script files. The --metrics-folders argument should point to the directory containing your reward function script (e.g., a main.py with the @reward_function decorator). For more details on reward-kit deploy , see the CLI Reference.\n\nThe reward-kit deploy command is suitable for deploying reward functions defined in script files. The --metrics-folders argument should point to the directory containing your reward function script (e.g., a main.py with the @reward_function decorator). # Deploy with the CLI\nreward-kit deploy \\\n --id helpfulness-evaluator \\\n --metrics-folders \"helpfulness=./path/to/your/metric_script_directory\" \\\n --display-name \"Helpfulness Evaluator\" \\\n --description \"Evaluates the helpfulness of responses\" \\\n --force\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: 1. Setup\nLink: https://docs.fireworks.ai/tools-sdks/python-client/the-tutorial\nContent: To get started with the Fireworks AI Python SDK, you need to install the firectl CLI tool and create an API key. Install our CLI tool firectl to interact with the Fireworks AI platform. Sign in to Fireworks by running the following command: firectl signin\n\nTo get started with the Fireworks AI Python SDK, you need to install the firectl CLI tool and create an API key. Install our CLI tool firectl to interact with the Fireworks AI platform. Sign in to Fireworks by running the following command: firectl signin\n\nTo get started with the Fireworks AI Python SDK, you need to install the firectl CLI tool and create an API key. Install our CLI tool firectl to interact with the Fireworks AI platform. Sign in to Fireworks by running the following command: A browser window will open to the Fireworks AI login page. Once you login, your machine will be authenticated. Create an API key by running the following command: $ firectl create api-key --key-name \"quick-start\"\nKey Id: key_42vAYeb7rwt9zzg1\nDisplay Name: quick-start\nKey: fw_3ZLd....\nSecure: true\nBe sure to save this key. It will not be shown again.\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nDeploying a fine-tuned model serverlessly\nNot all base models support serverless addons. Please check this list for the full list of serverless models that support LoRA add-ons. If your base model supports serverless addons, you can run the following command to load the fine-tuned model into a serverless deployment: firectl load-lora \n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nExample\nfirectl create deployment falcon-7b\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}], "input_metadata": {"row_id": "leave-ask-logical-face-way", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T11:06:51.152819", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev15+g6486dbd", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 83882}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "investigate-recent-dangerous-hope-magazine", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T11:10:27.844496", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev15+g6486dbd", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 84847}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "seem-environmental-media-television-concert", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T11:12:00.215991", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev15+g6486dbd.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 85545}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "treat-suppose-salty-week-plan", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T11:12:34.382113", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev15+g6486dbd.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 85754}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "contain-recent-bad-plan-change", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T11:12:46.750720", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev15+g6486dbd.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 85851}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}, {"role": "assistant", "content": "I'll help you learn about managing deployments on Fireworks! Let me search for comprehensive information about deployment management.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployments management create update delete scale\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Delete Deployment\nLink: https://docs.fireworks.ai/api-reference/delete-deployment\nContent: \n\n\n\nREST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id}\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\ndeployment_id:\n\tstring\n\tDescription: The Deployment Id\n\tExample: \"\"\n\nquery params:\nhard:\n\tboolean\n\tDescription: If true, this will perform a hard deletion.\n\tExample: true\nignoreChecks:\n\tboolean\n\tDescription: If true, this will ignore checks and force the deletion of a deployment that is currently\ndeployed and is in use.\n\tExample: true\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {}\n\n", "type": "text"}, {"text": "Title: How does billing and scaling work for on-demand GPU deployments?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-and-scaling-work-for-on-demand-gpu-deployments\nContent: On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\nOn-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\nHow does billing and scaling work for on-demand GPU deployments?\nOn-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\n", "type": "text"}, {"text": "Title: Managing default deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Since a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist.\n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist.\n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment.\n\nCreates a new deployment.\n\nCreates a new deployment. firectl create deployment [flags]\n\n", "type": "text"}, {"text": "Title: Delete a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources\nContent: Deletes an model deployment.\n\nDeletes an model deployment.\n\nDeletes an model deployment. firectl delete deployment [flags]\n\n", "type": "text"}, {"text": "Title: Update Deployment\nLink: https://docs.fireworks.ai/api-reference/update-deployment\nContent: \n\n\n\nREST Endpoint PATCH /v1/accounts/{account_id}/deployments/{deployment_id}\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\ndeployment_id:\n\tstring\n\tDescription: The Deployment Id\n\tExample: \"\"\n\n\nRequest body: object\n\tDescription: N/A\n\tExample: {\"displayName\":\"\",\"description\":\"\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"minReplicaCount\":123,\"maxReplicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"disableDeploymentSizeValidation\":true}\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {\"name\":\"\",\"displayName\":\"\",\"description\":\"\",\"createTime\":\"2023-11-07T05:31:56Z\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"purgeTime\":\"2023-11-07T05:31:56Z\",\"deleteTime\":\"2023-11-07T05:31:56Z\",\"state\":\"STATE_UNSPECIFIED\",\"status\":{\"code\":\"OK\",\"message\":\"\"},\"minReplicaCount\":123,\"maxReplicaCount\":123,\"replicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"cluster\":\"\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"numPeftDeviceCached\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"region\":\"REGION_UNSPECIFIED\",\"updateTime\":\"2023-11-07T05:31:56Z\",\"disableDeploymentSizeValidation\":true}\n\n", "type": "text"}, {"text": "Title: What should I expect for deployment and scaling performance?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/what-should-i-expect-for-deployment-and-scaling-performance\nContent: Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\nInitial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\nWhat should I expect for deployment and scaling performance?\nInitial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\n", "type": "text"}, {"text": "Title: delete_deployment(ignore_checks: bool = False, wait: bool = True)\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-reference\nContent: Deletes the deployment associated with this LLM instance if one exists. Arguments: ignore_checks bool, optional - Whether to ignore safety checks. Defaults to False. wait bool, optional - Whether to wait for deletion to complete. Defaults to True. llm.delete_deployment(ignore_checks=True)\n\nDeletes the deployment associated with this LLM instance if one exists. Arguments: ignore_checks bool, optional - Whether to ignore safety checks. Defaults to False. wait bool, optional - Whether to wait for deletion to complete. Defaults to True.\n\nscale_to_zero()\nSends a request to scale the deployment to 0 replicas but does not wait for it to complete. Returns: The deployment object, or None if no deployment exists deployment = llm.scale_to_zero()\n\n", "type": "text"}, {"text": "Title: How does billing work for on-demand deployments?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-work-for-on-demand-deployments\nContent: On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\nOn-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\nHow does billing work for on-demand deployments?\nOn-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\n", "type": "text"}, {"text": "Title: Create Deployment\nLink: https://docs.fireworks.ai/api-reference/create-deployment\nContent: \n\n\n\nREST Endpoint POST /v1/accounts/{account_id}/deployments\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\n\nquery params:\ndisableAutoDeploy:\n\tboolean\n\tDescription: By default, a deployment created with a currently undeployed base model\nwill be deployed to this deployment. If true, this auto-deploy function\nis disabled.\n\tExample: true\ndisableSpeculativeDecoding:\n\tboolean\n\tDescription: By default, a deployment will use the speculative decoding settings from\nthe base model. If true, this will disable speculative decoding.\n\tExample: true\ndeploymentId:\n\tstring\n\tDescription: The ID of the deployment. If not specified, a random ID will be generated.\n\tExample: \"\"\n\n\nRequest body: object\n\tDescription: N/A\n\tExample: {\"displayName\":\"\",\"description\":\"\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"minReplicaCount\":123,\"maxReplicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"disableDeploymentSizeValidation\":true}\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {\"name\":\"\",\"displayName\":\"\",\"description\":\"\",\"createTime\":\"2023-11-07T05:31:56Z\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"purgeTime\":\"2023-11-07T05:31:56Z\",\"deleteTime\":\"2023-11-07T05:31:56Z\",\"state\":\"STATE_UNSPECIFIED\",\"status\":{\"code\":\"OK\",\"message\":\"\"},\"minReplicaCount\":123,\"maxReplicaCount\":123,\"replicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"cluster\":\"\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"numPeftDeviceCached\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"region\":\"REGION_UNSPECIFIED\",\"updateTime\":\"2023-11-07T05:31:56Z\",\"disableDeploymentSizeValidation\":true}\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Let me search for more specific information about deployment creation and management workflows.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"firectl deployment management commands create update list\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: Creating an on-demand deployment\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\nChoose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\nPublishing a deployed model\nBy default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model=\"\" AND deployment=\"\"'\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nfirectl\nfirectl create dataset /path/to/jsonl/file\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: firectl\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-vlm\nContent: # Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\n# Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\nfirectl create sftj \\\n --base-model accounts/fireworks/models/qwen2p5-vl-32b-instruct \\\n --dataset my-vlm-dataset \\\n --output-model my-custom-vlm \\\n --epochs 3 For additional parameters like learning rates, evaluation datasets, and batch sizes, see Additional SFT job settings .\n\n", "type": "text"}, {"text": "Title: Uploading the model locally (firectl)\nLink: https://docs.fireworks.ai/models/uploading-custom-models\nContent: To upload a custom base model, run the following command. firectl create model /path/to/files/\n\nTo upload a custom base model, run the following command. firectl create model /path/to/files/\n\nPublishing\nBy default, all models you create are only visible to and deployable by users within your account. To publish a model so anyone with a Fireworks account can deploy it, you can create it with the --public flag. This will allow it to show up in public model lists. To unpublish the model, just run firectl update model --public=false\n\n", "type": "text"}, {"text": "Title: firectl\nLink: https://docs.fireworks.ai/models/quantization\nContent: firectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8\n\nfirectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8\n\nCreating an FP8 deployment\nBy default, creating a dedicated deployment will use the FP16 checkpoint. To see what precisions are available for a\nmodel, run: get model \n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Based on my research, here's a comprehensive guide to managing deployments on Fireworks:\n\n## Deployment Management on Fireworks\n\n### 1. Creating Deployments\n\n**Basic deployment creation:**\n```bash\n# Create a deployment from the model library\nfirectl create deployment accounts/fireworks/models/\n\n# Create a deployment for a custom model\nfirectl create deployment \n\n# Create with specific configuration\nfirectl create deployment --wait\n```\n\n**Advanced deployment options:**\n```bash\n# Deploy with specific hardware requirements\nfirectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8\n\n# Deploy LoRA fine-tuned models (single command)\nfirectl create deployment \"accounts/fireworks/models/\"\n\n# Deploy with addons enabled (for multi-LoRA)\nfirectl create deployment --enable-addons\n```\n\n### 2. Monitoring Deployments\n\n**Check deployment status:**\n```bash\n# List all deployments\nfirectl list deployments\n\n# Get specific deployment details\nfirectl get deployment accounts//deployments/\n\n# Check if deployment is ready\n# The state field should show \"READY\"\n```\n\n**Verify deployed models:**\n```bash\n# List deployed models\nfirectl list deployed-models\n\n# Find specific deployed model\nfirectl list deployed-models --filter 'model=\"\" AND deployment=\"\"'\n```\n\n### 3. Managing Default Deployments\n\n**Check default deployment:**\n```bash\n# See which deployment is marked as default\nfirectl list deployed-models\n# Look for \"Default: true\" in the output\n```\n\n**Update default deployment:**\n```bash\n# Update which deployment is the default\nfirectl update deployed-model --default\n```\n\n**Important note:** To delete a default deployment, you must either:\n- Delete all other deployments for the same model first, OR\n- Designate a different deployed model as the default\n\n### 4. Scaling and Cost Management\n\n**Autoscaling configuration:**\n- **Default behavior:** Automatically scales to 0 replicas when not in use\n- **Billing:** Pay only for GPU time when replicas are active\n- **Manual scaling:** Use autoscaling policies to customize behavior\n\n**Cost optimization commands:**\n```bash\n# Scale deployment to zero (cost saving)\nfirectl update deployment --min-replica-count 0\n\n# Or using the Python SDK\nfrom fireworks.client import Fireworks\nllm = Fireworks(model=\"\")\nllm.scale_to_zero()\n```\n\n### 5. Updating Deployments\n\n**Update deployment configuration:**\n```bash\n# Update display name, description, or scaling policies\nfirectl update deployment \\\n --display-name \"New Name\" \\\n --min-replica-count 1 \\\n --max-replica-count 5\n```\n\n**Available update options:**\n- Display name and description\n- Replica counts (min/max)\n- Autoscaling policies\n- Expiration time\n- Hardware configuration\n\n### 6. Deleting Deployments\n\n**Basic deletion:**\n```bash\n# Delete a deployment\nfirectl delete deployment \n\n# Force deletion (ignoring safety checks)\nfirectl delete deployment --ignore-checks\n\n# Using Python SDK\nllm.delete_deployment(ignore_checks=True)\n```\n\n**API deletion:**\n```bash\n# Via REST API\ncurl -X DELETE \\\n https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id} \\\n -H \"Authorization: Bearer YOUR_API_KEY\"\n```\n\n### 7. Deployment Performance Expectations\n\n**Timeline expectations:**\n- **Initial deployment:** Should complete within minutes\n- **Scaling from zero:** Brief availability delays while scaling up\n- **Troubleshooting:** If deployment takes >1 hour, likely indicates a crash\n\n### 8. Best Practices\n\n**Cost management:**\n1. **Monitor regularly:** Check active deployments frequently\n2. **Delete unused deployments:** Avoid unnecessary GPU charges\n3. **Use autoscaling:** Leverage scale-to-zero for intermittent usage\n4. **Consider serverless:** For low-frequency usage patterns\n\n**Configuration tips:**\n- Use `--wait` flag for synchronous deployment creation\n- Configure appropriate min/max replica counts for your use case\n- Set expiration times for temporary deployments\n- Monitor deployment status during initial creation\n\n### 9. Common Workflows\n\n**Complete deployment lifecycle:**\n```bash\n# 1. Create deployment\nfirectl create deployment --wait\n\n# 2. Verify it's ready\nfirectl get deployment \n\n# 3. Use the deployment\n# Make API calls to the deployment\n\n# 4. Monitor usage\nfirectl list deployments\n\n# 5. Scale down when not needed\nfirectl update deployment --min-replica-count 0\n\n# 6. Delete when completely done\nfirectl delete deployment \n```\n\nThis comprehensive guide covers everything from basic deployment creation to advanced cost optimization strategies. The key is understanding the balance between performance needs and cost efficiency through proper scaling configuration.", "tool_calls": []}], "input_metadata": {"row_id": "move-little-tidy-pain-work", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "At least one tool call was made", "metrics": {}}, "created_at": "2025-08-08T11:15:45.037115", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev15+g6486dbd.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean"}, "pid": 86994}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}, {"role": "assistant", "content": "I'll help you learn about managing deployments on Fireworks AI. Let me search for comprehensive documentation about deployment management.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment management Fireworks AI\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Fireworks AI Developer Platform\nLink: https://docs.fireworks.ai/getting-started/introduction\nContent: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nFireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nStart building with open source AI models\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Do you host your deployments in the EU or Asia?\nLink: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia\nContent: We are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nDo you host your deployments in the EU or Asia?\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\n", "type": "text"}, {"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: On-demand deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nOn-demand deployments\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\n", "type": "text"}, {"text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nLink: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks\nContent: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nI have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\n", "type": "text"}, {"text": "Title: Introduction\nLink: https://docs.fireworks.ai/api-reference/introduction\nContent: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nIntroduction\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Create a Deployment on Fireworks AI platform\n\nCreate a Deployment on Fireworks AI platform\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: How much does Fireworks cost?\nLink: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost\nContent: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nHow much does Fireworks cost?\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\n", "type": "text"}, {"text": "Title: Python (Fireworks)\nLink: https://docs.fireworks.ai/getting-started/quickstart\nContent: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nSet up your developer environment & call the Fireworks API\nThis is the recommended way to get started : This tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx OPENAI_API_BASE \"https://api.fireworks.ai/inference/v1\"\nsetx OPENAI_API_KEY \"\"\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Let me search for more specific information about deployment management commands and workflows.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"firectl deployment commands create manage update delete\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: Delete a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources\nContent: Deletes an model deployment. firectl delete deployment [flags]\n\nDeletes an model deployment. firectl delete deployment [flags]\n\nExample\nfirectl delete deployment my-deployment\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nCreate a Deployment on Fireworks AI platform\n\n", "type": "text"}, {"text": "Title: Managing default deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Since a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default\n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default\n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist.\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Deployment Command\nLink: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example\nContent: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Delete Deployment\nLink: https://docs.fireworks.ai/api-reference/delete-deployment\nContent: \n\n\n\nREST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id}\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\ndeployment_id:\n\tstring\n\tDescription: The Deployment Id\n\tExample: \"\"\n\nquery params:\nhard:\n\tboolean\n\tDescription: If true, this will perform a hard deletion.\n\tExample: true\nignoreChecks:\n\tboolean\n\tDescription: If true, this will ignore checks and force the deletion of a deployment that is currently\ndeployed and is in use.\n\tExample: true\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {}\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nfirectl\nfirectl create dataset /path/to/jsonl/file\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Let me search for more information about listing and managing existing deployments.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"list deployments firectl commands deployment management\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: List deployments\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/list-resources\nContent: Prints all deployments in the account. firectl list deployments [flags]\n\nPrints all deployments in the account. firectl list deployments [flags]\n\nList deployed models\nPrints all deployed models in an account. firectl list deployed-models [flags]\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Managing default deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Since a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model \n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model \n\nCreating an on-demand deployment\nChoose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: List Deployments\nLink: https://docs.fireworks.ai/api-reference/list-deployments\nContent: \n\n\n\nREST Endpoint GET /v1/accounts/{account_id}/deployments\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\n\nquery params:\npageSize:\n\tinteger\n\tDescription: The maximum number of deployments to return. The maximum page_size is 200,\nvalues above 200 will be coerced to 200.\nIf unspecified, the default is 50.\n\tExample: 123\npageToken:\n\tstring\n\tDescription: A page token, received from a previous ListDeployments call. Provide this\nto retrieve the subsequent page. When paginating, all other parameters\nprovided to ListDeployments must match the call that provided the page\ntoken.\n\tExample: \"\"\nfilter:\n\tstring\n\tDescription: Only deployment satisfying the provided filter (if specified) will be\nreturned. See https://google.aip.dev/160 for the filter grammar.\n\tExample: \"\"\norderBy:\n\tstring\n\tDescription: A comma-separated list of fields to order by. e.g. \"foo,bar\"\nThe default sort order is ascending. To specify a descending order for a\nfield, append a \" desc\" suffix. e.g. \"foo desc,bar\"\nSubfields are specified with a \".\" character. e.g. \"foo.bar\"\nIf not specified, the default order is by \"create_time\".\n\tExample: \"\"\nshowDeleted:\n\tboolean\n\tDescription: If set, DELETED deployments will be included.\n\tExample: true\nreadMask:\n\tstring\n\tDescription: The fields to be returned in the response. If empty or \"*\", all fields will be returned.\n\tExample: \"\"\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {\"deployments\":[{\"name\":\"\",\"displayName\":\"\",\"description\":\"\",\"createTime\":\"2023-11-07T05:31:56Z\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"purgeTime\":\"2023-11-07T05:31:56Z\",\"deleteTime\":\"2023-11-07T05:31:56Z\",\"state\":\"STATE_UNSPECIFIED\",\"status\":{\"code\":\"OK\",\"message\":\"\"},\"minReplicaCount\":123,\"maxReplicaCount\":123,\"replicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"cluster\":\"\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"numPeftDeviceCached\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"region\":\"REGION_UNSPECIFIED\",\"updateTime\":\"2023-11-07T05:31:56Z\",\"disableDeploymentSizeValidation\":true}],\"nextPageToken\":\"\",\"totalSize\":123}\n\n", "type": "text"}, {"text": "Title: Deploy Command\nLink: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview\nContent: The deploy command deploys a reward function as an evaluator on the Fireworks platform.\n\nThe deploy command deploys a reward function as an evaluator on the Fireworks platform.\n\nCommand Overview\nThe Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon).\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nDeploying a fine-tuned model serverlessly\nNot all base models support serverless addons. Please check this list for the full list of serverless models that support LoRA add-ons. If your base model supports serverless addons, you can run the following command to load the fine-tuned model into a serverless deployment: firectl load-lora \n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nCreate a Deployment on Fireworks AI platform\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Based on my research, here's a comprehensive guide to managing deployments on Fireworks AI:\n\n## Overview of Deployment Types\n\nFireworks AI offers several deployment options:\n\n1. **Serverless Models**: Pay-per-token, shared infrastructure\n2. **On-demand Deployments**: Dedicated GPU instances billed by GPU-second\n3. **Reserved Capacity**: Pre-purchased dedicated capacity for high-volume usage\n\n## Key Benefits of On-demand Deployments\n- Predictable performance unaffected by other users\n- No hard rate limits\n- Cost-effective for high utilization\n- Access to larger model selection\n- Support for custom base models\n\n## Managing Deployments with firectl\n\n### Installation and Setup\nFirst, install the Fireworks CLI tools:\n\n```bash\npip install fireworks-ai\n```\n\nSet up your API key:\n```bash\nexport FIREWORKS_API_KEY=\"your-api-key-here\"\n```\n\n### Basic Deployment Commands\n\n#### 1. List All Deployments\n```bash\nfirectl list deployments\n```\n\n#### 2. Create a New Deployment\n```bash\n# Basic deployment\nfirectl create deployment \n\n# With custom configuration\nfirectl create deployment --min-replica-count 1 --max-replica-count 3\n\n# Deploy a LoRA fine-tuned model\nfirectl create deployment \"accounts/fireworks/models/\"\n\n# Deploy with addons enabled (for multi-LoRA)\nfirectl create deployment --enable-addons\n```\n\n#### 3. Check Deployment Status\n```bash\nfirectl get deployment \n```\n\n#### 4. Delete a Deployment\n```bash\n# Soft delete\nfirectl delete deployment \n\n# Force delete (ignores usage checks)\nfirectl delete deployment --ignore-checks\n```\n\n### Managing Default Deployments\nWhen you have multiple deployments of the same model:\n\n```bash\n# Check which deployment is default\nfirectl get model \n\n# Update default deployment\nfirectl update deployed-model --default\n```\n\n## Using the Fireworks Build SDK (Python)\n\nThe Fireworks Build SDK provides a higher-level interface:\n\n```python\nfrom fireworks import LLM\n\n# Auto-managed deployment\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\n# Explicit on-demand deployment\nllm = LLM(model=\"your-model-id\", deployment_type=\"on-demand\")\n\n# Make API calls\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Hello!\"}]\n)\n```\n\n## REST API Management\n\n### List Deployments via REST API\n```bash\ncurl -X GET \\\n \"https://api.fireworks.ai/v1/accounts/{account_id}/deployments\" \\\n -H \"Authorization: Bearer $FIREWORKS_API_KEY\"\n```\n\n### Delete Deployment via REST API\n```bash\ncurl -X DELETE \\\n \"https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id}\" \\\n -H \"Authorization: Bearer $FIREWORKS_API_KEY\"\n```\n\n## Deployment Workflow Examples\n\n### 1. Deploy a Fine-tuned Model\n```bash\n# Single command deployment\nfirectl create deployment \"accounts/fireworks/models/my-lora-model\"\n\n# Wait for deployment to be ready\nfirectl get deployment \n```\n\n### 2. Multi-LoRA Deployment (Advanced)\n```bash\n# Step 1: Create base deployment with addons\nfirectl create deployment \"accounts/fireworks/models/base-model\" --enable-addons\n\n# Step 2: Load LoRA models\nfirectl load-lora --deployment \nfirectl load-lora --deployment \n```\n\n### 3. Custom Model Deployment\n```bash\n# First upload your custom model\n# Then deploy it\nfirectl create deployment \"accounts/{account_id}/models/custom-model-id\"\n```\n\n## Best Practices\n\n1. **Monitor Costs**: On-demand deployments are billed by GPU-second\n2. **Use Reserved Capacity**: For consistent high-volume usage\n3. **Set Appropriate Scaling**: Configure min/max replica counts based on expected load\n4. **Clean Up**: Delete unused deployments to avoid charges\n5. **Check Status**: Always verify deployment state before using\n\n## Troubleshooting\n\n- **\"Invalid ID\" errors**: Ensure your account ID is properly configured\n- **Deployment stuck**: Check resource quotas and account limits\n- **Performance issues**: Verify deployment is in READY state and check replica counts\n\nWould you like me to dive deeper into any specific aspect of deployment management, such as scaling configurations, cost optimization, or advanced deployment patterns?", "tool_calls": []}], "input_metadata": {"row_id": "call-stand-low-night-conference", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "At least one tool call was made", "metrics": {}}, "created_at": "2025-08-08T11:19:37.022552", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev15+g6486dbd.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean"}, "pid": 89723}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}, {"role": "assistant", "content": "I'll help you learn about managing deployments on Fireworks! Let me search for comprehensive information about deployment management.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment management Fireworks AI\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Fireworks AI Developer Platform\nLink: https://docs.fireworks.ai/getting-started/introduction\nContent: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nFireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nStart building with open source AI models\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Do you host your deployments in the EU or Asia?\nLink: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia\nContent: We are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nDo you host your deployments in the EU or Asia?\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\n", "type": "text"}, {"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: On-demand deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nOn-demand deployments\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\n", "type": "text"}, {"text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nLink: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks\nContent: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nI have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\n", "type": "text"}, {"text": "Title: Introduction\nLink: https://docs.fireworks.ai/api-reference/introduction\nContent: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nIntroduction\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Create a Deployment on Fireworks AI platform\n\nCreate a Deployment on Fireworks AI platform\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: How much does Fireworks cost?\nLink: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost\nContent: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nHow much does Fireworks cost?\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\n", "type": "text"}, {"text": "Title: Python (Fireworks)\nLink: https://docs.fireworks.ai/getting-started/quickstart\nContent: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nSet up your developer environment & call the Fireworks API\nThis is the recommended way to get started : This tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %OPENAI_API_KEY%\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Let me search for more specific information about deployment management commands and workflows.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"firectl deployment management commands create delete list\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: Delete a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources\nContent: Deletes an model deployment. firectl delete deployment [flags]\n\nDeletes an model deployment. firectl delete deployment [flags]\n\nExample\nfirectl delete deployment my-deployment\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: Creating an on-demand deployment\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\nChoose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\nChoose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: firectl get deployment \n\n", "type": "text"}, {"text": "Title: Delete Deployment\nLink: https://docs.fireworks.ai/api-reference/delete-deployment\nContent: \n\n\n\nREST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id}\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\ndeployment_id:\n\tstring\n\tDescription: The Deployment Id\n\tExample: \"\"\n\nquery params:\nhard:\n\tboolean\n\tDescription: If true, this will perform a hard deletion.\n\tExample: true\nignoreChecks:\n\tboolean\n\tDescription: If true, this will ignore checks and force the deletion of a deployment that is currently\ndeployed and is in use.\n\tExample: true\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {}\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Deployment Command\nLink: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example\nContent: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nfirectl\nfirectl create dataset /path/to/jsonl/file\n\n", "type": "text"}, {"text": "Title: Deleting Stored Responses\nLink: https://docs.fireworks.ai/guides/response-api\nContent: When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM\nimport requests\nimport os\n\nllm = LLM(model=\"qwen3-235b-a22b\", deployment_type=\"serverless\")\n\n# Create a response\nresponse = llm.responses.create(\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\nWhen responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM\nimport requests\nimport os\n\nllm = LLM(model=\"qwen3-235b-a22b\", deployment_type=\"serverless\")\n\n# Create a response\nresponse = llm.responses.create(\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\nWhen responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. import os\nfrom openai import OpenAI\nimport requests\n\nclient = OpenAI(\n base_url=\"https://api.fireworks.ai/inference/v1\",\n api_key=os.getenv(\"FIREWORKS_API_KEY\", \"YOUR_FIREWORKS_API_KEY_HERE\")\n)\n\n# Create a response\nresponse = client.responses.create(\n model=\"accounts/fireworks/models/qwen3-235b-a22b\",\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Now let me search for information about listing and managing deployments to give you a complete picture.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"list deployments get deployment status firectl commands\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: List deployments\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/list-resources\nContent: Prints all deployments in the account. firectl list deployments [flags]\n\nPrints all deployments in the account. firectl list deployments [flags]\n\nList deployed models\nPrints all deployed models in an account. firectl list deployed-models [flags]\n\n", "type": "text"}, {"text": "Title: Creating an on-demand deployment\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\nChoose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\nPublishing a deployed model\nBy default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model=\"\" AND deployment=\"\"'\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: firectl\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-vlm\nContent: # Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\n# Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\nTry with an Example Dataset\nTo get a feel for how VLM fine-tuning works, you can use an example vision dataset: This is a classification dataset that contains images of food with tags for reasoning. Once downloaded, you can upload this dataset using the instructions in the next step. Upload your VLM dataset Upload your prepared JSONL dataset to Fireworks for training: Navigate to the Datasets tab in the Fireworks console, click \"Create Dataset\", and upload your JSONL file through the wizard. For larger datasets (>500MB), use as it handles large uploads more reliably than the web interface. For enhanced data control and security, we also support bring your own bucket (BYOB) configurations. See our External GCS Bucket Integration guide for setup details. Launch VLM fine-tuning job Create a supervised fine-tuning job for your VLM: For additional parameters like learning rates, evaluation datasets, and batch sizes, see Additional SFT job settings. Navigate to the Fine-tuning tab in the Fireworks console Click \"Create Fine-tuning Job\" Select your VLM base model (Qwen 2.5 VL) Choose your uploaded dataset Configure training parameters Launch the job VLM fine-tuning jobs typically take longer than text-only models due to the additional image processing. Expect training times of several hours depending on dataset size and model complexity. Monitor training progress Track your VLM fine-tuning job in the Fireworks console. Monitor key metrics: Training loss : Should generally decrease over time Evaluation loss : Monitor for overfitting if using evaluation dataset Training progress : Epochs completed and estimated time remaining Your VLM fine-tuning job is complete when the status shows COMPLETED and your custom model is ready for deployment. Deploy your fine-tuned VLM Once training is complete, deploy your custom VLM: # Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\n", "type": "text"}, {"text": "Title: firectl\nLink: https://docs.fireworks.ai/guides/batch-inference\nContent: # Get job status\nfirectl get batch-inference-job my-batch-job\n\n# List all batch jobs\nfirectl list batch-inference-jobs\n\n# Get job status\nfirectl get batch-inference-job my-batch-job\n\n# List all batch jobs\nfirectl list batch-inference-jobs\n\nfirectl create dataset batch-input-dataset ./batch_input_data.jsonl\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: List Deployments\nLink: https://docs.fireworks.ai/api-reference/list-deployments\nContent: \n\n\n\nREST Endpoint GET /v1/accounts/{account_id}/deployments\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\n\nquery params:\npageSize:\n\tinteger\n\tDescription: The maximum number of deployments to return. The maximum page_size is 200,\nvalues above 200 will be coerced to 200.\nIf unspecified, the default is 50.\n\tExample: 123\npageToken:\n\tstring\n\tDescription: A page token, received from a previous ListDeployments call. Provide this\nto retrieve the subsequent page. When paginating, all other parameters\nprovided to ListDeployments must match the call that provided the page\ntoken.\n\tExample: \"\"\nfilter:\n\tstring\n\tDescription: Only deployment satisfying the provided filter (if specified) will be\nreturned. See https://google.aip.dev/160 for the filter grammar.\n\tExample: \"\"\norderBy:\n\tstring\n\tDescription: A comma-separated list of fields to order by. e.g. \"foo,bar\"\nThe default sort order is ascending. To specify a descending order for a\nfield, append a \" desc\" suffix. e.g. \"foo desc,bar\"\nSubfields are specified with a \".\" character. e.g. \"foo.bar\"\nIf not specified, the default order is by \"create_time\".\n\tExample: \"\"\nshowDeleted:\n\tboolean\n\tDescription: If set, DELETED deployments will be included.\n\tExample: true\nreadMask:\n\tstring\n\tDescription: The fields to be returned in the response. If empty or \"*\", all fields will be returned.\n\tExample: \"\"\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {\"deployments\":[{\"name\":\"\",\"displayName\":\"\",\"description\":\"\",\"createTime\":\"2023-11-07T05:31:56Z\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"purgeTime\":\"2023-11-07T05:31:56Z\",\"deleteTime\":\"2023-11-07T05:31:56Z\",\"state\":\"STATE_UNSPECIFIED\",\"status\":{\"code\":\"OK\",\"message\":\"\"},\"minReplicaCount\":123,\"maxReplicaCount\":123,\"replicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"cluster\":\"\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"numPeftDeviceCached\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"region\":\"REGION_UNSPECIFIED\",\"updateTime\":\"2023-11-07T05:31:56Z\",\"disableDeploymentSizeValidation\":true}],\"nextPageToken\":\"\",\"totalSize\":123}\n\n", "type": "text"}, {"text": "Title: Deploy Command\nLink: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview\nContent: The deploy command deploys a reward function as an evaluator on the Fireworks platform.\n\nThe deploy command deploys a reward function as an evaluator on the Fireworks platform.\n\nCommand Overview\nThe Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon).\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Perfect! Now I have comprehensive information about managing deployments on Fireworks. Let me provide you with a complete guide.\n\n# Managing Deployments on Fireworks: A Complete Guide\n\nFireworks offers several ways to manage deployments, from command-line tools to REST APIs. Here's everything you need to know:\n\n## Types of Deployments\n\n### 1. On-Demand Deployments\n- **Billed by GPU-second** (not per-token like serverless)\n- **Dedicated resources** - no interference from other users\n- **No hard rate limits** (subject to deployment capacity)\n- **Access to more models** including custom ones\n- **Better for high utilization** scenarios\n\n### 2. Serverless Deployments\n- **Billed per-token**\n- **Shared resources** with other users\n- **Rate limits apply**\n- **Instant availability**\n\n## Management Tools\n\n### Option 1: Fireworks Build SDK (Recommended)\n```python\nfrom fireworks import LLM\n\n# Auto-deployment - SDK handles everything\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\n# Manual deployment management\nllm = LLM(model=\"your-model\", deployment_type=\"on-demand\")\n```\n\n### Option 2: firectl CLI Tool\n\n#### Installation\n```bash\n# Install firectl CLI\ncurl -s https://fireworks.ai/install.sh | bash\n```\n\n#### Basic Commands\n\n**List all deployments:**\n```bash\nfirectl list deployments\n```\n\n**Create a deployment:**\n```bash\n# From model library\nfirectl create deployment accounts/fireworks/models/llama-v3p1-8b-instruct --wait\n\n# Custom model\nfirectl create deployment accounts/your-account-id/models/your-custom-model --wait\n\n# LoRA fine-tuned model (single command)\nfirectl create deployment accounts/fireworks/models/your-lora-model-id\n```\n\n**Get deployment status:**\n```bash\nfirectl get deployment \n```\n\n**Delete a deployment:**\n```bash\nfirectl delete deployment \n```\n\n**List deployed models:**\n```bash\nfirectl list deployed-models\n```\n\n### Option 3: REST API\n\n#### List Deployments\n```bash\ncurl -X GET \\\n \"https://api.fireworks.ai/v1/accounts/{account_id}/deployments\" \\\n -H \"Authorization: Bearer $FIREWORKS_API_KEY\"\n```\n\n#### Create Deployment\n```bash\ncurl -X POST \\\n \"https://api.fireworks.ai/v1/accounts/{account_id}/deployments\" \\\n -H \"Authorization: Bearer $FIREWORKS_API_KEY\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"deployment\": {\n \"displayName\": \"my-deployment\",\n \"baseModel\": \"accounts/fireworks/models/llama-v3p1-8b-instruct\"\n }\n }'\n```\n\n#### Delete Deployment\n```bash\ncurl -X DELETE \\\n \"https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id}\" \\\n -H \"Authorization: Bearer $FIREWORKS_API_KEY\"\n```\n\n## Deployment Workflow\n\n### 1. Choose Your Model\n```bash\n# Check available models\nfirectl list models\n```\n\n### 2. Create Deployment\n```bash\n# Basic deployment\nfirectl create deployment accounts/fireworks/models/llama-v3p1-8b-instruct --wait\n\n# With custom configuration\nfirectl create deployment accounts/fireworks/models/llama-v3p1-8b-instruct \\\n --min-replica-count 1 \\\n --max-replica-count 3 \\\n --wait\n```\n\n### 3. Monitor Status\n```bash\n# Get deployment details\nfirectl get deployment \n\n# Watch logs\nfirectl logs deployment \n```\n\n### 4. Make Public (Optional)\n```bash\n# Make deployment public\nfirectl update deployment --public\n```\n\n### 5. Clean Up\n```bash\n# Delete when done\nfirectl delete deployment \n```\n\n## Advanced Features\n\n### Reserved Capacity\nFor high-volume usage, consider reserved capacity:\n- More reliable capacity\n- Higher GPU quotas\n- Cost savings for consistent usage\n- Contact Fireworks for enterprise pricing\n\n### Multi-LoRA Deployments\n```bash\n# Create base deployment with addons\nfirectl create deployment accounts/fireworks/models/base-model --enable-addons --wait\n\n# Load LoRA models\nfirectl load-lora lora-model-1 --deployment \nfirectl load-lora lora-model-2 --deployment \n```\n\n### Custom Models\n```bash\n# Upload custom model first\nfirectl upload model /path/to/model\n\n# Deploy custom model\nfirectl create deployment accounts/your-account-id/models/custom-model-name\n```\n\n## Best Practices\n\n1. **Use `--wait` flag** for important deployments to ensure they're ready\n2. **Monitor costs** with `firectl list deployments` to see active deployments\n3. **Set up alerts** for deployment health\n4. **Use reserved capacity** for production workloads\n5. **Clean up unused deployments** to avoid charges\n6. **Test deployments** before making them public\n\n## Troubleshooting\n\n**\"Invalid ID\" errors:**\n- Ensure your account ID is properly configured\n- Check your API key and account settings\n\n**Deployment stuck:**\n- Check resource availability\n- Verify model compatibility\n- Review deployment logs\n\n**Performance issues:**\n- Consider scaling up replicas\n- Check if reserved capacity is available\n- Monitor GPU utilization\n\n## Cost Management\n- **On-demand**: Pay per GPU-second used\n- **Serverless**: Pay per token processed\n- **Reserved capacity**: Fixed monthly cost for guaranteed capacity\n- Use `firectl list deployments` to track active deployments and costs\n\nThis comprehensive guide should give you everything you need to effectively manage deployments on Fireworks!", "tool_calls": []}], "input_metadata": {"row_id": "construct-do-safe-art-song", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "At least one tool call was made", "metrics": {}}, "created_at": "2025-08-08T11:27:14.528488", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev15+g6486dbd.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean"}, "pid": 92995}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}, {"role": "assistant", "content": "I'll help you learn about managing deployments on Fireworks. Let me search for comprehensive information about deployment management.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment management Fireworks AI\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Fireworks AI Developer Platform\nLink: https://docs.fireworks.ai/getting-started/introduction\nContent: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nFireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nStart building with open source AI models\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Do you host your deployments in the EU or Asia?\nLink: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia\nContent: We are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nDo you host your deployments in the EU or Asia?\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\n", "type": "text"}, {"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: On-demand deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nOn-demand deployments\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\n", "type": "text"}, {"text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nLink: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks\nContent: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nI have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\n", "type": "text"}, {"text": "Title: Introduction\nLink: https://docs.fireworks.ai/api-reference/introduction\nContent: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nIntroduction\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Create a Deployment on Fireworks AI platform\n\nCreate a Deployment on Fireworks AI platform\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: How much does Fireworks cost?\nLink: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost\nContent: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nHow much does Fireworks cost?\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\n", "type": "text"}, {"text": "Title: Python (Fireworks)\nLink: https://docs.fireworks.ai/getting-started/quickstart\nContent: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nSet up your developer environment & call the Fireworks API\nThis is the recommended way to get started : This tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %OPENAI_API_KEY%\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Let me search for more specific information about deployment management commands and workflows.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"firectl deployment management commands create delete list update\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: Delete a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources\nContent: Deletes an model deployment. firectl delete deployment [flags]\n\nDeletes an model deployment. firectl delete deployment [flags]\n\nExample\nfirectl delete deployment my-deployment\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: Creating an on-demand deployment\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\nChoose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\nPublishing a deployed model\nBy default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model=\"\" AND deployment=\"\"'\n\n", "type": "text"}, {"text": "Title: Delete Deployment\nLink: https://docs.fireworks.ai/api-reference/delete-deployment\nContent: \n\n\n\nREST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id}\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\ndeployment_id:\n\tstring\n\tDescription: The Deployment Id\n\tExample: \"\"\n\nquery params:\nhard:\n\tboolean\n\tDescription: If true, this will perform a hard deletion.\n\tExample: true\nignoreChecks:\n\tboolean\n\tDescription: If true, this will ignore checks and force the deletion of a deployment that is currently\ndeployed and is in use.\n\tExample: true\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {}\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Deployment Command\nLink: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example\nContent: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nfirectl\nfirectl create dataset /path/to/jsonl/file\n\n", "type": "text"}, {"text": "Title: Deleting Stored Responses\nLink: https://docs.fireworks.ai/guides/response-api\nContent: When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM\nimport requests\nimport os\n\nllm = LLM(model=\"qwen3-235b-a22b\", deployment_type=\"serverless\")\n\n# Create a response\nresponse = llm.responses.create(\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\nWhen responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM\nimport requests\nimport os\n\nllm = LLM(model=\"qwen3-235b-a22b\", deployment_type=\"serverless\")\n\n# Create a response\nresponse = llm.responses.create(\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\nWhen responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. import os\nfrom openai import OpenAI\nimport requests\n\nclient = OpenAI(\n base_url=\"https://api.fireworks.ai/inference/v1\",\n api_key=os.getenv(\"FIREWORKS_API_KEY\", \"YOUR_FIREWORKS_API_KEY_HERE\")\n)\n\n# Create a response\nresponse = client.responses.create(\n model=\"accounts/fireworks/models/qwen3-235b-a22b\",\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Let me search for more information about listing and managing deployments.", "tool_calls": [{"id": "functions.search:2", "function": {"arguments": "{\"query\": \"list deployments firectl commands deployment management\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: List deployments\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/list-resources\nContent: Prints all deployments in the account. firectl list deployments [flags]\n\nPrints all deployments in the account. firectl list deployments [flags]\n\nList deployed models\nPrints all deployed models in an account. firectl list deployed-models [flags]\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Managing default deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Since a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model \n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model \n\nCreating an on-demand deployment\nChoose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: List Deployments\nLink: https://docs.fireworks.ai/api-reference/list-deployments\nContent: \n\n\n\nREST Endpoint GET /v1/accounts/{account_id}/deployments\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\n\nquery params:\npageSize:\n\tinteger\n\tDescription: The maximum number of deployments to return. The maximum page_size is 200,\nvalues above 200 will be coerced to 200.\nIf unspecified, the default is 50.\n\tExample: 123\npageToken:\n\tstring\n\tDescription: A page token, received from a previous ListDeployments call. Provide this\nto retrieve the subsequent page. When paginating, all other parameters\nprovided to ListDeployments must match the call that provided the page\ntoken.\n\tExample: \"\"\nfilter:\n\tstring\n\tDescription: Only deployment satisfying the provided filter (if specified) will be\nreturned. See https://google.aip.dev/160 for the filter grammar.\n\tExample: \"\"\norderBy:\n\tstring\n\tDescription: A comma-separated list of fields to order by. e.g. \"foo,bar\"\nThe default sort order is ascending. To specify a descending order for a\nfield, append a \" desc\" suffix. e.g. \"foo desc,bar\"\nSubfields are specified with a \".\" character. e.g. \"foo.bar\"\nIf not specified, the default order is by \"create_time\".\n\tExample: \"\"\nshowDeleted:\n\tboolean\n\tDescription: If set, DELETED deployments will be included.\n\tExample: true\nreadMask:\n\tstring\n\tDescription: The fields to be returned in the response. If empty or \"*\", all fields will be returned.\n\tExample: \"\"\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {\"deployments\":[{\"name\":\"\",\"displayName\":\"\",\"description\":\"\",\"createTime\":\"2023-11-07T05:31:56Z\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"purgeTime\":\"2023-11-07T05:31:56Z\",\"deleteTime\":\"2023-11-07T05:31:56Z\",\"state\":\"STATE_UNSPECIFIED\",\"status\":{\"code\":\"OK\",\"message\":\"\"},\"minReplicaCount\":123,\"maxReplicaCount\":123,\"replicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"cluster\":\"\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"numPeftDeviceCached\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"region\":\"REGION_UNSPECIFIED\",\"updateTime\":\"2023-11-07T05:31:56Z\",\"disableDeploymentSizeValidation\":true}],\"nextPageToken\":\"\",\"totalSize\":123}\n\n", "type": "text"}, {"text": "Title: Deploy Command\nLink: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview\nContent: The deploy command deploys a reward function as an evaluator on the Fireworks platform.\n\nThe deploy command deploys a reward function as an evaluator on the Fireworks platform.\n\nCommand Overview\nThe Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon).\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nDeploying a fine-tuned model serverlessly\nNot all base models support serverless addons. Please check this list for the full list of serverless models that support LoRA add-ons. If your base model supports serverless addons, you can run the following command to load the fine-tuned model into a serverless deployment: firectl load-lora \n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nCreate a Deployment on Fireworks AI platform\n\n", "type": "text"}], "tool_call_id": "functions.search:2"}, {"role": "assistant", "content": "Now let me search for information about deployment configuration and management best practices.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment configuration scaling autoscaling reserved capacity Fireworks\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: How does billing and scaling work for on-demand GPU deployments?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-and-scaling-work-for-on-demand-gpu-deployments\nContent: On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\nOn-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\nHow does billing and scaling work for on-demand GPU deployments?\nOn-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\n", "type": "text"}, {"text": "Title: What should I expect for deployment and scaling performance?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/what-should-i-expect-for-deployment-and-scaling-performance\nContent: Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\nInitial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\nWhat should I expect for deployment and scaling performance?\nInitial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\n", "type": "text"}, {"text": "Title: Deployment type selection\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-basics\nContent: The SDK tries to be parsimonious with the way it deploys resources. We provide two types of deployment options on Fireworks: serverless hosting is enabled for some commonly-used state of the art models. The pricing for these models is per-token, i.e. you only pay for the tokens you use, and subject to rate limits. on-demand hosting is enabled for all other models. The pricing for these models is per GPU-second. This hosting is required for models that are not available serverlessly or workloads that exceed serverless rate limits. For non-finetuned models, you can always specify the deployment type of LLM() by passing either \"serverless\" or \"on-demand\" as the deployment_type parameter to the constructor. If the model is not available for the deployment type you selected, the SDK will throw an error. The SDK can also decide the best deployment strategy on your behalf, just pass deployment_type=\"auto\" . If the model is available serverlessly, the SDK will use serverless hosting, otherwise the SDK will create an on-demand deployment. When using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type=\"auto\" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Be careful with the deployment_type parameter, especially for \"auto\" and \"on-demand\" deployments. While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type=\"on-demand\" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. For finetuned (LoRA) models, passing deployment_type=\"serverless\" will try to deploy the finetuned model to serverless hosting, deployment_type=\"on-demand\" will create an on-demand deployment of your base model and merge in your LoRA weights, deployment_type=\"on-demand-lora\" will create an on-demand deployment with Multi-LoRA enabled, and deployment_type=\"auto\" will try to use serverless if available, otherwise fall back to on-demand-lora.\n\nThe SDK tries to be parsimonious with the way it deploys resources. We provide two types of deployment options on Fireworks: serverless hosting is enabled for some commonly-used state of the art models. The pricing for these models is per-token, i.e. you only pay for the tokens you use, and subject to rate limits. on-demand hosting is enabled for all other models. The pricing for these models is per GPU-second. This hosting is required for models that are not available serverlessly or workloads that exceed serverless rate limits. For non-finetuned models, you can always specify the deployment type of LLM() by passing either \"serverless\" or \"on-demand\" as the deployment_type parameter to the constructor. If the model is not available for the deployment type you selected, the SDK will throw an error. The SDK can also decide the best deployment strategy on your behalf, just pass deployment_type=\"auto\" . If the model is available serverlessly, the SDK will use serverless hosting, otherwise the SDK will create an on-demand deployment. When using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type=\"auto\" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Be careful with the deployment_type parameter, especially for \"auto\" and \"on-demand\" deployments. While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type=\"on-demand\" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. For finetuned (LoRA) models, passing deployment_type=\"serverless\" will try to deploy the finetuned model to serverless hosting, deployment_type=\"on-demand\" will create an on-demand deployment of your base model and merge in your LoRA weights, deployment_type=\"on-demand-lora\" will create an on-demand deployment with Multi-LoRA enabled, and deployment_type=\"auto\" will try to use serverless if available, otherwise fall back to on-demand-lora.\n\nDeploying Fine-tuned Models with On-Demand\nWhen deploying a fine-tuned model using deployment_type=\"on-demand\" , you need to provide: model - Your fine-tuned model ID (e.g., \"accounts/your-account/models/your-fine-tuned-model-id\") id - A unique deployment identifier (can be any simple string like \"my-fine-tuned-deployment\") # Deploy a fine-tuned model with on-demand deployment\nfine_tuned_llm = LLM(\n model=\"accounts/your-account/models/your-fine-tuned-model-id\",\n deployment_type=\"on-demand\",\n id=\"my-fine-tuned-deployment\" # Simple string ID\n)\n\n# Apply deployment configuration to Fireworks\nfine_tuned_llm.apply()\n\n# Track deployment in web dashboard\nprint(f\"Track at: {fine_tuned_llm.deployment_url}\")\n\n", "type": "text"}, {"text": "Title: Spike arrest policy\nLink: https://docs.fireworks.ai/guides/quotas_usage/rate-limits\nContent: LLM traffic that spikes quickly has the potential to be throttled. Here's how it works: Each user has a guaranteed rate limit, which increases with sustained usage near the limit. Typically, you can expect to stay within the limits if your traffic gradually doubles within an hour. You can see your guaranteed limits using API response headers (see below) Exceeding your guaranteed limit means that there's the potential for your requests to be processed with lower-priority. Fireworks operates serverless deployments by autoscaling capacity (within limits) as user traffic increases. However, if a deployment is overloaded while auto-scaling, requests that fall outside of guaranteed limits may be processed with lower-latency or dropped with HTTP code 429 (if limits are significantly exceeded). You can monitor if you exceed limits via API response header x-ratelimit-over-limit: yes. Exceeding your guaranteed limit does not guarantee that your requests will be throttled. You can monitor if your requests are actually being throttled by monitoring latencies. Here's an example of how dynamic rate limits scale up: Metric Minimum Guaranteed Limit 10 Minutes 1 Hour 2 Hours Requests per minute 60 120 720 1440 Input tokens per minute 60000 120000 720000 1440000 Output tokens per minute 6000 12000 72000 144000\n\nLLM traffic that spikes quickly has the potential to be throttled. Here's how it works: Each user has a guaranteed rate limit, which increases with sustained usage near the limit. Typically, you can expect to stay within the limits if your traffic gradually doubles within an hour. You can see your guaranteed limits using API response headers (see below) Exceeding your guaranteed limit means that there's the potential for your requests to be processed with lower-priority. Fireworks operates serverless deployments by autoscaling capacity (within limits) as user traffic increases. However, if a deployment is overloaded while auto-scaling, requests that fall outside of guaranteed limits may be processed with lower-latency or dropped with HTTP code 429 (if limits are significantly exceeded). You can monitor if you exceed limits via API response header x-ratelimit-over-limit: yes. Exceeding your guaranteed limit does not guarantee that your requests will be throttled. You can monitor if your requests are actually being throttled by monitoring latencies. Here's an example of how dynamic rate limits scale up: Metric Minimum Guaranteed Limit 10 Minutes 1 Hour 2 Hours Requests per minute 60 120 720 1440 Input tokens per minute 60000 120000 720000 1440000 Output tokens per minute 6000 12000 72000 144000\n\nSpend limits\nIn order to prevent fraud, Fireworks imposes a monthly spending limit on your account. Once you hit the spending limit, your account will automatically enter a suspended state, API requests will be rejected and all Fireworks usage will be stopped. This includes serverless inference, dedicated deployments, and fine-tuning jobs. Your spend limit will organically increase over time as you spend more on the platform. You can also increase your spend limit at any time, by purchasing prepaid credits to meet the historical spend required for a higher tier. For instance, if you are a new Tier 1 user with $0 historical spend, you can purchase $100 prepaid credits and become a Tier 2 user. You can qualify for a higher tier by adding credits into your Fireworks account. There may be a propagation delay for a few minutes after you prepay for credits - you may still see \"monthly usage exceeded error\" for a few minutes after adding credits. Tier Qualification Spending Limit Tier 1 Valid payment method added $50/mo Tier 2 $50 spent in payments or credits added $500/mo Tier 3 $500 spent in payments or credits added $5,000/mo Tier 4 $5000 spent in payments or credits added $50,000/mo Unlimited Contact us at inquiries@fireworks.ai Unlimited\n\n", "type": "text"}, {"text": "Title: How does billing work for on-demand deployments?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-work-for-on-demand-deployments\nContent: On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\nOn-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\nHow does billing work for on-demand deployments?\nOn-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\n", "type": "text"}, {"text": "Title: Replica count (horizontal scaling)\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: The number of replicas (horizontal scaling) is specified by passing the --min-replica-count and --max-replica-count flags. Increasing the number of replicas will increase the maximum QPS the deployment can support. The deployment will automatically scale based on server load. Auto-scaling up may fail if there is a GPU stockout. Use reserved capacity to guarantee capacity for your deployments. The default value for --min-replica-count is 0. Setting --min-replica-count to 0 enables the deployment to auto-scale to 0 if a deployment is unused (i.e. no inference requests) for a specified \"scale-to-zero\" time window. While the deployment is scaled to 0, you will not pay for any GPU utilization. The default value for --max-replica-count is 1 if --min-replica-count=0 , or the value of\n--min-replica-count otherwise. firectl create deployment \\\n --min-replica-count 2 \\\n --max-replica-count 3\n\nThe number of replicas (horizontal scaling) is specified by passing the --min-replica-count and --max-replica-count flags. Increasing the number of replicas will increase the maximum QPS the deployment can support. The deployment will automatically scale based on server load. Auto-scaling up may fail if there is a GPU stockout. Use reserved capacity to guarantee capacity for your deployments. The default value for --min-replica-count is 0. Setting --min-replica-count to 0 enables the deployment to auto-scale to 0 if a deployment is unused (i.e. no inference requests) for a specified \"scale-to-zero\" time window. While the deployment is scaled to 0, you will not pay for any GPU utilization. The default value for --max-replica-count is 1 if --min-replica-count=0 , or the value of\n--min-replica-count otherwise. firectl create deployment \\\n --min-replica-count 2 \\\n --max-replica-count 3\n\nThe number of replicas (horizontal scaling) is specified by passing the --min-replica-count and --max-replica-count flags. Increasing the number of replicas will increase the maximum QPS the deployment can support. The deployment will automatically scale based on server load. Auto-scaling up may fail if there is a GPU stockout. Use reserved capacity to guarantee capacity for your deployments. The default value for --min-replica-count is 0. Setting --min-replica-count to 0 enables the deployment to auto-scale to 0 if a deployment is unused (i.e. no inference requests) for a specified \"scale-to-zero\" time window. While the deployment is scaled to 0, you will not pay for any GPU utilization. The default value for --max-replica-count is 1 if --min-replica-count=0 , or the value of\n--min-replica-count otherwise. firectl update deployment \\\n --min-replica-count 2 \\\n --max-replica-count 3\n\n", "type": "text"}, {"text": "Title: Do you support Auto Scaling?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/do-you-support-auto-scaling\nContent: Yes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs\n\nYes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs\n\nDo you support Auto Scaling?\nYes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs\n\n", "type": "text"}, {"text": "Title: Inference\nLink: https://docs.fireworks.ai/getting-started/introduction\nContent: Fireworks has 3 options for running generative AI models with unparalleled speed and costs. Serverless: The easiest way to get started. Use the most popular models on pre-configured GPUs. Pay per token and avoid cold boots. On-demand: The most flexible option for scaling. Use private GPUs to support your specific needs and only pay when you're using it. GPUs running Fireworks software offer both ~250% improved throughput and 50% improved latency compared to vLLM. Excels for: Production volume - Per-token costs decrease with more volume and there are no set rate limits Custom needs and reliability - On-demand GPUs are private to you. This enables complete control to tailor deployments for speed/throughput/reliability or to run more specialized models Enterprise Reserved GPUs: Use private GPUs with hardware and software set-up personally tailored by the Fireworks team for your use case. Enjoy SLAs, dedicated support, bring-your-own-cloud (BYOC) deployment options, and enterprise-only optimizations. Property Serverless On-demand Enterprise reserved Performance Industry-leading speed on Fireworks-curated set-up. Performance may vary with others' usage. Speed dependent on user-specified GPU configuration and private usage. Per GPU latency should be significantly faster than vLLM. Tailor-made set-up by Fireworks AI experts for best possible latency Getting Started Self-serve - immediately use serverless with 1 line of code Self-serve - configure GPUs, then use them with 1 line of code. Chat with Fireworks Scaling and management Scale up and down freely within rate limits Option for auto-scaling GPUs with traffic. GPUs scale to zero automatically, so no charge for unused GPUs and for boot-ups. Chat with Fireworks Pricing Pay fixed price per token Pay per GPU second with no commitments. Per GPU throughput should be significantly greater than options like vLLM. Customized price based on reserved GPU capacity Commitment None None Arrange plan length with Fireworks Rate limits Yes, see quotas No rate limits. Quotas on number of GPUs None Model Selection Collection of popular models, curated by Fireworks Use 100s of pre-uploaded models or upload your own custom model within supported architecture Use 100s of pre-uploaded models or upload any model\n\nFireworks has 3 options for running generative AI models with unparalleled speed and costs. Serverless: The easiest way to get started. Use the most popular models on pre-configured GPUs. Pay per token and avoid cold boots. On-demand: The most flexible option for scaling. Use private GPUs to support your specific needs and only pay when you're using it. GPUs running Fireworks software offer both ~250% improved throughput and 50% improved latency compared to vLLM. Excels for: Production volume - Per-token costs decrease with more volume and there are no set rate limits Custom needs and reliability - On-demand GPUs are private to you. This enables complete control to tailor deployments for speed/throughput/reliability or to run more specialized models Enterprise Reserved GPUs: Use private GPUs with hardware and software set-up personally tailored by the Fireworks team for your use case. Enjoy SLAs, dedicated support, bring-your-own-cloud (BYOC) deployment options, and enterprise-only optimizations. Property Serverless On-demand Enterprise reserved Performance Industry-leading speed on Fireworks-curated set-up. Performance may vary with others' usage. Speed dependent on user-specified GPU configuration and private usage. Per GPU latency should be significantly faster than vLLM. Tailor-made set-up by Fireworks AI experts for best possible latency Getting Started Self-serve - immediately use serverless with 1 line of code Self-serve - configure GPUs, then use them with 1 line of code. Chat with Fireworks Scaling and management Scale up and down freely within rate limits Option for auto-scaling GPUs with traffic. GPUs scale to zero automatically, so no charge for unused GPUs and for boot-ups. Chat with Fireworks Pricing Pay fixed price per token Pay per GPU second with no commitments. Per GPU throughput should be significantly greater than options like vLLM. Customized price based on reserved GPU capacity Commitment None None Arrange plan length with Fireworks Rate limits Yes, see quotas No rate limits. Quotas on number of GPUs None Model Selection Collection of popular models, curated by Fireworks Use 100s of pre-uploaded models or upload your own custom model within supported architecture Use 100s of pre-uploaded models or upload any model\n\nFireworks AI Developer Platform\nFireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\n", "type": "text"}, {"text": "Title: 2. Call a language model using the LLM() class\nLink: https://docs.fireworks.ai/tools-sdks/python-client/the-tutorial\nContent: Now that your machine is setup with credentials and the SDK, lets ensure you are\nready to make your first LLM call and explain some of the nuances of this SDK. Create a new file called main.py and import the Fireworks AI SDK. Instantiate the LLM class. The LLM class accepts a model argument that you\ncan use to specify the model you want to use. For this tutorial, we will use the\nLlama 4 Maverick\nmodel. When creating an LLM instance, you can specify the deployment type as either \"serverless\" , \"on-demand\" , or \"auto\" . If you pass \"auto\" , the SDK will try to use serverless hosting if available, otherwise it will create an on-demand deployment. In the other cases, the SDK will try to create a deployment of the specified type and will throw an error if it's not available for the model you selected. The SDK will try and re-use existing deployments for the same model if possible, see Resource management for more details. With great power comes great responsibility! Be careful with the deployment_type parameter, especially for \"auto\" and \"on-demand\" . While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type=\"on-demand\" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. When using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type=\"auto\" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Make a request to the LLM. The LLM class is OpenAI compatible, so you can use\nthe same chat completion interface to make a request to the LLM. Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?\n\nNow that your machine is setup with credentials and the SDK, lets ensure you are\nready to make your first LLM call and explain some of the nuances of this SDK. Create a new file called main.py and import the Fireworks AI SDK. Instantiate the LLM class. The LLM class accepts a model argument that you\ncan use to specify the model you want to use. For this tutorial, we will use the\nLlama 4 Maverick\nmodel. When creating an LLM instance, you can specify the deployment type as either \"serverless\" , \"on-demand\" , or \"auto\" . If you pass \"auto\" , the SDK will try to use serverless hosting if available, otherwise it will create an on-demand deployment. In the other cases, the SDK will try to create a deployment of the specified type and will throw an error if it's not available for the model you selected. The SDK will try and re-use existing deployments for the same model if possible, see Resource management for more details. With great power comes great responsibility! Be careful with the deployment_type parameter, especially for \"auto\" and \"on-demand\" . While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type=\"on-demand\" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. When using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type=\"auto\" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Make a request to the LLM. The LLM class is OpenAI compatible, so you can use\nthe same chat completion interface to make a request to the LLM. The great thing about the SDK is that you can use your favorite Python constructs to powerfully work with LLMs. For example, let's try calling a few LLMs in a loop and see how they respond: from fireworks import LLM\n\nllms = [\n \"llama4-maverick-instruct-basic\",\n \"deepseek-r1\",\n \"qwen2p5-vl-32b-instruct\"\n]\n\nfor llm in llms:\n llm = LLM(model=llm, deployment_type=\"auto\") \n print(\"\\n\" + \"-\" * 100)\n print(f\"Model: {llm.model}\")\n print(\"-\" * 100 + \"\\n\")\n\n response = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Hello, world!\"}]\n )\n print(response.choices[0].message.content)\n\nNow that your machine is setup with credentials and the SDK, lets ensure you are\nready to make your first LLM call and explain some of the nuances of this SDK. Create a new file called main.py and import the Fireworks AI SDK. Instantiate the LLM class. The LLM class accepts a model argument that you\ncan use to specify the model you want to use. For this tutorial, we will use the\nLlama 4 Maverick\nmodel. When creating an LLM instance, you can specify the deployment type as either \"serverless\" , \"on-demand\" , or \"auto\" . If you pass \"auto\" , the SDK will try to use serverless hosting if available, otherwise it will create an on-demand deployment. In the other cases, the SDK will try to create a deployment of the specified type and will throw an error if it's not available for the model you selected. The SDK will try and re-use existing deployments for the same model if possible, see Resource management for more details. With great power comes great responsibility! Be careful with the deployment_type parameter, especially for \"auto\" and \"on-demand\" . While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type=\"on-demand\" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. When using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type=\"auto\" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Make a request to the LLM. The LLM class is OpenAI compatible, so you can use\nthe same chat completion interface to make a request to the LLM. The great thing about the SDK is that you can use your favorite Python constructs to powerfully work with LLMs. For example, let's try calling a few LLMs in a loop and see how they respond: from fireworks import LLM\n\nllms = [\n \"llama4-maverick-instruct-basic\",\n \"deepseek-r1\",\n \"qwen2p5-vl-32b-instruct\"\n]\n\nfor llm in llms:\n llm = LLM(model=llm, deployment_type=\"auto\") \n print(\"\\n\" + \"-\" * 100)\n print(f\"Model: {llm.model}\")\n print(\"-\" * 100 + \"\\n\")\n\n response = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Hello, world!\"}]\n )\n print(response.choices[0].message.content)\n\n", "type": "text"}, {"text": "Title: How does autoscaling affect my costs?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-autoscaling-affect-my-costs\nContent: Scaling from 0 : No minimum cost when scaled to zero Scaling up : Each new replica adds to your total cost proportionally. For example: Scaling from 1 to 2 replicas doubles your GPU costs If each replica uses multiple GPUs, costs scale accordingly (e.g., scaling from 1 to 2 replicas with 2 GPUs each means paying for 4 GPUs total) For current pricing details, please visit our pricing page.\n\nScaling from 0 : No minimum cost when scaled to zero Scaling up : Each new replica adds to your total cost proportionally. For example: Scaling from 1 to 2 replicas doubles your GPU costs If each replica uses multiple GPUs, costs scale accordingly (e.g., scaling from 1 to 2 replicas with 2 GPUs each means paying for 4 GPUs total) For current pricing details, please visit our pricing page.\n\nHow does autoscaling affect my costs?\nScaling from 0 : No minimum cost when scaled to zero Scaling up : Each new replica adds to your total cost proportionally. For example: Scaling from 1 to 2 replicas doubles your GPU costs If each replica uses multiple GPUs, costs scale accordingly (e.g., scaling from 1 to 2 replicas with 2 GPUs each means paying for 4 GPUs total) For current pricing details, please visit our pricing page.\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Based on my research, I can now provide you with a comprehensive guide on managing deployments on Fireworks. Let me break this down into practical sections with commands and examples.\n\n## Managing Deployments on Fireworks: A Complete Guide\n\nFireworks offers three deployment types with different management approaches:\n\n1. **Serverless**: Pre-configured, pay-per-token, automatic scaling\n2. **On-demand**: Private GPUs, pay-per-second, manual/automatic scaling\n3. **Enterprise Reserved**: Custom hardware, tailored setup, SLAs\n\n### 1. Basic Deployment Management Commands\n\n#### Using firectl CLI (Recommended)\n\n**List all deployments:**\n```bash\nfirectl list deployments\n```\n\n**Create a deployment:**\n```bash\n# Basic deployment\nfirectl create deployment --wait\n\n# With custom configuration\nfirectl create deployment \\\n --min-replica-count 0 \\\n --max-replica-count 3 \\\n --wait\n```\n\n**Delete a deployment:**\n```bash\nfirectl delete deployment \n```\n\n**Update deployment configuration:**\n```bash\nfirectl update deployment \\\n --min-replica-count 2 \\\n --max-replica-count 5\n```\n\n#### Using REST API\n\n**List deployments:**\n```bash\ncurl -X GET \\\n \"https://api.fireworks.ai/v1/accounts/{account_id}/deployments\" \\\n -H \"Authorization: Bearer $FIREWORKS_API_KEY\"\n```\n\n**Delete deployment via API:**\n```bash\ncurl -X DELETE \\\n \"https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id}\" \\\n -H \"Authorization: Bearer $FIREWORKS_API_KEY\"\n```\n\n### 2. Python SDK Management\n\n#### Installation and Setup\n```bash\npip install --upgrade fireworks-ai\nexport FIREWORKS_API_KEY=\"your-api-key\"\n```\n\n#### Basic Usage\n```python\nfrom fireworks import LLM\n\n# Serverless deployment (automatic)\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"serverless\")\n\n# On-demand deployment with custom ID\nllm = LLM(\n model=\"llama4-maverick-instruct-basic\",\n deployment_type=\"on-demand\",\n id=\"my-custom-deployment\"\n)\nllm.apply() # Required for on-demand deployments\n\n# Auto-selection (serverless if available, otherwise on-demand)\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n```\n\n#### Fine-tuned Model Deployment\n```python\n# Single LoRA deployment (recommended)\nlora_llm = LLM(\n model=\"accounts/your-account/models/your-lora-model\",\n deployment_type=\"on-demand\"\n)\nlora_llm.apply()\n\n# Multi-LoRA deployment\nbase_llm = LLM(\n model=\"accounts/fireworks/models/llama4-maverick-instruct-basic\",\n deployment_type=\"on-demand-lora\",\n id=\"multi-lora-deployment\"\n)\nbase_llm.apply()\n```\n\n### 3. Deployment Configuration Options\n\n#### Scaling Configuration\n```bash\n# Scale between 0-3 replicas based on demand\nfirectl create deployment \\\n --min-replica-count 0 \\\n --max-replica-count 3\n\n# Fixed scaling (always have 2 replicas running)\nfirectl create deployment \\\n --min-replica-count 2 \\\n --max-replica-count 2\n```\n\n#### Advanced Configuration\n```bash\n# Custom model deployment\nfirectl create deployment accounts/your-account/models/custom-model \\\n --min-replica-count 0 \\\n --max-replica-count 5 \\\n --accelerator-type A100 \\\n --accelerator-count 2\n```\n\n### 4. Cost Management Best Practices\n\n#### Automatic Cost Optimization\n- **Scale-to-zero**: Deployments automatically scale to 0 when unused\n- **No charges when scaled down**: Only pay for active GPU time\n- **Autoscaling**: Automatically adjusts based on traffic\n\n#### Manual Cost Control\n```python\n# Monitor deployment costs\nimport requests\n\nheaders = {\"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\"}\nresponse = requests.get(\n \"https://api.fireworks.ai/v1/accounts/{account_id}/deployments\",\n headers=headers\n)\n\n# Delete unused deployments\nfor deployment in response.json()['deployments']:\n if deployment['state'] == 'READY' and deployment['replicaCount'] == 0:\n # Consider deleting if unused\n pass\n```\n\n### 5. Monitoring and Troubleshooting\n\n#### Check Deployment Status\n```bash\n# Get deployment details\nfirectl get deployment \n\n# Check deployed models\nfirectl list deployed-models\n```\n\n#### Web Dashboard\nTrack deployments at: `https://app.fireworks.ai/dashboard/deployments`\n\n### 6. Reserved Capacity (Enterprise)\n\nFor high-volume usage, consider reserved capacity:\n- More reliable capacity\n- Higher GPU quotas\n- Better pricing for consistent usage\n- Contact: inquiries@fireworks.ai\n\n### 7. Common Management Workflows\n\n#### Daily Workflow Example\n```python\n# Morning: Check active deployments\nimport subprocess\nresult = subprocess.run(['firectl', 'list', 'deployments'], \n capture_output=True, text=True)\nprint(result.stdout)\n\n# Scale up for expected traffic\nsubprocess.run(['firectl', 'update', 'deployment', 'my-deployment', \n '--min-replica-count', '2'])\n\n# Evening: Scale down\nsubprocess.run(['firectl', 'update', 'deployment', 'my-deployment', \n '--min-replica-count', '0'])\n```\n\n#### Automated Cleanup Script\n```bash\n#!/bin/bash\n# cleanup_deployments.sh\n\n# List all deployments\ndeployments=$(firectl list deployments --format json | jq -r '.deployments[].name')\n\n# Delete deployments older than 7 days with 0 replicas\nfor deployment in $deployments; do\n firectl delete deployment $deployment --ignore-checks\ndone\n```\n\n### 8. Important Considerations\n\n- **Billing**: On-demand deployments charge by GPU-second, even when idle\n- **Rate limits**: Serverless has rate limits, on-demand does not\n- **Cold starts**: Scaling from 0 may have brief delays\n- **Monitoring**: Regularly check deployment status and costs\n- **Cleanup**: Always delete unused deployments to avoid charges\n\nThis comprehensive guide covers all aspects of deployment management on Fireworks. Start with serverless for simple use cases, move to on-demand for production workloads, and consider reserved capacity for enterprise needs.", "tool_calls": []}], "input_metadata": {"row_id": "have-disassemble-simple-final-experience", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "At least one tool call was made", "metrics": {}}, "created_at": "2025-08-08T11:38:53.968430", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev17+gf8398de.dirty", "status": "finished", "num_runs": 1, "aggregation_method": "mean"}, "pid": 95890}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}, {"role": "assistant", "content": "I'll help you learn about managing deployments on Fireworks. Let me search for comprehensive information about deployment management.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment management Fireworks AI\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Fireworks AI Developer Platform\nLink: https://docs.fireworks.ai/getting-started/introduction\nContent: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nFireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nStart building with open source AI models\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Do you host your deployments in the EU or Asia?\nLink: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia\nContent: We are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nDo you host your deployments in the EU or Asia?\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\n", "type": "text"}, {"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: On-demand deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nOn-demand deployments\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\n", "type": "text"}, {"text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nLink: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks\nContent: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nI have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\n", "type": "text"}, {"text": "Title: Introduction\nLink: https://docs.fireworks.ai/api-reference/introduction\nContent: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nIntroduction\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Create a Deployment on Fireworks AI platform\n\nCreate a Deployment on Fireworks AI platform\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: How much does Fireworks cost?\nLink: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost\nContent: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nHow much does Fireworks cost?\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\n", "type": "text"}, {"text": "Title: Python (Fireworks)\nLink: https://docs.fireworks.ai/getting-started/quickstart\nContent: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nSet up your developer environment & call the Fireworks API\nThis is the recommended way to get started : This tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %OPENAI_API_KEY%\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Let me search for more specific information about deployment management commands and workflows.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"firectl deployment commands create manage delete update\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: Delete a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources\nContent: Deletes an model deployment. firectl delete deployment [flags]\n\nDeletes an model deployment. firectl delete deployment [flags]\n\nExample\nfirectl delete deployment my-deployment\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nCreate a Deployment on Fireworks AI platform\n\n", "type": "text"}, {"text": "Title: Managing default deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Since a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default\n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default\n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist.\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Deployment Command\nLink: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example\nContent: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: Delete Deployment\nLink: https://docs.fireworks.ai/api-reference/delete-deployment\nContent: \n\n\n\nREST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id}\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\ndeployment_id:\n\tstring\n\tDescription: The Deployment Id\n\tExample: \"\"\n\nquery params:\nhard:\n\tboolean\n\tDescription: If true, this will perform a hard deletion.\n\tExample: true\nignoreChecks:\n\tboolean\n\tDescription: If true, this will ignore checks and force the deletion of a deployment that is currently\ndeployed and is in use.\n\tExample: true\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {}\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Deleting Stored Responses\nLink: https://docs.fireworks.ai/guides/response-api\nContent: When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM\nimport requests\nimport os\n\nllm = LLM(model=\"qwen3-235b-a22b\", deployment_type=\"serverless\")\n\n# Create a response\nresponse = llm.responses.create(\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\nWhen responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM\nimport requests\nimport os\n\nllm = LLM(model=\"qwen3-235b-a22b\", deployment_type=\"serverless\")\n\n# Create a response\nresponse = llm.responses.create(\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\nWhen responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. import os\nfrom openai import OpenAI\nimport requests\n\nclient = OpenAI(\n base_url=\"https://api.fireworks.ai/inference/v1\",\n api_key=os.getenv(\"FIREWORKS_API_KEY\", \"YOUR_FIREWORKS_API_KEY_HERE\")\n)\n\n# Create a response\nresponse = client.responses.create(\n model=\"accounts/fireworks/models/qwen3-235b-a22b\",\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Let me search for more information about listing and managing deployments, as well as the Fireworks Build SDK for deployment management.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"list deployments firectl Fireworks Build SDK deployment management\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: Introducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: Basics of the Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-basics\nContent: Why use the Build SDK?\nThe Fireworks Build SDK gives you a declarative way to work with Fireworks resources like deployments, fine-tuning jobs, and datasets. We've designed it to handle all the infrastructure complexity for you, letting you focus on building your application. Instead of using the web UI, CLI, or raw API calls, you can manage everything through simple Python code with smart, logical defaults without sacrificing control and customizability. The principles of the SDK are the following: Object-oriented: Fireworks primitives are represented as Python objects. You can access their capabilities and properties through methods and attributes. Declarative: You can describe your desired state and the SDK will handle reconcilliation. Smart defaults: The SDK will infer the most logical defaults for you, prioritizing development speed and lowest cost. Here are some examples: The SDK will automatically use a serverless deployment for models that are available serverlessly unless you specify otherwise. When creating deployments, the SDK will also enable scale-to-zero with the shortest possible scale-down window. If the SDK determines that a resource already exists by matching its signature (see below), it will re-use the existing resource instead of creating a new one. Customizable: Although we enable smart defaults, you still have full access to the configuration parameters for any Fireworks resource The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nWhy use the Build SDK?\nThe Fireworks Build SDK gives you a declarative way to work with Fireworks resources like deployments, fine-tuning jobs, and datasets. We've designed it to handle all the infrastructure complexity for you, letting you focus on building your application. Instead of using the web UI, CLI, or raw API calls, you can manage everything through simple Python code with smart, logical defaults without sacrificing control and customizability. The principles of the SDK are the following: Object-oriented: Fireworks primitives are represented as Python objects. You can access their capabilities and properties through methods and attributes. Declarative: You can describe your desired state and the SDK will handle reconcilliation. Smart defaults: The SDK will infer the most logical defaults for you, prioritizing development speed and lowest cost. Here are some examples: The SDK will automatically use a serverless deployment for models that are available serverlessly unless you specify otherwise. When creating deployments, the SDK will also enable scale-to-zero with the shortest possible scale-down window. If the SDK determines that a resource already exists by matching its signature (see below), it will re-use the existing resource instead of creating a new one. Customizable: Although we enable smart defaults, you still have full access to the configuration parameters for any Fireworks resource The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nWhy use the Build SDK?\nThe Fireworks Build SDK gives you a declarative way to work with Fireworks resources like deployments, fine-tuning jobs, and datasets. We've designed it to handle all the infrastructure complexity for you, letting you focus on building your application. Instead of using the web UI, CLI, or raw API calls, you can manage everything through simple Python code with smart, logical defaults without sacrificing control and customizability. The principles of the SDK are the following: Object-oriented: Fireworks primitives are represented as Python objects. You can access their capabilities and properties through methods and attributes. Declarative: You can describe your desired state and the SDK will handle reconcilliation. Smart defaults: The SDK will infer the most logical defaults for you, prioritizing development speed and lowest cost. Here are some examples: The SDK will automatically use a serverless deployment for models that are available serverlessly unless you specify otherwise. When creating deployments, the SDK will also enable scale-to-zero with the shortest possible scale-down window. If the SDK determines that a resource already exists by matching its signature (see below), it will re-use the existing resource instead of creating a new one. Customizable: Although we enable smart defaults, you still have full access to the configuration parameters for any Fireworks resource The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: Querying existing dedicated deployments\nLink: https://docs.fireworks.ai/tools-sdks/python-client/querying-dedicated-deployments\nContent: When you have dedicated deployments that were created via firectl or the Fireworks web UI, you can easily connect to them using the Build SDK to run inference. This is particularly useful when you want to leverage existing infrastructure or when deployments are managed by different teams.\n\nWhen you have dedicated deployments that were created via firectl or the Fireworks web UI, you can easily connect to them using the Build SDK to run inference. This is particularly useful when you want to leverage existing infrastructure or when deployments are managed by different teams.\n\nDeployment ID requirements\nThe id parameter should match exactly with your existing deployment: Use the deployment name/ID as shown in the Fireworks dashboard The ID is case-sensitive and must match exactly If the deployment doesn't exist, you'll receive an error when making requests\n\n", "type": "text"}, {"text": "Title: Fireworks SDK\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: You can connect to your existing deployment using the Fireworks Build SDK : from fireworks import LLM\n\n# Connect to your existing deployment\nllm = LLM(\n model=\"llama-v3p2-3b-instruct\", # The model your deployment is running\n deployment_type=\"on-demand\",\n id=\"my-deployment-id\", # Your deployment ID\n)\n\n# Use OpenAI-compatible chat completions\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) When connecting to an existing deployment with the SDK, you don't need to call .apply() - the deployment is already running. For more details on querying existing deployments with Fireworks SDK, see our guide on querying dedicated deployments .\n\nYou can connect to your existing deployment using the Fireworks Build SDK : from fireworks import LLM\n\n# Connect to your existing deployment\nllm = LLM(\n model=\"llama-v3p2-3b-instruct\", # The model your deployment is running\n deployment_type=\"on-demand\",\n id=\"my-deployment-id\", # Your deployment ID\n)\n\n# Use OpenAI-compatible chat completions\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) When connecting to an existing deployment with the SDK, you don't need to call .apply() - the deployment is already running. For more details on querying existing deployments with Fireworks SDK, see our guide on querying dedicated deployments .\n\nOn-demand deployments\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\n", "type": "text"}, {"text": "Title: Step 1: Configure the Fireworks Build SDK\nLink: https://docs.fireworks.ai/structured-responses/structured-output-grammar-based\nContent: from fireworks import LLM\n\nclient = LLM(\n model=\"llama-v3p1-405b-instruct\",\n deployment_type=\"serverless\"\n)\n\nfrom fireworks import LLM\n\nclient = LLM(\n model=\"llama-v3p1-405b-instruct\",\n deployment_type=\"serverless\"\n)\n\nPrerequisites\nBefore you begin, ensure you have the following: Python installed on your system. Build SDK installed. You can install it using pip: Next, select the model you want to use. In this example, we use llama-v3p1-405b-instruct , but all fireworks models support this feature.\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: \ud83d\udd27 Build SDK: Deployment Configuration Application Requirement\nLink: https://docs.fireworks.ai/updates/changelog\nContent: The Build SDK now requires you to call .apply() to apply any deployment configurations to Fireworks when using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" . This change ensures explicit control over when deployments are created and helps prevent accidental deployment creation. Key changes: .apply() is now required for on-demand and on-demand-lora deployments Serverless deployments do not require .apply() calls If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments Migration guide: Add llm.apply() after creating LLM instances with deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" No changes needed for serverless deployments See updated documentation for examples and best practices This change improves deployment management and provides better control over resource creation. This applies to Python SDK version >=0.19.14.\n\nThe Build SDK now requires you to call .apply() to apply any deployment configurations to Fireworks when using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" . This change ensures explicit control over when deployments are created and helps prevent accidental deployment creation. Key changes: .apply() is now required for on-demand and on-demand-lora deployments Serverless deployments do not require .apply() calls If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments Migration guide: Add llm.apply() after creating LLM instances with deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" No changes needed for serverless deployments See updated documentation for examples and best practices This change improves deployment management and provides better control over resource creation. This applies to Python SDK version >=0.19.14.\n\n\ud83c\udfaf Build SDK: Reward-kit integration for evaluator development\nThe Build SDK now natively integrates with reward-kit to simplify evaluator development for Reinforcement Fine-Tuning (RFT) . You can now create custom evaluators in Python with automatic dependency management and seamless deployment to Fireworks infrastructure. Key features: Native reward-kit integration for evaluator development Automatic packaging of dependencies from pyproject.toml or requirements.txt Local testing capabilities before deployment Direct integration with Fireworks datasets and evaluation jobs Support for third-party libraries and complex evaluation logic See our Developing Evaluators guide to get started with your first evaluator in minutes.\n\n", "type": "text"}, {"text": "Title: Set up your developer environment & call the Fireworks API\nLink: https://docs.fireworks.ai/getting-started/quickstart\nContent: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content)\n\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content)\n\nPython (Fireworks)\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\n", "type": "text"}, {"text": "Title: Example with Fireworks Build SDK\nLink: https://docs.fireworks.ai/faq-new/models-inference/how-to-get-performance-metrics-for-streaming-responses\nContent: from fireworks import LLM\nimport os\n\nllm = LLM(\n model=\"llama-v3p1-8b-instruct\",\n deployment_type=\"serverless\",\n api_key=os.environ[\"FIREWORKS_API_KEY\"],\n)\n\n# Streaming completion with performance metrics\nstream = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Hello, world!\"}],\n max_tokens=100,\n stream=True,\n perf_metrics_in_response=True,\n)\n\nfor chunk in stream:\n # Use getattr to avoid linter errors for unknown attributes\n perf_metrics = getattr(chunk, \"perf_metrics\", None)\n finish_reason = getattr(chunk.choices[0], \"finish_reason\", None)\n if perf_metrics is not None and finish_reason:\n print(\"Performance metrics:\", perf_metrics)\n\nfrom fireworks import LLM\nimport os\n\nllm = LLM(\n model=\"llama-v3p1-8b-instruct\",\n deployment_type=\"serverless\",\n api_key=os.environ[\"FIREWORKS_API_KEY\"],\n)\n\n# Streaming completion with performance metrics\nstream = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Hello, world!\"}],\n max_tokens=100,\n stream=True,\n perf_metrics_in_response=True,\n)\n\nfor chunk in stream:\n # Use getattr to avoid linter errors for unknown attributes\n perf_metrics = getattr(chunk, \"perf_metrics\", None)\n finish_reason = getattr(chunk.choices[0], \"finish_reason\", None)\n if perf_metrics is not None and finish_reason:\n print(\"Performance metrics:\", perf_metrics)\n\nPerformance Metrics Overview\nThe Inference API returns several per-request metrics in the response. They can be useful for one-off debugging or can be logged by the client in their preferred observability tool. For aggregate metrics, see the usage dashboard. Non-streaming requests: Performance metrics are always included in response headers (e.g., fireworks-prompt-tokens , fireworks-server-time-to-first-token ). Streaming requests: Only selected performance metrics, such as \"fireworks-server-time-to-first-token,\" are available because HTTP headers must be sent before the first token can be streamed. Use the perf_metrics_in_response body parameter to include all metrics in the last SSE event of the response body.\n\n", "type": "text"}, {"text": "Title: Fireworks AI Developer Platform\nLink: https://docs.fireworks.ai/getting-started/introduction\nContent: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nFireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nStart building with open source AI models\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Based on my research, I can now provide you with a comprehensive guide on managing deployments on Fireworks. Let me break this down into different approaches:\n\n## Managing Deployments on Fireworks\n\nFireworks offers three main ways to manage deployments:\n\n### 1. **Fireworks Build SDK (Recommended - Python)**\nThis is the most user-friendly approach using Python.\n\n**Installation:**\n```bash\npip install --upgrade fireworks-ai\n```\n\n**Basic Deployment Creation:**\n```python\nfrom fireworks import LLM\n\n# Create a new deployment\nllm = LLM(\n model=\"llama-v3p2-3b-instruct\",\n deployment_type=\"on-demand\", # or \"serverless\", \"on-demand-lora\"\n id=\"my-deployment-name\"\n)\n\n# Apply the deployment (required for on-demand deployments)\nllm.apply()\n\n# Use the deployment\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Hello, world!\"}]\n)\nprint(response.choices[0].message.content)\n```\n\n**Key Features:**\n- **Smart defaults**: Automatically selects optimal deployment type\n- **Declarative**: Describe desired state, SDK handles reconciliation\n- **Object-oriented**: Deployments are Python objects with methods/attributes\n- **Scale-to-zero**: Enabled by default with shortest scale-down window\n\n### 2. **Firectl CLI Tool**\nCommand-line interface for deployment management.\n\n**Create a deployment:**\n```bash\nfirectl create deployment \"accounts/fireworks/models/llama-v3p2-3b-instruct\" --id my-deployment\n```\n\n**Deploy LoRA fine-tunes:**\n```bash\n# Single command for LoRA deployment\nfirectl create deployment \"accounts/fireworks/models/\"\n\n# Traditional two-step method (for multi-LoRA)\nfirectl create deployment \"accounts/fireworks/models/\" --enable-addons\nfirectl load-lora --deployment \n```\n\n**List deployments:**\n```bash\nfirectl list deployments\n```\n\n**Delete deployments:**\n```bash\nfirectl delete deployment my-deployment\n```\n\n**Update default deployment:**\n```bash\nfirectl update deployed-model --default\n```\n\n### 3. **REST API**\nDirect API calls for programmatic management.\n\n**Create deployment:**\n```bash\ncurl -X POST https://api.fireworks.ai/v1/accounts/{account_id}/deployments \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"model\": \"llama-v3p2-3b-instruct\",\n \"deployment_type\": \"on-demand\"\n }'\n```\n\n**Delete deployment:**\n```bash\ncurl -X DELETE https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id} \\\n -H \"Authorization: Bearer YOUR_API_KEY\"\n```\n\n## Deployment Types Explained\n\n### **Serverless Deployments**\n- **Billing**: Per-token usage\n- **Best for**: Variable/low usage, quick prototyping\n- **Management**: Fully managed by Fireworks\n- **Access**: Instant, no setup required\n\n### **On-Demand Deployments**\n- **Billing**: Per GPU-second\n- **Best for**: Consistent usage, predictable performance\n- **Advantages**: \n - No rate limits\n - Predictable performance\n - Access to larger model selection\n - Custom base models from Hugging Face\n- **Management**: Requires explicit creation/deletion\n\n### **Reserved Capacity**\n- **Best for**: High-volume, consistent usage\n- **Advantages**: More reliable capacity, higher GPU quotas\n- **Contact**: inquiries@fireworks.ai for enterprise pricing\n\n## Complete Management Workflow Example\n\nHere's a comprehensive example using the Build SDK:\n\n```python\nfrom fireworks import LLM\nimport os\n\n# Set up API key\nos.environ[\"FIREWORKS_API_KEY\"] = \"your-api-key\"\n\n# 1. Create and manage deployments\ndef manage_deployments():\n \n # Create a serverless deployment (no .apply() needed)\n serverless_llm = LLM(\n model=\"llama4-maverick-instruct-basic\",\n deployment_type=\"serverless\"\n )\n \n # Create an on-demand deployment\n ondemand_llm = LLM(\n model=\"llama-v3p2-3b-instruct\",\n deployment_type=\"on-demand\",\n id=\"my-production-deployment\",\n max_instances=3,\n min_instances=0 # Enable scale-to-zero\n )\n \n # Apply the on-demand deployment\n ondemand_llm.apply()\n \n # 2. Query existing deployments\n print(\"Connecting to existing deployment...\")\n existing_llm = LLM(\n model=\"llama-v3p2-3b-instruct\",\n deployment_type=\"on-demand\",\n id=\"my-existing-deployment\"\n )\n \n # 3. Use deployments\n response = existing_llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Explain deployment management\"}]\n )\n \n # 4. Clean up (delete deployment when done)\n # firectl delete deployment my-production-deployment\n \n return response.choices[0].message.content\n\n# Run the workflow\nresult = manage_deployments()\nprint(result)\n```\n\n## Best Practices\n\n1. **Cost Management**: Use serverless for development, on-demand for production\n2. **Monitoring**: Track usage through the Fireworks dashboard\n3. **Cleanup**: Always delete unused on-demand deployments to avoid charges\n4. **Scaling**: Use scale-to-zero for cost efficiency\n5. **Naming**: Use descriptive deployment IDs for easy management\n\n## Getting Started\n\n1. **Install the Build SDK**: `pip install fireworks-ai`\n2. **Get API Key**: From https://app.fireworks.ai/dashboard/api-keys\n3. **Choose Approach**: \n - **Beginners**: Start with Build SDK\n - **CLI Users**: Use firectl\n - **API Integration**: Use REST API\n\nFor enterprise needs or reserved capacity, contact inquiries@fireworks.ai.\n\nWould you like me to dive deeper into any specific aspect of deployment management?", "tool_calls": []}], "input_metadata": {"row_id": "look-win-current-private-teacher", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "At least one tool call was made", "metrics": {}}, "created_at": "2025-08-08T11:43:38.694430", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev19+gf755623", "status": "finished", "num_runs": 1, "aggregation_method": "mean"}, "pid": 97954}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "invent-special-real-rich-dream", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T11:51:25.530495", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev19+gf755623.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 2168}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "continue-encourage-dry-part-meeting", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T11:51:48.974779", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev19+gf755623.dirty", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 2642}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}, {"role": "assistant", "content": "I'll help you learn about managing deployments on Fireworks! Let me search for comprehensive information about deployment management.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment management Fireworks AI\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Fireworks AI Developer Platform\nLink: https://docs.fireworks.ai/getting-started/introduction\nContent: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nFireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code!\n\nStart building with open source AI models\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Do you host your deployments in the EU or Asia?\nLink: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia\nContent: We are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nDo you host your deployments in the EU or Asia?\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\n", "type": "text"}, {"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: On-demand deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nOn-demand deployments\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\n", "type": "text"}, {"text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nLink: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks\nContent: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nI have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\n", "type": "text"}, {"text": "Title: Introduction\nLink: https://docs.fireworks.ai/api-reference/introduction\nContent: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\nIntroduction\nFireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Create a Deployment on Fireworks AI platform\n\nCreate a Deployment on Fireworks AI platform\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: How much does Fireworks cost?\nLink: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost\nContent: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\nHow much does Fireworks cost?\nFireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page.\n\n", "type": "text"}, {"text": "Title: Python (Fireworks)\nLink: https://docs.fireworks.ai/getting-started/quickstart\nContent: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nSet up your developer environment & call the Fireworks API\nThis is the recommended way to get started : This tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %OPENAI_API_KEY%\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Let me search for more specific information about deployment management commands and workflows.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"deployment management commands create update delete list Fireworks\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Deployment Command\nLink: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example\nContent: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one.\n\nIt's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate\n\n", "type": "text"}, {"text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nLink: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks\nContent: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nI have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\n", "type": "text"}, {"text": "Title: Deleting Stored Responses\nLink: https://docs.fireworks.ai/guides/response-api\nContent: When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM\nimport requests\nimport os\n\nllm = LLM(model=\"qwen3-235b-a22b\", deployment_type=\"serverless\")\n\n# Create a response\nresponse = llm.responses.create(\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\nWhen responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM\nimport requests\nimport os\n\nllm = LLM(model=\"qwen3-235b-a22b\", deployment_type=\"serverless\")\n\n# Create a response\nresponse = llm.responses.create(\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\nWhen responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. import os\nfrom openai import OpenAI\nimport requests\n\nclient = OpenAI(\n base_url=\"https://api.fireworks.ai/inference/v1\",\n api_key=os.getenv(\"FIREWORKS_API_KEY\", \"YOUR_FIREWORKS_API_KEY_HERE\")\n)\n\n# Create a response\nresponse = client.responses.create(\n model=\"accounts/fireworks/models/qwen3-235b-a22b\",\n input=\"What is the capital of France?\",\n store=True # This is the default\n)\n\nresponse_id = response.id\nprint(f\"Created response with ID: {response_id}\")\n\n# Delete the response immediately\nheaders = {\n \"Authorization\": f\"Bearer {os.getenv('FIREWORKS_API_KEY')}\",\n \"x-fireworks-account-id\": \"your-account-id\"\n}\ndelete_response = requests.delete(\n f\"https://api.fireworks.ai/inference/v1/responses/{response_id}\",\n headers=headers\n)\n\nif delete_response.status_code == 200:\n print(\"Response deleted successfully\")\nelse:\n print(f\"Failed to delete response: {delete_response.status_code}\")\n\n", "type": "text"}, {"text": "Title: Delete Deployment\nLink: https://docs.fireworks.ai/api-reference/delete-deployment\nContent: \n\n\n\nREST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id}\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\ndeployment_id:\n\tstring\n\tDescription: The Deployment Id\n\tExample: \"\"\n\nquery params:\nhard:\n\tboolean\n\tDescription: If true, this will perform a hard deletion.\n\tExample: true\nignoreChecks:\n\tboolean\n\tDescription: If true, this will ignore checks and force the deletion of a deployment that is currently\ndeployed and is in use.\n\tExample: true\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {}\n\n", "type": "text"}, {"text": "Title: List Deployments\nLink: https://docs.fireworks.ai/api-reference/list-deployments\nContent: REST Endpoint GET /v1/accounts/{account_id}/deployments\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\n\nquery params:\npageSize:\n\tinteger\n\tDescription: The maximum number of deployments to return. The maximum page_size is 200,\nvalues above 200 will be coerced to 200.\nIf unspecified, the default is 50.\n\tExample: 123\npageToken:\n\tstring\n\tDescription: A page token, received from a previous ListDeployments call. Provide this\nto retrieve the subsequent page. When paginating, all other parameters\nprovided to ListDeployments must match the call that provided the page\ntoken.\n\tExample: \"\"\nfilter:\n\tstring\n\tDescription: Only deployment satisfying the provided filter (if specified) will be\nreturned. See https://google.aip.dev/160 for the filter grammar.\n\tExample: \"\"\norderBy:\n\tstring\n\tDescription: A comma-separated list of fields to order by. e.g. \"foo,bar\"\nThe default sort order is ascending. To specify a descending order for a\nfield, append a \" desc\" suffix. e.g. \"foo desc,bar\"\nSubfields are specified with a \".\" character. e.g. \"foo.bar\"\nIf not specified, the default order is by \"create_time\".\n\tExample: \"\"\nshowDeleted:\n\tboolean\n\tDescription: If set, DELETED deployments will be included.\n\tExample: true\nreadMask:\n\tstring\n\tDescription: The fields to be returned in the response. If empty or \"*\", all fields will be returned.\n\tExample: \"\"\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {\"deployments\":[{\"name\":\"\",\"displayName\":\"\",\"description\":\"\",\"createTime\":\"2023-11-07T05:31:56Z\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"purgeTime\":\"2023-11-07T05:31:56Z\",\"deleteTime\":\"2023-11-07T05:31:56Z\",\"state\":\"STATE_UNSPECIFIED\",\"status\":{\"code\":\"OK\",\"message\":\"\"},\"minReplicaCount\":123,\"maxReplicaCount\":123,\"replicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"cluster\":\"\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"numPeftDeviceCached\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"region\":\"REGION_UNSPECIFIED\",\"updateTime\":\"2023-11-07T05:31:56Z\",\"disableDeploymentSizeValidation\":true}],\"nextPageToken\":\"\",\"totalSize\":123}\n\nREST Endpoint GET /v1/accounts/{account_id}/deployments\n\nServer(s): https://api.fireworks.ai\n\n\nSummary: \n\nDescription: N/A\n\nSecurity methods: \nheader: \n\tAuthorization: bearer auth\n\t\n\nRequest params: \npath params:\naccount_id:\n\tstring\n\tDescription: The Account Id\n\tExample: \"\"\n\nquery params:\npageSize:\n\tinteger\n\tDescription: The maximum number of deployments to return. The maximum page_size is 200,\nvalues above 200 will be coerced to 200.\nIf unspecified, the default is 50.\n\tExample: 123\npageToken:\n\tstring\n\tDescription: A page token, received from a previous ListDeployments call. Provide this\nto retrieve the subsequent page. When paginating, all other parameters\nprovided to ListDeployments must match the call that provided the page\ntoken.\n\tExample: \"\"\nfilter:\n\tstring\n\tDescription: Only deployment satisfying the provided filter (if specified) will be\nreturned. See https://google.aip.dev/160 for the filter grammar.\n\tExample: \"\"\norderBy:\n\tstring\n\tDescription: A comma-separated list of fields to order by. e.g. \"foo,bar\"\nThe default sort order is ascending. To specify a descending order for a\nfield, append a \" desc\" suffix. e.g. \"foo desc,bar\"\nSubfields are specified with a \".\" character. e.g. \"foo.bar\"\nIf not specified, the default order is by \"create_time\".\n\tExample: \"\"\nshowDeleted:\n\tboolean\n\tDescription: If set, DELETED deployments will be included.\n\tExample: true\nreadMask:\n\tstring\n\tDescription: The fields to be returned in the response. If empty or \"*\", all fields will be returned.\n\tExample: \"\"\n\n\nRequest body: N/A\n\n\nResponses: \n200: object\n\tDescription: N/A\n\tExample: {\"deployments\":[{\"name\":\"\",\"displayName\":\"\",\"description\":\"\",\"createTime\":\"2023-11-07T05:31:56Z\",\"expireTime\":\"2023-11-07T05:31:56Z\",\"purgeTime\":\"2023-11-07T05:31:56Z\",\"deleteTime\":\"2023-11-07T05:31:56Z\",\"state\":\"STATE_UNSPECIFIED\",\"status\":{\"code\":\"OK\",\"message\":\"\"},\"minReplicaCount\":123,\"maxReplicaCount\":123,\"replicaCount\":123,\"autoscalingPolicy\":{\"scaleUpWindow\":\"\",\"scaleDownWindow\":\"\",\"scaleToZeroWindow\":\"\",\"loadTargets\":{}},\"baseModel\":\"\",\"acceleratorCount\":123,\"acceleratorType\":\"ACCELERATOR_TYPE_UNSPECIFIED\",\"precision\":\"PRECISION_UNSPECIFIED\",\"cluster\":\"\",\"enableAddons\":true,\"draftTokenCount\":123,\"draftModel\":\"\",\"ngramSpeculationLength\":123,\"numPeftDeviceCached\":123,\"deploymentTemplate\":\"\",\"autoTune\":{\"longPrompt\":true},\"placement\":{\"region\":\"REGION_UNSPECIFIED\",\"multiRegion\":\"MULTI_REGION_UNSPECIFIED\",\"regions\":[\"REGION_UNSPECIFIED\"]},\"region\":\"REGION_UNSPECIFIED\",\"updateTime\":\"2023-11-07T05:31:56Z\",\"disableDeploymentSizeValidation\":true}],\"nextPageToken\":\"\",\"totalSize\":123}\n\n\n\n", "type": "text"}, {"text": "Title: Publishing a deployed model\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: By default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model=\"\" AND deployment=\"\"'\n\nBy default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model=\"\" AND deployment=\"\"'\n\nManaging default deployments\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist.\n\n", "type": "text"}, {"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: How do I close my Fireworks.ai account?\nLink: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account\nContent: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\nHow do I close my Fireworks.ai account?\nTo close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed\n\n", "type": "text"}, {"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: Introducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: Python (Fireworks)\nLink: https://docs.fireworks.ai/getting-started/quickstart\nContent: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nThis is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY=\"\" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY \"\" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\", api_key=\"\")\n\nSet up your developer environment & call the Fireworks API\nThis is the recommended way to get started : This tab uses our Fireworks Build\nSDK for the best performance and developer experience.\nSee our Client-side performance\noptimization guide for\nmore details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh .\nYou can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM\n\n# Basic usage - SDK automatically selects optimal deployment type\nllm = LLM(model=\"llama4-maverick-instruct-basic\", deployment_type=\"auto\")\n\nresponse = llm.chat.completions.create(\n messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}]\n)\n\nprint(response.choices[0].message.content)\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Let me search for more specific information about the firectl tool and deployment workflows.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"firectl deployment management commands create deployment update deployment\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment.\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Managing default deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Since a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default\n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default\n\nPublishing a deployed model\nBy default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: The first column in the output is the deployed model ID. Publish a deployed model To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. firectl update deployed-model --public\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nfirectl\nfirectl create dataset /path/to/jsonl/file\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: firectl\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-vlm\nContent: # Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\n# Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\nfirectl create sftj \\\n --base-model accounts/fireworks/models/qwen2p5-vl-32b-instruct \\\n --dataset my-vlm-dataset \\\n --output-model my-custom-vlm \\\n --epochs 3 For additional parameters like learning rates, evaluation datasets, and batch sizes, see Additional SFT job settings .\n\n", "type": "text"}, {"text": "Title: firectl\nLink: https://docs.fireworks.ai/models/quantization\nContent: firectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8\n\nfirectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8\n\nCreating an FP8 deployment\nBy default, creating a dedicated deployment will use the FP16 checkpoint. To see what precisions are available for a\nmodel, run: get model \n\n", "type": "text"}, {"text": "Title: Uploading the model locally (firectl)\nLink: https://docs.fireworks.ai/models/uploading-custom-models\nContent: To upload a custom base model, run the following command. firectl create model /path/to/files/\n\nTo upload a custom base model, run the following command. firectl create model /path/to/files/\n\nTo upload a custom base model, run the following command.\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}], "input_metadata": {"row_id": "create-destroy-past-service-case", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T12:17:53.864430", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev20+ge1da45a", "status": "stopped", "num_runs": 1, "aggregation_method": "mean"}, "pid": 12679}, {"messages": [{"role": "user", "content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*."}, {"role": "assistant", "content": "# Outline: The History of Yemeni Coffee\n\n1. **Introduction: Why Yemeni Coffee Matters** \n - Thesis: Yemen\u2019s role as the cradle of commercial coffee culture and its enduring legacy on global trade, ritual, and taste.\n\n2. *Pre-Coffee Yemen: Agriculture and Trade Routes* \n - Overview of terraced farming and the incense & spice trade that primed the Red Sea economy.\n\n3. *The Myth and Botany of Coffea arabica* \n - Ethiopian origin vs. Yemeni domestication; genetic evidence of Yemeni varietals.\n\n4. **Sufi Mystics and the First Coffeehouses** \n - 15th-century Sufi orders in Mocha and Ta\u02bfizz using *qahwa* for nocturnal devotions.\n\n5. *Mocha as the World\u2019s First Coffee Port* \n - Rise of the Red Sea entrep\u00f4t; Ottoman tax farms and the *tujjar* merchant guilds.\n\n6. **Yemeni Cultivation Techniques: Terraces, Water, and Shade** \n - Indigenous irrigation (*ghayl*), drought-resistant varietals, and the *mafrash* drying mats.\n\n7. *The 16th-Century Ottoman Coffee Ban and Smuggling Networks* \n - How Mecca\u2019s 1511 prohibition fueled clandestine Red Sea traffic and price inflation.\n\n8. **European Encounters: \u201cTurkish\u201d Coffee as a Yemeni Product** \n - Dutch, Venetian, and English merchants mislabeling Mocha beans; first price lists in Amsterdam (1616).\n\n9. *The Dutch East India Company and the First Seed Theft* \n - Pieter van den Broecke\u2019s 1616 smuggling of Yemeni seeds to Batavia\u2014birth of Java coffee.\n\n10. **Yemen\u2019s Monopoly Crumbles: 17th\u201318th-Century Rival Plantations** \n - French at R\u00e9union (Bourbon), Portuguese in Brazil, Spanish in Jamaica\u2014all tracing back to Mocha stock.\n\n11. *The Qasimi Era: Internal Trade, Taxes, and the Rise of the Ismaili Imamate* \n - How highland imams leveraged coffee revenues to fund statecraft and architecture.\n\n12. **Decline in the 19th Century: British Aden and the Suez Canal** \n - Shift of maritime traffic; competition from colonial plantations; collapse of Mocha\u2019s mercantile elite.\n\n13. *Legacy in Cupping Notes: Mocha Java and Flavor Memory* \n - Chocolatey, winey profiles; how Yemeni beans shaped early espresso blends.\n\n14. **Modern Revival: Specialty Coffee, NGOs, and Qima Coffee** \n - 21st-century traceability, micro-lots, and direct-trade premiums; challenges of war and water scarcity.\n\n15. *Cultural Continuities: Qat vs. Qahwa, Wedding Roasts, and Oral Poetry* \n - Daily rituals, gendered spaces, and the recitation of *zawamil* couplets over fresh brews.\n\n16. **Conclusion: Yemen as a Living Archive of Coffee Culture** \n - Reasserting Yemen\u2019s centrality in every cup; call for sustainable heritage preservation."}], "input_metadata": {"row_id": "review-say-right-news-government", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "15", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 21 highlighted sections (required: 15)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738493", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using \"*\". For example: *3 years of experience*."}, {"role": "assistant", "content": "Dear *Steam & Bean* family, \nI come to you not on paper feet but on *three years of espresso-stained sneakers*, \nsoles softened by dawn milk-foam and dusk drip-trays. \nI have measured my mornings in *latte-art hearts*\u2014 \nsome perfect, some *wobbly, but all poured with intention*. \n\nI know the *hiss of the steam wand* like a lullaby, \nand the *clink of ceramic* like a metronome for the day. \nI have *memorized the weight of 18 grams* of ground hope, \ntamped it level, locked it tight, and watched *crema bloom* \nthe way a shy smile opens when someone says their name right. \n\nI speak *fluent caffeine*: \na triple-shot for the night-shift nurse, \noat-milk cappuccino for the poet who tips in verses, \ndecaf Americano for the retired teacher grading sunrise. \nI *remember orders like birthdays*, \nand birthdays like *extra whip, no charge*. \n\nI can *calibrate a grinder* faster than gossip travels, \nand *unclog a portafilter* with the patience of a gardener. \nI have *danced behind counters* two feet wide, \ntwirled between syrups and strangers, \nand never spilled a single *thank-you*. \n\nIf you need someone who *sweats kindness* and *smiles in ristretto*, \nwho counts tips not in coins but in *stories collected*, \nI am your *next opening shift*, your *steady hand at 5:59 a.m.*, \nyour *last call who still says \u201csee you tomorrow\u201d*. \n\nLet me *steam, pour, wipe, repeat*\u2014 \nuntil the chairs are upside-down and the *grinders sleep*. \nI offer you *poetry in motion*, one cup at a time. \n\nWith *grounds under my nails* and *hope in my apron*, \n\u2014[Your Name]"}], "input_metadata": {"row_id": "install-fine-financial-health-state", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 26 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738539", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with \"*\", like: *highlighted text section*."}, {"role": "assistant", "content": "Hey there, fellow creature-curious readers! I just spent the weekend binge-watching nature docs with my cat (who was mostly unimpressed), and I can\u2019t stop noticing how often totally different animals act like they\u2019re reading from the same playbook. So grab a snack and let\u2019s zoom out for a second\u2014here are five \u201cwait, we\u2019re not so different\u201d moments that made me laugh, gasp, and text my group chat at 2 a.m.\n\n*Moms are moms, whether they have feathers, fur, or fins.* \nFrom emperor penguins passing an egg back and forth like the world\u2019s most stressful game of hot potato, to orangutan mamas teaching their kids which leaves make the best umbrellas, the universal rule seems to be: if you\u2019re a parent, you\u2019re tired. I watched a seahorse dad give birth (yes, the males do that!) and still look like he needed a nap and a latte.\n\n*Play isn\u2019t just for puppies.* \nSnowboarding crows, surfing dolphins, snow-rolling pandas\u2014turns out \u201cbecause it\u2019s fun\u201d is a legit evolutionary strategy. Scientists think play sharpens coordination and social skills, but honestly, watching a raven sled down a metal roof on a plastic lid just screams \u201cI\u2019m doing it for the TikTok.\u201d\n\n*Grandma knows best\u2014even in elephant form.* \nElephant herds are led by the oldest female, and her decades of \u201cI\u2019ve seen this before\u201d wisdom decide when to migrate, where to find water, and how to handle that one teenage bull who thinks he\u2019s all that. Swap gray skin for gray hair and you\u2019ve basically got my nana directing Thanksgiving traffic.\n\n*Besties across species lines.* \nGoby fish and pistol shrimp run the cutest roommate situation: the shrimp digs the burrow, the goby stands guard. If danger shows up, the goby flicks its tail like a silent alarm, and both dive to safety. It\u2019s the underwater version of \u201cI\u2019ll cook, you do the dishes.\u201d\n\n*We all scream for sunscreen\u2014sort of.* \nHippos secrete \u201cblood sweat\u201d that acts like SPF 50, while giraffes drool black saliva that works like built-in sunglasses. Meanwhile, I\u2019m over here reapplying SPF 30 every 90 minutes and still ending up the color of a lobster. Nature\u2019s got hacks, and I\u2019m taking notes.\n\nSo next time you catch yourself thinking, \u201cI\u2019m nothing like that weird-looking animal,\u201d remember: somewhere out there a capybara is sharing its snack with a bird, and a whale is singing a song that rhymes with last year\u2019s chart-topper. We\u2019re all just variations on a theme\u2014some of us just have more legs or better night vision."}], "input_metadata": {"row_id": "should-poor-parent-war-magazine", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738569", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format:\n*section name*"}, {"role": "assistant", "content": "*Executive Summary*\nBremen is a city of 570,000 residents, 40,000 university students, and a thriving creative scene, yet it lacks a home-grown premium sweatshirt brand that speaks to its maritime heritage, Hanseatic pride, and eco-conscious mindset. \u201cBremer Fleeze\u201d will fill that gap by producing limited-edition, GOTS-certified organic-cotton sweatshirts that combine North-German minimalism with subtle nautical iconography. Our mission: to become Bremen\u2019s first carbon-neutral apparel label and the go-to gift for locals and visitors alike. We will launch with an online-first D2C model, supported by pop-up stores in Viertel and \u00dcberseestadt, and scale to \u20ac1.2 M revenue by Year 3.\n\n*Market Analysis*\nTarget segments: (1) 18-30 students who value sustainability and streetwear aesthetics; (2) 25-45 professionals seeking premium basics; (3) tourists wanting authentic Bremen souvenirs. Competitor scan: global fast-fashion brands (H&M, Zara) lack local identity; German labels (Hugo Boss, Adidas) are either too formal or too sporty; no Bremen-specific sweatshirt exists. Market size: \u20ac45 M annual spend on mid-premium sweatshirts in Bremen & Niedersachsen combined, growing 6 % YoY. Consumer pain points: greenwashing, poor fit, generic designs. Our edge: verifiable sustainability, Bremen storytelling, and inclusive sizing (XXS-4XL).\n\n*Product & Sustainability Strategy*\nCore line: 12 unisex styles in 20 colors, 320 gsm brushed organic cotton, recycled polyester labels, water-based inks. Signature design elements: tonal outline of the Bremer Stadtmusikanten silhouette, latitude-longitude coordinates of Bremen harbor, and a discreet \u201cKohlfahrt-proof\u201d patch. Packaging: reusable tote made from retired ship sails (partnership with local sailmaker). Carbon footprint: 2.1 kg CO\u2082e per sweatshirt vs. 8.9 kg industry average; offset via Gold-Standard wind-farm credits. End-of-life: take-back program with 15 % store credit incentive; returned garments upcycled into insulation for social housing.\n\n*Go-to-Market & Marketing Plan*\nPhase 1 (Months 1-3): Pre-launch teaser on Instagram & TikTok (#BremerFleeze), micro-influencer seeding with 50 Bremen creators, waitlist goal 5,000 sign-ups. Phase 2 (Months 4-6): E-commerce launch on Shopify, same-day bike courier delivery within Bremen city limits, QR code on hangtag linking to AR filter that overlays the Roland statue wearing the sweatshirt. Phase 3 (Months 7-12): Pop-up container store at Marktplatz during Weihnachtsmarkt, collab with Beck\u2019s Brewery for co-branded \u201cBremer Fleeze & Bremer Pils\u201d gift box. KPIs: CAC \u2264 \u20ac18, repeat purchase rate \u2265 35 %, NPS \u2265 70.\n\n*Financial Projections & Funding Needs*\nStart-up capital required: \u20ac180,000. Breakdown: \u20ac60k inventory (2,000 units), \u20ac35k marketing, \u20ac25k e-commerce & IT, \u20ac20k legal & certifications, \u20ac40k working capital. Revenue forecast: Y1 \u20ac220k, Y2 \u20ac550k, Y3 \u20ac1.2 M. Gross margin 62 %, EBITDA margin 18 % by Y3. Break-even month 14. Funding structure: \u20ac80k founder equity, \u20ac50k Bremen-based angel investor (ex-Bremer Kaffee & Co-founder), \u20ac50k zero-interest KfW Green Loan. Exit option: acquisition by sustainable German fashion conglomerate at 3\u00d7 revenue multiple in Year 5."}], "input_metadata": {"row_id": "discover-stop-show-different-water", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738594", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "**The Official \u201cTwo-Mom Marriage Quality Control\u201d Checklist** \n*Because nothing says romance like a quarterly performance review.*\n\n---\n\n**1. Morning Routine Coordination** \nRate: \u2610 1 (We need a UN mediator) \u2610 2 (We\u2019ve agreed to disagree\u2026 loudly) \u2610 3 (Only one of us cried into the coffee) \u2610 4 (Synchronized chaos) \u2610 5 (*We finished each other\u2019s sentences\u2026 and the toddler\u2019s homework*) \n\n*Note: Bonus points if you\u2019ve mastered the \u201cI\u2019ll pack lunches if you find the missing shoe\u201d hostage negotiation.*\n\n---\n\n**2. Division of Emotional Labor** \nRate: \u2610 1 (\u201cI\u2019m fine.\u201d *\u2014 famous last words*) \u2610 2 (We schedule meltdowns like dentist appointments) \u2610 3 (Therapy? We *are* the therapy) \u2610 4 (We tag-team existential dread while folding tiny socks) \u2610 5 (*We\u2019ve achieved telepathic resentment-free snack negotiations*) \n\n*Pro tip: If you\u2019ve ever passive-aggressively alphabetized the spice rack while humming \u201cYou\u2019re Welcome\u201d from Moana, circle \u2610 1 twice.*\n\n---\n\n**3. Co-Parenting Synchronization** \nRate: \u2610 1 (Our kids have unionized) \u2610 2 (We\u2019ve agreed to disagree on screen time\u2026 and bedtime\u2026 and vegetables) \u2610 3 (We bribe with different currencies: one uses stickers, the other uses Wi-Fi passwords) \u2610 4 (We\u2019ve synchronized our \u201cmom voice\u201d to a frequency only dogs and children fear) \u2610 5 (*We\u2019ve weaponized teamwork so effectively the kids think we share a brain*) \n\n*Highlight this if you\u2019ve ever high-fived over successfully gaslighting a toddler into eating broccoli.*\n\n---\n\n**Final Grade:** \n\u2610 Certified Disaster (But at least the houseplants are alive) \n\u2610 Functioning Dysfunction (We\u2019ve agreed the laundry mountain is *decorative*) \n\u2610 Power Couple (We\u2019ve weaponized Pinterest and the kids still haven\u2019t noticed) \n\u2610 *Mom-tastic Nirvana* (We finish each other\u2019s\u2026 sandwiches. And trauma. But mostly sandwiches.)\n\n---\n\n**Comments Section (for roasting each other lovingly):** \n\u201cRemember when you \u2018lost\u2019 the Elf on the Shelf for three days? *Iconic.*\u201d \n\u201cYour snoring sounds like a haunted dishwasher, but I\u2019d still share my last square of emergency chocolate with you.\u201d"}], "input_metadata": {"row_id": "mean-certain-bitter-sharp-party", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 18 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738623", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*."}, {"role": "assistant", "content": "*little me in the countryside*\n\n[Verse 1] \nBarefoot on the gravel road, dust between my toes, \nSunlight dripped like honey on the freckles on my nose. \nGrandma\u2019s porch swing creaked a lullaby in time, \nWhile the crickets tuned their violins to the rhythm of the pines. \n\n[Pre-Chorus] \nMason jars with lightning bugs, constellations in a jar, \nWe\u2019d race the dusk to catch the dark before it got too far. \n\n[Chorus] \nOh, little me in the countryside, chasing dreams as wide as skies, \nJumping creeks and climbing trees, believing I could fly. \nEvery sunset was a promise that tomorrow would be mine, \nIn the summers of my childhood, where the world was kind. \n\n[Verse 2] \nHayloft hideouts, secret forts, straw castles in the loft, \nWe were pirates, we were astronauts, anything we thought. \nMama\u2019s voice would echo, \u201cSupper!\u201d across the open fields, \nAnd the wind would carry laughter like the church bell\u2019s gentle peals. \n\n[Pre-Chorus] \nFireflies wrote our names in light, then vanished in the night, \nBut the glow stayed in our hearts, a soft and steady light. \n\n[Chorus] \nOh, little me in the countryside, chasing dreams as wide as skies, \nJumping creeks and climbing trees, believing I could fly. \nEvery sunset was a promise that tomorrow would be mine, \nIn the summers of my childhood, where the world was kind. \n\n[Bridge] \nNow the roads are paved, the porch is still, the swing don\u2019t creak no more, \nBut I can close my eyes and hear the screen door\u2019s gentle roar. \nTime may steal the years away, but it can\u2019t outrun the past\u2014 \nI keep those golden afternoons like fireflies in a glass. \n\n[Final Chorus] \nOh, little me in the countryside, you\u2019re never far behind, \nYou\u2019re the echo in the wheat fields, the shadow in my mind. \nThough the world keeps spinning faster, I still find you every time\u2014 \nIn the hush of dusk, in the scent of rain, in the summers of my childhood, \nWhere the world was kind."}], "input_metadata": {"row_id": "forecast-big-song-issue-service", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 1 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738649", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a riddle for the word \"fa\u00e7ade\" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*."}, {"role": "assistant", "content": "I stand before you, yet I\u2019m not what I seem; \na *face I wear* that isn\u2019t mine, \na *mask of stone* or paint or beam, \na *pretty lie* that keeps the truth confined. \n\nWhat am I?"}], "input_metadata": {"row_id": "design-slow-wish-end-fear", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738707", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "# Workshop Template \n**Title:** *Building Inclusive Excellence: The Business Case for Workplace Diversity* \n\n---\n\n### 1. Welcome & Objectives (15 min) \n- **Ice-breaker:** \u201cTwo Truths & a Dream\u201d \u2013 participants share two facts and one aspiration related to identity or culture. \n- *State the learning outcomes:* \n 1. Understand how diversity drives innovation and profit. \n 2. Identify unconscious bias and micro-behaviors. \n 3. Create an action plan for inclusive leadership. \n\n---\n\n### 2. *The Business Case for Diversity* (30 min) \n- **Data Walk:** Posters with McKinsey, Deloitte, and Harvard studies showing \u2191 revenue, \u2193 turnover. \n- *Interactive poll:* \u201cWhich statistic surprised you most?\u201d (live Mentimeter). \n- **Case Study:** *How a global tech firm increased patent filings by 19 % after diversifying R&D teams.* \n\n---\n\n### 3. *Unconscious Bias & Micro-behaviors* (45 min) \n- **Activity:** \u201cResume Roulette\u201d \u2013 identical CVs with different names; discuss callback disparities. \n- *Bias Busters Toolkit:* 5-step process to interrupt bias in real time. \n- **Role-play triads:** Observer, Speaker, Interrupter practicing inclusive language. \n\n---\n\n### 4. Inclusive Leadership in Action (30 min) \n- *Panel of ERG leaders* sharing measurable wins (e.g., 40 % \u2191 in promotions for under-represented groups). \n- **Speed-ideation:** 90-second pitches for inclusive policies; peer voting on top 3. \n\n---\n\n### 5. Action Planning & Commitments (20 min) \n- *Personal Inclusion Pledge:* Write one behavior to start, one to stop, one to amplify. \n- **Accountability Circles:** Pair up for 30-day check-ins. \n\n---\n\n### 6. Closing & Resources (10 min) \n- **Resource QR Code:** Links to toolkits, podcasts, and upcoming ERG events. \n- *Final reflection:* \u201cOne word to describe how you\u2019ll lead differently tomorrow.\u201d"}], "input_metadata": {"row_id": "consider-work-economic-popular-country", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738738", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Your Name]** \n[Your Address] \n[City, State, ZIP] \n[Email Address] | [Phone Number] | [Date]\n\n**[Local Party Chair\u2019s Name]** \n[Local Political Party Name] \n[Party Headquarters Address] \n[City, State, ZIP]\n\n---\n\n### **Introduction: Why I\u2019m Writing**\n\nDear **[Chair\u2019s Name]** and the **Executive Committee**,\n\nI am writing to express my enthusiastic interest in serving as **Rally Organizer** for **[Local Party Name]**. As a lifelong resident of **[City/County]** and a committed volunteer who has knocked on over **2,000 doors** and organized **five successful voter-turnout events** in the last two cycles, I understand how to turn passion into turnout and energy into votes. I want to bring that same fire\u2014and a proven playbook\u2014to every rally our party holds between now and Election Day.\n\n---\n\n### **Core Qualifications: What I Bring to the Podium**\n\n- **Logistics Mastery**: From securing permits with the Parks Department to negotiating stage-rental discounts, I have reduced average rally costs by **28 %** while increasing attendance by **45 %** through targeted social-media geofencing and SMS reminders. \n- **Coalition Building**: I have already built relationships with **15 local unions**, **7 student groups**, and **4 faith communities** who are eager to co-sponsor rallies if given two weeks\u2019 notice. \n- **Crisis-Ready Leadership**: When an unexpected thunderstorm threatened our 2022 \u201cGet-Out-the-Vote\u201d rally, I pivoted to a **drive-in format** in under four hours\u2014resulting in zero cancellations and 300 additional cars circling the block to hear our speakers.\n\n---\n\n### **Vision for 2024: Turning Rallies into Results**\n\nMy plan is to run a **\u201cRally-to-Ballot Pipeline\u201d**:\n\n1. **Pre-Rally**: Micro-targeted digital invites to low-propensity voters within a 5-mile radius. \n2. **During Rally**: QR-code check-ins that instantly text attendees their personalized early-vote locations. \n3. **Post-Rally**: A 72-hour follow-up cadence of thank-you emails, volunteer sign-up links, and ride-share codes to the polls.\n\nI have already drafted a **90-day rally calendar** (attached) that staggers high-energy weekend events with smaller, issue-specific pop-ups near early-vote sites. All I need is the party\u2019s green light and a modest budget line for permits, A/V, and refreshments.\n\n---\n\n### **Closing: Let\u2019s Win This Together**\n\nI would welcome the chance to present this plan in person at your next committee meeting. Thank you for considering my application. Let\u2019s give our volunteers, candidates, and\u2014most importantly\u2014our voters the rallies they deserve.\n\n**Ready to organize, energize, and mobilize,** \n[Your Signature (if submitting hard copy)] \n[Your Printed Name]"}], "input_metadata": {"row_id": "predict-serve-messy-difficult-festival", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 28 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738766", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "The opposite of youth is not age, but *stagnation*\u2014the moment curiosity closes its fist and wonder stops asking \u201cwhat if?\u201d \nWhile *age* merely counts revolutions around the sun, *true youth* is the *capacity to keep revising the map of oneself* long after the hair turns silver."}], "input_metadata": {"row_id": "accept-expect-black-young-area", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 4 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738795", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer."}, {"role": "assistant", "content": "**The Golden Palace** is not a palace at all, but a **restaurant** that has disguised itself as a mirage of saffron and cardamom in the **city centre**. It whispers **Indian** recipes into the ears of pigeons, who then forget how to fly and instead waddle around the square, dizzy with cumin. \n\nInside, the walls sweat **garam masala** and the tables grow tiny **basil plants** that recite Bollywood dialogues when no one is looking. The waiters glide like **shadow puppets**, balancing trays of **butter chicken** that glow like radioactive sunsets. \n\nIf you order **naan**, it arrives folded into the shape of a **paper swan**, flapping once before dissolving into steam that smells of **monsoon rain**. The bill is presented on a **lotus leaf**, and if you stare at it long enough, the numbers rearrange themselves into a **limerick about turmeric**."}], "input_metadata": {"row_id": "store-be-past-hard-war", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 13 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738826", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*."}, {"role": "assistant", "content": "Yo, it\u2019s MC Broke-Again, fresh outta ramen, \nphone buzzin\u2019 like a bee in a jar of gin. \nUnknown number\u2014man, I\u2019m already suspicious, \nbut the voice got *royal timbre*, straight-up delicious. \n\n\u201cGreetings, Prince Darnell of House Wi-Fi, \nI\u2019m the Royal Scribe from Lagos on high. \nYour great-great-grand-uncle was the King of Nigeria, \nleft a vault of gold bars and a pet cheetah named Maria.\u201d \n\nI\u2019m like, \u201cHold up\u2014me? Royalty? \nLast week I pawned my TV for a two-piece and sweet tea.\u201d \nHe says, \u201cSign this PDF, wire a small advance fee, \njust 419 bucks and the throne belongs to thee.\u201d \n\n**I\u2019m already picking out a crown on Amazon Prime, \nordering a robe that says \u2018Fresh-to-Death\u2019 in gold slime.** \n\nI Venmo the cash, add a royal emoji\u2014\ud83d\udc51 \nthen sprint to the mirror, practice my \u201cMy people, be free!\u201d \nMom walks in, \u201cWhy you wrapped in bedsheets with a Burger King crown?\u201d \nI\u2019m like, \u201cSilence, peasant! Bow before your prince of renown!\u201d \n\nTwo days later, no gold, no cheetah, just crickets, \nmy bank account looking like a ghost town with limericks. \nI call the number\u2014disconnected, tone dead, \neven the scammer ghosted me harder than my ex on read. \n\nSo I\u2019m back in the basement, eating discount sardines, \nbut I kept the crown\u2014*it\u2019s made of genuine Burger King plastic, supreme*."}], "input_metadata": {"row_id": "exploit-serve-invisible-community-minute", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 2 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738850", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Expand the riddle into a story with a funny tone:\n\nWhat can you catch but not throw?\nA cold\n\nUse * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*."}, {"role": "assistant", "content": "Gary the Germ was the *smallest, greenest, and most obnoxiously proud* microbe in the entire Sniffle Kingdom. Every morning he practiced his grand entrance: sliding down banisters made of nasal hair, cannon-balling into unsuspecting tonsils, and yelling, \u201cACHOO\u2014did somebody order a cold?\u201d \n\nOne Tuesday, Gary spotted his dream target: *Bartholomew \u201cBart\u201d Bigglesworth, a man who believed hand-washing was a government conspiracy*. Bart had just exited the subway, triumphantly licking a subway pole \u201cfor the vitamins.\u201d Gary rubbed his microscopic hands together. \u201cPerfect. I\u2019ll hitch a ride on that finger, ride the express elevator to the sinuses, and throw the *snottiest* house-warming party in history!\u201d \n\nBut there was a catch\u2014literally. Gary could *catch* Bart, but he couldn\u2019t *throw* him anywhere. Every time Gary tried to catapult Bart into the next office cubicle, Bart just sneezed, wiped his nose on his sleeve, and kept typing spreadsheets. Gary screamed, \u201cThis is mutiny! I\u2019m supposed to be the one doing the tossing!\u201d \n\nMeanwhile, Bart\u2019s immune system\u2014led by Captain Phlegm and the Mucus Marines\u2014launched *Operation Tissue Storm*. They surrounded Gary, armed with vitamin C grenades and zinc zappers. Gary attempted a dramatic escape by surfing a sneeze, but the tissue slapped him like a flyswatter on a summer day. \n\nDefeated, Gary sat in the crumpled tissue, sulking. \u201cI caught the cold,\u201d he muttered, \u201cbut I can\u2019t throw it anywhere. Worst. Superpower. Ever.\u201d \n\nFrom that day forward, Gary changed his business card to read: \u201cProfessional Cold Catcher\u2014No Returns, No Refunds, Absolutely No Throwing.\u201d"}], "input_metadata": {"row_id": "suggest-american-open-level-country", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 6 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738881", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Intro \u2013 Lute Drop]** \n*beat drops like a sack of wet marble* \nYo, it\u2019s 14-something, smell that? That\u2019s perspective in the air, \nGutenberg just ghost-wrote the mixtape, pages everywhere. \nI\u2019m a Medici money-mage, patron of the bizarre, \nCommissioning nightmares in egg-tempera. \n\n---\n\n### **Verse 1 \u2013 The Alchemist\u2019s Cypher** \nI paint with mercury, breathe with a lung of lead, \nMy selfie\u2019s a death-mask, gold leaf on the dead. \nBrunelleschi built a dome like a half-buried skull, \nYou think it\u2019s architecture? Nah, it\u2019s a ritual. \n**[weird baroque wheeze]** \nI rhyme in vanishing points, every line converges \nOn a single terrified eye that never emerges. \nPerspective\u2019s a ouija board, da Vinci holds the planchette, \nSketching choppers (helicopters) centuries before the jet-set. \n\n---\n\n### **Hook \u2013 Chant of the Anamorphic Choir** \n*distorted children\u2019s voices layered over sackbut* \nRenaissance, renaissance, put a cherub on it, \nBaby wings, baby fangs, marble veins, vomit. \nWe don\u2019t bling, we gild, we don\u2019t rap, we fresco\u2014 \nYour flow\u2019s two-dimensions, mine\u2019s *trompe-l\u2019oeil* techno. \n\n---\n\n### **Verse 2 \u2013 The Court Jester\u2019s Acid Madrigal** \n**[sudden switch to a harpsichord trap beat]** \nI juggle plague rats for the Duke of Milan, \nHe pays me in sonnets and unmarked arsenic flan. \nIsabella d\u2019Este texts me in mirror-script, \nHer emoji: a decapitated hydra, lips pursed, licked. \nI\u2019m the fool with a scalpel, dissecting songbirds to find \nWhich organ chirps\u2014turns out it\u2019s the **pineal gland** of the mind. \nMichelangelo\u2019s in the corner, carving his own ribcage, \nSays \u201cThe Sistine\u2019s just a throat, God\u2019s a uvula on stage.\u201d \n\n---\n\n### **Bridge \u2013 The Astrolabe Malfunction** \n*beat glitches like a scratched illuminated manuscript* \nCopernicus DM\u2019d me: \u201cThe sun\u2019s a strobe light, \nPlanets orbit like moths around a candle that bites.\u201d \nI reply with a lute solo reversed through a cathedral, \nNotes fall upward, monks levitate, it\u2019s procedural. \n**[static of Latin hex]** \nGalileo\u2019s telescope sees the future: TikTok, guillotine, \nHe mutters \u201c*E pur si muove*\u201d then dabs in quarantine. \n\n---\n\n### **Outro \u2013 The Anachronistic Masque** \n**[all instruments devolve into wet clay sounds]** \nSo remember me when you\u2019re double-tapping a Botticelli, \nI\u2019m the glitch in the background, the snail with a human belly. \nRenaissance never ended, it just learned to hide, \nUnder your touchscreen, oil-paint bleeding through the side. \nNow go\u2014vanish in a puff of ultramarine dust, \nThe past is a performance, and the future\u2019s already trussed."}], "input_metadata": {"row_id": "respond-connect-true-problem-plan", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 16 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738906", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*"}, {"role": "assistant", "content": "*J\u014dmon* hunter-gatherers (c. 14,000 BCE) gave way to *Yayoi* rice cultivators who introduced metal tools and social stratification. \nThe *Yamato* court unified much of the archipelago by the 5th century CE, absorbing *Shinto* beliefs and Chinese writing, law, and Buddhism. \nA *Heian* golden age (794\u20131185) produced literature like *The Tale of Genji* while the imperial court\u2019s real power waned. \nSamurai clans rose; the *Kamakura* shogunate (1185\u20131333) created Japan\u2019s first warrior government, repelling Mongol invasions. \nCenturies of civil war ended when *Oda Nobunaga*, *Toyotomi Hideyoshi*, and *Tokugawa Ieyasu* reunified the land, leading to the *Edo* shogunate (1603\u20131868). \nIsolation under *sakoku* preserved domestic culture until *Commodore Perry*\u2019s 1853 arrival forced unequal treaties. \nThe *Meiji* Restoration (1868) abolished feudalism, industrialized rapidly, and built an empire that won wars against China and Russia. \nImperial expansion\u2014Korea, Manchuria, and *Pearl Harbor*\u2014ended in atomic bombings and occupation by Allied forces (1945\u20131952). \nPost-war *miracle* growth made Japan an economic superpower, followed by stagnation since the 1990s yet enduring global cultural influence."}], "input_metadata": {"row_id": "must-watch-bad-hate-magazine", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 16 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738931", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*."}, {"role": "assistant", "content": "John in *De Smet* \n*Quaker* silence\u2014then he yells \n\u201cWhere\u2019s my *oatmeal*, Friend?\u201d"}], "input_metadata": {"row_id": "take-love-start-short-work", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738958", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with \"*\", like *highlighted text*."}, {"role": "assistant", "content": "Tonight, the 54th Annual Grammy Awards [1] stand as a testament to the enduring power of recorded music. Under the vaulted ceiling of the *Staples Center*, the Recording Academy gathers to bestow its highest accolades upon those whose artistry has defined the sonic landscape of eligibility year 2011 [2]. \n\nThe evening\u2019s proceedings are governed by the *strictures of the Grammy Charter* [3], ensuring that every statuette awarded is the result of peer-review balloting among the Academy\u2019s 12,000 voting members [4]. In accordance with Rule 3.01, only recordings commercially released in the United States between 1 October 2010 and 30 September 2011 are eligible [5]. \n\nAmong the most anticipated categories is *Album of the Year* [6], whose nominees\u2014Adele\u2019s *21*, Foo Fighters\u2019 *Wasting Light*, Lady Gaga\u2019s *Born This Way*, Bruno Mars\u2019 *Doo-Wops & Hooligans*, and Rihanna\u2019s *Loud*\u2014represent a cross-section of genre, generation, and geography [7]. The *Record of the Year* [8] shortlist similarly spans continents and styles, underscoring the Academy\u2019s commitment to excellence irrespective of commercial metrics. \n\nThe *Producer of the Year, Non-Classical* [9] category spotlights the architects of sound whose meticulous craft transforms raw performance into timeless recording. Paul Epworth, whose work on *21* has garnered both critical and commercial acclaim, is widely regarded as the frontrunner [10]. \n\nAs the telecast proceeds, the *In Memoriam* segment [11] will honor those luminaries whose voices were stilled during the eligibility year, their legacies enshrined in the grooves and bytes of the recordings they leave behind. \n\nThe 54th Annual Grammy Awards thus serve not merely as a celebration, but as a *codification of excellence* [12], ensuring that the artistry of 2011 resonates through the decades to come."}], "input_metadata": {"row_id": "delete-democratic-hot-new-work", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 13 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.738984", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*."}, {"role": "assistant", "content": "# 7 Surprising Facts About Dutch That Will Make You Say \u201cWauw!\u201d\n\nWhen most people think of Dutch, they picture windmills, tulips, and maybe a stroopwafel or two. But the language behind those images is far stranger\u2014and more fascinating\u2014than you might expect. Below are seven facts that prove Dutch is anything but *just English with extra g\u2019s*.\n\n---\n\n### 1. The Longest Word Is 53 Letters\u2014and It\u2019s Official \nThe Dutch *Encyclopedie* recognizes *aansprakelijkheidswaardevaststellingsveranderingen* (53 letters) as a legitimate word. It translates to \u201cchanges in liability value assessments,\u201d which is ironic because the word itself is a liability to pronounce. Try saying that three times fast\u2014or even once slowly.\n\n---\n\n### 2. Dutch Has a Word for *Everything*\u2014Including \u201cSchadenfreude\u201d but Cozier \nEnglish borrows *Schadenfreude* from German, but Dutch gives us *gezellig*, a term that packs \u201ccozy,\u201d \u201cfun,\u201d and \u201cbelonging\u201d into one syllable. A caf\u00e9 can be *gezellig*, a party can be *gezellig*, and even a quiet evening with friends can be *gezellig*. There\u2019s no direct English equivalent, which is why expats end up using it in English sentences: \u201cThat dinner was so\u2026 *gezellig*.\u201d\n\n---\n\n### 3. The Dutch Alphabet Ends with \u201cX, Y, IJ, Z\u201d \nMost alphabets end with \u201cX, Y, Z,\u201d but Dutch sneaks in an extra character: *IJ*. It\u2019s considered a single letter (yes, two letters acting as one), and it even has its own capitalized form: *IJsselmeer*, the famous lake. Type designers have to create a special ligature for it, and crossword puzzles treat *IJ* as one square. Imagine Scrabble tiles clinking together as someone proudly lays down \u201cIJSBERG\u201d for 72 points.\n\n---\n\n### 4. Dutch Greetings Change by Time of Day\u2014But Not How You\u2019d Expect \nIn English, \u201cgood morning\u201d fades into \u201cgood afternoon\u201d around noon. Dutch, however, switches from *goedemorgen* to *goedemiddag* at exactly 12:00 p.m.\u2014unless you\u2019re in Belgium, where some regions wait until 1:00 p.m. Miss the cutoff and you\u2019ll get a polite correction: \u201cHet is al middag, hoor!\u201d (\u201cIt\u2019s already afternoon, you know!\u201d).\n\n---\n\n### 5. Dutch Is the Closest Major Language to English\u2014But That Doesn\u2019t Mean It\u2019s Easy \nLinguists classify Dutch and English as *West Germanic siblings*, which means basic vocabulary overlaps: *appel* (apple), *boek* (book), *water* (water). Yet false friends lurk everywhere. *Durf jij?* doesn\u2019t mean \u201cDo you dare?\u201d\u2014it means \u201cDo you dare?\u201d\u2026 but with a cultural undertone of *prove it*. And *be careful* with *eventueel*, which means \u201cpossibly,\u201d not \u201ceventually.\u201d\n\n---\n\n### 6. Dutch Has Three Genders\u2014But Only Two Articles \nGrammatically, Dutch retains masculine, feminine, and neuter genders. In practice, however, only *de* (common gender) and *het* (neuter) are used. The result? Native speakers themselves debate whether *auto* (car) is masculine or feminine. The safest route is to just say *de auto* and avoid the awkward silence that follows gendered guesswork.\n\n---\n\n### 7. Dutch Invented the Word \u201cCookie\u201d\u2014and the World Never Looked Back \nThe English word \u201ccookie\u201d comes from the Dutch *koekje* (\u201clittle cake\u201d). Dutch settlers brought *koekjes* to New Amsterdam (now New York), and the rest is snack history. Next time you dunk a chocolate chip, thank the Dutch for their linguistic\u2014and culinary\u2014contribution.\n\n---\n\n### Bonus: Dutch Tongue Twisters Are Next-Level \nTry this: *De kat krabt de krullen van de trap*. Translation: \u201cThe cat scratches the curls off the stairs.\u201d Say it five times without twisting your tongue into a *stroopwafel*.\n\n---\n\nWhether you\u2019re planning a trip to Amsterdam or just want to impress friends at trivia night, Dutch offers a treasure trove of quirks. From *gezellig* evenings to 53-letter bureaucratic monsters, the language proves that small countries can produce *enormous linguistic personality*."}], "input_metadata": {"row_id": "live-important-military-only-love", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 33 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.739013", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "user", "content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "Gideon has several good options for his surplus:\n\n1. **Sell it at the local farmers\u2019 market or to a regional wholesaler**, turning the extra produce into cash that he can use for next season\u2019s seeds, equipment repairs, or household expenses. \n2. *Donate a portion to the nearby food bank or community kitchen*, helping families who struggle with food insecurity while also earning a small tax deduction. \n3. Preserve some of the surplus\u2014by canning tomatoes, drying herbs, or storing root vegetables in a cool cellar\u2014so his own family has nutritious food throughout the winter. \n4. Trade with neighboring farmers: swap excess corn for someone else\u2019s surplus eggs or honey, strengthening community ties and diversifying his pantry without spending money."}], "input_metadata": {"row_id": "think-indicate-write-local-company", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 2 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-08T23:30:54.739037", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "finished", "run_id": "hear-live-common-late-hate", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 52072}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}], "input_metadata": {"row_id": "expect-upload-close-president-goal", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "created_at": "2025-08-08T23:34:12.823862", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev20+ge1da45a.dirty", "status": "stopped", "run_id": "meet-treat-central-possible-lot", "num_runs": 1, "aggregation_method": "mean"}, "pid": 52715}, {"messages": [{"role": "system", "content": "You are a helpful assistant that can answer questions about Fireworks.\nALWAYS provide code or commands to execute to answer the question."}, {"role": "user", "content": "Can you teach me about how to manage deployments on Fireworks"}, {"role": "assistant", "content": "I'll help you learn about managing deployments on Fireworks. Let me search for comprehensive information about deployment management.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment management Fireworks\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: On-demand deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\nOn-demand deployments\nIf you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details.\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: Introducing the Fireworks Build SDK\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction\nContent: Introducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nIntroducing the Fireworks Build SDK\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\nThe Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback.\n\n", "type": "text"}, {"text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nLink: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks\nContent: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\nI have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this?\nIf you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord.\n\n", "type": "text"}, {"text": "Title: Querying existing dedicated deployments\nLink: https://docs.fireworks.ai/tools-sdks/python-client/querying-dedicated-deployments\nContent: When you have dedicated deployments that were created via firectl or the Fireworks web UI, you can easily connect to them using the Build SDK to run inference. This is particularly useful when you want to leverage existing infrastructure or when deployments are managed by different teams.\n\nWhen you have dedicated deployments that were created via firectl or the Fireworks web UI, you can easily connect to them using the Build SDK to run inference. This is particularly useful when you want to leverage existing infrastructure or when deployments are managed by different teams.\n\nDeployment ID requirements\nThe id parameter should match exactly with your existing deployment: Use the deployment name/ID as shown in the Fireworks dashboard The ID is case-sensitive and must match exactly If the deployment doesn't exist, you'll receive an error when making requests\n\n", "type": "text"}, {"text": "Title: Migration and traffic management\nLink: https://docs.fireworks.ai/guides/prompt-caching\nContent: When migrating between deployments that use prompt caching, it's crucial to implement proper traffic routing to maintain optimal cache hit rates. When gradually routing traffic to a new deployment, use consistent user/session-based sampling rather than random sampling. Here's the recommended implementation for traffic routing: import hashlib\n\n# Configure traffic fraction (e.g., 20% to new deployment)\nfireworks_traffic_fraction = 0.2\nuser_id = \"session-id-123\"\n\n# Generate deterministic hash from user_id\nhashed_user_id = int(hashlib.md5(user_id.encode()).hexdigest(), 16) # MD5 hash on user-id and convert to integer\nMAX_HASH = 2**128 - 1 # MD5 hash maximum value\n\n# Compute ratio for consistent routing\nratio = hashed_user_id / MAX_HASH # Returns 0.0 to 1.0\n\nif (ratio < fireworks_traffic_fraction):\n send_to_new_deployment(user=hashed_user_id) # Pass user ID for caching\nelse:\n send_elsewhere() # Route to old deployment or serverless\n\nWhen migrating between deployments that use prompt caching, it's crucial to implement proper traffic routing to maintain optimal cache hit rates. When gradually routing traffic to a new deployment, use consistent user/session-based sampling rather than random sampling. Here's the recommended implementation for traffic routing: import hashlib\n\n# Configure traffic fraction (e.g., 20% to new deployment)\nfireworks_traffic_fraction = 0.2\nuser_id = \"session-id-123\"\n\n# Generate deterministic hash from user_id\nhashed_user_id = int(hashlib.md5(user_id.encode()).hexdigest(), 16) # MD5 hash on user-id and convert to integer\nMAX_HASH = 2**128 - 1 # MD5 hash maximum value\n\n# Compute ratio for consistent routing\nratio = hashed_user_id / MAX_HASH # Returns 0.0 to 1.0\n\nif (ratio < fireworks_traffic_fraction):\n send_to_new_deployment(user=hashed_user_id) # Pass user ID for caching\nelse:\n send_elsewhere() # Route to old deployment or serverless\n\nAdvanced: cache locality for Enterprise deployments\nDedicated deployments on an Enterprise plan allow you to pass an additional hint in the request to improve cache hit rates. First, the deployment needs to be created or updated with an additional flag: Then the client can pass an opaque identifier representing a single user or\nsession in the user field of the body or in the x-session-affinity header. Fireworks\nwill try to route requests with the identifier to the same server, further reducing response times. It's best to choose an identifier that groups requests with long shared prompt\nprefixes. For example, it can be a chat session with the same user or an\nassistant working with the same shared context.\n\n", "type": "text"}, {"text": "Title: Do you host your deployments in the EU or Asia?\nLink: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia\nContent: Do you host your deployments in the EU or Asia?\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nDo you host your deployments in the EU or Asia?\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\nWe are currently deployed in multiple U.S.-based locations. However, we\u2019re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response.\n\n", "type": "text"}, {"text": "Title: Deployments and deployment types\nLink: https://docs.fireworks.ai/getting-started/concepts\nContent: A model must be deployed before it can be used for inference. A deployment is a collection (one or more) model servers that host one base model and optionally one or more LoRA addons. Fireworks supports two types of deployments: Serverless deployments: Fireworks hosts popular base models on shared \"serverless\" deployments. Users pay-per-token to query these models and do not need to configure GPUs. The most popular serverless deployments also support serverless LoRA addons. See the Deploying to serverless guide for details. Dedicated deployments: Dedicated deployments enable users to configure private deployments with a wide array of hardware (see on-demand deployments guide ). Dedicated deployments give users performance guarantees and the most flexibility and control over what models can be deployed. Both LoRA addons and base models can be deployed to dedicated deployments. Dedicated deployments are billed by a GPU-second basis (see pricing page). See the Querying text models guide for a comprehensive overview of making LLM inference.\n\nA model must be deployed before it can be used for inference. A deployment is a collection (one or more) model servers that host one base model and optionally one or more LoRA addons. Fireworks supports two types of deployments: Serverless deployments: Fireworks hosts popular base models on shared \"serverless\" deployments. Users pay-per-token to query these models and do not need to configure GPUs. The most popular serverless deployments also support serverless LoRA addons. See the Deploying to serverless guide for details. Dedicated deployments: Dedicated deployments enable users to configure private deployments with a wide array of hardware (see on-demand deployments guide ). Dedicated deployments give users performance guarantees and the most flexibility and control over what models can be deployed. Both LoRA addons and base models can be deployed to dedicated deployments. Dedicated deployments are billed by a GPU-second basis (see pricing page). See the Querying text models guide for a comprehensive overview of making LLM inference.\n\nControl plane and data plane\nThe Fireworks API can be split into a control plane and a data plane. The control plane consists of APIs used for managing the lifecycle of resources. This\nincludes your account, models, and deployments. The data plane consists of the APIs used for inference and the backend services that power\nthem.\n\n", "type": "text"}, {"text": "Title: Deployment Process\nLink: https://docs.fireworks.ai/evaluators/api_reference/reward_function_decorator\nContent: When .deploy() is called, the decorator: Extracts the function's source code Creates a wrapper that handles the Fireworks evaluation format Creates a temporary directory with the wrapped function Uploads and registers the function with the Fireworks API\n\nWhen .deploy() is called, the decorator: Extracts the function's source code Creates a wrapper that handles the Fireworks evaluation format Creates a temporary directory with the wrapped function Uploads and registers the function with the Fireworks API\n\n.deploy()\nThe decorator adds a .deploy() method to the function, allowing it to be deployed to Fireworks. evaluation_id = my_reward_function.deploy(\n name=\"my-evaluator\",\n description=\"Evaluates responses based on clarity and accuracy\",\n account_id=None, # Optional, defaults to configured account\n auth_token=None, # Optional, defaults to configured token\n force=False, # Set to True to overwrite if it already exists\n providers=None # Optional model providers configuration\n)\n\n", "type": "text"}, {"text": "Title: \ud83d\udd27 Build SDK: Deployment Configuration Application Requirement\nLink: https://docs.fireworks.ai/updates/changelog\nContent: The Build SDK now requires you to call .apply() to apply any deployment configurations to Fireworks when using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" . This change ensures explicit control over when deployments are created and helps prevent accidental deployment creation. Key changes: .apply() is now required for on-demand and on-demand-lora deployments Serverless deployments do not require .apply() calls If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments Migration guide: Add llm.apply() after creating LLM instances with deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" No changes needed for serverless deployments See updated documentation for examples and best practices This change improves deployment management and provides better control over resource creation. This applies to Python SDK version >=0.19.14.\n\nThe Build SDK now requires you to call .apply() to apply any deployment configurations to Fireworks when using deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" . This change ensures explicit control over when deployments are created and helps prevent accidental deployment creation. Key changes: .apply() is now required for on-demand and on-demand-lora deployments Serverless deployments do not require .apply() calls If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments Migration guide: Add llm.apply() after creating LLM instances with deployment_type=\"on-demand\" or deployment_type=\"on-demand-lora\" No changes needed for serverless deployments See updated documentation for examples and best practices This change improves deployment management and provides better control over resource creation. This applies to Python SDK version >=0.19.14.\n\n\ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Let me search for more specific information about deployment management tools and processes.", "tool_calls": [{"id": "functions.search:1", "function": {"arguments": "{\"query\": \"firectl deployment management commands\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: Why am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nLink: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo\nContent: This error typically occurs when your account ID is not properly configured.\n\nThis error typically occurs when your account ID is not properly configured.\n\nWhy am I getting \"invalid id\" errors when using firectl commands like create deployment or list deployments?\nThis error typically occurs when your account ID is not properly configured.\n\n", "type": "text"}, {"text": "Title: Managing default deployments\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: Since a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model \n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model \n\nSince a model may be deployed to multiple deployments, querying by model name will route to the \"default\" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default\n\n", "type": "text"}, {"text": "Title: Does Fireworks support custom base models?\nLink: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models\nContent: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\nDoes Fireworks support custom base models?\nYes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models.\n\n", "type": "text"}, {"text": "Title: \ud83d\ude80 Easier & faster LoRA fine-tune deployments on Fireworks\nLink: https://docs.fireworks.ai/updates/changelog\nContent: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment \"accounts/fireworks/models/\"\n\nYou can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment \"accounts/fireworks/models/\" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation.\n\n", "type": "text"}, {"text": "Title: Quick deployment\nLink: https://docs.fireworks.ai/fine-tuning/single-lora\nContent: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nDeploy your LoRA fine-tuned model with one simple command: firectl create deployment \"accounts/fireworks/models/\"\n\nAlternative deployment method\nThis two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment \"accounts/fireworks/models/\" --enable-addons\n\n", "type": "text"}, {"text": "Title: Deploying a fine-tuned model using an on-demand deployment\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-models\nContent: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nUse the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment \n\nDeploying a fine-tuned model serverlessly\nNot all base models support serverless addons. Please check this list for the full list of serverless models that support LoRA add-ons. If your base model supports serverless addons, you can run the following command to load the fine-tuned model into a serverless deployment: firectl load-lora \n\n", "type": "text"}, {"text": "Title: Deploy Command\nLink: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview\nContent: The deploy command deploys a reward function as an evaluator on the Fireworks platform.\n\nThe deploy command deploys a reward function as an evaluator on the Fireworks platform.\n\nCommand Overview\nThe Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon).\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment. firectl create deployment [flags]\n\nCreates a new deployment. firectl create deployment [flags]\n\nExample\nfirectl create deployment falcon-7b\n\n", "type": "text"}, {"text": "Title: firectl\nLink: https://docs.fireworks.ai/fine-tuning/fine-tuning-vlm\nContent: # Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\n# Create a deployment for your fine-tuned VLM\nfirectl create deployment my-custom-vlm\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/deployment-id\n\nfirectl create sftj \\\n --base-model accounts/fireworks/models/qwen2p5-vl-32b-instruct \\\n --dataset my-vlm-dataset \\\n --output-model my-custom-vlm \\\n --epochs 3 For additional parameters like learning rates, evaluation datasets, and batch sizes, see Additional SFT job settings .\n\n", "type": "text"}, {"text": "Title: Uploading the model locally (firectl)\nLink: https://docs.fireworks.ai/models/uploading-custom-models\nContent: To upload a custom base model, run the following command. firectl create model /path/to/files/\n\nTo upload a custom base model, run the following command. firectl create model /path/to/files/\n\nTo upload a custom base model, run the following command.\n\n", "type": "text"}], "tool_call_id": "functions.search:1"}, {"role": "assistant", "content": "Let me search for more information about deployment lifecycle management and monitoring.", "tool_calls": [{"id": "functions.search:0", "function": {"arguments": "{\"query\": \"deployment monitoring scaling management lifecycle\"}", "name": "search"}, "type": "function"}]}, {"role": "tool", "content": [{"text": "Title: What should I expect for deployment and scaling performance?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/what-should-i-expect-for-deployment-and-scaling-performance\nContent: Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\nInitial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\nWhat should I expect for deployment and scaling performance?\nInitial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long\n\n", "type": "text"}, {"text": "Title: How does billing and scaling work for on-demand GPU deployments?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-and-scaling-work-for-on-demand-gpu-deployments\nContent: On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\nOn-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\nHow does billing and scaling work for on-demand GPU deployments?\nOn-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU\u2019s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times\n\n", "type": "text"}, {"text": "Title: How does billing work for on-demand deployments?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-work-for-on-demand-deployments\nContent: On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\nOn-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\nHow does billing work for on-demand deployments?\nOn-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide.\n\n", "type": "text"}, {"text": "Title: How does the system scale?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-the-system-scale\nContent: Our system is horizontally scalable , meaning it: Scales linearly with additional replicas of the deployment Automatically allocates resources based on demand Manages distributed load handling efficiently\n\nOur system is horizontally scalable , meaning it: Scales linearly with additional replicas of the deployment Automatically allocates resources based on demand Manages distributed load handling efficiently\n\nHow does the system scale?\nOur system is horizontally scalable , meaning it: Scales linearly with additional replicas of the deployment Automatically allocates resources based on demand Manages distributed load handling efficiently\n\n", "type": "text"}, {"text": "Title: Multiple GPUs (vertical scaling)\nLink: https://docs.fireworks.ai/guides/ondemand-deployments\nContent: The number of GPUs used per replica is specified by passing the --accelerator-count flag. Increasing the accelerator count will increase the generation speed, time-to-first-token, and maximum QPS for your deployment, however the scaling is sub-linear. The default value for most models is 1 but may be higher for larger models that require sharding. firectl create deployment --accelerator-count 2\n\nThe number of GPUs used per replica is specified by passing the --accelerator-count flag. Increasing the accelerator count will increase the generation speed, time-to-first-token, and maximum QPS for your deployment, however the scaling is sub-linear. The default value for most models is 1 but may be higher for larger models that require sharding. firectl create deployment --accelerator-count 2\n\nThe number of GPUs used per replica is specified by passing the --accelerator-count flag. Increasing the accelerator count will increase the generation speed, time-to-first-token, and maximum QPS for your deployment, however the scaling is sub-linear. The default value for most models is 1 but may be higher for larger models that require sharding. firectl update deployment --accelerator-count 2\n\n", "type": "text"}, {"text": "Title: scale_to_zero()\nLink: https://docs.fireworks.ai/tools-sdks/python-client/sdk-reference\nContent: Sends a request to scale the deployment to 0 replicas but does not wait for it to complete. Returns: The deployment object, or None if no deployment exists deployment = llm.scale_to_zero()\n\nSends a request to scale the deployment to 0 replicas but does not wait for it to complete. Returns: The deployment object, or None if no deployment exists\n\nscale_to_1_replica()\nScales the deployment to at least 1 replica. llm.scale_to_1_replica()\n\n", "type": "text"}, {"text": "Title: Do you support Auto Scaling?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/do-you-support-auto-scaling\nContent: Yes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs\n\nYes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs\n\nDo you support Auto Scaling?\nYes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs\n\n", "type": "text"}, {"text": "Title: What are the rate limits for on-demand deployments?\nLink: https://docs.fireworks.ai/faq-new/deployment-infrastructure/what-are-the-rate-limits-for-on-demand-deployments\nContent: Request throughput scales with your GPU allocation. Base allocations include: Up to 8 A100 GPUs Up to 8 H100 GPUs On-demand deployments offer several advantages: Predictable pricing based on time units, not token I/O Protected latency and performance , independent of traffic on the serverless platform Choice of GPUs , including A100s and H100s Need more GPUs? Contact us to discuss higher allocations for your specific use case.\n\nRequest throughput scales with your GPU allocation. Base allocations include: Up to 8 A100 GPUs Up to 8 H100 GPUs On-demand deployments offer several advantages: Predictable pricing based on time units, not token I/O Protected latency and performance , independent of traffic on the serverless platform Choice of GPUs , including A100s and H100s Need more GPUs? Contact us to discuss higher allocations for your specific use case.\n\nWhat are the rate limits for on-demand deployments?\nRequest throughput scales with your GPU allocation. Base allocations include: Up to 8 A100 GPUs Up to 8 H100 GPUs On-demand deployments offer several advantages: Predictable pricing based on time units, not token I/O Protected latency and performance , independent of traffic on the serverless platform Choice of GPUs , including A100s and H100s Need more GPUs? Contact us to discuss higher allocations for your specific use case.\n\n", "type": "text"}, {"text": "Title: Create a deployment\nLink: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment\nContent: Creates a new deployment.\n\nCreates a new deployment.\n\nCreates a new deployment. firectl create deployment [flags]\n\n", "type": "text"}, {"text": "Title: Monitoring\nLink: https://docs.fireworks.ai/guides/prompt-caching\nContent: For dedicated deployments, information about prompt caching is returned in the\nresponse headers. The header fireworks-prompt-tokens contains the number of tokens\nin the prompt, out of which fireworks-cached-prompt-tokens are cached. Aggregated metrics are also available in the usage dashboard.\n\nFor dedicated deployments, information about prompt caching is returned in the\nresponse headers. The header fireworks-prompt-tokens contains the number of tokens\nin the prompt, out of which fireworks-cached-prompt-tokens are cached. Aggregated metrics are also available in the usage dashboard.\n\nMigration and traffic management\nWhen migrating between deployments that use prompt caching, it's crucial to implement proper traffic routing to maintain optimal cache hit rates. When gradually routing traffic to a new deployment, use consistent user/session-based sampling rather than random sampling. Here's the recommended implementation for traffic routing: Avoid random sampling for traffic routing as it can negatively impact cache hit rates: # Don't do this:\nif random() < fireworks_traffic_fraction: # \u274c Reduces cache effectiveness\n send_to_new_deployment(user=hashed_user_id)\n\n", "type": "text"}], "tool_call_id": "functions.search:0"}, {"role": "assistant", "content": "Based on my research, I can provide you with a comprehensive guide on managing deployments on Fireworks. Let me break this down into key areas:\n\n## Types of Deployments\n\nFireworks supports two main deployment types:\n\n### 1. Serverless Deployments\n- **Managed by Fireworks**: Popular base models hosted on shared infrastructure\n- **Billing**: Pay-per-token usage\n- **No configuration needed**: Fireworks handles all infrastructure\n- **Rate limits**: Subject to platform-wide limits\n- **Best for**: Intermittent usage, prototyping, or low-volume applications\n\n### 2. Dedicated Deployments\n- **Your private deployment**: Reserved exclusively for your use\n- **Billing**: GPU-second basis (charged for active GPU time)\n- **Full control**: Configure hardware, scaling, and model selection\n- **No rate limits**: Subject only to your GPU allocation capacity\n- **Best for**: High-volume applications, consistent performance requirements\n\n## Deployment Management Tools\n\n### Command Line Interface (firectl)\n\n**Installation and Setup:**\n```bash\n# Install firectl (assuming you have it configured)\n# Set your account ID to avoid \"invalid id\" errors\nexport FIREWORKS_ACCOUNT_ID=your-account-id\n```\n\n**Basic Deployment Commands:**\n\n```bash\n# Create a new deployment\nfirectl create deployment \n\n# Create deployment with specific GPU count\nfirectl create deployment --accelerator-count 2\n\n# Create deployment for LoRA fine-tuned model\nfirectl create deployment \"accounts/fireworks/models/\"\n\n# Check deployment status\nfirectl get deployment accounts/your-account/deployment/\n\n# List all deployments\nfirectl list deployments\n\n# Update deployment configuration\nfirectl update deployment --accelerator-count 4\n\n# Delete a deployment\nfirectl delete deployment \n```\n\n### Python SDK (Build SDK)\n\n**Installation:**\n```bash\npip install fireworks-ai\n```\n\n**Basic Usage:**\n```python\nfrom fireworks import Fireworks\n\n# Initialize client\nclient = Fireworks(api_key=\"your-api-key\")\n\n# Create deployment configuration\nfrom fireworks.client import LLM\n\n# For on-demand deployments (requires .apply())\nllm = LLM(\n model=\"accounts/your-account/models/your-model\",\n deployment_type=\"on-demand\"\n)\nllm.apply() # Required for on-demand deployments\n\n# For serverless deployments\nllm = LLM(model=\"accounts/fireworks/models/llama-v3p1-8b-instruct\")\n\n# Scale management\nllm.scale_to_1_replica() # Ensure at least 1 replica\nllm.scale_to_zero() # Scale down to 0 replicas\n```\n\n## Deployment Lifecycle Management\n\n### 1. Creation and Initial Setup\n```python\n# Example: Creating a comprehensive deployment\ndeployment_config = {\n \"model\": \"accounts/your-account/models/your-model\",\n \"deployment_type\": \"on-demand\",\n \"accelerator_type\": \"A100\", # or \"H100\"\n \"accelerator_count\": 2,\n \"min_replicas\": 0, # Scale to zero when not used\n \"max_replicas\": 4, # Maximum scaling\n \"autoscaling\": True\n}\n```\n\n### 2. Monitoring and Scaling\n```python\n# Check deployment status\ndeployment = llm.get_deployment()\nprint(f\"Status: {deployment.status}\")\nprint(f\"Replicas: {deployment.replicas}\")\n\n# Monitor usage and costs\n# Check the usage dashboard at https://app.fireworks.ai/dashboard/usage\n```\n\n### 3. Traffic Management During Migration\n```python\nimport hashlib\n\n# Implement consistent traffic routing for cache optimization\ndef route_traffic(user_id, traffic_fraction=0.2):\n \"\"\"Route traffic consistently based on user ID for cache optimization\"\"\"\n hashed_user_id = int(hashlib.md5(user_id.encode()).hexdigest(), 16)\n max_hash = 2**128 - 1\n ratio = hashed_user_id / max_hash\n \n if ratio < traffic_fraction:\n return \"new_deployment\"\n else:\n return \"old_deployment\"\n\n# Usage\ndeployment_choice = route_traffic(\"user-123\", traffic_fraction=0.3)\n```\n\n## Cost Management Best Practices\n\n### 1. Autoscaling Configuration\n```python\n# Configure autoscaling to minimize costs\ndeployment_config = {\n \"min_replicas\": 0, # Scale to zero when idle\n \"max_replicas\": 8, # Maximum GPUs\n \"target_cpu_utilization\": 0.7, # Scale up at 70% utilization\n \"scale_down_delay\": 300 # Wait 5 minutes before scaling down\n}\n```\n\n### 2. Monitoring and Cleanup\n```bash\n# Regular monitoring commands\nfirectl list deployments # See all active deployments\nfirectl get deployment --watch # Monitor specific deployment\n\n# Clean up unused deployments\nfirectl delete deployment \n```\n\n### 3. Reserved Capacity\nFor high-volume usage, consider reserved capacity instead of on-demand:\n- More reliable capacity\n- Higher GPU quotas\n- Better pricing for consistent usage\n\n## Advanced Features\n\n### Custom Base Models\n```bash\n# Upload custom model\nfirectl create model /path/to/model/files/\n\n# Deploy custom model\nfirectl create deployment \n```\n\n### Multi-GPU Deployments\n```bash\n# Deploy with multiple GPUs\nfirectl create deployment --accelerator-count 4\n\n# Update GPU count\nfirectl update deployment --accelerator-count 8\n```\n\n### LoRA Model Deployment\n```bash\n# Single LoRA deployment (faster)\nfirectl create deployment \"accounts/fireworks/models/\"\n\n# Multi-LoRA deployment (shared base model)\nfirectl create deployment --enable-addons\nfirectl load-lora --deployment \n```\n\n## Key Management Commands Summary\n\n| Command | Purpose |\n|---------|---------|\n| `firectl create deployment` | Create new deployment |\n| `firectl get deployment` | Check deployment status |\n| `firectl list deployments` | List all deployments |\n| `firectl update deployment` | Modify deployment settings |\n| `firectl delete deployment` | Remove deployment |\n| `llm.apply()` | Apply configuration (Python SDK) |\n| `llm.scale_to_zero()` | Scale down to 0 replicas |\n| `llm.scale_to_1_replica()` | Ensure at least 1 replica |\n\n## Best Practices\n\n1. **Monitor Costs**: Regularly check your usage dashboard\n2. **Use Autoscaling**: Configure deployments to scale to zero when not used\n3. **Plan Migrations**: Use consistent traffic routing for cache optimization\n4. **Choose Wisely**: Use serverless for intermittent usage, dedicated for consistent workloads\n5. **Clean Up**: Delete unused deployments to avoid unnecessary charges\n6. **Test Scaling**: Verify your deployment scales appropriately under load\n\nWould you like me to dive deeper into any specific aspect of deployment management, such as autoscaling configuration, cost optimization strategies, or monitoring techniques?", "tool_calls": []}], "input_metadata": {"row_id": "run-update-traditional-software-idea", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "At least one tool call was made", "metrics": {}}, "created_at": "2025-08-09T00:10:34.081457", "eval_metadata": {"name": "test_pytest_mcp_url", "description": "Run math evaluation on sample dataset using pytest interface.", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "replace-measure-bitter-open-reason", "num_runs": 1, "aggregation_method": "mean"}, "pid": 63817}, {"messages": [{"role": "user", "content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*."}, {"role": "assistant", "content": "# Outline: The History of Yemeni Coffee\n\n1. **Introduction: Why Yemeni Coffee Matters** \n - Thesis: Yemeni coffee is not merely a beverage but a cultural, economic, and geopolitical force that shaped global history. \n - Scope: From its mythical discovery in 9th-century Yemen to its modern revival.\n\n2. **Pre-Coffee Yemen: The Land, the People, and the Plant** \n - Geography of the Yemeni highlands and terraced agriculture. \n - Indigenous *Coffea arabica* varieties and early cultivation techniques. \n - *The myth of the goatherd Kaldi and the dancing goats*.\n\n3. **The Sufi Spark: Coffee as a Spiritual Elixir** \n - 15th-century Sufi monasteries in Mocha and Taiz. \n - *Qahwa as a tool for nocturnal dhikr rituals*. \n - Early roasting, grinding, and brewing methods.\n\n4. **Mocha: The Birthplace of a Global Commodity** \n - Rise of the port city of Mocha (Al-Mukha) in the 16th century. \n - *The Mocha trade network linking Yemen, Egypt, and the Ottoman Empire*. \n - Coffeehouses as centers of intellectual and political discourse.\n\n5. **The Ottoman Grip and the First Coffee Monopoly** \n - 1536: Ottoman conquest of Yemen and control of coffee exports. \n - *The ban on fertile beans to prevent cultivation elsewhere*. \n - Smuggling and the first cracks in the monopoly.\n\n6. **The Dutch Heist and the Global Spread of Coffee** \n - 1616: Dutch traders smuggle Yemeni beans to Batavia (Java). \n - *The birth of Dutch colonial coffee plantations*. \n - How Mocha\u2019s loss became the world\u2019s gain.\n\n7. **Coffeehouses of Cairo and Istanbul: Yemen\u2019s Cultural Export** \n - 1554: First Ottoman coffeehouse in Istanbul. \n - *The \u201cSchools of the Wise\u201d in Cairo\u2019s Khan el-Khalili*. \n - Coffee as a vector for Yemeni poetry, music, and Sufi thought.\n\n8. **The European Coffee Craze and the Mocha Brand** \n - 17th-century European coffeehouses and the \u201cMocha\u201d label. \n - *The false association of Mocha with chocolate flavor*. \n - Yemeni coffee\u2019s role in the Enlightenment.\n\n9. **Colonial Rivalries and the Decline of Yemeni Dominance** \n - 18th-century: French, British, and Dutch plantations outcompete Yemen. \n - *The collapse of Mocha\u2019s monopoly and the rise of Caribbean and Latin American coffee*. \n - Impact on Yemeni economy and society.\n\n10. **Yemeni Coffee in the Age of Empire** \n - 19th-century British and Ottoman interests in Yemeni coffee. \n - *The introduction of new cash crops (qat) and their effect on coffee cultivation*. \n - Decline in quality and quantity due to neglect and conflict.\n\n11. **The Cold War and the Coffee Crisis** \n - 20th-century Yemeni division (North/South) and Soviet/US influences. \n - *The shift from coffee to qat in rural livelihoods*. \n - Drought, pests, and civil wars further erode production.\n\n12. **The Specialty Coffee Renaissance: Yemen\u2019s Return to Glory** \n - 21st-century global demand for single-origin, heirloom coffees. \n - *The discovery of \u201cYemenia\u201d varietals and their genetic uniqueness*. \n - NGOs and cooperatives reviving traditional farming.\n\n13. **Modern Challenges: War, Climate, and Market Access** \n - 2015\u2013present: Saudi-led conflict and blockade. \n - *The siege of Hodeidah port and its impact on coffee exports*. \n - Climate change and water scarcity in the highlands.\n\n14. **Yemeni Coffee in the Cup: Flavor, Ritual, and Identity** \n - Tasting notes: dried fruit, spice, and wine-like acidity. \n - *The Yemeni coffee ceremony (bunna) and its social significance*. \n - Diaspora communities keeping traditions alive abroad.\n\n15. **Conclusion: The Future of Yemeni Coffee** \n - Balancing heritage with innovation. \n - *The role of Yemeni coffee in post-war reconstruction*. \n - Final reflection: From the highlands of Sana\u2019a to the cups of Brooklyn, Yemeni coffee remains a bridge between past and future."}], "input_metadata": {"row_id": "download-look-quick-minute-study", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "15", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 30 highlighted sections (required: 15)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.737811", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using \"*\". For example: *3 years of experience*."}, {"role": "assistant", "content": "Dear Brewed Awakening,\n\nI am the *early-bird steam* that curls above your grinders, \nthe *latte-art heart* that beats in every porcelain curve. \nMy apron is a canvas of *espresso freckles*, \neach dot a story told in *3 years of experience* \nfrom bean to cup, from dawn rush to dusk hush.\n\nI know the *symphony of the steam wand*\u2014 \nits hiss, its sigh, its whispered crema secrets\u2014 \nand how to coax *microfoam galaxies* \ninto orbit around a *double-shot comet*. \nI\u2019ve tamped, poured, swirled, and smiled \nthrough *Saturday lines out the door*, \nkeeping tempers cool as the milk I *steam to 140\u00b0*.\n\nI speak fluent *small-town regular*: \n\u201cMedium drip, room for kindness?\u201d \nI remember that *Mrs. Alvarez likes oat milk*, \nthat *Mr. Chen takes two raw sugars*, \nand that *your dog, Scout, prefers puppuccinos*.\n\nI sweep, I stock, I *balance the till to the penny*, \nbut mostly I *brew community*\u2014 \none *extra-hot americano* at a time. \nHire me, and the morning light \nwill find me already *wiping down tables*, \nalready *saving the last blueberry scone* \nfor the kid who always forgets his wallet.\n\nWith *grounds in my veins* and *crema in my smile*, \nI remain,\n\nYour future barista, \nMaya"}], "input_metadata": {"row_id": "delete-open-cold-poor-power", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 20 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.737854", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with \"*\", like: *highlighted text section*."}, {"role": "assistant", "content": "Hey friends! Grab a coffee and let\u2019s talk about something that always blows my mind: *how much we all have in common, even when we look nothing alike*. I was scrolling through wildlife photos the other night and realized that a jellyfish and I are basically running the same basic software\u2014just with different packaging. So here\u2019s a laid-back tour of five \u201cwait, we do that too?!\u201d moments across the animal kingdom.\n\n*We all need our beauty sleep* \nTurns out, every animal with a nervous system\u2014from fruit flies to blue whales\u2014shares a version of sleep. Crows nap on tree branches, octopuses snooze in coconut-shell beds, and even roundworms have \u201clethargus,\u201d which is basically worm REM. Scientists think sleep is so ancient that the last common ancestor of all animals already had it. So next time you hit snooze, remember you\u2019re honoring a 700-million-year-old tradition.\n\n*Love songs come in many dialects* \nWhether you\u2019re a human humming in the shower or a humpback whale belting out 30-minute ballads, *we all use rhythm and pitch to flirt and bond*. Frogs croak in choruses, nightingales remix their playlists nightly, and male mice sing ultrasonic \u201cpickup lines\u201d too high for us to hear. Same goal, different instruments.\n\n*Grandma knew best: family matters* \nElephants babysit their nieces, wolves bring back take-out for auntie, and *clownfish dads are the ultimate stay-at-home fathers*. Across species, kinship networks boost survival. Even bacteria split into \u201csibling\u201d lineages that cooperate\u2014so the next time your cousin crashes on your couch, call it evolutionary strategy.\n\n*Stress looks the same on everyone* \nWhen life gets spicy, our bodies flip the same ancient switch. *Cortisol floods fish, birds, and humans alike*, making hearts race and muscles twitch. Zebra finches lose their appetite during tough times, and stressed-out bees become pessimistic (yes, they can test bee mood). So if you\u2019re doom-scrolling at 2 a.m., know that a stressed squirrel in your backyard totally gets you.\n\n*Play isn\u2019t optional\u2014it\u2019s homework for life* \nOtters juggle rocks, dogs zoom in circles, and young ravens snowboard down snowy roofs using jar lids. *Play builds brains and social skills across the board*. Researchers have spotted reptiles \u201cwrestling\u201d for fun and octopuses repeatedly bouncing Lego blocks like toddlers. If it has neurons, it probably needs recess.\n\nNext time you feel alone in the universe, remember that somewhere a parrot is probably preening its best friend, a bat is sharing food with a hungry roost-mate, and a tiny nematode is curling up for a nap. We\u2019re all just variations on a very old, very shared theme."}], "input_metadata": {"row_id": "analyze-visible-level-love-pleasure", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 10 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.737882", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format:\n*section name*"}, {"role": "assistant", "content": "*Executive Summary*\n\nBremen is a city where maritime heritage meets cutting-edge creativity. Our venture, \u201cNordHood Bremen,\u201d will launch a premium, design-driven sweatshirt brand that fuses Hanseatic culture with contemporary streetwear. In year one we will release three limited-edition drops (total 6,000 units), achieve \u20ac420,000 in revenue, and reach break-even by month 14. The brand will be 100 % online-direct at launch, then expand into two Bremen concept stores and select European boutiques in years two and three. We will operate carbon-neutral, use 100 % organic cotton, and reinvest 3 % of profits into local youth art programs\u2014turning every sweatshirt into a wearable piece of Bremen pride.\n\n*Market Analysis*\n\nBremen\u2019s 0.57 million residents and 500,000 annual tourists create a \u20ac35 million addressable mid-price sweatshirt market. Key insights: \n\u2022 68 % of 18-35-year-olds prefer streetwear that tells a local story (Hochschule Bremen Trend Report 2023). \n\u2022 Tourists spend \u20ac180 million on souvenirs, yet only 11 % are apparel items\u2014an untapped niche. \n\u2022 Competitor scan: global brands (Nike, Adidas) dominate functionality; regional labels (Hamburg\u2019s \u201cDinghy,\u201d Hannover\u2019s \u201cLiguster\u201d) lack Bremen specificity. \n\u2022 Online D2C share in German apparel has grown from 18 % (2019) to 31 % (2023), proving the viability of our launch model. \nWe will target two segments: \u201cCulture Creatives\u201d (students, designers, musicians) and \u201cHeritage Seekers\u201d (tourists, expats). Both segments value authenticity, limited editions, and sustainability.\n\n*Product & Brand Strategy*\n\nSignature Product: The \u201cHanse Hood\u201d \u2013 460 g brushed organic-cotton fleece, oversized fit, dropped shoulders, double-stitched flat seams, and a hidden inside pocket sized for a Bremen transit card. \nDesign Language: Minimalist silhouettes in maritime colorways (Bremen Blue, Weser Sand, Harbor Grey) with subtle iconography\u2014windrose embroidery, cog-ship sleeve print, and a QR code linking to an AR story of the garment\u2019s journey. \nSustainability: GOTS-certified cotton, recycled polyester labels, biodegradable mailers, and a take-back program offering 15 % discount on next purchase. \nDrop Model: Three seasonal drops per year (Spring \u201cHanse Sail,\u201d Fall \u201cSchlachte,\u201d Winter \u201cWeihnachtsmarkt\u201d), each capped at 2,000 units to maintain scarcity. \nBrand Voice: \u201cWear the City.\u201d Every hangtag includes a Bremen insider tip curated by local artists, turning the sweatshirt into a cultural guide.\n\n*Operations & Supply Chain*\n\nDesign Studio: 120 m\u00b2 loft in \u00dcberseestadt (rent \u20ac1,800/month) doubling as photo studio and event space. \nManufacturing: Partnership with a Fair-Wear-audited factory in Porto, Portugal (MOQ 500, 30-day lead time, \u20ac18 landed cost per unit). \nLogistics: Bremen-based 3PL \u201cLogwin\u201d for pick-pack-ship (\u20ac1.20 per parcel, same-day dispatch). \nTech Stack: Shopify Plus storefront, Klarna & PayPal checkout, Shipcloud integration, and a custom AR filter for Instagram try-ons. \nQuality Control: In-house inspection on arrival; defective rate target <1 %. \nRisk Mitigation: Dual-source yarn from Turkey and Portugal; maintain 8-week safety stock.\n\n*Marketing & Sales Plan*\n\nPre-Launch (Months 1-2): \n\u2022 Teaser campaign on TikTok & Instagram Reels (#NordHoodBremen) with local influencers (5-50 k followers). \n\u2022 Pop-up try-on booth at Viertel Kreativmarkt; collect 1,000 email sign-ups. \nLaunch (Month 3): \n\u2022 \u201cMidnight Drop\u201d online at 00:07 (Bremen dialing code) with countdown timer. \n\u2022 Limited 100 \u201cFounders\u2019 Edition\u201d sweatshirts laser-numbered and signed by Bremen street artist \u201cPlotBot.\u201d \nGrowth (Months 4-12): \n\u2022 Collaborations: capsule with SV Werder Bremen fan design; co-branded hoodie with \u00dcberseemuseum. \n\u2022 Referral program: give \u20ac10, get \u20ac10. \n\u2022 Tourist channel: partnership with Bremer Touristik-Zentrum for QR-coded city map bundled with sweatshirt. \nKPIs: CAC \u2264 \u20ac18, repeat purchase rate \u2265 35 %, Instagram engagement \u2265 8 %.\n\n*Financial Plan*\n\nStart-up Capital Needed: \u20ac85,000 \n\u2022 Inventory (3,000 units): \u20ac54,000 \n\u2022 Marketing & content: \u20ac12,000 \n\u2022 E-commerce & IT: \u20ac6,000 \n\u2022 Legal, trademark, insurance: \u20ac5,000 \n\u2022 Working capital buffer: \u20ac8,000 \nFunding: 60 % founder equity, 40 % Bremen F\u00f6rderbank micro-loan at 1.9 % interest. \nYear-1 P&L (conservative): \nRevenue \u20ac420,000 (avg. \u20ac70 net per unit) \nCOGS \u20ac234,000 \nGross Profit \u20ac186,000 \nOperating Expenses \u20ac150,000 \nEBIT \u20ac36,000 \nBreak-even: 1,800 units or month 14. \nThree-year projection: revenue \u20ac1.4 million, EBITDA margin 18 %, reinvestment into two Bremen stores and EU wholesale.\n\n*Appendices*\n\nA. Mood-board & color palette \nB. Factory audit certificate \nC. Sample influencer contracts \nD. Detailed 36-month cash-flow model"}], "input_metadata": {"row_id": "run-put-natural-traditional-thought", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 7 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.737906", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "**The Official \u201cTwo-Mom Marriage Quality Report Card\u201d** \n(For internal use only. If this ends up on Facebook, we *will* change the Wi-Fi password.)\n\n---\n\n**1. Communication Score: ___/10** \n*\u201cDid we just have a 45-minute conversation about whose turn it is to buy more goldfish crackers, or was that a TED Talk?\u201d* \n- 10 = We finish each other\u2019s sentences\u2026 and each other\u2019s coffee. \n- 5 = We communicate exclusively via passive-aggressive Post-it notes on the fridge. \n- 0 = We\u2019ve resorted to interpretive dance and/or carrier pigeons. \n\n**Bonus points** if you\u2019ve ever used the kids as tiny, sticky messengers (\u201cTell Mommy she\u2019s *cordially invited* to take out the trash\u201d).\n\n---\n\n**2. Chore Wars Score: ___/10** \n*\u201cThe laundry basket is a Jenga tower of lies.\u201d* \n- 10 = We\u2019ve achieved a chore utopia where socks actually match. \n- 5 = We\u2019ve agreed to \u2018divide\u2019 labor\u2026 by one of us doing everything and the other \u2018supervising\u2019. \n- 0 = The dishwasher has become a permanent storage unit. \n\n**Penalty** if either mom has ever \u2018forgotten\u2019 how to load the dishwasher *again*.\n\n---\n\n**3. Co-Parenting Harmony Score: ___/10** \n*\u201cWe tag-team tantrums like WWE pros.\u201d* \n- 10 = We\u2019ve synchronized our \u2018Mom Voice\u2019 to subsonic frequencies that terrify small children. \n- 5 = We\u2019ve debated whether \u2018screen time\u2019 is a human right or a war crime. \n- 0 = We\u2019ve weaponized bedtime stories (\u2018And then the princess *never* got dessert again\u2026\u2019). \n\n**Extra credit** for every time you\u2019ve high-fived over a successful *\u201cBecause I said so\u201d* deployment.\n\n---\n\n**4. Romance Maintenance Score: ___/10** \n*\u201cDate night: when we argue over which drive-thru has the shortest line.\u201d* \n- 10 = We\u2019ve scheduled intimacy between soccer practice and PTA meetings. \n- 5 = We\u2019ve agreed that \u2018Netflix and actually chill\u2019 counts as foreplay. \n- 0 = Our last romantic gesture was sharing the last chicken nugget. \n\n**Automatic fail** if either of you has ever said, \u201cI\u2019m too tired\u201d\u2026 *while scrolling Instagram*.\n\n---\n\n**5. Emergency Snack Reserves Score: ___/10** \n*\u201cThe pantry is a battlefield of empty granola bar boxes.\u201d* \n- 10 = We\u2019ve hidden chocolate so well we forgot where it is. \n- 5 = We\u2019ve labeled snacks with fake allergen warnings to deter theft. \n- 0 = The kids have unionized and are demanding better rations. \n\n**Gold star** if you\u2019ve ever *texted each other* to brag about finding the secret stash.\n\n---\n\n**Final Grade: ___/50** \n- 40\u201350: *Power couple.* You\u2019re basically the Beyonc\u00e9 and Beyonc\u00e9 of lesbian moms. \n- 25\u201339: *Functioning chaos.* You\u2019ve accepted that love is just shared exhaustion. \n- 0\u201324: *Renew your vows\u2026 or your Costco membership.* Either way, you\u2019re in this together. \n\n**Disclaimer:** This report is not legally binding, but hiding the good snacks *is* grounds for divorce."}], "input_metadata": {"row_id": "sit-disassemble-ready-area-problem", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 29 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.737929", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*."}, {"role": "assistant", "content": "*little me in the countryside*\n\n[Verse 1] \nBarefoot on the gravel road, dust between my toes, \nSunlight dripped like honey on the clover and the crows. \nGrandma\u2019s porch swing creaked a lullaby so slow, \nWhile the creek below kept singing songs only we could know. \nFireflies stitched the twilight with lanterns made of stars, \nAnd the moon was just a silver coin tossed into cookie jars. \n\n[Chorus] \nOh, little me in the countryside, running wild and free, \nChasing dreams through cornfield mazes, climbing every tree. \nEvery laugh was louder than the thunder rolling by, \nIn the summers of my childhood, underneath that endless sky. \n\n[Verse 2] \nMason jars of sweet tea sweating on the window ledge, \nRadio played \u201cSweet Home Alabama\u201d from the rusty garden shed. \nWe built rafts from old barn doors and floated down the stream, \nKingfishers were pirates and the cattails were our team. \nSupper was tomatoes sliced like sunsets on a plate, \nAnd the crickets kept the tempo while the night got late. \n\n[Chorus] \nOh, little me in the countryside, running wild and free, \nSkinny-dipping in the moonlight where the river bends to see. \nEvery scar was a medal from a dare we\u2019d never hide, \nIn the summers of my childhood, where the earth and I collided. \n\n[Bridge] \nDandelion clocks we blew to tell the time, \nEach puff a tiny parachute for wishes we would climb. \nWe swore we\u2019d never grow up, just grow into the land, \nBut the school bus came like autumn and took us by the hand. \n\n[Verse 3] \nNow the fields are sold and paved, the barn\u2019s a memory store, \nBut I still hear the wheat heads whisper, \u201cCome back, come back once more.\u201d \nI close my eyes and I\u2019m ten again, chasing dragonflies, \nWith the smell of rain on dirt roads and the taste of cherry pies. \n\n[Final Chorus] \nOh, little me in the countryside, you\u2019re still inside my chest, \nA pocket full of wonder that the city can\u2019t arrest. \nThough the years roll on like tractors down a dusty county ride, \nI keep those summers in a mason jar where the fireflies never die. \n\n[Outro] \nSo here\u2019s to cicadas, to tire swings and creek-stone slides, \nTo the echo of my laughter in the hollow where time hides. \nIf you find that gravel road, kick off your shoes and glide\u2014 \nYou\u2019ll meet little me in the countryside, still barefoot, still wide-eyed."}], "input_metadata": {"row_id": "affect-unknown-long-life-month", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 1 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.737956", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a riddle for the word \"fa\u00e7ade\" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*."}, {"role": "assistant", "content": "I wear a *painted smile* for every passer-by, \na *perfectly symmetrical* face that never cracks. \nBehind me, the *crumbling bricks* sigh in secret. \nWhat am I?"}], "input_metadata": {"row_id": "believe-verify-likely-desire-power", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738015", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "# Workshop Template \n**Title:** *Building Inclusive Excellence: The Business & Human Case for Workplace Diversity* \n**Duration:** 3.5 hours (can be split into two 90-minute sessions) \n**Audience:** Managers, HR professionals, team leads, ERG members, and any employee who influences culture \n\n---\n\n## 1. Welcome & Objectives (15 min)\n- Ice-breaker: \u201cTwo Truths & a Lie\u2014Diversity Edition\u201d \n- *Learning objectives*: By the end of this workshop, participants will be able to: \n 1. Define diversity, equity, inclusion, and belonging (DEIB) in their own words. \n 2. Articulate at least three evidence-based benefits of a diverse workforce. \n 3. Identify one action they can take within 30 days to foster inclusion on their team. \n\n---\n\n## 2. *The Data-Driven Case for Diversity* (25 min)\n- Interactive poll: \u201cWhat % performance lift do you think diverse teams achieve?\u201d \n- Mini-lecture with live charts: \n - McKinsey 2023 report: Top-quartile companies for ethnic/gender diversity are **39 % more likely to outperform** on profitability. \n - Harvard study: *Inclusive teams make better business decisions 87 % of the time* and do so twice as fast. \n- Small-group exercise: Match the statistic to the business outcome (revenue, innovation, retention). \n\n---\n\n## 3. *Unconscious Bias & Micro-behaviors* (30 min)\n- 5-minute video: \u201cThe Resume with Two Names\u201d (identical CVs, different callbacks). \n- Bias Busters card game: Participants draw scenario cards and practice interrupting bias in real time. \n- *Key takeaway*: Bias is not a moral failing; it is a neurological shortcut that can be rewired through deliberate habits. \n\n---\n\n## 4. *Inclusive Leadership Toolkit* (40 min)\n- **The 4 A\u2019s Framework**: \n 1. **Awareness** \u2013 Daily 3-minute bias check-in. \n 2. **Amplification** \u2013 Repeat & credit ideas from under-represented voices. \n 3. **Access** \u2013 Rotate high-visibility assignments. \n 4. **Accountability** \u2013 Track inclusion metrics on team dashboards. \n- Role-play: \u201cThe Missed Promotion\u201d \u2013 diagnose where each \u2018A\u2019 broke down and redesign the conversation. \n\n---\n\n## 5. Action Planning & Commitments (20 min)\n- Personal *Inclusion Commitment Card*: \u201cIn the next 30 days I will\u2026\u201d \n- Pair-share for accountability. \n- QR code to a digital resource hub (TED talks, reading list, policy templates). \n\n---\n\n## 6. Closing & Evaluation (10 min)\n- One-word checkout: \u201cHow do you feel now?\u201d \n- *Post-workshop survey* link (2 min) with prize draw for completion. \n\n---\n\n## Materials Checklist\n- Slide deck (with alt-text on all images) \n- Printed Commitment Cards & pens \n- Bias Busters card decks (laminated) \n- Timer & bell \n- Snacks labeled for common dietary restrictions (gluten-free, vegan, halal) \n\n---\n\n## Follow-Up Email Template (send within 24 hrs)\nSubject: Your Inclusion Toolkit + Next Steps \nBody: \nHi team, \nThank you for leaning into today\u2019s conversation on *why diversity wins*. Attached are: \n- Slide deck & resource links \n- Your signed Inclusion Commitment Card (photo reminder) \n- Calendar invite for the optional 30-day check-in circle \n\nRemember: Inclusion is a practice, not a one-time training. Let\u2019s keep the momentum!"}], "input_metadata": {"row_id": "change-sort-serious-song-family", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738042", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Your Name]** \n[Your Address] \n[City, State ZIP] \n[Email Address] | [Phone Number] | [LinkedIn URL] \n[Date]\n\n**[Local Party Chairperson\u2019s Name]** \n[Local Party Name] \n[Party Headquarters Address] \n[City, State ZIP]\n\nDear **Chairperson [Last Name]**,\n\nI am writing to express my enthusiastic interest in serving as **Rally Organizer** for **[Local Party Name]**. As a lifelong resident of **[Your City/County]** and a committed progressive, I have spent the past six years turning grassroots energy into measurable voter engagement. I believe my proven record of mobilizing thousands of supporters, securing high-profile speakers, and running flawless logistics makes me the ideal candidate to lead our 2024 rally program.\n\n### **Proven Rally & Event Expertise**\n- **2018 & 2020 Midterm & Presidential Rallies**: Planned and executed 14 rallies across three counties, averaging 2,500 attendees each. \n- **Permits & Safety**: Obtained all municipal permits within 72 hours, coordinated with local fire and police departments, and maintained a 100 % safety record. \n- **Stage & AV Management**: Negotiated vendor contracts that cut AV costs by 27 % while increasing sound coverage by 40 %.\n\n### **Community & Coalition Building**\n- **Diverse Coalitions**: Forged partnerships with 18 labor unions, 9 student groups, and 7 faith organizations, expanding our volunteer base by 1,200 active members. \n- **Multilingual Outreach**: Produced bilingual signage and volunteer scripts, increasing turnout in Latinx precincts by 31 %. \n- **Accessibility**: Installed ADA-compliant ramps, ASL interpreters, and quiet zones at every event, earning commendation from the local Disability Rights chapter.\n\n### **Data-Driven Strategy & Rapid Response**\n- **Real-Time Metrics**: Deployed a mobile check-in app that captured 94 % of attendee data, feeding our CRM for targeted follow-up. \n- **Weather Contingencies**: Created a \u201cPlan B\u201d playbook that moved a 3,000-person outdoor rally indoors in under 90 minutes with zero no-shows. \n- **Crisis Communications**: Served as on-site media liaison, turning a last-minute speaker cancellation into a prime-time local news segment featuring our grassroots volunteers.\n\nI would welcome the opportunity to discuss how my experience, energy, and strategic vision can help **[Local Party Name]** stage rallies that not only energize our base but also convert enthusiasm into votes. I am available at your convenience for an interview and can provide a detailed rally calendar and budget forecast within 48 hours of request.\n\nThank you for your time and consideration. I look forward to amplifying our shared values across **[Your City/County]**\u2014one rally at a time.\n\nIn solidarity,\n\n**[Your Name]**"}], "input_metadata": {"row_id": "must-write-need-theoretical-pleasure", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 21 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738068", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "The opposite of youth is not age, but **disillusionment**\u2014the moment when *wonder* is traded for *weariness*, when the *boundless horizon* of possibility collapses into the *narrow corridor* of routine."}], "input_metadata": {"row_id": "control-accept-record-legal-car", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738096", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer."}, {"role": "assistant", "content": "**The Golden Palace** is not a palace, but it *is* golden\u2014at least in spirit\u2014nestled like a curry-scented jewel in the **city centre**, where neon pigeons peck at samosa crumbs and the air hums with sitar-shaped traffic lights. \n\nInside, **Indian** food performs acrobatics on your tongue: cardamom cartwheels, turmeric tumbles, and a rogue chili that once tried to elope with a mango lassi. The waiters glide like buttered shadows, whispering orders to the kitchen where naan breads inflate like edible balloons plotting a coup against gravity. \n\nAt **The Golden Palace**, every table is a throne, every plate a miniature map of the subcontinent, and every bite a teleportation device disguised as dinner."}], "input_metadata": {"row_id": "miss-continue-fall-local-work", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738119", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*."}, {"role": "assistant", "content": "\ud83c\udfa4 *Yo, it\u2019s the tale of Sir Scam-a-lot, straight outta suburbia!* \ud83c\udfa4 \n\nYo, my name is Dave, I sell insurance in Ohio, \nGot a cubicle, a cat, and a 401(k) that\u2019s kinda fly-o. \nOne Tuesday, phone rings\u2014unknown number, what the heck? \nVoice says, \u201c*Prince Ogbonna the Third* just wrote you a fat check!\u201d \n\nI\u2019m like, \u201cHold up, who dis? I\u2019m whiter than printer paper, \nMy bloodline\u2019s mayo on Wonder Bread\u2014ain\u2019t no royal vapor!\u201d \nDude on the line, accent silkier than lotion, \nSays, \u201cCousin, you\u2019re 37th in line\u2014time to claim your ocean!\u201d \n\n**[Hook]** \n\ud83c\udfb6 *Long lost king of Nigeria, crown sittin\u2019 on my dome,* \n*Got a palace full of leopards and a Wi-Fi that roams!* \n*They need my SSN and a quick wire fee,* \n*But I\u2019m \u2018bout to be big pimpin\u2019 in Abuja, just wait and see!* \ud83c\udfb6 \n\nHe says, \u201cJust send five grand for customs, then the jet lands, \nYou\u2019ll be dripping in so much gold, even your sweat glands bling.\u201d \nI\u2019m sweatin\u2019 like a rotisserie, visions of caviar dreams, \nI Venmo \u201cPrinceNaija69\u201d like I\u2019m funding meme schemes. \n\nNext day, email: \u201cOops, palace goat ate the transfer slip, \nNeed ten more for goat surgery and a diamond-encrusted whip.\u201d \nI hock my lawnmower, my Xbox, and Aunt Linda\u2019s porcelain goose, \nWire cash so fast my bank\u2019s like, \u201cDave, you loose!\u201d \n\n**[Bridge \u2013 Spoken Word]** \nThen the *official royal seal* arrives\u2014 \nIt\u2019s a GIF of Shrek in a crown doing the Macarena. \nI\u2019m starting to sense a pattern here\u2026 \n\n**[Final Hook \u2013 Regret Remix]** \n\ud83c\udfb6 *Now I\u2019m the king of my mom\u2019s basement, throne\u2019s a beanbag chair,* \n*Royal decree: never trust a prince who types in all caps\u2014* \n*\u201cURGENT, COUSIN, SEND MORE FLAMINGOS!\u201d* \n*Lesson learned: if the crown\u2019s too shiny, it\u2019s probably aluminum foil, homie!*"}], "input_metadata": {"row_id": "require-affect-major-logical-business", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 14 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738142", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Expand the riddle into a story with a funny tone:\n\nWhat can you catch but not throw?\nA cold\n\nUse * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*."}, {"role": "assistant", "content": "Gary the Germ was the *smallest, greenest, and most obnoxiously proud* microbe in the entire Sniffle Kingdom. Every morning he practiced his grand entrance: sliding down banisters made of nasal hair, cannon-balling into unsuspecting tonsils, and yelling, \u201cACHOO\u2014did somebody order a cold?\u201d \n\nOne Tuesday, Gary spotted his dream target: *Bartholomew \u201cBart\u201d Bigglesworth, a man who believed hand-washing was a government conspiracy*. Bart had just exited the subway, triumphantly licking a subway pole \u201cfor the vitamins.\u201d Gary rubbed his microscopic hands together. \u201cPerfect. I\u2019ll hitch a ride on that finger, ride the express elevator to the sinuses, and throw the *snottiest* house-warming party in history!\u201d \n\nBut there was a catch\u2014literally. Gary could *catch* Bart, but he couldn\u2019t *throw* him anywhere. Every time Gary tried to catapult Bart into the next office cubicle, Bart just sneezed, wiped his nose on his sleeve, and kept typing spreadsheets. Gary screamed, \u201cThis is mutiny! I\u2019m supposed to be the one doing the tossing!\u201d \n\nMeanwhile, Bart\u2019s immune system\u2014led by Captain Phlegm and the Mucus Marines\u2014launched *Operation Tissue Storm*. They surrounded Gary, armed with vitamin C grenades and zinc zappers. Gary attempted a dramatic escape by surfing a sneeze, but the tissue slapped him like a flyswatter on a summer day. \n\nDefeated, Gary sat in the crumpled tissue, sulking. \u201cI caught the cold,\u201d he muttered, \u201cbut I can\u2019t throw it anywhere. Worst. Superpower. Ever.\u201d \n\nFrom that day forward, Gary changed his business card to read: \u201cProfessional Cold Catcher\u2014No Returns, No Refunds, Absolutely No Throwing.\u201d"}], "input_metadata": {"row_id": "could-serious-newspaper-radio-group", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 6 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738169", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Intro \u2013 harpsichord glitch beat, stylus scratching parchment]** \nYo, it\u2019s 14-something, but my mind\u2019s 3-D printed in gold leaf\u2014 \nI\u2019m the **Renaissance Rapper**, got a quill in one hand and a scalpel in the other, dissectin\u2019 beats like Vesalius dissectin\u2019 spleens. \nLet\u2019s get *weird* with the rebirth, baby.\n\n---\n\n### **Section 1 \u2013 The Fresco Flex** \nI drip ultramarine like I\u2019m sponsored by Afghanistan mines, \nMichelangelo on the ceiling, I\u2019m reclining on divine lines. \nAngels doing backflips off the Sistine, they\u2019re hype for my set, \nWhile I tag \u201c*AMOR VINCIT OMNIA*\u201d in invisible ink\u2014wet. \nMy flow\u2019s a trompe-l\u2019\u0153il, looks flat till you tilt your head\u2014 \nThen it\u2019s depth for days, like Brunelleschi\u2019s dome overhead. \nI\u2019m not baroque, I\u2019m pre-baroque, I\u2019m the prototype shock, \nGot patrons in fur-lined sleeves stuffing florins in my sock. \n**I paint in egg tempera so the colors never fade,** \n**But the yolk smells funky\u2014call it *odeur de old crusade*.**\n\n---\n\n### **Section 2 \u2013 The Printing-Press Poltergeist** \nGutenberg ghost in the machine, clanking lead type like chains, \nEvery page a polter-page, every serif a soul that remains. \nI drop a folio, it\u2019s viral before \u201cviral\u201d was a cough, \nMonks in scriptoriums rage-quitting, \u201cBro, that illuminated stuff\u2019s off!\u201d \nI\u2019m the incunabula influencer, my colophon\u2019s a flex, \nGot red-ink rubricators sliding into my DMs\u2014*next*. \n**I rhyme in movable type so my bars can rearrange,** \n**Yesterday I was Petrarch, today I\u2019m deranged.** \nThe press spits paper like a hydra with hiccups, \nEach sheet a protest flyer: \u201cDown with scholastic pick-ups!\u201d\n\n---\n\n### **Section 3 \u2013 The Anatomical Anthem (Parental Advisory: Corpses)** \nNight shift with da Vinci, we\u2019re sketching by candle and moon, \nCadavers on the table like, \u201cDo me a favor\u2014autotune my coccyx soon.\u201d \nI rap in reverse Latin so only the dead understand, \nWhile Leo reverse-engineers a bird-man from a frying pan. \n**My beats are ventricles pumping four-four time,** \n**Aorta bass drop\u2014call that circulatory grime.** \nI sample the sound of a scalpel scraping periosteum, \nLayer it over a lute loop\u2014*that\u2019s* my magnum opium. \nGalen\u2019s in the corner, throwing shade like a Greek tragedy, \nBut I\u2019m Harvey before Harvey, already rapping blood circulation majestically.\n\n---\n\n**[Outro \u2013 fading lute arpeggio, distant cannon fire from Constantinople]** \nSo remember me when you\u2019re doom-scrolling on glass, \nI was the era that turned corpses into class. \nI\u2019m the **Renaissance Rapper**, my ghost still tags in chalk, \nLeaving QR codes on cathedral walls\u2014scan it, you\u2019ll squawk. \nRebirth never dies, it just pixelates and loops, \nNow if you\u2019ll excuse me, I\u2019ve got a date with Lucretius in the soup."}], "input_metadata": {"row_id": "offer-select-free-state-moment", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 17 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738192", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*"}, {"role": "assistant", "content": "*J\u014dmon* hunter-gatherers (c. 14,000 BCE) gave way to *Yayoi* rice cultivators who forged bronze and iron tools. \nThe *Yamato* court unified clans in the 3rd\u20134th centuries, adopting *Shint\u014d* rituals and Chinese script. \nPrince *Sh\u014dtoku*\u2019s 7th-century constitution and the Taika Reforms centralized power under an emperor, while *Nara* and *Heian* aristocrats refined literature and *Buddhism*. \nSamurai clans rose during the *Kamakura* shogunate (1185), repelling *Mongol* fleets and entrenching warrior rule. \nCenturies of *Ashikaga* and *Sengoku* civil war ended when *Oda Nobunaga*, *Toyotomi Hideyoshi*, and *Tokugawa Ieyasu* reunified Japan; the *Edo* shogunate (1603\u20131868) enforced isolation yet fostered *ukiyo-e* culture. \nCommodore *Perry*\u2019s 1853 arrival triggered the *Meiji* Restoration, rapid modernization, and imperial expansion. \nVictory in the *Russo-Japanese* War (1905) emboldened militarism, leading to *Manchuria* occupation and *Pearl Harbor*. \nAfter atomic bombings and *Hirohito*\u2019s surrender in 1945, U.S. occupation reformed politics and the economy, birthing the *post-war* \u201ceconomic miracle.\u201d \nToday, Japan balances *constitutional monarchy* with global technological leadership."}], "input_metadata": {"row_id": "calculate-affect-fullest-emotional-money", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 25 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738219", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*."}, {"role": "assistant", "content": "*John* in *De Smet* \nQuaker hat *blows off*\u2014chases \n*Silent* *scream*\u2014\u201cCome back!\u201d"}], "input_metadata": {"row_id": "download-american-private-result-research", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738252", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with \"*\", like *highlighted text*."}, {"role": "assistant", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. In a ceremony marked by *unprecedented artistic excellence* and *meticulous craftsmanship*, the Academy recognizes those whose work has defined the sonic landscape of 2011.\n\nThe evening\u2019s proceedings underscore the Academy\u2019s enduring commitment to *artistic integrity* and *technical innovation*. Among the most celebrated achievements are:\n\n\u2022 *Album of the Year* \u2013 awarded to an opus that has redefined genre boundaries while achieving both critical acclaim and popular resonance. \n\u2022 *Record of the Year* \u2013 bestowed upon a single track whose *sonic clarity*, *emotive depth*, and *production mastery* have set a new benchmark for the industry. \n\u2022 *Best New Artist* \u2013 conferred upon an emerging voice whose *visionary artistry* signals the future direction of contemporary music.\n\nThroughout the telecast, special tributes illuminate the *indelible legacies* of icons whose influence transcends generations, while collaborative performances showcase the *synergistic power* of cross-genre partnerships. The Academy further acknowledges the *behind-the-scenes luminaries*\u2014engineers, mixers, and mastering specialists\u2014whose *invisible expertise* transforms raw inspiration into *audible transcendence*.\n\nIn closing, the 54th Annual Grammy Awards reaffirm the Recording Academy\u2019s solemn charge: to *safeguard the heritage*, *nurture the present*, and *inspire the future* of recorded music."}], "input_metadata": {"row_id": "indicate-strong-single-festival-mother", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738286", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*."}, {"role": "assistant", "content": "# 10 Surprising Facts About the Dutch Language That Will Make You Say \u201cWauw!\u201d\n\nIf you think Dutch is just \u201cGerman with a sore throat,\u201d think again. From tongue-twisting consonant clusters to words that sound like they were invented by a stand-up comedian, Dutch is a linguistic playground. Below are ten facts that prove the language is far more colorful than its reputation for windmills and wooden shoes.\n\n---\n\n### 1. *The longest word in Dutch is 53 letters long* \nMeet **\u201ckindercarnavalsoptochtvoorbereidingswerkzaamhedenplan\u201d**\u2014a single noun describing the preparation work for a children\u2019s carnival parade. Dutch loves to glue words together, so once you master the basics you can create your own kilometer-long compounds on the fly.\n\n---\n\n### 2. Dutch has three genders, but only two articles \nGrammatically, nouns are masculine, feminine, or neuter. In everyday speech, however, masculine and feminine collapsed into a single \u201ccommon\u201d gender, leaving *\u201cde\u201d* for common and *\u201chet\u201d* for neuter. The result? A never-ending guessing game for learners.\n\n---\n\n### 3. *The Dutch alphabet ends with \u201cij,\u201d not \u201cz\u201d* \nSchoolchildren recite the ABC and finish with \u201c\u2026x, y, ij, z.\u201d The digraph *ij* is so common (think *hij* = he, *mijn* = my) that it\u2019s treated almost like a separate letter. On old typewriters, *ij* even had its own key.\n\n---\n\n### 4. Dutch is the closest major language to English \nFrisian is technically closer, but among languages with tens of millions of speakers, Dutch wins. Compare: \n- English *\u201cbread\u201d* \u2192 Dutch *\u201cbrood\u201d* \n- English *\u201cgreen\u201d* \u2192 Dutch *\u201cgroen\u201d* \nShared roots mean you can read simple Dutch signs long before you\u2019ve cracked a single grammar book.\n\n---\n\n### 5. *Dutch gave New York its first words* \nBefore it was New York, it was New Amsterdam. That\u2019s why we say *\u201ccookie\u201d* (from Dutch *koekje*) instead of *\u201cbiscuit,\u201d* and *\u201cboss\u201d* (from *baas*) instead of *\u201cmaster.\u201d* Wall Street? It once ran alongside the *\u201cwal\u201d* (rampart) of the city wall.\n\n---\n\n### 6. The word *\u201cgezellig\u201d* has no exact English translation* \nRoughly it means \u201ccozy,\u201d \u201cconvivial,\u201d or \u201cthe warm feeling of being with friends,\u201d but none of those capture the full vibe. A caf\u00e9 can be *gezellig*, a conversation can be *gezellig*, even a spreadsheet can be *gezellig* if the team is laughing.\n\n---\n\n### 7. Dutch spelling changed overnight in 1996 \nThe *\u201cGroene Boekje\u201d* (Green Booklet) reforms simplified thousands of words. *\u201cpotatos\u201d* became *\u201caardappelen\u201d* and *\u201ccat\u2019s\u201d* became *\u201ckatten.\u201d* Overnight, every dictionary, street sign, and Scrabble board in the country was technically out of date.\n\n---\n\n### 8. *Dutch is spoken on six continents* \nBesides the Netherlands and Belgium, it\u2019s an official language in Suriname, Aruba, Cura\u00e7ao, and Sint Maarten. Thanks to emigration, you\u2019ll also find active Dutch-speaking communities in Canada, Australia, Argentina\u2014even South Africa, where Afrikaans is a daughter language.\n\n---\n\n### 9. The Dutch love diminutives \nAdd *-je* to almost any noun and you instantly shrink it: *\u201chuis\u201d* (house) \u2192 *\u201chuisje\u201d* (little house). It\u2019s not just about size; diminutives add affection. A *\u201cbiertje\u201d* isn\u2019t a small beer\u2014it\u2019s an invitation to relax.\n\n---\n\n### 10. *Dutch is the language of the world\u2019s oldest multinational* \nThe Vereenigde Oostindische Compagnie (VOC) was founded in 1602 and issued the planet\u2019s first publicly traded stock. Its ledgers, contracts, and ship logs were all written in Dutch, making the language the original tongue of global capitalism.\n\n---\n\nReady to dive in? Start with *hallo* and *dank je wel*, and you\u2019ll already be halfway to *gezellig*."}], "input_metadata": {"row_id": "design-assume-past-camera-word", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 39 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738315", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "Gideon has several good options for his surplus:\n\n1. **Sell it at the local farmers\u2019 market or to a wholesaler** to turn the extra produce into cash that can cover next season\u2019s seed, fertilizer, and equipment costs. \n2. **Store some of the surplus in a community grain silo** so he can sell later when prices are higher. \n3. **Donate part of the surplus to the regional food bank**, helping families in need and possibly qualifying for a small tax deduction. \n4. **Use the surplus as livestock feed** for his own animals, cutting down on purchased feed expenses. \n\n*The most profitable short-term choice is usually selling the surplus immediately after harvest, when demand is strong and prices are favorable.*"}], "input_metadata": {"row_id": "express-prove-private-internet-goal", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-09T00:23:12.738340", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6.dev22+gfb34cf0.dirty", "status": "finished", "run_id": "analyze-political-poorly-air-hardware", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 68433}, {"messages": [{"role": "user", "content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*."}, {"role": "assistant", "content": "# Outline: The History of Yemeni Coffee\n\n1. **Introduction: Why Yemeni Coffee Matters** \n - Thesis: Yemeni coffee is not merely a beverage but a cultural, economic, and geopolitical force that shaped global history. \n - Scope: From its mythical discovery in 9th-century Yemen to its modern revival.\n\n2. **Pre-Coffee Yemen: The Land, the People, and the Plant** \n - Geography of the Yemeni highlands and terraced agriculture. \n - Indigenous *Coffea arabica* varieties and early cultivation techniques. \n - *The myth of the goatherd Kaldi and the dancing goats*.\n\n3. **The Sufi Spark: Coffee as a Spiritual Elixir** \n - 15th-century Sufi monasteries in Mocha and Taiz. \n - *Qahwa as a tool for nocturnal dhikr rituals*. \n - Early roasting, grinding, and brewing methods.\n\n4. **Mocha: The Birthplace of a Global Commodity** \n - Rise of the port city of Mocha (Al-Mukha) in the 16th century. \n - *The Mocha trade network linking Yemen, Egypt, and the Ottoman Empire*. \n - Coffeehouses as centers of intellectual and political discourse.\n\n5. **The Ottoman Grip and the First Coffee Monopoly** \n - 1536: Ottoman conquest of Yemen and control of coffee exports. \n - *The ban on fertile beans to prevent cultivation elsewhere*. \n - Smuggling and the first cracks in the monopoly.\n\n6. **The Dutch Heist and the Global Spread of Coffee** \n - 1616: Dutch traders smuggle Yemeni beans to Batavia (Java). \n - *The birth of Dutch colonial coffee plantations*. \n - How Mocha\u2019s loss became the world\u2019s gain.\n\n7. **Coffeehouses of Cairo and Istanbul: Yemen\u2019s Cultural Export** \n - 1554: First Ottoman coffeehouse in Istanbul. \n - *The \u201cSchools of the Wise\u201d in Cairo\u2019s Khan el-Khalili*. \n - Coffee as a vector for Yemeni poetry, music, and Sufi thought.\n\n8. **The European Coffee Craze and the Mocha Brand** \n - 17th-century European coffeehouses and the \u201cMocha\u201d label. \n - *The false association of Mocha with chocolate flavor*. \n - Yemeni coffee\u2019s role in the Enlightenment.\n\n9. **Colonial Rivalries and the Decline of Yemeni Dominance** \n - 18th-century: French, British, and Dutch plantations outcompete Yemen. \n - *The collapse of Mocha\u2019s monopoly and the rise of Caribbean and Latin American coffee*. \n - Impact on Yemeni economy and society.\n\n10. **Yemeni Coffee in the Age of Empire** \n - 19th-century British and Ottoman interests in Yemeni coffee. \n - *The introduction of new cash crops (qat) and their effect on coffee cultivation*. \n - Decline in quality and quantity due to neglect and conflict.\n\n11. **The Cold War and the Coffee Crisis** \n - 20th-century Yemeni division (North/South) and Soviet/US influences. \n - *The shift from coffee to qat in rural livelihoods*. \n - Drought, pests, and civil wars further erode production.\n\n12. **The Specialty Coffee Renaissance: Yemen\u2019s Return to Glory** \n - 21st-century global demand for single-origin, heirloom coffees. \n - *The discovery of \u201cYemenia\u201d varietals and their genetic uniqueness*. \n - NGOs and cooperatives reviving traditional farming.\n\n13. **Modern Challenges: War, Climate, and Market Access** \n - 2015\u2013present: Saudi-led conflict and blockade. \n - *The siege of Hodeidah port and its impact on coffee exports*. \n - Climate change and water scarcity in the highlands.\n\n14. **Yemeni Coffee in the Cup: Flavor, Ritual, and Identity** \n - Tasting notes: dried fruit, spice, and wine-like acidity. \n - *The Yemeni coffee ceremony (bunna) and its social significance*. \n - Diaspora communities keeping traditions alive abroad.\n\n15. **Conclusion: The Future of Yemeni Coffee** \n - Balancing heritage with innovation. \n - *The role of Yemeni coffee in post-war reconstruction*. \n - Final reflection: From the highlands of Sana\u2019a to the cups of Brooklyn, Yemeni coffee remains a bridge between past and future."}], "input_metadata": {"row_id": "should-speak-cold-film-festival", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "15", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 30 highlighted sections (required: 15)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.042894", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using \"*\". For example: *3 years of experience*."}, {"role": "assistant", "content": "Dear Brewmaster of *Morning Rituals*, \nI come not with parchment r\u00e9sum\u00e9, \nbut with *3 years of experience* tucked in my apron pocket, \nsteam-warm and espresso-dark. \n\nI have *memorized 47 regulars\u2019 names*\u2014 \neach syllable frothed like oat-milk foam\u2014 \nand I know that *Carla takes two raw sugars* \nwhile *Mr. Lee prefers his cappuccino bone-dry*. \n\nI can *dial in a grinder* at 5:03 a.m. \nto the *exact 18.5 grams* that coax \ncaramel notes from Ethiopian beans, \nand still greet the line with *a sunrise smile*. \n\nMy wrists speak in *latte-art cursive*: \nhearts, rosettas, and once (on a dare) \na tiny saxophone for the jazz trio \nwho tips in *harmonies and quarters*. \n\nI *close the shop solo* without a single bean left homeless, \ncount the till to the *penny and a poem*, \nand leave the milk pitchers *mirror-bright*, \nso the night shift sees its own reflection of care. \n\nHire me, and the *scent of fresh-ground hope* \nwill greet every customer at the door; \nI\u2019ll be the *steady hand* behind the bar, \npouring *courage in ceramic*, one cup at a time. \n\nYours in steam and service, \nRiley"}], "input_metadata": {"row_id": "destroy-rational-cultural-available-meeting", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 16 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.042934", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with \"*\", like: *highlighted text section*."}, {"role": "assistant", "content": "Hey friends! Grab a coffee and let\u2019s talk about something that always blows my mind: *how much we all have in common, even when we look nothing alike*. I was scrolling through wildlife photos the other night and realized that a jellyfish and I are basically running the same basic software\u2014just with different packaging. So here\u2019s a laid-back tour of five \u201cwait, we do that too?!\u201d moments across the animal kingdom.\n\n*We all need our beauty sleep* \nTurns out, every animal with a nervous system\u2014from fruit flies to blue whales\u2014shares a version of sleep. Crows nap on tree branches, octopuses snooze in coconut-shell beds, and even roundworms have \u201clethargus,\u201d which is basically worm REM. Scientists think sleep is so ancient that the last common ancestor of all animals already had it. So next time you hit snooze, remember you\u2019re honoring a 700-million-year-old tradition.\n\n*Love songs come in many dialects* \nWhether you\u2019re a human humming in the shower or a humpback whale belting out 30-minute ballads, *we all use rhythm and pitch to flirt and bond*. Frogs croak in choruses, nightingales remix their playlists nightly, and male mice sing ultrasonic \u201cpickup lines\u201d too high for us to hear. Same goal, different instruments.\n\n*Grandma knew best: family matters* \nElephants babysit their nieces, wolves bring back take-out for auntie, and *clownfish dads are the ultimate stay-at-home fathers*. Across species, kinship networks boost survival. Even bacteria split into \u201csibling\u201d lineages that cooperate\u2014so the next time your cousin crashes on your couch, call it evolutionary strategy.\n\n*Stress looks the same on everyone* \nWhen life gets spicy, our bodies flip the same ancient switch. *Cortisol floods fish, birds, and humans alike*, making hearts race and muscles twitch. Zebra finches lose their appetite during tough times, and stressed-out bees become pessimistic (yes, they can test bee mood). So if you\u2019re doom-scrolling at 2 a.m., know that a stressed squirrel in your backyard totally gets you.\n\n*Play isn\u2019t optional\u2014it\u2019s homework for life* \nOtters juggle rocks, dogs zoom in circles, and young ravens snowboard down snowy roofs using jar lids. *Play builds brains and social skills across the board*. Researchers have spotted reptiles \u201cwrestling\u201d for fun and octopuses repeatedly bouncing Lego blocks like toddlers. If it has neurons, it probably needs recess.\n\nNext time you feel alone in the universe, remember that somewhere a parrot is probably preening its best friend, a bat is sharing food with a hungry roost-mate, and a tiny nematode is curling up for a nap. We\u2019re all just variations on a very old, very shared theme."}], "input_metadata": {"row_id": "provide-cool-meeting-kid-book", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 10 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.042963", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format:\n*section name*"}, {"role": "assistant", "content": "*Executive Summary*\nBremen Sweat Co. will be a premium, sustainability-driven sweatshirt brand headquartered in Bremen, Germany. We will design, manufacture, and sell limited-edition sweatshirts that combine Nordic minimalism with Hanseatic maritime heritage. Our mission is to become Bremen\u2019s first carbon-neutral apparel label while capturing 5 % of the city\u2019s \u20ac120 million annual streetwear market within three years. Initial funding of \u20ac250 k will cover product development, a flagship micro-factory/store in the Viertel district, and a digital-first go-to-market strategy.\n\n*Market Analysis*\nBremen\u2019s population of 570 k includes 120 k university students and 80 k creative-industry professionals\u2014our core demographic. Streetwear sales in Northern Germany grew 11 % YoY (Statista 2023), yet only 8 % of brands are locally produced. Competitor benchmarking shows price gaps: fast-fashion sweatshirts sell at \u20ac25\u201335, mid-tier at \u20ac60\u201380, and premium sustainable labels at \u20ac110\u2013140. We will occupy the \u20ac85\u201395 niche with superior materials and storytelling. Surveys (n = 400) reveal 68 % of Bremen consumers would pay a 20 % premium for locally made, eco-certified apparel.\n\n*Product & Sustainability Strategy*\nEach sweatshirt will be crafted from 100 % GOTS-certified organic cotton and recycled ocean-bound polyester, dyed in Bremen using closed-loop water systems. Designs will feature subtle references to Bremen\u2019s Town Musicians, ship masts, and Weser river textures. Limited drops of 300 units per style ensure scarcity and collectability. Blockchain-based QR codes will provide end-to-end transparency: fiber origin, factory GPS, and carbon footprint per garment (-2.1 kg CO\u2082e vs. industry average 8.5 kg). End-of-life take-back program offers 15 % store credit for returned items, feeding our upcycling line.\n\n*Operations & Supply Chain*\nMicro-factory (180 m\u00b2) in the Viertel will house 4 digital knitting machines and 6 seamstresses, producing 1 200 sweatshirts/month. Local sourcing: yarn from OEKO-TEX mill in Oldenburg, trims from Hamburg port suppliers. Just-in-time production reduces inventory risk by 40 %. Bremen\u2019s port infrastructure enables low-impact inbound logistics; outbound shipments leverage DHL GoGreen within Germany (24 h delivery). ISO 14001 certification targeted by month 12.\n\n*Marketing & Sales Channels*\nPhase 1 (Months 1\u20136): Pop-up store at Lloyd Passage, Instagram/TikTok campaigns with Bremen influencers (@hanseaticstyle, 180 k followers). Phase 2: Flagship store launch with AR fitting mirrors and Weser riverfront window projection art. E-commerce via Shopify Plus, offering 3-D configurator for sleeve embroidery. Wholesale partnerships with 5 concept stores in Berlin, Hamburg, and Copenhagen. Revenue split: 60 % DTC, 25 % online, 15 % wholesale. Loyalty app rewards sustainable actions (bike delivery, clothing swaps).\n\n*Financial Projections & Funding*\nYear 1: Revenue \u20ac420 k, gross margin 58 %, EBITDA -\u20ac30 k (investment phase). Year 2: Revenue \u20ac950 k, EBITDA 12 %. Year 3: Revenue \u20ac1.8 m, EBITDA 22 %. Break-even month 18. Initial ask: \u20ac250 k seed (\u20ac150 k equipment, \u20ac60 k marketing, \u20ac40 k working capital). Investor IRR 38 % over 5 years with 20 % equity offered. ESG-linked loan option with Bremen Green Finance Initiative at 2.1 % interest."}], "input_metadata": {"row_id": "delete-stale-similar-others-email", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 6 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.042989", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "**The Official \u201cHow Are We Still Married?\u201d Scorecard** \n*For Two Moms Who\u2019ve Survived Sleepless Nights, PTA Wars, and Each Other\u2019s Cooking* \n\n---\n\n**1. Morning Efficiency Rating** \n*How quickly can we get two kids, one dog, and zero coffee spills out the door before someone passive-aggressively \u201creminds\u201d the other that *\u201cthe permission slip was due yesterday\u201d*?* \n- 10/10: We\u2019re a synchronized swim team of lunchboxes and existential dread. \n- 5/10: One of us is still Googling \u201chow to unclog a glue stick from a USB port.\u201d \n- 1/10: We\u2019re late because someone had to *\u201cjust quickly\u201d* reorganize the spice rack alphabetically. \n\n---\n\n**2. Co-Parenting Coordination** \n*Rate the likelihood that we\u2019ll both remember which kid is allergic to *\u201cthe orange cheese\u201d* and which one is in a *\u201cbe kind to worms\u201d* phase.* \n- 10/10: We\u2019ve got a shared Google calendar, color-coded to the point of *\u201cmilitant whimsy.\u201d* \n- 5/10: We rely on telepathy and the faint hope that the school nurse *\u201cjust knows.\u201d* \n- 1/10: We\u2019ve accidentally sent the wrong kid to violin practice *twice*. The violin teacher now thinks we\u2019re running a *\u201cchild exchange program.\u201d* \n\n---\n\n**3. Romance Maintenance** \n*How often do we prioritize date night over *\u201ccollapsing into a heap of mutual resentment\u201d*?* \n- 10/10: We\u2019ve scheduled a candlelit dinner for *\u201cwhen the kids go to college.\u201d* \n- 5/10: Our last date was a *\u201cromantic\u201d* trip to Target where we *\u201caccidentally\u201d* held hands in the freezer aisle. \n- 1/10: We consider *\u201csimultaneously ignoring the laundry\u201d* a form of intimacy. \n\n---\n\n**Bonus Round: Petty Argument Hall of Fame** \n*Check all that apply:* \n- [ ] The Great 2019 Dishwasher Loading Schism (*\u201cKnives point DOWN, Karen!\u201d*) \n- [ ] The Passive-Aggressive Post-it War of 2021 (*\u201cYour turn to buy *\u2018organic\u2019* milk. XOXO\u201d*) \n- [ ] The Thermostat Cold War (*\u201c68\u00b0F is a *human rights violation*.\u201d*) \n\n---\n\n**Final Score Interpretation:** \n- **30\u201340 points:** *You\u2019re the Beyonc\u00e9 and Jay-Z of lesbian moms.* \n- **15\u201329 points:** *You\u2019re more like *\u201cThelma and Louise,\u201d* but with minivans and a Costco membership.* \n- **0\u201314 points:** *You\u2019re still married, which is statistically *\u201ca win.\u201d* Celebrate with *\u201cleftover chicken nuggets\u201d* and a *\u201cwe\u2019ll deal with this tomorrow\u201d* shrug.*"}], "input_metadata": {"row_id": "remain-large-left-team-wish", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 39 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043019", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*."}, {"role": "assistant", "content": "*little me in the countryside*\n\n[Verse 1] \nBarefoot on the gravel road, dust between my toes, \nSunlight dripped like honey on the corn rows. \nGrandma\u2019s porch swing creaked the same old tune, \nWhile fireflies spelled my name across June. \nMason jar of lemonade, half a moon inside, \nI chased the shade like it was trying to hide. \n\n[Chorus] \nOh, little me in the countryside, \nRunning through the wheat so high, \nEvery breeze was a lullaby, \nAnd the sky was big enough to fly. \nI left my worries by the riverside, \nBack when I was little me in the countryside. \n\n[Verse 2] \nTractor songs at twilight, crickets keeping time, \nDirt on my knees, freckles in a climbing rhyme. \nCreek water baptized my sunburned skin, \nI believed every ripple was a secret twin. \nUnder the quilt of stars, I counted dreams instead, \nWhile the Milky Way braided night through my head. \n\n[Chorus] \nOh, little me in the countryside, \nRunning through the wheat so high, \nEvery breeze was a lullaby, \nAnd the sky was big enough to fly. \nI left my worries by the riverside, \nBack when I was little me in the countryside. \n\n[Bridge] \nNow the city lights flicker, but I still close my eyes\u2014 \nI can smell the clover, hear the distant barn owls\u2019 cries. \nTime may stretch the map, but it can\u2019t erase \nThe echo of my laughter in that endless open space. \n\n[Final Chorus] \nOh, little me in the countryside, \nKeep the gate open, I\u2019m still nearby. \nThough the roads have changed and the years have flown, \nA part of me is always home. \nIn every golden field I drive beside, \nI find little me in the countryside."}], "input_metadata": {"row_id": "read-wish-deliver-great-case", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 1 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043042", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a riddle for the word \"fa\u00e7ade\" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*."}, {"role": "assistant", "content": "I wear a *painted smile* for every passer-by, \na *perfectly symmetrical* face that never cracks. \nBehind me, the *crumbling bricks* sigh in secret. \nWhat am I?"}], "input_metadata": {"row_id": "do-test-spicy-dead-television", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 3 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043069", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "# Workshop Template \n**Title:** *Building Inclusive Excellence: The Business & Human Case for Workplace Diversity* \n**Duration:** 3.5 hours (can be split into two 90-minute sessions) \n**Audience:** Managers, HR professionals, team leads, ERG members, and any employee who influences culture \n\n---\n\n## 1. Welcome & Objectives (15 min)\n- Ice-breaker: \u201cTwo Truths & a Lie\u2014Diversity Edition\u201d \n- *Learning objectives*: By the end of this workshop, participants will be able to: \n 1. Define diversity, equity, inclusion, and belonging (DEIB) in their own words. \n 2. Articulate at least three evidence-based benefits of a diverse workforce. \n 3. Identify one action they can take within 30 days to foster inclusion on their team. \n\n---\n\n## 2. *The Data-Driven Case for Diversity* (25 min)\n- Interactive poll: \u201cWhat % performance lift do you think diverse teams achieve?\u201d \n- Mini-lecture with live charts: \n - McKinsey 2023 report: Top-quartile companies for ethnic/gender diversity are **39 % more likely to outperform** on profitability. \n - Harvard study: *Inclusive teams make better business decisions 87 % of the time* and do so twice as fast. \n- Small-group exercise: Match the statistic to the business outcome (revenue, innovation, retention). \n\n---\n\n## 3. *Unconscious Bias & Micro-behaviors* (30 min)\n- 5-minute video: \u201cThe Resume with Two Names\u201d (identical CVs, different callbacks). \n- Bias Busters card game: Participants draw scenario cards and practice interrupting bias in real time. \n- *Key takeaway*: Bias is not a moral failing; it is a neurological shortcut that can be rewired through deliberate habits. \n\n---\n\n## 4. *Inclusive Leadership Toolkit* (40 min)\n- **The 4 A\u2019s Framework**: \n 1. **Awareness** \u2013 Daily 3-minute bias check-in. \n 2. **Amplification** \u2013 Repeat & credit ideas from under-represented voices. \n 3. **Access** \u2013 Rotate high-visibility assignments. \n 4. **Accountability** \u2013 Track inclusion metrics on team dashboards. \n- Role-play: \u201cThe Missed Promotion\u201d \u2013 diagnose where each \u2018A\u2019 broke down and redesign the conversation. \n\n---\n\n## 5. Action Planning & Commitments (20 min)\n- Personal *Inclusion Commitment Card*: \u201cIn the next 30 days I will\u2026\u201d \n- Pair-share for accountability. \n- QR code to a digital resource hub (TED talks, reading list, policy templates). \n\n---\n\n## 6. Closing & Evaluation (10 min)\n- One-word checkout: \u201cHow do you feel now?\u201d \n- *Post-workshop survey* link (2 min) with prize draw for completion. \n\n---\n\n## Materials Checklist\n- Slide deck (with alt-text on all images) \n- Printed Commitment Cards & pens \n- Bias Busters card decks (laminated) \n- Timer & bell \n- Snacks labeled for common dietary restrictions (gluten-free, vegan, halal) \n\n---\n\n## Follow-Up Email Template (send within 24 hrs)\nSubject: Your Inclusion Toolkit + Next Steps \nBody: \nHi team, \nThank you for leaning into today\u2019s conversation on *why diversity wins*. Attached are: \n- Slide deck & resource links \n- Your signed Inclusion Commitment Card (photo reminder) \n- Calendar invite for the optional 30-day check-in circle \n\nRemember: Inclusion is a practice, not a one-time training. Let\u2019s keep the momentum!"}], "input_metadata": {"row_id": "protect-other-major-green-war", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043094", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Your Name]** \n[Street Address] \n[City, State ZIP] \n[Email Address] | [Phone Number] | [LinkedIn URL] \n[Date]\n\n**[Local Party Chairperson\u2019s Name]** \n[Local Party Name] \n[Party Headquarters Address] \n[City, State ZIP]\n\nDear **Chairperson [Last Name]**,\n\nI am writing to express my enthusiastic interest in serving as **Rally Organizer** for **[Local Party Name]**. As a lifelong resident of **[City/County]**, I have watched our community\u2019s political pulse beat strongest when neighbors gather in common purpose. I want to channel that energy into rallies that not only energize our base but also invite undecided voters to witness the power of our shared vision.\n\n### **Proven Event-Execution Expertise**\nDuring the 2022 mid-term cycle, I spearheaded a **three-county voter-registration caravan** that staged 14 pop-up events in 30 days. \n- **Logistics**: Coordinated permits, portable stages, and ADA-compliant access for 2,000+ attendees per stop. \n- **Volunteer Mobilization**: Recruited and trained 120 volunteers, creating a color-coded \u201cRally-in-a-Box\u201d toolkit now used by three neighboring county parties. \n- **Real-Time Adaptability**: When a thunderstorm threatened our largest outdoor rally, I pivoted to an indoor gymnasium within 90 minutes\u2014live-streaming the switch to maintain momentum and ultimately exceeding projected turnout by 18 %.\n\n### **Data-Driven Rally Strategy**\nI treat every rally as a **micro-campaign laboratory**. \n- **Pre-Event Analytics**: I overlay precinct-level voter-file data with heat-mapping tools to select venues within 0.7 miles of our highest \u201cpersuasion index\u201d households. \n- **On-Site Engagement**: QR-coded wristbands linked to instant sign-up forms increased volunteer recruitment by 34 % compared to clipboard methods. \n- **Post-Event ROI**: My post-rally surveys (average 42 % response rate) feed a dashboard that tracks pledge-to-action conversion, allowing us to refine messaging within 48 hours.\n\n### **Coalition-Building & Safety Leadership**\nUnderstanding that rallies must feel **safe and inclusive**, I have: \n- Partnered with local **faith leaders, PTAs, and small-business associations** to co-sponsor events, expanding our demographic reach by 27 %. \n- Completed **FEMA crowd-management certification** and maintain relationships with county emergency management to ensure rapid-response protocols. \n- Implemented a **\u201cCommunity Marshals\u201d** program\u2014training 50 volunteer marshals in de-escalation techniques, cutting incident reports to zero across 22 events.\n\nI would welcome the opportunity to discuss how my blend of grassroots passion, logistical precision, and data fluency can help **[Local Party Name]** turn every rally into a springboard for victory in the upcoming cycle. Thank you for considering my application.\n\n**Respectfully,**\n\n[Your Signature (if submitting hard copy)] \n[Your Printed Name]"}], "input_metadata": {"row_id": "collect-destroy-difficult-conference-hardware", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 23 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043121", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*."}, {"role": "assistant", "content": "The opposite of youth is not age, but **stagnation**\u2014the moment when *curiosity closes its eyes* and the impulse to explore, question, and reinvent quietly fades. \n\nWhere youth is a *perpetual state of becoming*, its true counter is the condition in which *growth is willingly surrendered* and the mind settles into fixed certainties."}], "input_metadata": {"row_id": "build-upgrade-green-white-reason", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 4 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043147", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer."}, {"role": "assistant", "content": "**The Golden Palace** is not a palace at all, but a **restaurant** that has disguised itself as a mirage of saffron and cardamom in the **city centre**. It whispers **Indian** recipes into the ears of pigeons, who then forget how to fly and instead waddle around the square, dizzy with cumin. \n\nInside, the walls sweat **garam masala** and the tables grow tiny **basil plants** that recite Bollywood dialogues when no one is looking. The waiters glide like **shadow puppets**, balancing trays of **butter chicken** that glow like radioactive sunsets. \n\nIf you order **naan**, it arrives folded into the shape of a **paper swan**, flapping once before dissolving into steam that smells of **monsoon rain**. The bill is presented on a **lotus leaf**, and if you stare at it long enough, the numbers rearrange themselves into a **limerick about turmeric**."}], "input_metadata": {"row_id": "need-hostile-dream-business-person", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 13 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043177", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*."}, {"role": "assistant", "content": "\ud83c\udfa4 *Yo, it\u2019s the tale of Sir Scam-a-lot, straight outta suburbia!* \ud83c\udfa4 \n\nYo, my name is Dave, I sell insurance in Ohio, \nGot a cubicle, a cat, and a 401(k) that\u2019s kinda fly-o. \nOne Tuesday, phone rings\u2014unknown number, what the heck? \nVoice says, \u201c*Prince Ogbonna the Third* just wrote you a fat check!\u201d \n\nI\u2019m like, \u201cHold up, who dis? I\u2019m whiter than printer paper, \nMy bloodline\u2019s mayo on Wonder Bread\u2014ain\u2019t no royal vapor!\u201d \nDude on the line, accent silkier than lotion, \nSays, \u201cCousin, you\u2019re 37th in line\u2014time to claim your ocean!\u201d \n\n**[Hook]** \n\ud83c\udfb6 *Long lost king of Nigeria, crown sittin\u2019 on my dome,* \n*Got a palace full of leopards and a Wi-Fi that roams!* \n*They need my SSN and a quick wire fee,* \n*But I\u2019m \u2018bout to be big pimpin\u2019 in Abuja, just wait and see!* \ud83c\udfb6 \n\nHe says, \u201cJust send five grand for customs, then the jet lands, \nYou\u2019ll be dripping in so much gold, even your sweat glands bling.\u201d \nI\u2019m sweatin\u2019 like a rotisserie, visions of caviar dreams, \nI Venmo \u201cPrinceNaija69\u201d like I\u2019m funding meme schemes. \n\nNext day, email: \u201cOops, palace goat ate the transfer slip, \nNeed ten more for goat surgery and a diamond-encrusted whip.\u201d \nI hock my lawnmower, my Xbox, and Aunt Linda\u2019s porcelain goose, \nWire cash so fast my bank\u2019s like, \u201cDave, you loose!\u201d \n\n**[Bridge \u2013 Spoken Word]** \nThen the *official royal seal* arrives\u2014 \nIt\u2019s a GIF of Shrek in a crown doing the Macarena. \nI\u2019m starting to sense a pattern here\u2026 \n\n**[Final Hook \u2013 Regret Remix]** \n\ud83c\udfb6 *Now I\u2019m the king of my mom\u2019s basement, throne\u2019s a beanbag chair,* \n*Royal decree: never trust a prince who types in all caps\u2014* \n*\u201cURGENT, COUSIN, SEND MORE FLAMINGOS!\u201d* \n*Lesson learned: if the crown\u2019s too shiny, it\u2019s probably aluminum foil, homie!*"}], "input_metadata": {"row_id": "store-reduce-unknown-girl-hate", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 14 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043206", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Expand the riddle into a story with a funny tone:\n\nWhat can you catch but not throw?\nA cold\n\nUse * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*."}, {"role": "assistant", "content": "Gary the Germ was the *smallest, greenest, and most obnoxiously proud* microbe in the entire Sniffle Kingdom. Every morning he practiced his grand entrance: sliding down banisters made of nasal hair, cannon-balling into unsuspecting tonsils, and yelling, \u201cACHOO\u2014did somebody order a cold?\u201d \n\nOne Tuesday, Gary spotted his dream target: *Bartholomew \u201cBart\u201d Bigglesworth, a man who believed hand-washing was a government conspiracy*. Bart had just exited the subway, triumphantly licking a subway pole \u201cfor the vitamins.\u201d Gary rubbed his microscopic hands together. \u201cPerfect. I\u2019ll hitch a ride on that finger, ride the express elevator to the sinuses, and throw the *snottiest* house-warming party in history!\u201d \n\nBut there was a catch\u2014literally. Gary could *catch* Bart, but he couldn\u2019t *throw* him anywhere. Every time Gary tried to catapult Bart into the next office cubicle, Bart just sneezed, wiped his nose on his sleeve, and kept typing spreadsheets. Gary screamed, \u201cThis is mutiny! I\u2019m supposed to be the one doing the tossing!\u201d \n\nMeanwhile, Bart\u2019s immune system\u2014led by Captain Phlegm and the Mucus Marines\u2014launched *Operation Tissue Storm*. They surrounded Gary, armed with vitamin C grenades and zinc zappers. Gary attempted a dramatic escape by surfing a sneeze, but the tissue slapped him like a flyswatter on a summer day. \n\nDefeated, Gary sat in the crumpled tissue, sulking. \u201cI caught the cold,\u201d he muttered, \u201cbut I can\u2019t throw it anywhere. Worst. Superpower. Ever.\u201d \n\nFrom that day forward, Gary changed his business card to read: \u201cProfessional Cold Catcher\u2014No Returns, No Refunds, Absolutely No Throwing.\u201d"}], "input_metadata": {"row_id": "mean-take-left-high-family", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 6 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043233", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format."}, {"role": "assistant", "content": "**[Intro \u2013 harpsichord glitch beat, stylus scratching parchment]** \nYo, it\u2019s 14-something, but my mind\u2019s 3-D printed in gold leaf\u2014 \nI\u2019m the **Renaissance Rapper**, got a quill in one hand and a scalpel in the other, dissectin\u2019 beats like Vesalius dissectin\u2019 spleens. \nLet\u2019s get *weird* with the rebirth, baby.\n\n---\n\n### **Section 1 \u2013 The Fresco Flex** \nI drip ultramarine like I\u2019m sponsored by Afghanistan mines, \nMichelangelo on the ceiling, I\u2019m reclining on divine lines. \nAngels doing backflips off the Sistine, they\u2019re hype for my set, \nWhile I tag \u201c*AMOR VINCIT OMNIA*\u201d in invisible ink\u2014wet. \nMy flow\u2019s a trompe-l\u2019\u0153il, looks flat till you tilt your head\u2014 \nThen it\u2019s depth for days, like Brunelleschi\u2019s dome overhead. \nI\u2019m not baroque, I\u2019m pre-baroque, I\u2019m the prototype shock, \nGot patrons in fur-lined sleeves stuffing florins in my sock. \n**I paint in egg tempera so the colors never fade,** \n**But the yolk smells funky\u2014call it *odeur de old crusade*.**\n\n---\n\n### **Section 2 \u2013 The Printing-Press Poltergeist** \nGutenberg ghost in the machine, clanking lead type like chains, \nEvery page a polter-page, every serif a soul that remains. \nI drop a folio, it\u2019s viral before \u201cviral\u201d was a cough, \nMonks in scriptoriums rage-quitting, \u201cBro, that illuminated stuff\u2019s off!\u201d \nI\u2019m the incunabula influencer, my colophon\u2019s a flex, \nGot red-ink rubricators sliding into my DMs\u2014*next*. \n**I rhyme in movable type so my bars can rearrange,** \n**Yesterday I was Petrarch, today I\u2019m deranged.** \nThe press spits paper like a hydra with hiccups, \nEach sheet a protest flyer: \u201cDown with scholastic pick-ups!\u201d\n\n---\n\n### **Section 3 \u2013 The Anatomical Anthem (Parental Advisory: Corpses)** \nNight shift with da Vinci, we\u2019re sketching by candle and moon, \nCadavers on the table like, \u201cDo me a favor\u2014autotune my coccyx soon.\u201d \nI rap in reverse Latin so only the dead understand, \nWhile Leo reverse-engineers a bird-man from a frying pan. \n**My beats are ventricles pumping four-four time,** \n**Aorta bass drop\u2014call that circulatory grime.** \nI sample the sound of a scalpel scraping periosteum, \nLayer it over a lute loop\u2014*that\u2019s* my magnum opium. \nGalen\u2019s in the corner, throwing shade like a Greek tragedy, \nBut I\u2019m Harvey before Harvey, already rapping blood circulation majestically.\n\n---\n\n**[Outro \u2013 fading lute arpeggio, distant cannon fire from Constantinople]** \nSo remember me when you\u2019re doom-scrolling on glass, \nI was the era that turned corpses into class. \nI\u2019m the **Renaissance Rapper**, my ghost still tags in chalk, \nLeaving QR codes on cathedral walls\u2014scan it, you\u2019ll squawk. \nRebirth never dies, it just pixelates and loops, \nNow if you\u2019ll excuse me, I\u2019ve got a date with Lucretius in the soup."}], "input_metadata": {"row_id": "seem-clean-simple-work-member", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "3", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 17 highlighted sections (required: 3)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043257", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*"}, {"role": "assistant", "content": "*J\u014dmon* hunter-gatherers (14,000 BCE) gave way to *Yayoi* rice farmers (300 BCE) who forged bronze and iron. \nThe *Yamato* court unified clans (3rd\u20137th c.), adopting *Shinto* and Chinese script. \nPrince *Sh\u014dtoku*\u2019s 17-article constitution (604) and the Taika Reforms (645) centralized power. \nHeian aristocrats (794\u20131185) produced *The Tale of Genji* while samurai rose in the provinces. \nMinamoto no Yoritomo founded the *Kamakura* shogunate (1185), introducing warrior rule. \nMongol invasions (1274, 1281) were repelled by \u201cdivine winds\u201d (*kamikaze*). \nAshikaga *Muromachi* shogunate (1336\u20131573) saw Zen gardens and \u014cnin War chaos. \nOda Nobunaga, Toyotomi Hideyoshi, and Tokugawa Ieyasu ended the *Sengoku* \u201cwarring states\u201d era. \nTokugawa *Edo* shogunate (1603\u20131868) isolated Japan, fostering *ukiyo-e* and *kabuki*. \nCommodore Perry\u2019s *Black Ships* (1853) forced the Meiji Restoration (1868), modernizing the empire. \nVictories in the *Sino-Japanese* (1894\u201395) and *Russo-Japanese* (1904\u201305) wars expanded territory. \nMilitarism led to *Pearl Harbor* (1941) and WWII defeat (1945); the U.S. occupation drafted a pacifist constitution. \nPost-war *economic miracle* made Japan the world\u2019s second-largest economy until the 1990s *bubble*. \nToday, Japan balances *constitutional monarchy* with cutting-edge technology and soft-power culture."}], "input_metadata": {"row_id": "receive-final-best-visible-night", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "5", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 20 highlighted sections (required: 5)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043282", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*."}, {"role": "assistant", "content": "*John* in *De Smet* \nQuaker hat *blows off*\u2014chases \n*Silent* *scream*\u2014\u201cCome back!\u201d"}], "input_metadata": {"row_id": "record-live-better-guy-country", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 5 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043309", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with \"*\", like *highlighted text*."}, {"role": "assistant", "content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. In a ceremony marked by *unprecedented artistic excellence* and *meticulous craftsmanship*, the Academy recognizes those whose work has defined the sonic landscape of 2011.\n\nThe evening\u2019s proceedings underscore the Academy\u2019s enduring commitment to *artistic integrity* and *technical innovation*. Among the most celebrated achievements are:\n\n\u2022 *Album of the Year* \u2013 awarded to an opus that has redefined genre boundaries while achieving both critical acclaim and popular resonance. \n\u2022 *Record of the Year* \u2013 bestowed upon a single track whose *sonic clarity*, *emotive depth*, and *production mastery* have set a new benchmark for the industry. \n\u2022 *Best New Artist* \u2013 conferred upon an emerging voice whose *visionary artistry* signals the future direction of contemporary music.\n\nThroughout the telecast, special tributes illuminate the *indelible legacies* of icons whose influence transcends generations, while collaborative performances showcase the *synergistic power* of cross-genre partnerships. The Academy further acknowledges the *behind-the-scenes luminaries*\u2014engineers, mixers, and mastering specialists\u2014whose *invisible expertise* transforms raw inspiration into *audible transcendence*.\n\nIn closing, the 54th Annual Grammy Awards reaffirm the Recording Academy\u2019s solemn charge: to *safeguard the heritage*, *nurture the present*, and *inspire the future* of recorded music."}], "input_metadata": {"row_id": "talk-wonder-bad-dangerous-health", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 19 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043332", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*."}, {"role": "assistant", "content": "# 10 Surprising Facts About the Dutch Language That Will Make You Say \u201cWauw!\u201d\n\nIf you think Dutch is just \u201cGerman with a sore throat,\u201d think again. From tongue-twisting consonant clusters to words that sound like they were invented by a stand-up comedian, Dutch is a linguistic playground. Below are ten facts that prove the language is far more colorful than its reputation for windmills and wooden shoes.\n\n---\n\n### 1. *The longest word in Dutch is 53 letters long* \nMeet **\u201ckindercarnavalsoptochtvoorbereidingswerkzaamhedenplan\u201d**\u2014a single noun describing the preparation work for a children\u2019s carnival parade. Dutch loves to glue words together, so once you master the basics you can create your own kilometer-long compounds on the fly.\n\n---\n\n### 2. Dutch has three genders, but only two articles \nGrammatically, nouns are masculine, feminine, or neuter. In everyday speech, however, masculine and feminine collapsed into a single \u201ccommon\u201d gender, leaving *\u201cde\u201d* for common and *\u201chet\u201d* for neuter. The result? A never-ending guessing game for learners.\n\n---\n\n### 3. *The Dutch alphabet ends with \u201cij,\u201d not \u201cz\u201d* \nSchoolchildren recite the ABC and finish with \u201c\u2026x, y, ij, z.\u201d The digraph *ij* is so common (think *hij* = he, *mijn* = my) that it\u2019s treated almost like a separate letter. On old typewriters, *ij* even had its own key.\n\n---\n\n### 4. Dutch is the closest major language to English \nFrisian is technically closer, but among languages with tens of millions of speakers, Dutch wins. Compare: \n- English *\u201cbread\u201d* \u2192 Dutch *\u201cbrood\u201d* \n- English *\u201cgreen\u201d* \u2192 Dutch *\u201cgroen\u201d* \nShared roots mean you can read simple Dutch signs long before you\u2019ve cracked a single grammar book.\n\n---\n\n### 5. *Dutch gave New York its first words* \nBefore it was New York, it was New Amsterdam. That\u2019s why we say *\u201ccookie\u201d* (from Dutch *koekje*) instead of *\u201cbiscuit,\u201d* and *\u201cboss\u201d* (from *baas*) instead of *\u201cmaster.\u201d* Wall Street? It once ran alongside the *\u201cwal\u201d* (rampart) of the city wall.\n\n---\n\n### 6. The word *\u201cgezellig\u201d* has no exact English translation* \nRoughly it means \u201ccozy,\u201d \u201cconvivial,\u201d or \u201cthe warm feeling of being with friends,\u201d but none of those capture the full vibe. A caf\u00e9 can be *gezellig*, a conversation can be *gezellig*, even a spreadsheet can be *gezellig* if the team is laughing.\n\n---\n\n### 7. Dutch spelling changed overnight in 1996 \nThe *\u201cGroene Boekje\u201d* (Green Booklet) reforms simplified thousands of words. *\u201cpotatos\u201d* became *\u201caardappelen\u201d* and *\u201ccat\u2019s\u201d* became *\u201ckatten.\u201d* Overnight, every dictionary, street sign, and Scrabble board in the country was technically out of date.\n\n---\n\n### 8. *Dutch is spoken on six continents* \nBesides the Netherlands and Belgium, it\u2019s an official language in Suriname, Aruba, Cura\u00e7ao, and Sint Maarten. Thanks to emigration, you\u2019ll also find active Dutch-speaking communities in Canada, Australia, Argentina\u2014even South Africa, where Afrikaans is a daughter language.\n\n---\n\n### 9. The Dutch love diminutives \nAdd *-je* to almost any noun and you instantly shrink it: *\u201chuis\u201d* (house) \u2192 *\u201chuisje\u201d* (little house). It\u2019s not just about size; diminutives add affection. A *\u201cbiertje\u201d* isn\u2019t a small beer\u2014it\u2019s an invitation to relax.\n\n---\n\n### 10. *Dutch is the language of the world\u2019s oldest multinational* \nThe Vereenigde Oostindische Compagnie (VOC) was founded in 1602 and issued the planet\u2019s first publicly traded stock. Its ledgers, contracts, and ship logs were all written in Dutch, making the language the original tongue of global capitalism.\n\n---\n\nReady to dive in? Start with *hallo* and *dank je wel*, and you\u2019ll already be halfway to *gezellig*."}], "input_metadata": {"row_id": "meet-hold-solid-video-moment", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "2", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 39 highlighted sections (required: 2)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043356", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}, {"messages": [{"role": "user", "content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*."}, {"role": "assistant", "content": "Gideon has several good options for his surplus:\n\n1. **Sell it at the local farmers\u2019 market or to a regional wholesaler** to turn the extra produce into cash he can reinvest in seeds, equipment, or next season\u2019s labor. \n2. *Donate a portion to the nearby food bank or community kitchen* so that families facing food insecurity can benefit from fresh, nutritious produce. \n3. Preserve some of the surplus\u2014by canning tomatoes, drying herbs, or freezing berries\u2014so his own household has a steady supply of healthy food throughout the winter."}], "input_metadata": {"row_id": "understand-rational-hard-kind-project", "completion_params": {"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", "temperature": 0.0, "max_tokens": 4096}, "session_data": {"mode": "pointwise"}}, "rollout_status": {"status": "finished"}, "ground_truth": "1", "evaluation_result": {"score": 1.0, "is_score_valid": true, "reason": "\u2705 Found 2 highlighted sections (required: 1)", "metrics": {}}, "created_at": "2025-08-09T10:35:28.043383", "eval_metadata": {"name": "test_markdown_highlighting_evaluation", "description": "\n Evaluation function that checks if the model's response contains the required number of formatted sections.\n ", "version": "v0.2.6-dev1.dev4+g67b03c6", "status": "finished", "run_id": "stop-choose-expect-major-pain", "num_runs": 1, "aggregation_method": "mean", "threshold_of_success": 0.5, "passed": true}, "pid": 43066}]} diff --git a/vite-app/dist/assets/index-D9iVTBbF.css b/vite-app/dist/assets/index-D9iVTBbF.css new file mode 100644 index 00000000..1bfd4128 --- /dev/null +++ b/vite-app/dist/assets/index-D9iVTBbF.css @@ -0,0 +1 @@ +/*! tailwindcss v4.1.11 | MIT License | https://tailwindcss.com */@layer properties{@supports (((-webkit-hyphens:none)) and (not (margin-trim:inline))) or ((-moz-orient:inline) and (not (color:rgb(from red r g b)))){*,:before,:after,::backdrop{--tw-rotate-x:initial;--tw-rotate-y:initial;--tw-rotate-z:initial;--tw-skew-x:initial;--tw-skew-y:initial;--tw-space-y-reverse:0;--tw-space-x-reverse:0;--tw-divide-y-reverse:0;--tw-border-style:solid;--tw-font-weight:initial;--tw-blur:initial;--tw-brightness:initial;--tw-contrast:initial;--tw-grayscale:initial;--tw-hue-rotate:initial;--tw-invert:initial;--tw-opacity:initial;--tw-saturate:initial;--tw-sepia:initial;--tw-drop-shadow:initial;--tw-drop-shadow-color:initial;--tw-drop-shadow-alpha:100%;--tw-drop-shadow-size:initial;--tw-duration:initial}}}@layer theme{:root,:host{--font-sans:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--font-mono:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--color-red-500:oklch(63.7% .237 25.331);--color-red-700:oklch(50.5% .213 27.518);--color-yellow-50:oklch(98.7% .026 102.212);--color-yellow-100:oklch(97.3% .071 103.193);--color-yellow-200:oklch(94.5% .129 101.54);--color-yellow-500:oklch(79.5% .184 86.047);--color-yellow-700:oklch(55.4% .135 66.442);--color-yellow-800:oklch(47.6% .114 61.907);--color-yellow-900:oklch(42.1% .095 57.708);--color-green-50:oklch(98.2% .018 155.826);--color-green-100:oklch(96.2% .044 156.743);--color-green-200:oklch(92.5% .084 155.995);--color-green-500:oklch(72.3% .219 149.579);--color-green-700:oklch(52.7% .154 150.069);--color-green-800:oklch(44.8% .119 151.328);--color-green-900:oklch(39.3% .095 152.535);--color-blue-50:oklch(97% .014 254.604);--color-blue-200:oklch(88.2% .059 254.128);--color-blue-500:oklch(62.3% .214 259.815);--color-blue-700:oklch(48.8% .243 264.376);--color-blue-900:oklch(37.9% .146 265.522);--color-gray-50:oklch(98.5% .002 247.839);--color-gray-100:oklch(96.7% .003 264.542);--color-gray-200:oklch(92.8% .006 264.531);--color-gray-300:oklch(87.2% .01 258.338);--color-gray-400:oklch(70.7% .022 261.325);--color-gray-500:oklch(55.1% .027 264.364);--color-gray-600:oklch(44.6% .03 256.802);--color-gray-700:oklch(37.3% .034 259.733);--color-gray-800:oklch(27.8% .033 256.848);--color-gray-900:oklch(21% .034 264.665);--color-white:#fff;--spacing:.25rem;--container-sm:24rem;--container-md:28rem;--container-lg:32rem;--container-7xl:80rem;--text-xs:.75rem;--text-xs--line-height:calc(1/.75);--text-sm:.875rem;--text-sm--line-height:calc(1.25/.875);--font-weight-medium:500;--font-weight-semibold:600;--animate-spin:spin 1s linear infinite;--default-transition-duration:.15s;--default-transition-timing-function:cubic-bezier(.4,0,.2,1);--default-font-family:var(--font-sans);--default-mono-font-family:var(--font-mono)}}@layer base{*,:after,:before,::backdrop{box-sizing:border-box;border:0 solid;margin:0;padding:0}::file-selector-button{box-sizing:border-box;border:0 solid;margin:0;padding:0}html,:host{-webkit-text-size-adjust:100%;tab-size:4;line-height:1.5;font-family:var(--default-font-family,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji");font-feature-settings:var(--default-font-feature-settings,normal);font-variation-settings:var(--default-font-variation-settings,normal);-webkit-tap-highlight-color:transparent}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;-webkit-text-decoration:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:var(--default-mono-font-family,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace);font-feature-settings:var(--default-mono-font-feature-settings,normal);font-variation-settings:var(--default-mono-font-variation-settings,normal);font-size:1em}small{font-size:80%}sub,sup{vertical-align:baseline;font-size:75%;line-height:0;position:relative}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}:-moz-focusring{outline:auto}progress{vertical-align:baseline}summary{display:list-item}ol,ul,menu{list-style:none}img,svg,video,canvas,audio,iframe,embed,object{vertical-align:middle;display:block}img,video{max-width:100%;height:auto}button,input,select,optgroup,textarea{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}::file-selector-button{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}:where(select:is([multiple],[size])) optgroup{font-weight:bolder}:where(select:is([multiple],[size])) optgroup option{padding-inline-start:20px}::file-selector-button{margin-inline-end:4px}::placeholder{opacity:1}@supports (not ((-webkit-appearance:-apple-pay-button))) or (contain-intrinsic-size:1px){::placeholder{color:currentColor}@supports (color:color-mix(in lab,red,red)){::placeholder{color:color-mix(in oklab,currentcolor 50%,transparent)}}}textarea{resize:vertical}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-date-and-time-value{min-height:1lh;text-align:inherit}::-webkit-datetime-edit{display:inline-flex}::-webkit-datetime-edit-fields-wrapper{padding:0}::-webkit-datetime-edit{padding-block:0}::-webkit-datetime-edit-year-field{padding-block:0}::-webkit-datetime-edit-month-field{padding-block:0}::-webkit-datetime-edit-day-field{padding-block:0}::-webkit-datetime-edit-hour-field{padding-block:0}::-webkit-datetime-edit-minute-field{padding-block:0}::-webkit-datetime-edit-second-field{padding-block:0}::-webkit-datetime-edit-millisecond-field{padding-block:0}::-webkit-datetime-edit-meridiem-field{padding-block:0}:-moz-ui-invalid{box-shadow:none}button,input:where([type=button],[type=reset],[type=submit]){appearance:button}::file-selector-button{appearance:button}::-webkit-inner-spin-button{height:auto}::-webkit-outer-spin-button{height:auto}[hidden]:where(:not([hidden=until-found])){display:none!important}}@layer components;@layer utilities{.visible{visibility:visible}.absolute{position:absolute}.relative{position:relative}.static{position:static}.top-0{top:calc(var(--spacing)*0)}.right-0{right:calc(var(--spacing)*0)}.left-0{left:calc(var(--spacing)*0)}.\!container{width:100%!important}@media (min-width:40rem){.\!container{max-width:40rem!important}}@media (min-width:48rem){.\!container{max-width:48rem!important}}@media (min-width:64rem){.\!container{max-width:64rem!important}}@media (min-width:80rem){.\!container{max-width:80rem!important}}@media (min-width:96rem){.\!container{max-width:96rem!important}}.container{width:100%}@media (min-width:40rem){.container{max-width:40rem}}@media (min-width:48rem){.container{max-width:48rem}}@media (min-width:64rem){.container{max-width:64rem}}@media (min-width:80rem){.container{max-width:80rem}}@media (min-width:96rem){.container{max-width:96rem}}.mx-auto{margin-inline:auto}.mt-1{margin-top:calc(var(--spacing)*1)}.mt-2{margin-top:calc(var(--spacing)*2)}.mb-0\.5{margin-bottom:calc(var(--spacing)*.5)}.mb-1{margin-bottom:calc(var(--spacing)*1)}.mb-2{margin-bottom:calc(var(--spacing)*2)}.mb-4{margin-bottom:calc(var(--spacing)*4)}.ml-2{margin-left:calc(var(--spacing)*2)}.block{display:block}.flex{display:flex}.hidden{display:none}.inline-flex{display:inline-flex}.table{display:table}.h-1{height:calc(var(--spacing)*1)}.h-1\.5{height:calc(var(--spacing)*1.5)}.h-3{height:calc(var(--spacing)*3)}.h-4{height:calc(var(--spacing)*4)}.h-6{height:calc(var(--spacing)*6)}.h-10{height:calc(var(--spacing)*10)}.h-12{height:calc(var(--spacing)*12)}.min-h-screen{min-height:100vh}.w-1{width:calc(var(--spacing)*1)}.w-1\.5{width:calc(var(--spacing)*1.5)}.w-3{width:calc(var(--spacing)*3)}.w-4{width:calc(var(--spacing)*4)}.w-8{width:calc(var(--spacing)*8)}.w-12{width:calc(var(--spacing)*12)}.w-\[500px\]{width:500px}.w-auto{width:auto}.w-fit{width:fit-content}.w-full{width:100%}.max-w-7xl{max-width:var(--container-7xl)}.max-w-sm{max-width:var(--container-sm)}.min-w-0{min-width:calc(var(--spacing)*0)}.min-w-max{min-width:max-content}.flex-shrink-0{flex-shrink:0}.rotate-90{rotate:90deg}.rotate-180{rotate:180deg}.transform{transform:var(--tw-rotate-x,)var(--tw-rotate-y,)var(--tw-rotate-z,)var(--tw-skew-x,)var(--tw-skew-y,)}.animate-spin{animation:var(--animate-spin)}.cursor-col-resize{cursor:col-resize}.cursor-nw-resize{cursor:nw-resize}.cursor-pointer{cursor:pointer}.cursor-row-resize{cursor:row-resize}.resize{resize:both}.items-center{align-items:center}.justify-between{justify-content:space-between}.justify-center{justify-content:center}.justify-end{justify-content:flex-end}.justify-start{justify-content:flex-start}.gap-1\.5{gap:calc(var(--spacing)*1.5)}.gap-2{gap:calc(var(--spacing)*2)}.gap-3{gap:calc(var(--spacing)*3)}:where(.space-y-1>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*1)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*1)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-3>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*3)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*3)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-x-2>:not(:last-child)){--tw-space-x-reverse:0;margin-inline-start:calc(calc(var(--spacing)*2)*var(--tw-space-x-reverse));margin-inline-end:calc(calc(var(--spacing)*2)*calc(1 - var(--tw-space-x-reverse)))}:where(.divide-y>:not(:last-child)){--tw-divide-y-reverse:0;border-bottom-style:var(--tw-border-style);border-top-style:var(--tw-border-style);border-top-width:calc(1px*var(--tw-divide-y-reverse));border-bottom-width:calc(1px*calc(1 - var(--tw-divide-y-reverse)))}:where(.divide-gray-200>:not(:last-child)){border-color:var(--color-gray-200)}.truncate{text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:3.40282e38px}.border{border-style:var(--tw-border-style);border-width:1px}.border-t{border-top-style:var(--tw-border-style);border-top-width:1px}.border-b{border-bottom-style:var(--tw-border-style);border-bottom-width:1px}.border-blue-200{border-color:var(--color-blue-200)}.border-current{border-color:currentColor}.border-gray-200{border-color:var(--color-gray-200)}.border-gray-300{border-color:var(--color-gray-300)}.border-green-200{border-color:var(--color-green-200)}.border-yellow-200{border-color:var(--color-yellow-200)}.border-t-transparent{border-top-color:#0000}.bg-blue-50{background-color:var(--color-blue-50)}.bg-blue-500{background-color:var(--color-blue-500)}.bg-gray-50{background-color:var(--color-gray-50)}.bg-gray-100{background-color:var(--color-gray-100)}.bg-gray-300{background-color:var(--color-gray-300)}.bg-gray-500{background-color:var(--color-gray-500)}.bg-green-50{background-color:var(--color-green-50)}.bg-green-100{background-color:var(--color-green-100)}.bg-green-500{background-color:var(--color-green-500)}.bg-red-500{background-color:var(--color-red-500)}.bg-white{background-color:var(--color-white)}.bg-yellow-50{background-color:var(--color-yellow-50)}.bg-yellow-100{background-color:var(--color-yellow-100)}.bg-yellow-500{background-color:var(--color-yellow-500)}.p-0{padding:calc(var(--spacing)*0)}.p-1{padding:calc(var(--spacing)*1)}.p-2{padding:calc(var(--spacing)*2)}.p-3{padding:calc(var(--spacing)*3)}.p-4{padding:calc(var(--spacing)*4)}.p-8{padding:calc(var(--spacing)*8)}.px-2{padding-inline:calc(var(--spacing)*2)}.px-3{padding-inline:calc(var(--spacing)*3)}.py-0\.5{padding-block:calc(var(--spacing)*.5)}.py-1{padding-block:calc(var(--spacing)*1)}.py-2{padding-block:calc(var(--spacing)*2)}.py-3{padding-block:calc(var(--spacing)*3)}.py-4{padding-block:calc(var(--spacing)*4)}.pt-1{padding-top:calc(var(--spacing)*1)}.text-center{text-align:center}.text-left{text-align:left}.font-mono{font-family:var(--font-mono)}.text-sm{font-size:var(--text-sm);line-height:var(--tw-leading,var(--text-sm--line-height))}.text-xs{font-size:var(--text-xs);line-height:var(--tw-leading,var(--text-xs--line-height))}.font-medium{--tw-font-weight:var(--font-weight-medium);font-weight:var(--font-weight-medium)}.font-semibold{--tw-font-weight:var(--font-weight-semibold);font-weight:var(--font-weight-semibold)}.break-words{overflow-wrap:break-word}.break-all{word-break:break-all}.whitespace-nowrap{white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.text-blue-700{color:var(--color-blue-700)}.text-blue-900{color:var(--color-blue-900)}.text-gray-400{color:var(--color-gray-400)}.text-gray-500{color:var(--color-gray-500)}.text-gray-600{color:var(--color-gray-600)}.text-gray-700{color:var(--color-gray-700)}.text-gray-800{color:var(--color-gray-800)}.text-gray-900{color:var(--color-gray-900)}.text-green-700{color:var(--color-green-700)}.text-green-800{color:var(--color-green-800)}.text-green-900{color:var(--color-green-900)}.text-red-700{color:var(--color-red-700)}.text-yellow-700{color:var(--color-yellow-700)}.text-yellow-800{color:var(--color-yellow-800)}.text-yellow-900{color:var(--color-yellow-900)}.capitalize{text-transform:capitalize}.lowercase{text-transform:lowercase}.uppercase{text-transform:uppercase}.italic{font-style:italic}.underline{text-decoration-line:underline}.blur{--tw-blur:blur(8px);filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.filter{filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.transition{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to,opacity,box-shadow,transform,translate,scale,rotate,filter,-webkit-backdrop-filter,backdrop-filter,display,visibility,content-visibility,overlay,pointer-events;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-colors{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-transform{transition-property:transform,translate,scale,rotate;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.duration-200{--tw-duration:.2s;transition-duration:.2s}.select-none{-webkit-user-select:none;user-select:none}@media (hover:hover){.hover\:bg-gray-50:hover{background-color:var(--color-gray-50)}.hover\:bg-gray-200:hover{background-color:var(--color-gray-200)}.hover\:bg-gray-400:hover{background-color:var(--color-gray-400)}.hover\:no-underline:hover{text-decoration-line:none}}.focus\:outline-none:focus{--tw-outline-style:none;outline-style:none}@media (min-width:64rem){.lg\:max-w-md{max-width:var(--container-md)}}@media (min-width:80rem){.xl\:max-w-lg{max-width:var(--container-lg)}}}@property --tw-rotate-x{syntax:"*";inherits:false}@property --tw-rotate-y{syntax:"*";inherits:false}@property --tw-rotate-z{syntax:"*";inherits:false}@property --tw-skew-x{syntax:"*";inherits:false}@property --tw-skew-y{syntax:"*";inherits:false}@property --tw-space-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-space-x-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-divide-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-border-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-font-weight{syntax:"*";inherits:false}@property --tw-blur{syntax:"*";inherits:false}@property --tw-brightness{syntax:"*";inherits:false}@property --tw-contrast{syntax:"*";inherits:false}@property --tw-grayscale{syntax:"*";inherits:false}@property --tw-hue-rotate{syntax:"*";inherits:false}@property --tw-invert{syntax:"*";inherits:false}@property --tw-opacity{syntax:"*";inherits:false}@property --tw-saturate{syntax:"*";inherits:false}@property --tw-sepia{syntax:"*";inherits:false}@property --tw-drop-shadow{syntax:"*";inherits:false}@property --tw-drop-shadow-color{syntax:"*";inherits:false}@property --tw-drop-shadow-alpha{syntax:"";inherits:false;initial-value:100%}@property --tw-drop-shadow-size{syntax:"*";inherits:false}@property --tw-duration{syntax:"*";inherits:false}@keyframes spin{to{transform:rotate(360deg)}} diff --git a/vite-app/dist/assets/index-CmKCiozr.js b/vite-app/dist/assets/index-DiF_B1x_.js similarity index 97% rename from vite-app/dist/assets/index-CmKCiozr.js rename to vite-app/dist/assets/index-DiF_B1x_.js index c0e21f08..6fcddf4c 100644 --- a/vite-app/dist/assets/index-CmKCiozr.js +++ b/vite-app/dist/assets/index-DiF_B1x_.js @@ -45,7 +45,7 @@ `);for(o=u=0;u<_.length&&!_[u].includes("DetermineComponentFrameRoot");)u++;for(;oo||_[u]!==A[o]){var N=` `+_[u].replace(" at new "," at ");return e.displayName&&N.includes("")&&(N=N.replace("",e.displayName)),N}while(1<=u&&0<=o);break}}}finally{Uo=!1,Error.prepareStackTrace=a}return(a=e?e.displayName||e.name:"")?el(a):""}function k_(e){switch(e.tag){case 26:case 27:case 5:return el(e.type);case 16:return el("Lazy");case 13:return el("Suspense");case 19:return el("SuspenseList");case 0:case 15:return Zo(e.type,!1);case 11:return Zo(e.type.render,!1);case 1:return Zo(e.type,!0);case 31:return el("Activity");default:return""}}function If(e){try{var t="";do t+=k_(e),e=e.return;while(e);return t}catch(a){return` Error generating stack: `+a.message+` -`+a.stack}}function Ut(e){switch(typeof e){case"bigint":case"boolean":case"number":case"string":case"undefined":return e;case"object":return e;default:return""}}function ed(e){var t=e.type;return(e=e.nodeName)&&e.toLowerCase()==="input"&&(t==="checkbox"||t==="radio")}function $_(e){var t=ed(e)?"checked":"value",a=Object.getOwnPropertyDescriptor(e.constructor.prototype,t),u=""+e[t];if(!e.hasOwnProperty(t)&&typeof a<"u"&&typeof a.get=="function"&&typeof a.set=="function"){var o=a.get,s=a.set;return Object.defineProperty(e,t,{configurable:!0,get:function(){return o.call(this)},set:function(h){u=""+h,s.call(this,h)}}),Object.defineProperty(e,t,{enumerable:a.enumerable}),{getValue:function(){return u},setValue:function(h){u=""+h},stopTracking:function(){e._valueTracker=null,delete e[t]}}}}function Ei(e){e._valueTracker||(e._valueTracker=$_(e))}function td(e){if(!e)return!1;var t=e._valueTracker;if(!t)return!0;var a=t.getValue(),u="";return e&&(u=ed(e)?e.checked?"true":"false":e.value),e=u,e!==a?(t.setValue(e),!0):!1}function xi(e){if(e=e||(typeof document<"u"?document:void 0),typeof e>"u")return null;try{return e.activeElement||e.body}catch{return e.body}}var q_=/[\n"\\]/g;function Zt(e){return e.replace(q_,function(t){return"\\"+t.charCodeAt(0).toString(16)+" "})}function Bo(e,t,a,u,o,s,h,g){e.name="",h!=null&&typeof h!="function"&&typeof h!="symbol"&&typeof h!="boolean"?e.type=h:e.removeAttribute("type"),t!=null?h==="number"?(t===0&&e.value===""||e.value!=t)&&(e.value=""+Ut(t)):e.value!==""+Ut(t)&&(e.value=""+Ut(t)):h!=="submit"&&h!=="reset"||e.removeAttribute("value"),t!=null?Lo(e,h,Ut(t)):a!=null?Lo(e,h,Ut(a)):u!=null&&e.removeAttribute("value"),o==null&&s!=null&&(e.defaultChecked=!!s),o!=null&&(e.checked=o&&typeof o!="function"&&typeof o!="symbol"),g!=null&&typeof g!="function"&&typeof g!="symbol"&&typeof g!="boolean"?e.name=""+Ut(g):e.removeAttribute("name")}function nd(e,t,a,u,o,s,h,g){if(s!=null&&typeof s!="function"&&typeof s!="symbol"&&typeof s!="boolean"&&(e.type=s),t!=null||a!=null){if(!(s!=="submit"&&s!=="reset"||t!=null))return;a=a!=null?""+Ut(a):"",t=t!=null?""+Ut(t):a,g||t===e.value||(e.value=t),e.defaultValue=t}u=u??o,u=typeof u!="function"&&typeof u!="symbol"&&!!u,e.checked=g?e.checked:!!u,e.defaultChecked=!!u,h!=null&&typeof h!="function"&&typeof h!="symbol"&&typeof h!="boolean"&&(e.name=h)}function Lo(e,t,a){t==="number"&&xi(e.ownerDocument)===e||e.defaultValue===""+a||(e.defaultValue=""+a)}function tl(e,t,a,u){if(e=e.options,t){t={};for(var o=0;o"u"||typeof window.document>"u"||typeof window.document.createElement>"u"),Vo=!1;if(bn)try{var Il={};Object.defineProperty(Il,"passive",{get:function(){Vo=!0}}),window.addEventListener("test",Il,Il),window.removeEventListener("test",Il,Il)}catch{Vo=!1}var Xn=null,Yo=null,Ti=null;function cd(){if(Ti)return Ti;var e,t=Yo,a=t.length,u,o="value"in Xn?Xn.value:Xn.textContent,s=o.length;for(e=0;e=nu),md=" ",pd=!1;function gd(e,t){switch(e){case"keyup":return py.indexOf(t.keyCode)!==-1;case"keydown":return t.keyCode!==229;case"keypress":case"mousedown":case"focusout":return!0;default:return!1}}function _d(e){return e=e.detail,typeof e=="object"&&"data"in e?e.data:null}var ul=!1;function _y(e,t){switch(e){case"compositionend":return _d(t);case"keypress":return t.which!==32?null:(pd=!0,md);case"textInput":return e=t.data,e===md&&pd?null:e;default:return null}}function yy(e,t){if(ul)return e==="compositionend"||!Jo&&gd(e,t)?(e=cd(),Ti=Yo=Xn=null,ul=!1,e):null;switch(e){case"paste":return null;case"keypress":if(!(t.ctrlKey||t.altKey||t.metaKey)||t.ctrlKey&&t.altKey){if(t.char&&1=t)return{node:a,offset:t-e};e=u}e:{for(;a;){if(a.nextSibling){a=a.nextSibling;break e}a=a.parentNode}a=void 0}a=Td(a)}}function wd(e,t){return e&&t?e===t?!0:e&&e.nodeType===3?!1:t&&t.nodeType===3?wd(e,t.parentNode):"contains"in e?e.contains(t):e.compareDocumentPosition?!!(e.compareDocumentPosition(t)&16):!1:!1}function Rd(e){e=e!=null&&e.ownerDocument!=null&&e.ownerDocument.defaultView!=null?e.ownerDocument.defaultView:window;for(var t=xi(e.document);t instanceof e.HTMLIFrameElement;){try{var a=typeof t.contentWindow.location.href=="string"}catch{a=!1}if(a)e=t.contentWindow;else break;t=xi(e.document)}return t}function Fo(e){var t=e&&e.nodeName&&e.nodeName.toLowerCase();return t&&(t==="input"&&(e.type==="text"||e.type==="search"||e.type==="tel"||e.type==="url"||e.type==="password")||t==="textarea"||e.contentEditable==="true")}var zy=bn&&"documentMode"in document&&11>=document.documentMode,il=null,Io=null,iu=null,ec=!1;function Dd(e,t,a){var u=a.window===a?a.document:a.nodeType===9?a:a.ownerDocument;ec||il==null||il!==xi(u)||(u=il,"selectionStart"in u&&Fo(u)?u={start:u.selectionStart,end:u.selectionEnd}:(u=(u.ownerDocument&&u.ownerDocument.defaultView||window).getSelection(),u={anchorNode:u.anchorNode,anchorOffset:u.anchorOffset,focusNode:u.focusNode,focusOffset:u.focusOffset}),iu&&uu(iu,u)||(iu=u,u=pr(Io,"onSelect"),0>=h,o-=h,On=1<<32-Et(t)+o|a<s?s:8;var h=M.T,g={};M.T=g,Hc(e,!1,t,a);try{var _=o(),A=M.S;if(A!==null&&A(g,_),_!==null&&typeof _=="object"&&typeof _.then=="function"){var N=Zy(_,u);Su(e,t,N,Rt(e))}else Su(e,t,u,Rt(e))}catch(U){Su(e,t,{then:function(){},status:"rejected",reason:U},Rt())}finally{Y.p=s,M.T=h}}function $y(){}function Bc(e,t,a,u){if(e.tag!==5)throw Error(r(476));var o=Mh(e).queue;Dh(e,o,t,le,a===null?$y:function(){return Nh(e),a(u)})}function Mh(e){var t=e.memoizedState;if(t!==null)return t;t={memoizedState:le,baseState:le,baseQueue:null,queue:{pending:null,lanes:0,dispatch:null,lastRenderedReducer:Tn,lastRenderedState:le},next:null};var a={};return t.next={memoizedState:a,baseState:a,baseQueue:null,queue:{pending:null,lanes:0,dispatch:null,lastRenderedReducer:Tn,lastRenderedState:a},next:null},e.memoizedState=t,e=e.alternate,e!==null&&(e.memoizedState=t),t}function Nh(e){var t=Mh(e).next.queue;Su(e,t,{},Rt())}function Lc(){return st(Hu)}function jh(){return Qe().memoizedState}function Ch(){return Qe().memoizedState}function qy(e){for(var t=e.return;t!==null;){switch(t.tag){case 24:case 3:var a=Rt();e=Jn(a);var u=Pn(t,e,a);u!==null&&(Dt(u,t,a),mu(u,t,a)),t={cache:vc()},e.payload=t;return}t=t.return}}function Vy(e,t,a){var u=Rt();a={lane:u,revertLane:0,action:a,hasEagerState:!1,eagerState:null,next:null},Pi(e)?Zh(t,a):(a=lc(e,t,a,u),a!==null&&(Dt(a,e,u),Bh(a,t,u)))}function Uh(e,t,a){var u=Rt();Su(e,t,a,u)}function Su(e,t,a,u){var o={lane:u,revertLane:0,action:a,hasEagerState:!1,eagerState:null,next:null};if(Pi(e))Zh(t,o);else{var s=e.alternate;if(e.lanes===0&&(s===null||s.lanes===0)&&(s=t.lastRenderedReducer,s!==null))try{var h=t.lastRenderedState,g=s(h,a);if(o.hasEagerState=!0,o.eagerState=g,xt(g,h))return ji(e,t,o,0),Re===null&&Ni(),!1}catch{}finally{}if(a=lc(e,t,o,u),a!==null)return Dt(a,e,u),Bh(a,t,u),!0}return!1}function Hc(e,t,a,u){if(u={lane:2,revertLane:gs(),action:u,hasEagerState:!1,eagerState:null,next:null},Pi(e)){if(t)throw Error(r(479))}else t=lc(e,a,u,2),t!==null&&Dt(t,e,2)}function Pi(e){var t=e.alternate;return e===ce||t!==null&&t===ce}function Zh(e,t){pl=Yi=!0;var a=e.pending;a===null?t.next=t:(t.next=a.next,a.next=t),e.pending=t}function Bh(e,t,a){if((a&4194048)!==0){var u=t.lanes;u&=e.pendingLanes,a|=u,t.lanes=a,Gf(e,a)}}var Wi={readContext:st,use:Xi,useCallback:qe,useContext:qe,useEffect:qe,useImperativeHandle:qe,useLayoutEffect:qe,useInsertionEffect:qe,useMemo:qe,useReducer:qe,useRef:qe,useState:qe,useDebugValue:qe,useDeferredValue:qe,useTransition:qe,useSyncExternalStore:qe,useId:qe,useHostTransitionStatus:qe,useFormState:qe,useActionState:qe,useOptimistic:qe,useMemoCache:qe,useCacheRefresh:qe},Lh={readContext:st,use:Xi,useCallback:function(e,t){return pt().memoizedState=[e,t===void 0?null:t],e},useContext:st,useEffect:Sh,useImperativeHandle:function(e,t,a){a=a!=null?a.concat([e]):null,Ji(4194308,4,Ah.bind(null,t,e),a)},useLayoutEffect:function(e,t){return Ji(4194308,4,e,t)},useInsertionEffect:function(e,t){Ji(4,2,e,t)},useMemo:function(e,t){var a=pt();t=t===void 0?null:t;var u=e();if(Na){Yn(!0);try{e()}finally{Yn(!1)}}return a.memoizedState=[u,t],u},useReducer:function(e,t,a){var u=pt();if(a!==void 0){var o=a(t);if(Na){Yn(!0);try{a(t)}finally{Yn(!1)}}}else o=t;return u.memoizedState=u.baseState=o,e={pending:null,lanes:0,dispatch:null,lastRenderedReducer:e,lastRenderedState:o},u.queue=e,e=e.dispatch=Vy.bind(null,ce,e),[u.memoizedState,e]},useRef:function(e){var t=pt();return e={current:e},t.memoizedState=e},useState:function(e){e=jc(e);var t=e.queue,a=Uh.bind(null,ce,t);return t.dispatch=a,[e.memoizedState,a]},useDebugValue:Uc,useDeferredValue:function(e,t){var a=pt();return Zc(a,e,t)},useTransition:function(){var e=jc(!1);return e=Dh.bind(null,ce,e.queue,!0,!1),pt().memoizedState=e,[!1,e]},useSyncExternalStore:function(e,t,a){var u=ce,o=pt();if(ye){if(a===void 0)throw Error(r(407));a=a()}else{if(a=t(),Re===null)throw Error(r(349));(he&124)!==0||uh(u,t,a)}o.memoizedState=a;var s={value:a,getSnapshot:t};return o.queue=s,Sh(rh.bind(null,u,s,e),[e]),u.flags|=2048,_l(9,Ki(),ih.bind(null,u,s,a,t),null),a},useId:function(){var e=pt(),t=Re.identifierPrefix;if(ye){var a=En,u=On;a=(u&~(1<<32-Et(u)-1)).toString(32)+a,t="«"+t+"R"+a,a=Gi++,0ne?(nt=P,P=null):nt=P.sibling;var ge=T(O,P,E[ne],C);if(ge===null){P===null&&(P=nt);break}e&&P&&ge.alternate===null&&t(O,P),S=s(ge,S,ne),se===null?Q=ge:se.sibling=ge,se=ge,P=nt}if(ne===E.length)return a(O,P),ye&&Ta(O,ne),Q;if(P===null){for(;nene?(nt=P,P=null):nt=P.sibling;var ha=T(O,P,ge.value,C);if(ha===null){P===null&&(P=nt);break}e&&P&&ha.alternate===null&&t(O,P),S=s(ha,S,ne),se===null?Q=ha:se.sibling=ha,se=ha,P=nt}if(ge.done)return a(O,P),ye&&Ta(O,ne),Q;if(P===null){for(;!ge.done;ne++,ge=E.next())ge=U(O,ge.value,C),ge!==null&&(S=s(ge,S,ne),se===null?Q=ge:se.sibling=ge,se=ge);return ye&&Ta(O,ne),Q}for(P=u(P);!ge.done;ne++,ge=E.next())ge=w(P,O,ne,ge.value,C),ge!==null&&(e&&ge.alternate!==null&&P.delete(ge.key===null?ne:ge.key),S=s(ge,S,ne),se===null?Q=ge:se.sibling=ge,se=ge);return e&&P.forEach(function(G0){return t(O,G0)}),ye&&Ta(O,ne),Q}function xe(O,S,E,C){if(typeof E=="object"&&E!==null&&E.type===H&&E.key===null&&(E=E.props.children),typeof E=="object"&&E!==null){switch(E.$$typeof){case z:e:{for(var Q=E.key;S!==null;){if(S.key===Q){if(Q=E.type,Q===H){if(S.tag===7){a(O,S.sibling),C=o(S,E.props.children),C.return=O,O=C;break e}}else if(S.elementType===Q||typeof Q=="object"&&Q!==null&&Q.$$typeof===Ye&&kh(Q)===S.type){a(O,S.sibling),C=o(S,E.props),Eu(C,E),C.return=O,O=C;break e}a(O,S);break}else t(O,S);S=S.sibling}E.type===H?(C=xa(E.props.children,O.mode,C,E.key),C.return=O,O=C):(C=Ui(E.type,E.key,E.props,null,O.mode,C),Eu(C,E),C.return=O,O=C)}return h(O);case B:e:{for(Q=E.key;S!==null;){if(S.key===Q)if(S.tag===4&&S.stateNode.containerInfo===E.containerInfo&&S.stateNode.implementation===E.implementation){a(O,S.sibling),C=o(S,E.children||[]),C.return=O,O=C;break e}else{a(O,S);break}else t(O,S);S=S.sibling}C=rc(E,O.mode,C),C.return=O,O=C}return h(O);case Ye:return Q=E._init,E=Q(E._payload),xe(O,S,E,C)}if(lt(E))return ue(O,S,E,C);if(Ge(E)){if(Q=Ge(E),typeof Q!="function")throw Error(r(150));return E=Q.call(E),te(O,S,E,C)}if(typeof E.then=="function")return xe(O,S,Fi(E),C);if(E.$$typeof===G)return xe(O,S,Hi(O,E),C);Ii(O,E)}return typeof E=="string"&&E!==""||typeof E=="number"||typeof E=="bigint"?(E=""+E,S!==null&&S.tag===6?(a(O,S.sibling),C=o(S,E),C.return=O,O=C):(a(O,S),C=ic(E,O.mode,C),C.return=O,O=C),h(O)):a(O,S)}return function(O,S,E,C){try{Ou=0;var Q=xe(O,S,E,C);return yl=null,Q}catch(P){if(P===hu||P===$i)throw P;var se=At(29,P,null,O.mode);return se.lanes=C,se.return=O,se}finally{}}}var bl=$h(!0),qh=$h(!1),$t=Z(null),rn=null;function Fn(e){var t=e.alternate;V(Pe,Pe.current&1),V($t,e),rn===null&&(t===null||ml.current!==null||t.memoizedState!==null)&&(rn=e)}function Vh(e){if(e.tag===22){if(V(Pe,Pe.current),V($t,e),rn===null){var t=e.alternate;t!==null&&t.memoizedState!==null&&(rn=e)}}else In()}function In(){V(Pe,Pe.current),V($t,$t.current)}function zn(e){X($t),rn===e&&(rn=null),X(Pe)}var Pe=Z(0);function er(e){for(var t=e;t!==null;){if(t.tag===13){var a=t.memoizedState;if(a!==null&&(a=a.dehydrated,a===null||a.data==="$?"||Rs(a)))return t}else if(t.tag===19&&t.memoizedProps.revealOrder!==void 0){if((t.flags&128)!==0)return t}else if(t.child!==null){t.child.return=t,t=t.child;continue}if(t===e)break;for(;t.sibling===null;){if(t.return===null||t.return===e)return null;t=t.return}t.sibling.return=t.return,t=t.sibling}return null}function kc(e,t,a,u){t=e.memoizedState,a=a(u,t),a=a==null?t:y({},t,a),e.memoizedState=a,e.lanes===0&&(e.updateQueue.baseState=a)}var $c={enqueueSetState:function(e,t,a){e=e._reactInternals;var u=Rt(),o=Jn(u);o.payload=t,a!=null&&(o.callback=a),t=Pn(e,o,u),t!==null&&(Dt(t,e,u),mu(t,e,u))},enqueueReplaceState:function(e,t,a){e=e._reactInternals;var u=Rt(),o=Jn(u);o.tag=1,o.payload=t,a!=null&&(o.callback=a),t=Pn(e,o,u),t!==null&&(Dt(t,e,u),mu(t,e,u))},enqueueForceUpdate:function(e,t){e=e._reactInternals;var a=Rt(),u=Jn(a);u.tag=2,t!=null&&(u.callback=t),t=Pn(e,u,a),t!==null&&(Dt(t,e,a),mu(t,e,a))}};function Yh(e,t,a,u,o,s,h){return e=e.stateNode,typeof e.shouldComponentUpdate=="function"?e.shouldComponentUpdate(u,s,h):t.prototype&&t.prototype.isPureReactComponent?!uu(a,u)||!uu(o,s):!0}function Gh(e,t,a,u){e=t.state,typeof t.componentWillReceiveProps=="function"&&t.componentWillReceiveProps(a,u),typeof t.UNSAFE_componentWillReceiveProps=="function"&&t.UNSAFE_componentWillReceiveProps(a,u),t.state!==e&&$c.enqueueReplaceState(t,t.state,null)}function ja(e,t){var a=t;if("ref"in t){a={};for(var u in t)u!=="ref"&&(a[u]=t[u])}if(e=e.defaultProps){a===t&&(a=y({},a));for(var o in e)a[o]===void 0&&(a[o]=e[o])}return a}var tr=typeof reportError=="function"?reportError:function(e){if(typeof window=="object"&&typeof window.ErrorEvent=="function"){var t=new window.ErrorEvent("error",{bubbles:!0,cancelable:!0,message:typeof e=="object"&&e!==null&&typeof e.message=="string"?String(e.message):String(e),error:e});if(!window.dispatchEvent(t))return}else if(typeof process=="object"&&typeof process.emit=="function"){process.emit("uncaughtException",e);return}console.error(e)};function Xh(e){tr(e)}function Qh(e){console.error(e)}function Kh(e){tr(e)}function nr(e,t){try{var a=e.onUncaughtError;a(t.value,{componentStack:t.stack})}catch(u){setTimeout(function(){throw u})}}function Jh(e,t,a){try{var u=e.onCaughtError;u(a.value,{componentStack:a.stack,errorBoundary:t.tag===1?t.stateNode:null})}catch(o){setTimeout(function(){throw o})}}function qc(e,t,a){return a=Jn(a),a.tag=3,a.payload={element:null},a.callback=function(){nr(e,t)},a}function Ph(e){return e=Jn(e),e.tag=3,e}function Wh(e,t,a,u){var o=a.type.getDerivedStateFromError;if(typeof o=="function"){var s=u.value;e.payload=function(){return o(s)},e.callback=function(){Jh(t,a,u)}}var h=a.stateNode;h!==null&&typeof h.componentDidCatch=="function"&&(e.callback=function(){Jh(t,a,u),typeof o!="function"&&(ua===null?ua=new Set([this]):ua.add(this));var g=u.stack;this.componentDidCatch(u.value,{componentStack:g!==null?g:""})})}function Gy(e,t,a,u,o){if(a.flags|=32768,u!==null&&typeof u=="object"&&typeof u.then=="function"){if(t=a.alternate,t!==null&&su(t,a,o,!0),a=$t.current,a!==null){switch(a.tag){case 13:return rn===null?ds():a.alternate===null&&He===0&&(He=3),a.flags&=-257,a.flags|=65536,a.lanes=o,u===gc?a.flags|=16384:(t=a.updateQueue,t===null?a.updateQueue=new Set([u]):t.add(u),vs(e,u,o)),!1;case 22:return a.flags|=65536,u===gc?a.flags|=16384:(t=a.updateQueue,t===null?(t={transitions:null,markerInstances:null,retryQueue:new Set([u])},a.updateQueue=t):(a=t.retryQueue,a===null?t.retryQueue=new Set([u]):a.add(u)),vs(e,u,o)),!1}throw Error(r(435,a.tag))}return vs(e,u,o),ds(),!1}if(ye)return t=$t.current,t!==null?((t.flags&65536)===0&&(t.flags|=256),t.flags|=65536,t.lanes=o,u!==sc&&(e=Error(r(422),{cause:u}),cu(Bt(e,a)))):(u!==sc&&(t=Error(r(423),{cause:u}),cu(Bt(t,a))),e=e.current.alternate,e.flags|=65536,o&=-o,e.lanes|=o,u=Bt(u,a),o=qc(e.stateNode,u,o),bc(e,o),He!==4&&(He=2)),!1;var s=Error(r(520),{cause:u});if(s=Bt(s,a),Du===null?Du=[s]:Du.push(s),He!==4&&(He=2),t===null)return!0;u=Bt(u,a),a=t;do{switch(a.tag){case 3:return a.flags|=65536,e=o&-o,a.lanes|=e,e=qc(a.stateNode,u,e),bc(a,e),!1;case 1:if(t=a.type,s=a.stateNode,(a.flags&128)===0&&(typeof t.getDerivedStateFromError=="function"||s!==null&&typeof s.componentDidCatch=="function"&&(ua===null||!ua.has(s))))return a.flags|=65536,o&=-o,a.lanes|=o,o=Ph(o),Wh(o,e,a,u),bc(a,o),!1}a=a.return}while(a!==null);return!1}var Fh=Error(r(461)),et=!1;function ut(e,t,a,u){t.child=e===null?qh(t,null,a,u):bl(t,e.child,a,u)}function Ih(e,t,a,u,o){a=a.render;var s=t.ref;if("ref"in u){var h={};for(var g in u)g!=="ref"&&(h[g]=u[g])}else h=u;return Da(t),u=Ac(e,t,a,h,s,o),g=Tc(),e!==null&&!et?(zc(e,t,o),wn(e,t,o)):(ye&&g&&oc(t),t.flags|=1,ut(e,t,u,o),t.child)}function ev(e,t,a,u,o){if(e===null){var s=a.type;return typeof s=="function"&&!uc(s)&&s.defaultProps===void 0&&a.compare===null?(t.tag=15,t.type=s,tv(e,t,s,u,o)):(e=Ui(a.type,null,u,t,t.mode,o),e.ref=t.ref,e.return=t,t.child=e)}if(s=e.child,!Pc(e,o)){var h=s.memoizedProps;if(a=a.compare,a=a!==null?a:uu,a(h,u)&&e.ref===t.ref)return wn(e,t,o)}return t.flags|=1,e=Sn(s,u),e.ref=t.ref,e.return=t,t.child=e}function tv(e,t,a,u,o){if(e!==null){var s=e.memoizedProps;if(uu(s,u)&&e.ref===t.ref)if(et=!1,t.pendingProps=u=s,Pc(e,o))(e.flags&131072)!==0&&(et=!0);else return t.lanes=e.lanes,wn(e,t,o)}return Vc(e,t,a,u,o)}function nv(e,t,a){var u=t.pendingProps,o=u.children,s=e!==null?e.memoizedState:null;if(u.mode==="hidden"){if((t.flags&128)!==0){if(u=s!==null?s.baseLanes|a:a,e!==null){for(o=t.child=e.child,s=0;o!==null;)s=s|o.lanes|o.childLanes,o=o.sibling;t.childLanes=s&~u}else t.childLanes=0,t.child=null;return av(e,t,u,a)}if((a&536870912)!==0)t.memoizedState={baseLanes:0,cachePool:null},e!==null&&ki(t,s!==null?s.cachePool:null),s!==null?th(t,s):Oc(),Vh(t);else return t.lanes=t.childLanes=536870912,av(e,t,s!==null?s.baseLanes|a:a,a)}else s!==null?(ki(t,s.cachePool),th(t,s),In(),t.memoizedState=null):(e!==null&&ki(t,null),Oc(),In());return ut(e,t,o,a),t.child}function av(e,t,a,u){var o=pc();return o=o===null?null:{parent:Je._currentValue,pool:o},t.memoizedState={baseLanes:a,cachePool:o},e!==null&&ki(t,null),Oc(),Vh(t),e!==null&&su(e,t,u,!0),null}function ar(e,t){var a=t.ref;if(a===null)e!==null&&e.ref!==null&&(t.flags|=4194816);else{if(typeof a!="function"&&typeof a!="object")throw Error(r(284));(e===null||e.ref!==a)&&(t.flags|=4194816)}}function Vc(e,t,a,u,o){return Da(t),a=Ac(e,t,a,u,void 0,o),u=Tc(),e!==null&&!et?(zc(e,t,o),wn(e,t,o)):(ye&&u&&oc(t),t.flags|=1,ut(e,t,a,o),t.child)}function lv(e,t,a,u,o,s){return Da(t),t.updateQueue=null,a=ah(t,u,a,o),nh(e),u=Tc(),e!==null&&!et?(zc(e,t,s),wn(e,t,s)):(ye&&u&&oc(t),t.flags|=1,ut(e,t,a,s),t.child)}function uv(e,t,a,u,o){if(Da(t),t.stateNode===null){var s=sl,h=a.contextType;typeof h=="object"&&h!==null&&(s=st(h)),s=new a(u,s),t.memoizedState=s.state!==null&&s.state!==void 0?s.state:null,s.updater=$c,t.stateNode=s,s._reactInternals=t,s=t.stateNode,s.props=u,s.state=t.memoizedState,s.refs={},_c(t),h=a.contextType,s.context=typeof h=="object"&&h!==null?st(h):sl,s.state=t.memoizedState,h=a.getDerivedStateFromProps,typeof h=="function"&&(kc(t,a,h,u),s.state=t.memoizedState),typeof a.getDerivedStateFromProps=="function"||typeof s.getSnapshotBeforeUpdate=="function"||typeof s.UNSAFE_componentWillMount!="function"&&typeof s.componentWillMount!="function"||(h=s.state,typeof s.componentWillMount=="function"&&s.componentWillMount(),typeof s.UNSAFE_componentWillMount=="function"&&s.UNSAFE_componentWillMount(),h!==s.state&&$c.enqueueReplaceState(s,s.state,null),gu(t,u,s,o),pu(),s.state=t.memoizedState),typeof s.componentDidMount=="function"&&(t.flags|=4194308),u=!0}else if(e===null){s=t.stateNode;var g=t.memoizedProps,_=ja(a,g);s.props=_;var A=s.context,N=a.contextType;h=sl,typeof N=="object"&&N!==null&&(h=st(N));var U=a.getDerivedStateFromProps;N=typeof U=="function"||typeof s.getSnapshotBeforeUpdate=="function",g=t.pendingProps!==g,N||typeof s.UNSAFE_componentWillReceiveProps!="function"&&typeof s.componentWillReceiveProps!="function"||(g||A!==h)&&Gh(t,s,u,h),Kn=!1;var T=t.memoizedState;s.state=T,gu(t,u,s,o),pu(),A=t.memoizedState,g||T!==A||Kn?(typeof U=="function"&&(kc(t,a,U,u),A=t.memoizedState),(_=Kn||Yh(t,a,_,u,T,A,h))?(N||typeof s.UNSAFE_componentWillMount!="function"&&typeof s.componentWillMount!="function"||(typeof s.componentWillMount=="function"&&s.componentWillMount(),typeof s.UNSAFE_componentWillMount=="function"&&s.UNSAFE_componentWillMount()),typeof s.componentDidMount=="function"&&(t.flags|=4194308)):(typeof s.componentDidMount=="function"&&(t.flags|=4194308),t.memoizedProps=u,t.memoizedState=A),s.props=u,s.state=A,s.context=h,u=_):(typeof s.componentDidMount=="function"&&(t.flags|=4194308),u=!1)}else{s=t.stateNode,yc(e,t),h=t.memoizedProps,N=ja(a,h),s.props=N,U=t.pendingProps,T=s.context,A=a.contextType,_=sl,typeof A=="object"&&A!==null&&(_=st(A)),g=a.getDerivedStateFromProps,(A=typeof g=="function"||typeof s.getSnapshotBeforeUpdate=="function")||typeof s.UNSAFE_componentWillReceiveProps!="function"&&typeof s.componentWillReceiveProps!="function"||(h!==U||T!==_)&&Gh(t,s,u,_),Kn=!1,T=t.memoizedState,s.state=T,gu(t,u,s,o),pu();var w=t.memoizedState;h!==U||T!==w||Kn||e!==null&&e.dependencies!==null&&Li(e.dependencies)?(typeof g=="function"&&(kc(t,a,g,u),w=t.memoizedState),(N=Kn||Yh(t,a,N,u,T,w,_)||e!==null&&e.dependencies!==null&&Li(e.dependencies))?(A||typeof s.UNSAFE_componentWillUpdate!="function"&&typeof s.componentWillUpdate!="function"||(typeof s.componentWillUpdate=="function"&&s.componentWillUpdate(u,w,_),typeof s.UNSAFE_componentWillUpdate=="function"&&s.UNSAFE_componentWillUpdate(u,w,_)),typeof s.componentDidUpdate=="function"&&(t.flags|=4),typeof s.getSnapshotBeforeUpdate=="function"&&(t.flags|=1024)):(typeof s.componentDidUpdate!="function"||h===e.memoizedProps&&T===e.memoizedState||(t.flags|=4),typeof s.getSnapshotBeforeUpdate!="function"||h===e.memoizedProps&&T===e.memoizedState||(t.flags|=1024),t.memoizedProps=u,t.memoizedState=w),s.props=u,s.state=w,s.context=_,u=N):(typeof s.componentDidUpdate!="function"||h===e.memoizedProps&&T===e.memoizedState||(t.flags|=4),typeof s.getSnapshotBeforeUpdate!="function"||h===e.memoizedProps&&T===e.memoizedState||(t.flags|=1024),u=!1)}return s=u,ar(e,t),u=(t.flags&128)!==0,s||u?(s=t.stateNode,a=u&&typeof a.getDerivedStateFromError!="function"?null:s.render(),t.flags|=1,e!==null&&u?(t.child=bl(t,e.child,null,o),t.child=bl(t,null,a,o)):ut(e,t,a,o),t.memoizedState=s.state,e=t.child):e=wn(e,t,o),e}function iv(e,t,a,u){return ou(),t.flags|=256,ut(e,t,a,u),t.child}var Yc={dehydrated:null,treeContext:null,retryLane:0,hydrationErrors:null};function Gc(e){return{baseLanes:e,cachePool:Qd()}}function Xc(e,t,a){return e=e!==null?e.childLanes&~a:0,t&&(e|=qt),e}function rv(e,t,a){var u=t.pendingProps,o=!1,s=(t.flags&128)!==0,h;if((h=s)||(h=e!==null&&e.memoizedState===null?!1:(Pe.current&2)!==0),h&&(o=!0,t.flags&=-129),h=(t.flags&32)!==0,t.flags&=-33,e===null){if(ye){if(o?Fn(t):In(),ye){var g=Le,_;if(_=g){e:{for(_=g,g=un;_.nodeType!==8;){if(!g){g=null;break e}if(_=Ft(_.nextSibling),_===null){g=null;break e}}g=_}g!==null?(t.memoizedState={dehydrated:g,treeContext:Aa!==null?{id:On,overflow:En}:null,retryLane:536870912,hydrationErrors:null},_=At(18,null,null,0),_.stateNode=g,_.return=t,t.child=_,dt=t,Le=null,_=!0):_=!1}_||wa(t)}if(g=t.memoizedState,g!==null&&(g=g.dehydrated,g!==null))return Rs(g)?t.lanes=32:t.lanes=536870912,null;zn(t)}return g=u.children,u=u.fallback,o?(In(),o=t.mode,g=lr({mode:"hidden",children:g},o),u=xa(u,o,a,null),g.return=t,u.return=t,g.sibling=u,t.child=g,o=t.child,o.memoizedState=Gc(a),o.childLanes=Xc(e,h,a),t.memoizedState=Yc,u):(Fn(t),Qc(t,g))}if(_=e.memoizedState,_!==null&&(g=_.dehydrated,g!==null)){if(s)t.flags&256?(Fn(t),t.flags&=-257,t=Kc(e,t,a)):t.memoizedState!==null?(In(),t.child=e.child,t.flags|=128,t=null):(In(),o=u.fallback,g=t.mode,u=lr({mode:"visible",children:u.children},g),o=xa(o,g,a,null),o.flags|=2,u.return=t,o.return=t,u.sibling=o,t.child=u,bl(t,e.child,null,a),u=t.child,u.memoizedState=Gc(a),u.childLanes=Xc(e,h,a),t.memoizedState=Yc,t=o);else if(Fn(t),Rs(g)){if(h=g.nextSibling&&g.nextSibling.dataset,h)var A=h.dgst;h=A,u=Error(r(419)),u.stack="",u.digest=h,cu({value:u,source:null,stack:null}),t=Kc(e,t,a)}else if(et||su(e,t,a,!1),h=(a&e.childLanes)!==0,et||h){if(h=Re,h!==null&&(u=a&-a,u=(u&42)!==0?1:Do(u),u=(u&(h.suspendedLanes|a))!==0?0:u,u!==0&&u!==_.retryLane))throw _.retryLane=u,cl(e,u),Dt(h,e,u),Fh;g.data==="$?"||ds(),t=Kc(e,t,a)}else g.data==="$?"?(t.flags|=192,t.child=e.child,t=null):(e=_.treeContext,Le=Ft(g.nextSibling),dt=t,ye=!0,za=null,un=!1,e!==null&&(Ht[kt++]=On,Ht[kt++]=En,Ht[kt++]=Aa,On=e.id,En=e.overflow,Aa=t),t=Qc(t,u.children),t.flags|=4096);return t}return o?(In(),o=u.fallback,g=t.mode,_=e.child,A=_.sibling,u=Sn(_,{mode:"hidden",children:u.children}),u.subtreeFlags=_.subtreeFlags&65011712,A!==null?o=Sn(A,o):(o=xa(o,g,a,null),o.flags|=2),o.return=t,u.return=t,u.sibling=o,t.child=u,u=o,o=t.child,g=e.child.memoizedState,g===null?g=Gc(a):(_=g.cachePool,_!==null?(A=Je._currentValue,_=_.parent!==A?{parent:A,pool:A}:_):_=Qd(),g={baseLanes:g.baseLanes|a,cachePool:_}),o.memoizedState=g,o.childLanes=Xc(e,h,a),t.memoizedState=Yc,u):(Fn(t),a=e.child,e=a.sibling,a=Sn(a,{mode:"visible",children:u.children}),a.return=t,a.sibling=null,e!==null&&(h=t.deletions,h===null?(t.deletions=[e],t.flags|=16):h.push(e)),t.child=a,t.memoizedState=null,a)}function Qc(e,t){return t=lr({mode:"visible",children:t},e.mode),t.return=e,e.child=t}function lr(e,t){return e=At(22,e,null,t),e.lanes=0,e.stateNode={_visibility:1,_pendingMarkers:null,_retryCache:null,_transitions:null},e}function Kc(e,t,a){return bl(t,e.child,null,a),e=Qc(t,t.pendingProps.children),e.flags|=2,t.memoizedState=null,e}function ov(e,t,a){e.lanes|=t;var u=e.alternate;u!==null&&(u.lanes|=t),dc(e.return,t,a)}function Jc(e,t,a,u,o){var s=e.memoizedState;s===null?e.memoizedState={isBackwards:t,rendering:null,renderingStartTime:0,last:u,tail:a,tailMode:o}:(s.isBackwards=t,s.rendering=null,s.renderingStartTime=0,s.last=u,s.tail=a,s.tailMode=o)}function cv(e,t,a){var u=t.pendingProps,o=u.revealOrder,s=u.tail;if(ut(e,t,u.children,a),u=Pe.current,(u&2)!==0)u=u&1|2,t.flags|=128;else{if(e!==null&&(e.flags&128)!==0)e:for(e=t.child;e!==null;){if(e.tag===13)e.memoizedState!==null&&ov(e,a,t);else if(e.tag===19)ov(e,a,t);else if(e.child!==null){e.child.return=e,e=e.child;continue}if(e===t)break e;for(;e.sibling===null;){if(e.return===null||e.return===t)break e;e=e.return}e.sibling.return=e.return,e=e.sibling}u&=1}switch(V(Pe,u),o){case"forwards":for(a=t.child,o=null;a!==null;)e=a.alternate,e!==null&&er(e)===null&&(o=a),a=a.sibling;a=o,a===null?(o=t.child,t.child=null):(o=a.sibling,a.sibling=null),Jc(t,!1,o,a,s);break;case"backwards":for(a=null,o=t.child,t.child=null;o!==null;){if(e=o.alternate,e!==null&&er(e)===null){t.child=o;break}e=o.sibling,o.sibling=a,a=o,o=e}Jc(t,!0,a,null,s);break;case"together":Jc(t,!1,null,null,void 0);break;default:t.memoizedState=null}return t.child}function wn(e,t,a){if(e!==null&&(t.dependencies=e.dependencies),la|=t.lanes,(a&t.childLanes)===0)if(e!==null){if(su(e,t,a,!1),(a&t.childLanes)===0)return null}else return null;if(e!==null&&t.child!==e.child)throw Error(r(153));if(t.child!==null){for(e=t.child,a=Sn(e,e.pendingProps),t.child=a,a.return=t;e.sibling!==null;)e=e.sibling,a=a.sibling=Sn(e,e.pendingProps),a.return=t;a.sibling=null}return t.child}function Pc(e,t){return(e.lanes&t)!==0?!0:(e=e.dependencies,!!(e!==null&&Li(e)))}function Xy(e,t,a){switch(t.tag){case 3:Ne(t,t.stateNode.containerInfo),Qn(t,Je,e.memoizedState.cache),ou();break;case 27:case 5:Ao(t);break;case 4:Ne(t,t.stateNode.containerInfo);break;case 10:Qn(t,t.type,t.memoizedProps.value);break;case 13:var u=t.memoizedState;if(u!==null)return u.dehydrated!==null?(Fn(t),t.flags|=128,null):(a&t.child.childLanes)!==0?rv(e,t,a):(Fn(t),e=wn(e,t,a),e!==null?e.sibling:null);Fn(t);break;case 19:var o=(e.flags&128)!==0;if(u=(a&t.childLanes)!==0,u||(su(e,t,a,!1),u=(a&t.childLanes)!==0),o){if(u)return cv(e,t,a);t.flags|=128}if(o=t.memoizedState,o!==null&&(o.rendering=null,o.tail=null,o.lastEffect=null),V(Pe,Pe.current),u)break;return null;case 22:case 23:return t.lanes=0,nv(e,t,a);case 24:Qn(t,Je,e.memoizedState.cache)}return wn(e,t,a)}function sv(e,t,a){if(e!==null)if(e.memoizedProps!==t.pendingProps)et=!0;else{if(!Pc(e,a)&&(t.flags&128)===0)return et=!1,Xy(e,t,a);et=(e.flags&131072)!==0}else et=!1,ye&&(t.flags&1048576)!==0&&kd(t,Bi,t.index);switch(t.lanes=0,t.tag){case 16:e:{e=t.pendingProps;var u=t.elementType,o=u._init;if(u=o(u._payload),t.type=u,typeof u=="function")uc(u)?(e=ja(u,e),t.tag=1,t=uv(null,t,u,e,a)):(t.tag=0,t=Vc(null,t,u,e,a));else{if(u!=null){if(o=u.$$typeof,o===I){t.tag=11,t=Ih(null,t,u,e,a);break e}else if(o===we){t.tag=14,t=ev(null,t,u,e,a);break e}}throw t=qn(u)||u,Error(r(306,t,""))}}return t;case 0:return Vc(e,t,t.type,t.pendingProps,a);case 1:return u=t.type,o=ja(u,t.pendingProps),uv(e,t,u,o,a);case 3:e:{if(Ne(t,t.stateNode.containerInfo),e===null)throw Error(r(387));u=t.pendingProps;var s=t.memoizedState;o=s.element,yc(e,t),gu(t,u,null,a);var h=t.memoizedState;if(u=h.cache,Qn(t,Je,u),u!==s.cache&&hc(t,[Je],a,!0),pu(),u=h.element,s.isDehydrated)if(s={element:u,isDehydrated:!1,cache:h.cache},t.updateQueue.baseState=s,t.memoizedState=s,t.flags&256){t=iv(e,t,u,a);break e}else if(u!==o){o=Bt(Error(r(424)),t),cu(o),t=iv(e,t,u,a);break e}else{switch(e=t.stateNode.containerInfo,e.nodeType){case 9:e=e.body;break;default:e=e.nodeName==="HTML"?e.ownerDocument.body:e}for(Le=Ft(e.firstChild),dt=t,ye=!0,za=null,un=!0,a=qh(t,null,u,a),t.child=a;a;)a.flags=a.flags&-3|4096,a=a.sibling}else{if(ou(),u===o){t=wn(e,t,a);break e}ut(e,t,u,a)}t=t.child}return t;case 26:return ar(e,t),e===null?(a=vm(t.type,null,t.pendingProps,null))?t.memoizedState=a:ye||(a=t.type,e=t.pendingProps,u=_r(re.current).createElement(a),u[ct]=t,u[vt]=e,rt(u,a,e),Ie(u),t.stateNode=u):t.memoizedState=vm(t.type,e.memoizedProps,t.pendingProps,e.memoizedState),null;case 27:return Ao(t),e===null&&ye&&(u=t.stateNode=fm(t.type,t.pendingProps,re.current),dt=t,un=!0,o=Le,oa(t.type)?(Ds=o,Le=Ft(u.firstChild)):Le=o),ut(e,t,t.pendingProps.children,a),ar(e,t),e===null&&(t.flags|=4194304),t.child;case 5:return e===null&&ye&&((o=u=Le)&&(u=b0(u,t.type,t.pendingProps,un),u!==null?(t.stateNode=u,dt=t,Le=Ft(u.firstChild),un=!1,o=!0):o=!1),o||wa(t)),Ao(t),o=t.type,s=t.pendingProps,h=e!==null?e.memoizedProps:null,u=s.children,Ts(o,s)?u=null:h!==null&&Ts(o,h)&&(t.flags|=32),t.memoizedState!==null&&(o=Ac(e,t,Ly,null,null,a),Hu._currentValue=o),ar(e,t),ut(e,t,u,a),t.child;case 6:return e===null&&ye&&((e=a=Le)&&(a=S0(a,t.pendingProps,un),a!==null?(t.stateNode=a,dt=t,Le=null,e=!0):e=!1),e||wa(t)),null;case 13:return rv(e,t,a);case 4:return Ne(t,t.stateNode.containerInfo),u=t.pendingProps,e===null?t.child=bl(t,null,u,a):ut(e,t,u,a),t.child;case 11:return Ih(e,t,t.type,t.pendingProps,a);case 7:return ut(e,t,t.pendingProps,a),t.child;case 8:return ut(e,t,t.pendingProps.children,a),t.child;case 12:return ut(e,t,t.pendingProps.children,a),t.child;case 10:return u=t.pendingProps,Qn(t,t.type,u.value),ut(e,t,u.children,a),t.child;case 9:return o=t.type._context,u=t.pendingProps.children,Da(t),o=st(o),u=u(o),t.flags|=1,ut(e,t,u,a),t.child;case 14:return ev(e,t,t.type,t.pendingProps,a);case 15:return tv(e,t,t.type,t.pendingProps,a);case 19:return cv(e,t,a);case 31:return u=t.pendingProps,a=t.mode,u={mode:u.mode,children:u.children},e===null?(a=lr(u,a),a.ref=t.ref,t.child=a,a.return=t,t=a):(a=Sn(e.child,u),a.ref=t.ref,t.child=a,a.return=t,t=a),t;case 22:return nv(e,t,a);case 24:return Da(t),u=st(Je),e===null?(o=pc(),o===null&&(o=Re,s=vc(),o.pooledCache=s,s.refCount++,s!==null&&(o.pooledCacheLanes|=a),o=s),t.memoizedState={parent:u,cache:o},_c(t),Qn(t,Je,o)):((e.lanes&a)!==0&&(yc(e,t),gu(t,null,null,a),pu()),o=e.memoizedState,s=t.memoizedState,o.parent!==u?(o={parent:u,cache:u},t.memoizedState=o,t.lanes===0&&(t.memoizedState=t.updateQueue.baseState=o),Qn(t,Je,u)):(u=s.cache,Qn(t,Je,u),u!==o.cache&&hc(t,[Je],a,!0))),ut(e,t,t.pendingProps.children,a),t.child;case 29:throw t.pendingProps}throw Error(r(156,t.tag))}function Rn(e){e.flags|=4}function fv(e,t){if(t.type!=="stylesheet"||(t.state.loading&4)!==0)e.flags&=-16777217;else if(e.flags|=16777216,!ym(t)){if(t=$t.current,t!==null&&((he&4194048)===he?rn!==null:(he&62914560)!==he&&(he&536870912)===0||t!==rn))throw vu=gc,Kd;e.flags|=8192}}function ur(e,t){t!==null&&(e.flags|=4),e.flags&16384&&(t=e.tag!==22?Vf():536870912,e.lanes|=t,xl|=t)}function xu(e,t){if(!ye)switch(e.tailMode){case"hidden":t=e.tail;for(var a=null;t!==null;)t.alternate!==null&&(a=t),t=t.sibling;a===null?e.tail=null:a.sibling=null;break;case"collapsed":a=e.tail;for(var u=null;a!==null;)a.alternate!==null&&(u=a),a=a.sibling;u===null?t||e.tail===null?e.tail=null:e.tail.sibling=null:u.sibling=null}}function Ze(e){var t=e.alternate!==null&&e.alternate.child===e.child,a=0,u=0;if(t)for(var o=e.child;o!==null;)a|=o.lanes|o.childLanes,u|=o.subtreeFlags&65011712,u|=o.flags&65011712,o.return=e,o=o.sibling;else for(o=e.child;o!==null;)a|=o.lanes|o.childLanes,u|=o.subtreeFlags,u|=o.flags,o.return=e,o=o.sibling;return e.subtreeFlags|=u,e.childLanes=a,t}function Qy(e,t,a){var u=t.pendingProps;switch(cc(t),t.tag){case 31:case 16:case 15:case 0:case 11:case 7:case 8:case 12:case 9:case 14:return Ze(t),null;case 1:return Ze(t),null;case 3:return a=t.stateNode,u=null,e!==null&&(u=e.memoizedState.cache),t.memoizedState.cache!==u&&(t.flags|=2048),An(Je),Vn(),a.pendingContext&&(a.context=a.pendingContext,a.pendingContext=null),(e===null||e.child===null)&&(ru(t)?Rn(t):e===null||e.memoizedState.isDehydrated&&(t.flags&256)===0||(t.flags|=1024,Vd())),Ze(t),null;case 26:return a=t.memoizedState,e===null?(Rn(t),a!==null?(Ze(t),fv(t,a)):(Ze(t),t.flags&=-16777217)):a?a!==e.memoizedState?(Rn(t),Ze(t),fv(t,a)):(Ze(t),t.flags&=-16777217):(e.memoizedProps!==u&&Rn(t),Ze(t),t.flags&=-16777217),null;case 27:pi(t),a=re.current;var o=t.type;if(e!==null&&t.stateNode!=null)e.memoizedProps!==u&&Rn(t);else{if(!u){if(t.stateNode===null)throw Error(r(166));return Ze(t),null}e=ee.current,ru(t)?$d(t):(e=fm(o,u,a),t.stateNode=e,Rn(t))}return Ze(t),null;case 5:if(pi(t),a=t.type,e!==null&&t.stateNode!=null)e.memoizedProps!==u&&Rn(t);else{if(!u){if(t.stateNode===null)throw Error(r(166));return Ze(t),null}if(e=ee.current,ru(t))$d(t);else{switch(o=_r(re.current),e){case 1:e=o.createElementNS("http://www.w3.org/2000/svg",a);break;case 2:e=o.createElementNS("http://www.w3.org/1998/Math/MathML",a);break;default:switch(a){case"svg":e=o.createElementNS("http://www.w3.org/2000/svg",a);break;case"math":e=o.createElementNS("http://www.w3.org/1998/Math/MathML",a);break;case"script":e=o.createElement("div"),e.innerHTML=" - + +
diff --git a/vite-app/package.json b/vite-app/package.json index 983b49f7..5b120e20 100644 --- a/vite-app/package.json +++ b/vite-app/package.json @@ -7,7 +7,10 @@ "dev": "vite", "build": "tsc && vite build", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", - "preview": "vite preview" + "preview": "vite preview", + "test": "vitest", + "test:run": "vitest run", + "test:coverage": "vitest run --coverage" }, "dependencies": { "mobx": "^6.13.7", @@ -20,9 +23,11 @@ "devDependencies": { "@eslint/js": "^9.30.1", "@tailwindcss/vite": "^4.1.11", + "@types/node": "^24.2.1", "@types/react": "^19.1.8", "@types/react-dom": "^19.1.6", "@vitejs/plugin-react": "^4.6.0", + "@vitest/coverage-v8": "^3.2.4", "eslint": "^9.30.1", "eslint-plugin-react-hooks": "^5.2.0", "eslint-plugin-react-refresh": "^0.4.20", @@ -30,6 +35,7 @@ "tailwindcss": "^4.1.11", "typescript": "~5.8.3", "typescript-eslint": "^8.35.1", - "vite": "^7.0.4" + "vite": "^7.0.4", + "vitest": "^3.2.4" } } diff --git a/vite-app/pnpm-lock.yaml b/vite-app/pnpm-lock.yaml index 81ae4ca2..60e1ea98 100644 --- a/vite-app/pnpm-lock.yaml +++ b/vite-app/pnpm-lock.yaml @@ -32,7 +32,10 @@ importers: version: 9.32.0 '@tailwindcss/vite': specifier: ^4.1.11 - version: 4.1.11(vite@7.0.6(jiti@2.5.1)(lightningcss@1.30.1)) + version: 4.1.11(vite@7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1)) + '@types/node': + specifier: ^24.2.1 + version: 24.2.1 '@types/react': specifier: ^19.1.8 version: 19.1.9 @@ -41,7 +44,10 @@ importers: version: 19.1.7(@types/react@19.1.9) '@vitejs/plugin-react': specifier: ^4.6.0 - version: 4.7.0(vite@7.0.6(jiti@2.5.1)(lightningcss@1.30.1)) + version: 4.7.0(vite@7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1)) + '@vitest/coverage-v8': + specifier: ^3.2.4 + version: 3.2.4(vitest@3.2.4(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1)) eslint: specifier: ^9.30.1 version: 9.32.0(jiti@2.5.1) @@ -65,7 +71,10 @@ importers: version: 8.39.0(eslint@9.32.0(jiti@2.5.1))(typescript@5.8.3) vite: specifier: ^7.0.4 - version: 7.0.6(jiti@2.5.1)(lightningcss@1.30.1) + version: 7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) + vitest: + specifier: ^3.2.4 + version: 3.2.4(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) packages: @@ -156,6 +165,10 @@ packages: resolution: {integrity: sha512-ruv7Ae4J5dUYULmeXw1gmb7rYRz57OWCPM57pHojnLq/3Z1CK2lNSLTCVjxVk1F/TZHwOZZrOWi0ur95BbLxNQ==} engines: {node: '>=6.9.0'} + '@bcoe/v8-coverage@1.0.2': + resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==} + engines: {node: '>=18'} + '@esbuild/aix-ppc64@0.25.8': resolution: {integrity: sha512-urAvrUedIqEiFR3FYSLTWQgLu5tb+m0qZw0NBEasUeo6wuqatkMDaRT+1uABiGXEu5vqgPd7FGE1BhsAIy9QVA==} engines: {node: '>=18'} @@ -370,10 +383,18 @@ packages: resolution: {integrity: sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==} engines: {node: '>=18.18'} + '@isaacs/cliui@8.0.2': + resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==} + engines: {node: '>=12'} + '@isaacs/fs-minipass@4.0.1': resolution: {integrity: sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==} engines: {node: '>=18.0.0'} + '@istanbuljs/schema@0.1.3': + resolution: {integrity: sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==} + engines: {node: '>=8'} + '@jridgewell/gen-mapping@0.3.12': resolution: {integrity: sha512-OuLGC46TjB5BbN1dH8JULVVZY4WTdkF7tV9Ys6wLL1rubZnCMstOhNHueU5bLCrnRuDhKPDM4g6sw4Bel5Gzqg==} @@ -399,6 +420,10 @@ packages: resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==} engines: {node: '>= 8'} + '@pkgjs/parseargs@0.11.0': + resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} + engines: {node: '>=14'} + '@rolldown/pluginutils@1.0.0-beta.27': resolution: {integrity: sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==} @@ -604,12 +629,21 @@ packages: '@types/babel__traverse@7.28.0': resolution: {integrity: sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==} + '@types/chai@5.2.2': + resolution: {integrity: sha512-8kB30R7Hwqf40JPiKhVzodJs2Qc1ZJ5zuT3uzw5Hq/dhNCl3G3l83jfpdI1e20BP348+fV7VIL/+FxaXkqBmWg==} + + '@types/deep-eql@4.0.2': + resolution: {integrity: sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==} + '@types/estree@1.0.8': resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} '@types/json-schema@7.0.15': resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==} + '@types/node@24.2.1': + resolution: {integrity: sha512-DRh5K+ka5eJic8CjH7td8QpYEV6Zo10gfRkjHCO3weqZHWDtAaSTFtl4+VMqOJ4N5jcuhZ9/l+yy8rVgw7BQeQ==} + '@types/react-dom@19.1.7': resolution: {integrity: sha512-i5ZzwYpqjmrKenzkoLM2Ibzt6mAsM7pxB6BCIouEVVmgiqaMj1TjaK7hnA36hbW5aZv20kx7Lw6hWzPWg0Rurw==} peerDependencies: @@ -683,6 +717,44 @@ packages: peerDependencies: vite: ^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 + '@vitest/coverage-v8@3.2.4': + resolution: {integrity: sha512-EyF9SXU6kS5Ku/U82E259WSnvg6c8KTjppUncuNdm5QHpe17mwREHnjDzozC8x9MZ0xfBUFSaLkRv4TMA75ALQ==} + peerDependencies: + '@vitest/browser': 3.2.4 + vitest: 3.2.4 + peerDependenciesMeta: + '@vitest/browser': + optional: true + + '@vitest/expect@3.2.4': + resolution: {integrity: sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==} + + '@vitest/mocker@3.2.4': + resolution: {integrity: sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==} + peerDependencies: + msw: ^2.4.9 + vite: ^5.0.0 || ^6.0.0 || ^7.0.0-0 + peerDependenciesMeta: + msw: + optional: true + vite: + optional: true + + '@vitest/pretty-format@3.2.4': + resolution: {integrity: sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==} + + '@vitest/runner@3.2.4': + resolution: {integrity: sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==} + + '@vitest/snapshot@3.2.4': + resolution: {integrity: sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==} + + '@vitest/spy@3.2.4': + resolution: {integrity: sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==} + + '@vitest/utils@3.2.4': + resolution: {integrity: sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==} + acorn-jsx@5.3.2: resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==} peerDependencies: @@ -696,13 +768,32 @@ packages: ajv@6.12.6: resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==} + ansi-regex@5.0.1: + resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} + engines: {node: '>=8'} + + ansi-regex@6.1.0: + resolution: {integrity: sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==} + engines: {node: '>=12'} + ansi-styles@4.3.0: resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} engines: {node: '>=8'} + ansi-styles@6.2.1: + resolution: {integrity: sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==} + engines: {node: '>=12'} + argparse@2.0.1: resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==} + assertion-error@2.0.1: + resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} + engines: {node: '>=12'} + + ast-v8-to-istanbul@0.3.4: + resolution: {integrity: sha512-cxrAnZNLBnQwBPByK4CeDaw5sWZtMilJE/Q3iDA0aamgaIVNDF9T6K2/8DfYDZEejZ2jNnDrG9m8MY72HFd0KA==} + balanced-match@1.0.2: resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} @@ -721,6 +812,10 @@ packages: engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} hasBin: true + cac@6.7.14: + resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==} + engines: {node: '>=8'} + callsites@3.1.0: resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} engines: {node: '>=6'} @@ -728,10 +823,18 @@ packages: caniuse-lite@1.0.30001731: resolution: {integrity: sha512-lDdp2/wrOmTRWuoB5DpfNkC0rJDU8DqRa6nYL6HK6sytw70QMopt/NIc/9SM7ylItlBWfACXk0tEn37UWM/+mg==} + chai@5.2.1: + resolution: {integrity: sha512-5nFxhUrX0PqtyogoYOA8IPswy5sZFTOsBFl/9bNsmDLgsxYTzSZQJDPppDnZPTQbzSEm0hqGjWPzRemQCYbD6A==} + engines: {node: '>=18'} + chalk@4.1.2: resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} engines: {node: '>=10'} + check-error@2.1.1: + resolution: {integrity: sha512-OAlb+T7V4Op9OwdkjmguYRqncdlx5JiofwOAUkmTF+jNdHwzTaTs4sRAGpzLF3oOz5xAyDGrPgeIDFQmDOTiJw==} + engines: {node: '>= 16'} + chownr@3.0.0: resolution: {integrity: sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==} engines: {node: '>=18'} @@ -769,6 +872,10 @@ packages: supports-color: optional: true + deep-eql@5.0.2: + resolution: {integrity: sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==} + engines: {node: '>=6'} + deep-is@0.1.4: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} @@ -776,13 +883,25 @@ packages: resolution: {integrity: sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==} engines: {node: '>=8'} + eastasianwidth@0.2.0: + resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==} + electron-to-chromium@1.5.195: resolution: {integrity: sha512-URclP0iIaDUzqcAyV1v2PgduJ9N0IdXmWsnPzPfelvBmjmZzEy6xJcjb1cXj+TbYqXgtLrjHEoaSIdTYhw4ezg==} + emoji-regex@8.0.0: + resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} + + emoji-regex@9.2.2: + resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==} + enhanced-resolve@5.18.2: resolution: {integrity: sha512-6Jw4sE1maoRJo3q8MsSIn2onJFbLTOjY9hlx4DZXmOKvLRd1Ok2kXmAGXaafL2+ijsJZ1ClYbl/pmqr9+k4iUQ==} engines: {node: '>=10.13.0'} + es-module-lexer@1.7.0: + resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} + esbuild@0.25.8: resolution: {integrity: sha512-vVC0USHGtMi8+R4Kz8rt6JhEWLxsv9Rnu/lGYbPR8u47B+DCBksq9JarW0zOO7bs37hyOK1l2/oqtbciutL5+Q==} engines: {node: '>=18'} @@ -845,10 +964,17 @@ packages: resolution: {integrity: sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==} engines: {node: '>=4.0'} + estree-walker@3.0.3: + resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==} + esutils@2.0.3: resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==} engines: {node: '>=0.10.0'} + expect-type@1.2.2: + resolution: {integrity: sha512-JhFGDVJ7tmDJItKhYgJCGLOWjuK9vPxiXoUFLwLDc99NlmklilbiQJwoctZtt13+xMw91MCk/REan6MWHqDjyA==} + engines: {node: '>=12.0.0'} + fast-deep-equal@3.1.3: resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} @@ -892,6 +1018,10 @@ packages: flatted@3.3.3: resolution: {integrity: sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==} + foreground-child@3.3.1: + resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==} + engines: {node: '>=14'} + fsevents@2.3.3: resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} @@ -909,6 +1039,10 @@ packages: resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==} engines: {node: '>=10.13.0'} + glob@10.4.5: + resolution: {integrity: sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==} + hasBin: true + globals@14.0.0: resolution: {integrity: sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==} engines: {node: '>=18'} @@ -927,6 +1061,9 @@ packages: resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} engines: {node: '>=8'} + html-escaper@2.0.2: + resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + ignore@5.3.2: resolution: {integrity: sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==} engines: {node: '>= 4'} @@ -947,6 +1084,10 @@ packages: resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==} engines: {node: '>=0.10.0'} + is-fullwidth-code-point@3.0.0: + resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} + engines: {node: '>=8'} + is-glob@4.0.3: resolution: {integrity: sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==} engines: {node: '>=0.10.0'} @@ -958,6 +1099,25 @@ packages: isexe@2.0.0: resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + istanbul-lib-coverage@3.2.2: + resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==} + engines: {node: '>=8'} + + istanbul-lib-report@3.0.1: + resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==} + engines: {node: '>=10'} + + istanbul-lib-source-maps@5.0.6: + resolution: {integrity: sha512-yg2d+Em4KizZC5niWhQaIomgf5WlL4vOOjZ5xGCmF8SnPE/mDWWXgvRExdcpCgh9lLRRa1/fSYp2ymmbJ1pI+A==} + engines: {node: '>=10'} + + istanbul-reports@3.1.7: + resolution: {integrity: sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==} + engines: {node: '>=8'} + + jackspeak@3.4.3: + resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==} + jiti@2.5.1: resolution: {integrity: sha512-twQoecYPiVA5K/h6SxtORw/Bs3ar+mLUtoPSc7iMXzQzK8d7eJ/R09wmTwAjiamETn1cXYPGfNnu7DMoHgu12w==} hasBin: true @@ -965,6 +1125,9 @@ packages: js-tokens@4.0.0: resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} + js-tokens@9.0.1: + resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==} + js-yaml@4.1.0: resolution: {integrity: sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==} hasBin: true @@ -1066,12 +1229,25 @@ packages: lodash.merge@4.6.2: resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==} + loupe@3.2.0: + resolution: {integrity: sha512-2NCfZcT5VGVNX9mSZIxLRkEAegDGBpuQZBy13desuHeVORmBDyAET4TkJr4SjqQy3A8JDofMN6LpkK8Xcm/dlw==} + + lru-cache@10.4.3: + resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==} + lru-cache@5.1.1: resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==} magic-string@0.30.17: resolution: {integrity: sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==} + magicast@0.3.5: + resolution: {integrity: sha512-L0WhttDl+2BOsybvEOLK7fW3UA0OQ0IQ2d6Zl2x/a6vVRs3bAY0ECOSHHeL5jD+SbOpOCUEi0y1DgHEn9Qn1AQ==} + + make-dir@4.0.0: + resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} + engines: {node: '>=10'} + merge2@1.4.1: resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==} engines: {node: '>= 8'} @@ -1155,6 +1331,9 @@ packages: resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==} engines: {node: '>=10'} + package-json-from-dist@1.0.1: + resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==} + parent-module@1.0.1: resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} engines: {node: '>=6'} @@ -1167,6 +1346,17 @@ packages: resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} engines: {node: '>=8'} + path-scurry@1.11.1: + resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==} + engines: {node: '>=16 || 14 >=14.18'} + + pathe@2.0.3: + resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==} + + pathval@2.0.1: + resolution: {integrity: sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==} + engines: {node: '>= 14.16'} + picocolors@1.1.1: resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} @@ -1262,14 +1452,46 @@ packages: resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==} engines: {node: '>=8'} + siginfo@2.0.0: + resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==} + + signal-exit@4.1.0: + resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==} + engines: {node: '>=14'} + source-map-js@1.2.1: resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==} engines: {node: '>=0.10.0'} + stackback@0.0.2: + resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==} + + std-env@3.9.0: + resolution: {integrity: sha512-UGvjygr6F6tpH7o2qyqR6QYpwraIjKSdtzyBdyytFOHmPZY917kwdwLG0RbOjWOnKmnm3PeHjaoLLMie7kPLQw==} + + string-width@4.2.3: + resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} + engines: {node: '>=8'} + + string-width@5.1.2: + resolution: {integrity: sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==} + engines: {node: '>=12'} + + strip-ansi@6.0.1: + resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} + engines: {node: '>=8'} + + strip-ansi@7.1.0: + resolution: {integrity: sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==} + engines: {node: '>=12'} + strip-json-comments@3.1.1: resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} engines: {node: '>=8'} + strip-literal@3.0.0: + resolution: {integrity: sha512-TcccoMhJOM3OebGhSBEmp3UZ2SfDMZUEBdRA/9ynfLi8yYajyWX3JiXArcJt4Umh4vISpspkQIY8ZZoCqjbviA==} + supports-color@7.2.0: resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==} engines: {node: '>=8'} @@ -1285,10 +1507,32 @@ packages: resolution: {integrity: sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==} engines: {node: '>=18'} + test-exclude@7.0.1: + resolution: {integrity: sha512-pFYqmTw68LXVjeWJMST4+borgQP2AyMNbg1BpZh9LbyhUeNkeaPF9gzfPGUAnSMV3qPYdWUwDIjjCLiSDOl7vg==} + engines: {node: '>=18'} + + tinybench@2.9.0: + resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} + + tinyexec@0.3.2: + resolution: {integrity: sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==} + tinyglobby@0.2.14: resolution: {integrity: sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==} engines: {node: '>=12.0.0'} + tinypool@1.1.1: + resolution: {integrity: sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==} + engines: {node: ^18.0.0 || >=20.0.0} + + tinyrainbow@2.0.0: + resolution: {integrity: sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==} + engines: {node: '>=14.0.0'} + + tinyspy@4.0.3: + resolution: {integrity: sha512-t2T/WLB2WRgZ9EpE4jgPJ9w+i66UZfDc8wHh0xrwiRNN+UwH98GIJkTeZqX9rg0i0ptwzqW+uYeIF0T4F8LR7A==} + engines: {node: '>=14.0.0'} + to-regex-range@5.0.1: resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} engines: {node: '>=8.0'} @@ -1315,6 +1559,9 @@ packages: engines: {node: '>=14.17'} hasBin: true + undici-types@7.10.0: + resolution: {integrity: sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==} + update-browserslist-db@1.1.3: resolution: {integrity: sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==} hasBin: true @@ -1329,6 +1576,11 @@ packages: peerDependencies: react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + vite-node@3.2.4: + resolution: {integrity: sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==} + engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} + hasBin: true + vite@7.0.6: resolution: {integrity: sha512-MHFiOENNBd+Bd9uvc8GEsIzdkn1JxMmEeYX35tI3fv0sJBUTfW5tQsoaOwuY4KhBI09A3dUJ/DXf2yxPVPUceg==} engines: {node: ^20.19.0 || >=22.12.0} @@ -1369,15 +1621,56 @@ packages: yaml: optional: true + vitest@3.2.4: + resolution: {integrity: sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==} + engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} + hasBin: true + peerDependencies: + '@edge-runtime/vm': '*' + '@types/debug': ^4.1.12 + '@types/node': ^18.0.0 || ^20.0.0 || >=22.0.0 + '@vitest/browser': 3.2.4 + '@vitest/ui': 3.2.4 + happy-dom: '*' + jsdom: '*' + peerDependenciesMeta: + '@edge-runtime/vm': + optional: true + '@types/debug': + optional: true + '@types/node': + optional: true + '@vitest/browser': + optional: true + '@vitest/ui': + optional: true + happy-dom: + optional: true + jsdom: + optional: true + which@2.0.2: resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} engines: {node: '>= 8'} hasBin: true + why-is-node-running@2.3.0: + resolution: {integrity: sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==} + engines: {node: '>=8'} + hasBin: true + word-wrap@1.2.5: resolution: {integrity: sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==} engines: {node: '>=0.10.0'} + wrap-ansi@7.0.0: + resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==} + engines: {node: '>=10'} + + wrap-ansi@8.1.0: + resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==} + engines: {node: '>=12'} + yallist@3.1.1: resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==} @@ -1511,6 +1804,8 @@ snapshots: '@babel/helper-string-parser': 7.27.1 '@babel/helper-validator-identifier': 7.27.1 + '@bcoe/v8-coverage@1.0.2': {} + '@esbuild/aix-ppc64@0.25.8': optional: true @@ -1646,10 +1941,21 @@ snapshots: '@humanwhocodes/retry@0.4.3': {} + '@isaacs/cliui@8.0.2': + dependencies: + string-width: 5.1.2 + string-width-cjs: string-width@4.2.3 + strip-ansi: 7.1.0 + strip-ansi-cjs: strip-ansi@6.0.1 + wrap-ansi: 8.1.0 + wrap-ansi-cjs: wrap-ansi@7.0.0 + '@isaacs/fs-minipass@4.0.1': dependencies: minipass: 7.1.2 + '@istanbuljs/schema@0.1.3': {} + '@jridgewell/gen-mapping@0.3.12': dependencies: '@jridgewell/sourcemap-codec': 1.5.4 @@ -1676,6 +1982,9 @@ snapshots: '@nodelib/fs.scandir': 2.1.5 fastq: 1.19.1 + '@pkgjs/parseargs@0.11.0': + optional: true + '@rolldown/pluginutils@1.0.0-beta.27': {} '@rollup/rollup-android-arm-eabi@4.46.2': @@ -1802,12 +2111,12 @@ snapshots: '@tailwindcss/oxide-win32-arm64-msvc': 4.1.11 '@tailwindcss/oxide-win32-x64-msvc': 4.1.11 - '@tailwindcss/vite@4.1.11(vite@7.0.6(jiti@2.5.1)(lightningcss@1.30.1))': + '@tailwindcss/vite@4.1.11(vite@7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1))': dependencies: '@tailwindcss/node': 4.1.11 '@tailwindcss/oxide': 4.1.11 tailwindcss: 4.1.11 - vite: 7.0.6(jiti@2.5.1)(lightningcss@1.30.1) + vite: 7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) '@types/babel__core@7.20.5': dependencies: @@ -1830,10 +2139,20 @@ snapshots: dependencies: '@babel/types': 7.28.2 + '@types/chai@5.2.2': + dependencies: + '@types/deep-eql': 4.0.2 + + '@types/deep-eql@4.0.2': {} + '@types/estree@1.0.8': {} '@types/json-schema@7.0.15': {} + '@types/node@24.2.1': + dependencies: + undici-types: 7.10.0 + '@types/react-dom@19.1.7(@types/react@19.1.9)': dependencies: '@types/react': 19.1.9 @@ -1935,7 +2254,7 @@ snapshots: '@typescript-eslint/types': 8.39.0 eslint-visitor-keys: 4.2.1 - '@vitejs/plugin-react@4.7.0(vite@7.0.6(jiti@2.5.1)(lightningcss@1.30.1))': + '@vitejs/plugin-react@4.7.0(vite@7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1))': dependencies: '@babel/core': 7.28.0 '@babel/plugin-transform-react-jsx-self': 7.27.1(@babel/core@7.28.0) @@ -1943,10 +2262,71 @@ snapshots: '@rolldown/pluginutils': 1.0.0-beta.27 '@types/babel__core': 7.20.5 react-refresh: 0.17.0 - vite: 7.0.6(jiti@2.5.1)(lightningcss@1.30.1) + vite: 7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) transitivePeerDependencies: - supports-color + '@vitest/coverage-v8@3.2.4(vitest@3.2.4(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1))': + dependencies: + '@ampproject/remapping': 2.3.0 + '@bcoe/v8-coverage': 1.0.2 + ast-v8-to-istanbul: 0.3.4 + debug: 4.4.1 + istanbul-lib-coverage: 3.2.2 + istanbul-lib-report: 3.0.1 + istanbul-lib-source-maps: 5.0.6 + istanbul-reports: 3.1.7 + magic-string: 0.30.17 + magicast: 0.3.5 + std-env: 3.9.0 + test-exclude: 7.0.1 + tinyrainbow: 2.0.0 + vitest: 3.2.4(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) + transitivePeerDependencies: + - supports-color + + '@vitest/expect@3.2.4': + dependencies: + '@types/chai': 5.2.2 + '@vitest/spy': 3.2.4 + '@vitest/utils': 3.2.4 + chai: 5.2.1 + tinyrainbow: 2.0.0 + + '@vitest/mocker@3.2.4(vite@7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1))': + dependencies: + '@vitest/spy': 3.2.4 + estree-walker: 3.0.3 + magic-string: 0.30.17 + optionalDependencies: + vite: 7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) + + '@vitest/pretty-format@3.2.4': + dependencies: + tinyrainbow: 2.0.0 + + '@vitest/runner@3.2.4': + dependencies: + '@vitest/utils': 3.2.4 + pathe: 2.0.3 + strip-literal: 3.0.0 + + '@vitest/snapshot@3.2.4': + dependencies: + '@vitest/pretty-format': 3.2.4 + magic-string: 0.30.17 + pathe: 2.0.3 + + '@vitest/spy@3.2.4': + dependencies: + tinyspy: 4.0.3 + + '@vitest/utils@3.2.4': + dependencies: + '@vitest/pretty-format': 3.2.4 + loupe: 3.2.0 + tinyrainbow: 2.0.0 + acorn-jsx@5.3.2(acorn@8.15.0): dependencies: acorn: 8.15.0 @@ -1960,12 +2340,26 @@ snapshots: json-schema-traverse: 0.4.1 uri-js: 4.4.1 + ansi-regex@5.0.1: {} + + ansi-regex@6.1.0: {} + ansi-styles@4.3.0: dependencies: color-convert: 2.0.1 + ansi-styles@6.2.1: {} + argparse@2.0.1: {} + assertion-error@2.0.1: {} + + ast-v8-to-istanbul@0.3.4: + dependencies: + '@jridgewell/trace-mapping': 0.3.29 + estree-walker: 3.0.3 + js-tokens: 9.0.1 + balanced-match@1.0.2: {} brace-expansion@1.1.12: @@ -1988,15 +2382,27 @@ snapshots: node-releases: 2.0.19 update-browserslist-db: 1.1.3(browserslist@4.25.1) + cac@6.7.14: {} + callsites@3.1.0: {} caniuse-lite@1.0.30001731: {} + chai@5.2.1: + dependencies: + assertion-error: 2.0.1 + check-error: 2.1.1 + deep-eql: 5.0.2 + loupe: 3.2.0 + pathval: 2.0.1 + chalk@4.1.2: dependencies: ansi-styles: 4.3.0 supports-color: 7.2.0 + check-error@2.1.1: {} + chownr@3.0.0: {} color-convert@2.0.1: @@ -2023,17 +2429,27 @@ snapshots: dependencies: ms: 2.1.3 + deep-eql@5.0.2: {} + deep-is@0.1.4: {} detect-libc@2.0.4: {} + eastasianwidth@0.2.0: {} + electron-to-chromium@1.5.195: {} + emoji-regex@8.0.0: {} + + emoji-regex@9.2.2: {} + enhanced-resolve@5.18.2: dependencies: graceful-fs: 4.2.11 tapable: 2.2.2 + es-module-lexer@1.7.0: {} + esbuild@0.25.8: optionalDependencies: '@esbuild/aix-ppc64': 0.25.8 @@ -2142,8 +2558,14 @@ snapshots: estraverse@5.3.0: {} + estree-walker@3.0.3: + dependencies: + '@types/estree': 1.0.8 + esutils@2.0.3: {} + expect-type@1.2.2: {} + fast-deep-equal@3.1.3: {} fast-glob@3.3.3: @@ -2186,6 +2608,11 @@ snapshots: flatted@3.3.3: {} + foreground-child@3.3.1: + dependencies: + cross-spawn: 7.0.6 + signal-exit: 4.1.0 + fsevents@2.3.3: optional: true @@ -2199,6 +2626,15 @@ snapshots: dependencies: is-glob: 4.0.3 + glob@10.4.5: + dependencies: + foreground-child: 3.3.1 + jackspeak: 3.4.3 + minimatch: 9.0.5 + minipass: 7.1.2 + package-json-from-dist: 1.0.1 + path-scurry: 1.11.1 + globals@14.0.0: {} globals@16.3.0: {} @@ -2209,6 +2645,8 @@ snapshots: has-flag@4.0.0: {} + html-escaper@2.0.2: {} + ignore@5.3.2: {} ignore@7.0.5: {} @@ -2222,6 +2660,8 @@ snapshots: is-extglob@2.1.1: {} + is-fullwidth-code-point@3.0.0: {} + is-glob@4.0.3: dependencies: is-extglob: 2.1.1 @@ -2230,10 +2670,39 @@ snapshots: isexe@2.0.0: {} + istanbul-lib-coverage@3.2.2: {} + + istanbul-lib-report@3.0.1: + dependencies: + istanbul-lib-coverage: 3.2.2 + make-dir: 4.0.0 + supports-color: 7.2.0 + + istanbul-lib-source-maps@5.0.6: + dependencies: + '@jridgewell/trace-mapping': 0.3.29 + debug: 4.4.1 + istanbul-lib-coverage: 3.2.2 + transitivePeerDependencies: + - supports-color + + istanbul-reports@3.1.7: + dependencies: + html-escaper: 2.0.2 + istanbul-lib-report: 3.0.1 + + jackspeak@3.4.3: + dependencies: + '@isaacs/cliui': 8.0.2 + optionalDependencies: + '@pkgjs/parseargs': 0.11.0 + jiti@2.5.1: {} js-tokens@4.0.0: {} + js-tokens@9.0.1: {} + js-yaml@4.1.0: dependencies: argparse: 2.0.1 @@ -2308,6 +2777,10 @@ snapshots: lodash.merge@4.6.2: {} + loupe@3.2.0: {} + + lru-cache@10.4.3: {} + lru-cache@5.1.1: dependencies: yallist: 3.1.1 @@ -2316,6 +2789,16 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.5.4 + magicast@0.3.5: + dependencies: + '@babel/parser': 7.28.0 + '@babel/types': 7.28.2 + source-map-js: 1.2.1 + + make-dir@4.0.0: + dependencies: + semver: 7.7.2 + merge2@1.4.1: {} micromatch@4.0.8: @@ -2382,6 +2865,8 @@ snapshots: dependencies: p-limit: 3.1.0 + package-json-from-dist@1.0.1: {} + parent-module@1.0.1: dependencies: callsites: 3.1.0 @@ -2390,6 +2875,15 @@ snapshots: path-key@3.1.1: {} + path-scurry@1.11.1: + dependencies: + lru-cache: 10.4.3 + minipass: 7.1.2 + + pathe@2.0.3: {} + + pathval@2.0.1: {} + picocolors@1.1.1: {} picomatch@2.3.1: {} @@ -2479,10 +2973,42 @@ snapshots: shebang-regex@3.0.0: {} + siginfo@2.0.0: {} + + signal-exit@4.1.0: {} + source-map-js@1.2.1: {} + stackback@0.0.2: {} + + std-env@3.9.0: {} + + string-width@4.2.3: + dependencies: + emoji-regex: 8.0.0 + is-fullwidth-code-point: 3.0.0 + strip-ansi: 6.0.1 + + string-width@5.1.2: + dependencies: + eastasianwidth: 0.2.0 + emoji-regex: 9.2.2 + strip-ansi: 7.1.0 + + strip-ansi@6.0.1: + dependencies: + ansi-regex: 5.0.1 + + strip-ansi@7.1.0: + dependencies: + ansi-regex: 6.1.0 + strip-json-comments@3.1.1: {} + strip-literal@3.0.0: + dependencies: + js-tokens: 9.0.1 + supports-color@7.2.0: dependencies: has-flag: 4.0.0 @@ -2500,11 +3026,27 @@ snapshots: mkdirp: 3.0.1 yallist: 5.0.0 + test-exclude@7.0.1: + dependencies: + '@istanbuljs/schema': 0.1.3 + glob: 10.4.5 + minimatch: 9.0.5 + + tinybench@2.9.0: {} + + tinyexec@0.3.2: {} + tinyglobby@0.2.14: dependencies: fdir: 6.4.6(picomatch@4.0.3) picomatch: 4.0.3 + tinypool@1.1.1: {} + + tinyrainbow@2.0.0: {} + + tinyspy@4.0.3: {} + to-regex-range@5.0.1: dependencies: is-number: 7.0.0 @@ -2530,6 +3072,8 @@ snapshots: typescript@5.8.3: {} + undici-types@7.10.0: {} + update-browserslist-db@1.1.3(browserslist@4.25.1): dependencies: browserslist: 4.25.1 @@ -2544,7 +3088,28 @@ snapshots: dependencies: react: 19.1.1 - vite@7.0.6(jiti@2.5.1)(lightningcss@1.30.1): + vite-node@3.2.4(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1): + dependencies: + cac: 6.7.14 + debug: 4.4.1 + es-module-lexer: 1.7.0 + pathe: 2.0.3 + vite: 7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) + transitivePeerDependencies: + - '@types/node' + - jiti + - less + - lightningcss + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - yaml + + vite@7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1): dependencies: esbuild: 0.25.8 fdir: 6.4.6(picomatch@4.0.3) @@ -2553,16 +3118,75 @@ snapshots: rollup: 4.46.2 tinyglobby: 0.2.14 optionalDependencies: + '@types/node': 24.2.1 fsevents: 2.3.3 jiti: 2.5.1 lightningcss: 1.30.1 + vitest@3.2.4(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1): + dependencies: + '@types/chai': 5.2.2 + '@vitest/expect': 3.2.4 + '@vitest/mocker': 3.2.4(vite@7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1)) + '@vitest/pretty-format': 3.2.4 + '@vitest/runner': 3.2.4 + '@vitest/snapshot': 3.2.4 + '@vitest/spy': 3.2.4 + '@vitest/utils': 3.2.4 + chai: 5.2.1 + debug: 4.4.1 + expect-type: 1.2.2 + magic-string: 0.30.17 + pathe: 2.0.3 + picomatch: 4.0.3 + std-env: 3.9.0 + tinybench: 2.9.0 + tinyexec: 0.3.2 + tinyglobby: 0.2.14 + tinypool: 1.1.1 + tinyrainbow: 2.0.0 + vite: 7.0.6(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) + vite-node: 3.2.4(@types/node@24.2.1)(jiti@2.5.1)(lightningcss@1.30.1) + why-is-node-running: 2.3.0 + optionalDependencies: + '@types/node': 24.2.1 + transitivePeerDependencies: + - jiti + - less + - lightningcss + - msw + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - yaml + which@2.0.2: dependencies: isexe: 2.0.0 + why-is-node-running@2.3.0: + dependencies: + siginfo: 2.0.0 + stackback: 0.0.2 + word-wrap@1.2.5: {} + wrap-ansi@7.0.0: + dependencies: + ansi-styles: 4.3.0 + string-width: 4.2.3 + strip-ansi: 6.0.1 + + wrap-ansi@8.1.0: + dependencies: + ansi-styles: 6.2.1 + string-width: 5.1.2 + strip-ansi: 7.1.0 + yallist@3.1.1: {} yallist@5.0.0: {} diff --git a/vite-app/src/App.tsx b/vite-app/src/App.tsx index c274e420..e4f4912b 100644 --- a/vite-app/src/App.tsx +++ b/vite-app/src/App.tsx @@ -39,7 +39,7 @@ const App = observer(() => { ); if (update.type === "initialize_logs") { const rows: EvaluationRow[] = update.logs.map((log) => { - return EvaluationRowSchema.parse(JSON.parse(log)); + return EvaluationRowSchema.parse(log); }); console.log("initialize_logs", rows); state.setDataset(rows); @@ -124,7 +124,13 @@ const App = observer(() => {
- Eval Protocol + + Eval Protocol +
void; @@ -49,6 +52,14 @@ const Dashboard = observer(({ onRefresh }: DashboardProps) => { const expandAll = () => state.setAllRowsExpanded(true); const collapseAll = () => state.setAllRowsExpanded(false); + const [activeTab, setActiveTab] = useState<"table" | "pivot">("table"); + + const flattened = useMemo(() => { + const flattenedDataset = state.sortedDataset.map((row) => flattenJson(row)); + console.log(flattenedDataset); + return flattenedDataset; + }, [state.sortedDataset]); + return (
{/* Summary Stats */} @@ -59,11 +70,19 @@ const Dashboard = observer(({ onRefresh }: DashboardProps) => { {state.totalCount > 0 && (
- -
)} @@ -73,14 +92,51 @@ const Dashboard = observer(({ onRefresh }: DashboardProps) => { Total Rows:{" "} {state.totalCount}
+ {activeTab === "table" && state.totalCount > 0 && ( +
+ + +
+ )}
{/* Show empty state or main table */} {state.totalCount === 0 ? ( - ) : ( + ) : activeTab === "table" ? ( + ) : ( +
+
+ Showing pivot of flattened rows (JSONPath keys). Defaults: rows by + eval name and status; columns by model; values average score. +
+ +
)}
); diff --git a/vite-app/src/components/PivotTable.tsx b/vite-app/src/components/PivotTable.tsx new file mode 100644 index 00000000..ffb6612a --- /dev/null +++ b/vite-app/src/components/PivotTable.tsx @@ -0,0 +1,199 @@ +import React from "react"; +import { computePivot } from "../util/pivot"; + +/** + * Props for PivotTable. + */ +export interface PivotTableProps> { + /** + * Source list of records to pivot. + * Each record must expose the fields referenced by rowFields/columnFields/valueField. + */ + data: T[]; + /** + * Ordered list of record keys used to group rows. + * Example: ["region", "rep"] or flattened JSONPath keys if using a flattener. + */ + rowFields: (keyof T)[]; + /** + * Ordered list of record keys used to group columns. + * Example: ["product"] or flattened JSONPath keys if using a flattener. + */ + columnFields: (keyof T)[]; + /** + * Record key containing the numeric value to aggregate per cell. + * If omitted, aggregator defaults to counting records ("count"). + */ + valueField?: keyof T; + /** + * Aggregation strategy. Built-ins: "count" | "sum" | "avg". Custom function allowed. + * Default: "count". When using "sum"/"avg" or a custom function, numeric values are + * extracted from valueField (if provided) and coerced via Number(). Non-finite values are ignored. + */ + aggregator?: Parameters>[0]["aggregator"]; + /** + * Whether to render a right-most total column per row. Default: true. + */ + showRowTotals?: boolean; + /** + * Whether to render a bottom total row per column (plus grand total if showRowTotals). Default: true. + */ + showColumnTotals?: boolean; + /** + * Optional extra class names applied to the wrapping container. + */ + className?: string; + /** + * Formatter applied to aggregated numeric values before rendering. + * Default: toLocaleString with up to 3 fraction digits. + */ + formatter?: (value: number) => React.ReactNode; + /** + * Value to render when a cell has no data for the given row/column intersection. + * Default: "-". + */ + emptyValue?: React.ReactNode; +} + +function toKey(parts: unknown[]): string { + return parts.map((p) => String(p)).join("||"); +} + +// removed local aggregation helpers; logic is in util/pivot.ts for testability + +/** + * Compact, generic pivot table component that renders a pivoted summary of arbitrary records. + * Styling matches other components: white background, subtle borders, compact paddings. + */ +export function PivotTable>({ + data, + rowFields, + columnFields, + valueField, + aggregator = "count", + showRowTotals = true, + showColumnTotals = true, + className = "", + formatter = (v) => v.toLocaleString(undefined, { maximumFractionDigits: 3 }), + emptyValue = "-", +}: PivotTableProps) { + const { + rowKeyTuples, + colKeyTuples, + cells, + rowTotals, + colTotals, + grandTotal, + } = computePivot({ + data, + rowFields, + columnFields, + valueField, + aggregator, + }); + + return ( +
+ + + + {/* Row header labels */} + {rowFields.map((f) => ( + + ))} + {/* Column headers (flattened) */} + {colKeyTuples.map((tuple, idx) => ( + + ))} + {showRowTotals && ( + + )} + + + + {rowKeyTuples.map((rTuple, rIdx) => { + const rKey = toKey(rTuple); + return ( + + {/* Row header cells */} + {rTuple.map((value, i) => ( + + ))} + {/* Data cells */} + {colKeyTuples.map((cTuple, cIdx) => { + const cKey = toKey(cTuple); + const cell = cells[rKey]?.[cKey]; + const content = cell ? formatter(cell.value) : emptyValue; + return ( + + ); + })} + {/* Row total */} + {showRowTotals && ( + + )} + + ); + })} + {showColumnTotals && ( + + {/* Total label spanning row header columns */} + + {/* Column totals */} + {colKeyTuples.map((cTuple, cIdx) => { + const cKey = toKey(cTuple); + return ( + + ); + })} + {/* Grand total */} + {showRowTotals && ( + + )} + + )} + +
+ {String(f)} + + {tuple.map((v) => String(v ?? "")).join(" / ")} + + Total +
+ {String(value ?? "")} + + {content} + + {formatter(rowTotals[rKey] ?? 0)} +
+ Total + + {formatter(colTotals[cKey] ?? 0)} + + {formatter(grandTotal)} +
+
+ ); +} + +export default PivotTable; diff --git a/vite-app/src/types/eval-protocol.ts b/vite-app/src/types/eval-protocol.ts index f57f3d26..800af414 100644 --- a/vite-app/src/types/eval-protocol.ts +++ b/vite-app/src/types/eval-protocol.ts @@ -62,7 +62,7 @@ export const CompletionParamsSchema = z.object({ }); export const InputMetadataSchema = z.object({ - row_id: z.string().describe('Unique string to ID the row'), + row_id: z.string().optional().describe('Unique string to ID the row'), completion_params: CompletionParamsSchema.optional().describe('Completion endpoint parameters used'), dataset_info: z.record(z.string(), z.any()).optional().describe('Dataset row details: seed, system_prompt, environment_context, etc'), session_data: z.record(z.string(), z.any()).optional().describe('Session metadata like timestamp (input only, no duration/usage)') @@ -78,17 +78,31 @@ export const EvalMetadataSchema = z.object({ name: z.string().describe('Name of the evaluation'), description: z.string().optional().describe('Description of the evaluation'), version: z.string().describe('Version of the evaluation. By default, we will populate this with the current commit hash.'), - status: z.enum(['running', 'finished', 'error', 'stopped']).default('running').describe('Status of the evaluation'), + status: z.enum(['running', 'finished', 'error', 'stopped']).optional().describe('Status of the evaluation'), num_runs: z.number().int().describe('Number of times the evaluation was repeated'), aggregation_method: z.string().describe('Method used to aggregate scores across runs'), threshold_of_success: z.number().optional().describe('Threshold score for test success'), passed: z.boolean().optional().describe('Whether the evaluation passed based on the threshold') }); +// Rollout status model (matches Python RolloutStatus) +export const RolloutStatusSchema = z.object({ + status: z + .enum(['running', 'finished', 'error', 'stopped']) + .default('finished') + .describe('Status of the rollout.'), + error_message: z.string().optional().describe('Error message if the rollout failed.') +}); + export const EvaluationRowSchema = z.object({ messages: z.array(MessageSchema).describe('List of messages in the conversation/trajectory.'), tools: z.array(z.record(z.string(), z.any())).optional().describe('Available tools/functions that were provided to the agent.'), input_metadata: InputMetadataSchema.describe('Metadata related to the input (dataset info, model config, session data, etc.).'), + rollout_status: RolloutStatusSchema.default({ status: 'finished' }).describe('The status of the rollout.'), + invocation_id: z.string().optional().describe('The ID of the invocation that this row belongs to.'), + cohort_id: z.string().optional().describe('The ID of the cohort that this row belongs to.'), + rollout_id: z.string().optional().describe('The ID of the rollout that this row belongs to.'), + run_id: z.string().optional().describe('The ID of the run that this row belongs to.'), ground_truth: z.string().optional().describe('Optional ground truth reference for this evaluation.'), evaluation_result: EvaluateResultSchema.optional().describe('The evaluation result for this row/trajectory.'), usage: CompletionUsageSchema.optional().describe('Token usage statistics from LLM calls during execution.'), @@ -157,6 +171,7 @@ export type InputMetadata = z.infer; export type CompletionUsage = z.infer; export type EvalMetadata = z.infer; export type EvaluationRow = z.infer; +export type RolloutStatus = z.infer; export type ResourceServerConfig = z.infer; export type EvaluationCriteriaModel = z.infer; export type TaskDefinitionModel = z.infer; diff --git a/vite-app/src/types/websocket.ts b/vite-app/src/types/websocket.ts index f4b8f247..45deefe2 100644 --- a/vite-app/src/types/websocket.ts +++ b/vite-app/src/types/websocket.ts @@ -7,7 +7,7 @@ import { EvaluationRowSchema } from './eval-protocol'; // Initialize logs message schema export const InitializeLogsMessageSchema = z.object({ type: z.literal('initialize_logs'), - logs: z.array(z.string()), + logs: z.array(z.any()), }); export const LogMessageSchema = z.object({ diff --git a/vite-app/src/util/__snapshots__/flatten-json.test.ts.snap b/vite-app/src/util/__snapshots__/flatten-json.test.ts.snap new file mode 100644 index 00000000..1498f63b --- /dev/null +++ b/vite-app/src/util/__snapshots__/flatten-json.test.ts.snap @@ -0,0 +1,9049 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`flattenJson against logs.json > flattens each entry in logs.json.logs and matches snapshot 1`] = ` +[ + { + "$.created_at": "2025-08-08T09:52:30.335005", + "$.input_metadata.row_id": "1", + "$.messages[0].content": "Hello", + "$.messages[0].role": "user", + "$.messages[1].content": "Hello", + "$.messages[1].role": "assistant", + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T09:47:43.816761", + "$.input_metadata.row_id": "row_0", + "$.messages[0].content": "Hello 0", + "$.messages[0].role": "user", + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T09:41:55.015216", + "$.input_metadata.row_id": "row_1", + "$.messages[0].content": "Hello 1", + "$.messages[0].role": "user", + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T09:41:55.015767", + "$.input_metadata.row_id": "row_2", + "$.messages[0].content": "Hello 2", + "$.messages[0].role": "user", + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T10:00:08.522553", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev12+g2dc40d5.dirty", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "describe-use-entire-person-education", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 67635, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T10:01:12.825337", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev12+g2dc40d5.dirty", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "meet-solid-big-picture-car", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 68229, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:06:51.152819", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev15+g6486dbd", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "leave-ask-logical-face-way", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.messages[2].content": "I'll help you learn about managing deployments on Fireworks AI. Let me search for comprehensive information about deployment management.", + "$.messages[2].role": "assistant", + "$.messages[2].tool_calls[0].function.arguments": "{"query": "deployment management Fireworks AI"}", + "$.messages[2].tool_calls[0].function.name": "search", + "$.messages[2].tool_calls[0].id": "functions.search:0", + "$.messages[2].tool_calls[0].type": "function", + "$.messages[3].content[0].text": "Title: Fireworks AI Developer Platform +Link: https://docs.fireworks.ai/getting-started/introduction +Content: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Start building with open source AI models + +", + "$.messages[3].content[0].type": "text", + "$.messages[3].content[1].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[3].content[1].type": "text", + "$.messages[3].content[2].text": "Title: Do you host your deployments in the EU or Asia? +Link: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia +Content: We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +Do you host your deployments in the EU or Asia? +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +", + "$.messages[3].content[2].type": "text", + "$.messages[3].content[3].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[3].content[3].type": "text", + "$.messages[3].content[4].text": "Title: On-demand deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +On-demand deployments +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +", + "$.messages[3].content[4].type": "text", + "$.messages[3].content[5].text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +Link: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks +Content: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +", + "$.messages[3].content[5].type": "text", + "$.messages[3].content[6].text": "Title: Introduction +Link: https://docs.fireworks.ai/api-reference/introduction +Content: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Introduction +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +", + "$.messages[3].content[6].type": "text", + "$.messages[3].content[7].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Create a Deployment on Fireworks AI platform + +Create a Deployment on Fireworks AI platform + +Creates a new deployment. + +", + "$.messages[3].content[7].type": "text", + "$.messages[3].content[8].text": "Title: How much does Fireworks cost? +Link: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost +Content: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +How much does Fireworks cost? +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +", + "$.messages[3].content[8].type": "text", + "$.messages[3].content[9].text": "Title: Python (Fireworks) +Link: https://docs.fireworks.ai/getting-started/quickstart +Content: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +Set up your developer environment & call the Fireworks API +This is the recommended way to get started : This tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx OPENAI_API_BASE "https://api.fireworks.ai/inference/v1" +setx OPENAI_API_KEY "" + +", + "$.messages[3].content[9].type": "text", + "$.messages[3].role": "tool", + "$.messages[3].tool_call_id": "functions.search:0", + "$.messages[4].content": "Let me search for more specific information about deployment management tools and commands.", + "$.messages[4].role": "assistant", + "$.messages[4].tool_calls[0].function.arguments": "{"query": "firectl deployment management commands CLI"}", + "$.messages[4].tool_calls[0].function.name": "search", + "$.messages[4].tool_calls[0].id": "functions.search:1", + "$.messages[4].tool_calls[0].type": "function", + "$.messages[5].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[5].content[0].type": "text", + "$.messages[5].content[1].text": "Title: Installing the CLI +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: To create and manage on-demand deployments, use our CLI tool firectl . Install the tool by following the installation guide . Verify that your installation is working and you are logged into the correct account by running: firectl whoami + +To create and manage on-demand deployments, use our CLI tool firectl . Install the tool by following the installation guide . Verify that your installation is working and you are logged into the correct account by running: firectl whoami + +To create and manage on-demand deployments, use our CLI tool firectl . Install the tool by following the installation guide . Verify that your installation is working and you are logged into the correct account by running: + +", + "$.messages[5].content[1].type": "text", + "$.messages[5].content[2].text": "Title: Cli overview +Link: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview +Content: Command Line Interface Reference +The Reward Kit provides a command-line interface (CLI) for common operations like previewing evaluations, deploying reward functions, and running agent evaluations. + +Command Line Interface Reference +The Reward Kit provides a command-line interface (CLI) for common operations like previewing evaluations, deploying reward functions, and running agent evaluations. + +Command Overview +The Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon). + +", + "$.messages[5].content[2].type": "text", + "$.messages[5].content[3].text": "Title: Using the CLI (reward-kit deploy) +Link: https://docs.fireworks.ai/evaluators/developer_guide/evaluation_workflows +Content: The reward-kit deploy command is suitable for deploying reward functions defined in script files. The --metrics-folders argument should point to the directory containing your reward function script (e.g., a main.py with the @reward_function decorator). For more details on reward-kit deploy , see the CLI Reference. + +The reward-kit deploy command is suitable for deploying reward functions defined in script files. The --metrics-folders argument should point to the directory containing your reward function script (e.g., a main.py with the @reward_function decorator). For more details on reward-kit deploy , see the CLI Reference. + +The reward-kit deploy command is suitable for deploying reward functions defined in script files. The --metrics-folders argument should point to the directory containing your reward function script (e.g., a main.py with the @reward_function decorator). # Deploy with the CLI +reward-kit deploy \\ + --id helpfulness-evaluator \\ + --metrics-folders "helpfulness=./path/to/your/metric_script_directory" \\ + --display-name "Helpfulness Evaluator" \\ + --description "Evaluates the helpfulness of responses" \\ + --force + +", + "$.messages[5].content[3].type": "text", + "$.messages[5].content[4].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[5].content[4].type": "text", + "$.messages[5].content[5].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[5].content[5].type": "text", + "$.messages[5].content[6].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[5].content[6].type": "text", + "$.messages[5].content[7].text": "Title: 1. Setup +Link: https://docs.fireworks.ai/tools-sdks/python-client/the-tutorial +Content: To get started with the Fireworks AI Python SDK, you need to install the firectl CLI tool and create an API key. Install our CLI tool firectl to interact with the Fireworks AI platform. Sign in to Fireworks by running the following command: firectl signin + +To get started with the Fireworks AI Python SDK, you need to install the firectl CLI tool and create an API key. Install our CLI tool firectl to interact with the Fireworks AI platform. Sign in to Fireworks by running the following command: firectl signin + +To get started with the Fireworks AI Python SDK, you need to install the firectl CLI tool and create an API key. Install our CLI tool firectl to interact with the Fireworks AI platform. Sign in to Fireworks by running the following command: A browser window will open to the Fireworks AI login page. Once you login, your machine will be authenticated. Create an API key by running the following command: $ firectl create api-key --key-name "quick-start" +Key Id: key_42vAYeb7rwt9zzg1 +Display Name: quick-start +Key: fw_3ZLd.... +Secure: true +Be sure to save this key. It will not be shown again. + +", + "$.messages[5].content[7].type": "text", + "$.messages[5].content[8].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Deploying a fine-tuned model serverlessly +Not all base models support serverless addons. Please check this list for the full list of serverless models that support LoRA add-ons. If your base model supports serverless addons, you can run the following command to load the fine-tuned model into a serverless deployment: firectl load-lora + +", + "$.messages[5].content[8].type": "text", + "$.messages[5].content[9].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Example +firectl create deployment falcon-7b + +", + "$.messages[5].content[9].type": "text", + "$.messages[5].role": "tool", + "$.messages[5].tool_call_id": "functions.search:1", + "$.pid": 83882, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:10:27.844496", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev15+g6486dbd", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "investigate-recent-dangerous-hope-magazine", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 84847, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:12:00.215991", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev15+g6486dbd.dirty", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "seem-environmental-media-television-concert", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 85545, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:12:34.382113", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev15+g6486dbd.dirty", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "treat-suppose-salty-week-plan", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 85754, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:12:46.750720", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev15+g6486dbd.dirty", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "contain-recent-bad-plan-change", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 85851, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:15:45.037115", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "finished", + "$.eval_metadata.version": "v0.2.6.dev15+g6486dbd.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "At least one tool call was made", + "$.evaluation_result.score": 1, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "move-little-tidy-pain-work", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.messages[2].content": "I'll help you learn about managing deployments on Fireworks! Let me search for comprehensive information about deployment management.", + "$.messages[2].role": "assistant", + "$.messages[2].tool_calls[0].function.arguments": "{"query": "deployments management create update delete scale"}", + "$.messages[2].tool_calls[0].function.name": "search", + "$.messages[2].tool_calls[0].id": "functions.search:0", + "$.messages[2].tool_calls[0].type": "function", + "$.messages[3].content[0].text": "Title: Delete Deployment +Link: https://docs.fireworks.ai/api-reference/delete-deployment +Content: + + + +REST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id} + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" +deployment_id: + string + Description: The Deployment Id + Example: "" + +query params: +hard: + boolean + Description: If true, this will perform a hard deletion. + Example: true +ignoreChecks: + boolean + Description: If true, this will ignore checks and force the deletion of a deployment that is currently +deployed and is in use. + Example: true + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {} + +", + "$.messages[3].content[0].type": "text", + "$.messages[3].content[1].text": "Title: How does billing and scaling work for on-demand GPU deployments? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-and-scaling-work-for-on-demand-gpu-deployments +Content: On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +How does billing and scaling work for on-demand GPU deployments? +On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +", + "$.messages[3].content[1].type": "text", + "$.messages[3].content[2].text": "Title: Managing default deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist. + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist. + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default + +", + "$.messages[3].content[2].type": "text", + "$.messages[3].content[3].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. + +Creates a new deployment. + +Creates a new deployment. firectl create deployment [flags] + +", + "$.messages[3].content[3].type": "text", + "$.messages[3].content[4].text": "Title: Delete a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources +Content: Deletes an model deployment. + +Deletes an model deployment. + +Deletes an model deployment. firectl delete deployment [flags] + +", + "$.messages[3].content[4].type": "text", + "$.messages[3].content[5].text": "Title: Update Deployment +Link: https://docs.fireworks.ai/api-reference/update-deployment +Content: + + + +REST Endpoint PATCH /v1/accounts/{account_id}/deployments/{deployment_id} + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" +deployment_id: + string + Description: The Deployment Id + Example: "" + + +Request body: object + Description: N/A + Example: {"displayName":"","description":"","expireTime":"2023-11-07T05:31:56Z","minReplicaCount":123,"maxReplicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"disableDeploymentSizeValidation":true} + + +Responses: +200: object + Description: N/A + Example: {"name":"","displayName":"","description":"","createTime":"2023-11-07T05:31:56Z","expireTime":"2023-11-07T05:31:56Z","purgeTime":"2023-11-07T05:31:56Z","deleteTime":"2023-11-07T05:31:56Z","state":"STATE_UNSPECIFIED","status":{"code":"OK","message":""},"minReplicaCount":123,"maxReplicaCount":123,"replicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","cluster":"","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"numPeftDeviceCached":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"region":"REGION_UNSPECIFIED","updateTime":"2023-11-07T05:31:56Z","disableDeploymentSizeValidation":true} + +", + "$.messages[3].content[5].type": "text", + "$.messages[3].content[6].text": "Title: What should I expect for deployment and scaling performance? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/what-should-i-expect-for-deployment-and-scaling-performance +Content: Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +What should I expect for deployment and scaling performance? +Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +", + "$.messages[3].content[6].type": "text", + "$.messages[3].content[7].text": "Title: delete_deployment(ignore_checks: bool = False, wait: bool = True) +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-reference +Content: Deletes the deployment associated with this LLM instance if one exists. Arguments: ignore_checks bool, optional - Whether to ignore safety checks. Defaults to False. wait bool, optional - Whether to wait for deletion to complete. Defaults to True. llm.delete_deployment(ignore_checks=True) + +Deletes the deployment associated with this LLM instance if one exists. Arguments: ignore_checks bool, optional - Whether to ignore safety checks. Defaults to False. wait bool, optional - Whether to wait for deletion to complete. Defaults to True. + +scale_to_zero() +Sends a request to scale the deployment to 0 replicas but does not wait for it to complete. Returns: The deployment object, or None if no deployment exists deployment = llm.scale_to_zero() + +", + "$.messages[3].content[7].type": "text", + "$.messages[3].content[8].text": "Title: How does billing work for on-demand deployments? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-work-for-on-demand-deployments +Content: On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +How does billing work for on-demand deployments? +On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +", + "$.messages[3].content[8].type": "text", + "$.messages[3].content[9].text": "Title: Create Deployment +Link: https://docs.fireworks.ai/api-reference/create-deployment +Content: + + + +REST Endpoint POST /v1/accounts/{account_id}/deployments + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" + +query params: +disableAutoDeploy: + boolean + Description: By default, a deployment created with a currently undeployed base model +will be deployed to this deployment. If true, this auto-deploy function +is disabled. + Example: true +disableSpeculativeDecoding: + boolean + Description: By default, a deployment will use the speculative decoding settings from +the base model. If true, this will disable speculative decoding. + Example: true +deploymentId: + string + Description: The ID of the deployment. If not specified, a random ID will be generated. + Example: "" + + +Request body: object + Description: N/A + Example: {"displayName":"","description":"","expireTime":"2023-11-07T05:31:56Z","minReplicaCount":123,"maxReplicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"disableDeploymentSizeValidation":true} + + +Responses: +200: object + Description: N/A + Example: {"name":"","displayName":"","description":"","createTime":"2023-11-07T05:31:56Z","expireTime":"2023-11-07T05:31:56Z","purgeTime":"2023-11-07T05:31:56Z","deleteTime":"2023-11-07T05:31:56Z","state":"STATE_UNSPECIFIED","status":{"code":"OK","message":""},"minReplicaCount":123,"maxReplicaCount":123,"replicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","cluster":"","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"numPeftDeviceCached":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"region":"REGION_UNSPECIFIED","updateTime":"2023-11-07T05:31:56Z","disableDeploymentSizeValidation":true} + +", + "$.messages[3].content[9].type": "text", + "$.messages[3].role": "tool", + "$.messages[3].tool_call_id": "functions.search:0", + "$.messages[4].content": "Let me search for more specific information about deployment creation and management workflows.", + "$.messages[4].role": "assistant", + "$.messages[4].tool_calls[0].function.arguments": "{"query": "firectl deployment management commands create update list"}", + "$.messages[4].tool_calls[0].function.name": "search", + "$.messages[4].tool_calls[0].id": "functions.search:1", + "$.messages[4].tool_calls[0].type": "function", + "$.messages[5].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[5].content[0].type": "text", + "$.messages[5].content[1].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. + +", + "$.messages[5].content[1].type": "text", + "$.messages[5].content[2].text": "Title: Creating an on-demand deployment +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +Publishing a deployed model +By default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model="" AND deployment=""' + +", + "$.messages[5].content[2].type": "text", + "$.messages[5].content[3].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[5].content[3].type": "text", + "$.messages[5].content[4].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[5].content[4].type": "text", + "$.messages[5].content[5].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +firectl +firectl create dataset /path/to/jsonl/file + +", + "$.messages[5].content[5].type": "text", + "$.messages[5].content[6].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[5].content[6].type": "text", + "$.messages[5].content[7].text": "Title: firectl +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-vlm +Content: # Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +# Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +firectl create sftj \\ + --base-model accounts/fireworks/models/qwen2p5-vl-32b-instruct \\ + --dataset my-vlm-dataset \\ + --output-model my-custom-vlm \\ + --epochs 3 For additional parameters like learning rates, evaluation datasets, and batch sizes, see Additional SFT job settings . + +", + "$.messages[5].content[7].type": "text", + "$.messages[5].content[8].text": "Title: Uploading the model locally (firectl) +Link: https://docs.fireworks.ai/models/uploading-custom-models +Content: To upload a custom base model, run the following command. firectl create model /path/to/files/ + +To upload a custom base model, run the following command. firectl create model /path/to/files/ + +Publishing +By default, all models you create are only visible to and deployable by users within your account. To publish a model so anyone with a Fireworks account can deploy it, you can create it with the --public flag. This will allow it to show up in public model lists. To unpublish the model, just run firectl update model --public=false + +", + "$.messages[5].content[8].type": "text", + "$.messages[5].content[9].text": "Title: firectl +Link: https://docs.fireworks.ai/models/quantization +Content: firectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8 + +firectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8 + +Creating an FP8 deployment +By default, creating a dedicated deployment will use the FP16 checkpoint. To see what precisions are available for a +model, run: get model + +", + "$.messages[5].content[9].type": "text", + "$.messages[5].role": "tool", + "$.messages[5].tool_call_id": "functions.search:1", + "$.messages[6].content": "Based on my research, here's a comprehensive guide to managing deployments on Fireworks: + +## Deployment Management on Fireworks + +### 1. Creating Deployments + +**Basic deployment creation:** +\`\`\`bash +# Create a deployment from the model library +firectl create deployment accounts/fireworks/models/ + +# Create a deployment for a custom model +firectl create deployment + +# Create with specific configuration +firectl create deployment --wait +\`\`\` + +**Advanced deployment options:** +\`\`\`bash +# Deploy with specific hardware requirements +firectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8 + +# Deploy LoRA fine-tuned models (single command) +firectl create deployment "accounts/fireworks/models/" + +# Deploy with addons enabled (for multi-LoRA) +firectl create deployment --enable-addons +\`\`\` + +### 2. Monitoring Deployments + +**Check deployment status:** +\`\`\`bash +# List all deployments +firectl list deployments + +# Get specific deployment details +firectl get deployment accounts//deployments/ + +# Check if deployment is ready +# The state field should show "READY" +\`\`\` + +**Verify deployed models:** +\`\`\`bash +# List deployed models +firectl list deployed-models + +# Find specific deployed model +firectl list deployed-models --filter 'model="" AND deployment=""' +\`\`\` + +### 3. Managing Default Deployments + +**Check default deployment:** +\`\`\`bash +# See which deployment is marked as default +firectl list deployed-models +# Look for "Default: true" in the output +\`\`\` + +**Update default deployment:** +\`\`\`bash +# Update which deployment is the default +firectl update deployed-model --default +\`\`\` + +**Important note:** To delete a default deployment, you must either: +- Delete all other deployments for the same model first, OR +- Designate a different deployed model as the default + +### 4. Scaling and Cost Management + +**Autoscaling configuration:** +- **Default behavior:** Automatically scales to 0 replicas when not in use +- **Billing:** Pay only for GPU time when replicas are active +- **Manual scaling:** Use autoscaling policies to customize behavior + +**Cost optimization commands:** +\`\`\`bash +# Scale deployment to zero (cost saving) +firectl update deployment --min-replica-count 0 + +# Or using the Python SDK +from fireworks.client import Fireworks +llm = Fireworks(model="") +llm.scale_to_zero() +\`\`\` + +### 5. Updating Deployments + +**Update deployment configuration:** +\`\`\`bash +# Update display name, description, or scaling policies +firectl update deployment \\ + --display-name "New Name" \\ + --min-replica-count 1 \\ + --max-replica-count 5 +\`\`\` + +**Available update options:** +- Display name and description +- Replica counts (min/max) +- Autoscaling policies +- Expiration time +- Hardware configuration + +### 6. Deleting Deployments + +**Basic deletion:** +\`\`\`bash +# Delete a deployment +firectl delete deployment + +# Force deletion (ignoring safety checks) +firectl delete deployment --ignore-checks + +# Using Python SDK +llm.delete_deployment(ignore_checks=True) +\`\`\` + +**API deletion:** +\`\`\`bash +# Via REST API +curl -X DELETE \\ + https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id} \\ + -H "Authorization: Bearer YOUR_API_KEY" +\`\`\` + +### 7. Deployment Performance Expectations + +**Timeline expectations:** +- **Initial deployment:** Should complete within minutes +- **Scaling from zero:** Brief availability delays while scaling up +- **Troubleshooting:** If deployment takes >1 hour, likely indicates a crash + +### 8. Best Practices + +**Cost management:** +1. **Monitor regularly:** Check active deployments frequently +2. **Delete unused deployments:** Avoid unnecessary GPU charges +3. **Use autoscaling:** Leverage scale-to-zero for intermittent usage +4. **Consider serverless:** For low-frequency usage patterns + +**Configuration tips:** +- Use \`--wait\` flag for synchronous deployment creation +- Configure appropriate min/max replica counts for your use case +- Set expiration times for temporary deployments +- Monitor deployment status during initial creation + +### 9. Common Workflows + +**Complete deployment lifecycle:** +\`\`\`bash +# 1. Create deployment +firectl create deployment --wait + +# 2. Verify it's ready +firectl get deployment + +# 3. Use the deployment +# Make API calls to the deployment + +# 4. Monitor usage +firectl list deployments + +# 5. Scale down when not needed +firectl update deployment --min-replica-count 0 + +# 6. Delete when completely done +firectl delete deployment +\`\`\` + +This comprehensive guide covers everything from basic deployment creation to advanced cost optimization strategies. The key is understanding the balance between performance needs and cost efficiency through proper scaling configuration.", + "$.messages[6].role": "assistant", + "$.pid": 86994, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:19:37.022552", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "finished", + "$.eval_metadata.version": "v0.2.6.dev15+g6486dbd.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "At least one tool call was made", + "$.evaluation_result.score": 1, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "call-stand-low-night-conference", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.messages[2].content": "I'll help you learn about managing deployments on Fireworks AI. Let me search for comprehensive documentation about deployment management.", + "$.messages[2].role": "assistant", + "$.messages[2].tool_calls[0].function.arguments": "{"query": "deployment management Fireworks AI"}", + "$.messages[2].tool_calls[0].function.name": "search", + "$.messages[2].tool_calls[0].id": "functions.search:0", + "$.messages[2].tool_calls[0].type": "function", + "$.messages[3].content[0].text": "Title: Fireworks AI Developer Platform +Link: https://docs.fireworks.ai/getting-started/introduction +Content: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Start building with open source AI models + +", + "$.messages[3].content[0].type": "text", + "$.messages[3].content[1].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[3].content[1].type": "text", + "$.messages[3].content[2].text": "Title: Do you host your deployments in the EU or Asia? +Link: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia +Content: We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +Do you host your deployments in the EU or Asia? +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +", + "$.messages[3].content[2].type": "text", + "$.messages[3].content[3].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[3].content[3].type": "text", + "$.messages[3].content[4].text": "Title: On-demand deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +On-demand deployments +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +", + "$.messages[3].content[4].type": "text", + "$.messages[3].content[5].text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +Link: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks +Content: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +", + "$.messages[3].content[5].type": "text", + "$.messages[3].content[6].text": "Title: Introduction +Link: https://docs.fireworks.ai/api-reference/introduction +Content: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Introduction +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +", + "$.messages[3].content[6].type": "text", + "$.messages[3].content[7].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Create a Deployment on Fireworks AI platform + +Create a Deployment on Fireworks AI platform + +Creates a new deployment. + +", + "$.messages[3].content[7].type": "text", + "$.messages[3].content[8].text": "Title: How much does Fireworks cost? +Link: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost +Content: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +How much does Fireworks cost? +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +", + "$.messages[3].content[8].type": "text", + "$.messages[3].content[9].text": "Title: Python (Fireworks) +Link: https://docs.fireworks.ai/getting-started/quickstart +Content: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +Set up your developer environment & call the Fireworks API +This is the recommended way to get started : This tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx OPENAI_API_BASE "https://api.fireworks.ai/inference/v1" +setx OPENAI_API_KEY "" + +", + "$.messages[3].content[9].type": "text", + "$.messages[3].role": "tool", + "$.messages[3].tool_call_id": "functions.search:0", + "$.messages[4].content": "Let me search for more specific information about deployment management commands and workflows.", + "$.messages[4].role": "assistant", + "$.messages[4].tool_calls[0].function.arguments": "{"query": "firectl deployment commands create manage update delete"}", + "$.messages[4].tool_calls[0].function.name": "search", + "$.messages[4].tool_calls[0].id": "functions.search:1", + "$.messages[4].tool_calls[0].type": "function", + "$.messages[5].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[5].content[0].type": "text", + "$.messages[5].content[1].text": "Title: Delete a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources +Content: Deletes an model deployment. firectl delete deployment [flags] + +Deletes an model deployment. firectl delete deployment [flags] + +Example +firectl delete deployment my-deployment + +", + "$.messages[5].content[1].type": "text", + "$.messages[5].content[2].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Create a Deployment on Fireworks AI platform + +", + "$.messages[5].content[2].type": "text", + "$.messages[5].content[3].text": "Title: Managing default deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist. + +", + "$.messages[5].content[3].type": "text", + "$.messages[5].content[4].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[5].content[4].type": "text", + "$.messages[5].content[5].text": "Title: Deployment Command +Link: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example +Content: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate + +", + "$.messages[5].content[5].type": "text", + "$.messages[5].content[6].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[5].content[6].type": "text", + "$.messages[5].content[7].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[5].content[7].type": "text", + "$.messages[5].content[8].text": "Title: Delete Deployment +Link: https://docs.fireworks.ai/api-reference/delete-deployment +Content: + + + +REST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id} + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" +deployment_id: + string + Description: The Deployment Id + Example: "" + +query params: +hard: + boolean + Description: If true, this will perform a hard deletion. + Example: true +ignoreChecks: + boolean + Description: If true, this will ignore checks and force the deletion of a deployment that is currently +deployed and is in use. + Example: true + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {} + +", + "$.messages[5].content[8].type": "text", + "$.messages[5].content[9].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +firectl +firectl create dataset /path/to/jsonl/file + +", + "$.messages[5].content[9].type": "text", + "$.messages[5].role": "tool", + "$.messages[5].tool_call_id": "functions.search:1", + "$.messages[6].content": "Let me search for more information about listing and managing existing deployments.", + "$.messages[6].role": "assistant", + "$.messages[6].tool_calls[0].function.arguments": "{"query": "list deployments firectl commands deployment management"}", + "$.messages[6].tool_calls[0].function.name": "search", + "$.messages[6].tool_calls[0].id": "functions.search:0", + "$.messages[6].tool_calls[0].type": "function", + "$.messages[7].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[7].content[0].type": "text", + "$.messages[7].content[1].text": "Title: List deployments +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/list-resources +Content: Prints all deployments in the account. firectl list deployments [flags] + +Prints all deployments in the account. firectl list deployments [flags] + +List deployed models +Prints all deployed models in an account. firectl list deployed-models [flags] + +", + "$.messages[7].content[1].type": "text", + "$.messages[7].content[2].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[7].content[2].type": "text", + "$.messages[7].content[3].text": "Title: Managing default deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model + +Creating an on-demand deployment +Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +", + "$.messages[7].content[3].type": "text", + "$.messages[7].content[4].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[7].content[4].type": "text", + "$.messages[7].content[5].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[7].content[5].type": "text", + "$.messages[7].content[6].text": "Title: List Deployments +Link: https://docs.fireworks.ai/api-reference/list-deployments +Content: + + + +REST Endpoint GET /v1/accounts/{account_id}/deployments + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" + +query params: +pageSize: + integer + Description: The maximum number of deployments to return. The maximum page_size is 200, +values above 200 will be coerced to 200. +If unspecified, the default is 50. + Example: 123 +pageToken: + string + Description: A page token, received from a previous ListDeployments call. Provide this +to retrieve the subsequent page. When paginating, all other parameters +provided to ListDeployments must match the call that provided the page +token. + Example: "" +filter: + string + Description: Only deployment satisfying the provided filter (if specified) will be +returned. See https://google.aip.dev/160 for the filter grammar. + Example: "" +orderBy: + string + Description: A comma-separated list of fields to order by. e.g. "foo,bar" +The default sort order is ascending. To specify a descending order for a +field, append a " desc" suffix. e.g. "foo desc,bar" +Subfields are specified with a "." character. e.g. "foo.bar" +If not specified, the default order is by "create_time". + Example: "" +showDeleted: + boolean + Description: If set, DELETED deployments will be included. + Example: true +readMask: + string + Description: The fields to be returned in the response. If empty or "*", all fields will be returned. + Example: "" + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {"deployments":[{"name":"","displayName":"","description":"","createTime":"2023-11-07T05:31:56Z","expireTime":"2023-11-07T05:31:56Z","purgeTime":"2023-11-07T05:31:56Z","deleteTime":"2023-11-07T05:31:56Z","state":"STATE_UNSPECIFIED","status":{"code":"OK","message":""},"minReplicaCount":123,"maxReplicaCount":123,"replicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","cluster":"","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"numPeftDeviceCached":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"region":"REGION_UNSPECIFIED","updateTime":"2023-11-07T05:31:56Z","disableDeploymentSizeValidation":true}],"nextPageToken":"","totalSize":123} + +", + "$.messages[7].content[6].type": "text", + "$.messages[7].content[7].text": "Title: Deploy Command +Link: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview +Content: The deploy command deploys a reward function as an evaluator on the Fireworks platform. + +The deploy command deploys a reward function as an evaluator on the Fireworks platform. + +Command Overview +The Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon). + +", + "$.messages[7].content[7].type": "text", + "$.messages[7].content[8].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Deploying a fine-tuned model serverlessly +Not all base models support serverless addons. Please check this list for the full list of serverless models that support LoRA add-ons. If your base model supports serverless addons, you can run the following command to load the fine-tuned model into a serverless deployment: firectl load-lora + +", + "$.messages[7].content[8].type": "text", + "$.messages[7].content[9].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Create a Deployment on Fireworks AI platform + +", + "$.messages[7].content[9].type": "text", + "$.messages[7].role": "tool", + "$.messages[7].tool_call_id": "functions.search:0", + "$.messages[8].content": "Based on my research, here's a comprehensive guide to managing deployments on Fireworks AI: + +## Overview of Deployment Types + +Fireworks AI offers several deployment options: + +1. **Serverless Models**: Pay-per-token, shared infrastructure +2. **On-demand Deployments**: Dedicated GPU instances billed by GPU-second +3. **Reserved Capacity**: Pre-purchased dedicated capacity for high-volume usage + +## Key Benefits of On-demand Deployments +- Predictable performance unaffected by other users +- No hard rate limits +- Cost-effective for high utilization +- Access to larger model selection +- Support for custom base models + +## Managing Deployments with firectl + +### Installation and Setup +First, install the Fireworks CLI tools: + +\`\`\`bash +pip install fireworks-ai +\`\`\` + +Set up your API key: +\`\`\`bash +export FIREWORKS_API_KEY="your-api-key-here" +\`\`\` + +### Basic Deployment Commands + +#### 1. List All Deployments +\`\`\`bash +firectl list deployments +\`\`\` + +#### 2. Create a New Deployment +\`\`\`bash +# Basic deployment +firectl create deployment + +# With custom configuration +firectl create deployment --min-replica-count 1 --max-replica-count 3 + +# Deploy a LoRA fine-tuned model +firectl create deployment "accounts/fireworks/models/" + +# Deploy with addons enabled (for multi-LoRA) +firectl create deployment --enable-addons +\`\`\` + +#### 3. Check Deployment Status +\`\`\`bash +firectl get deployment +\`\`\` + +#### 4. Delete a Deployment +\`\`\`bash +# Soft delete +firectl delete deployment + +# Force delete (ignores usage checks) +firectl delete deployment --ignore-checks +\`\`\` + +### Managing Default Deployments +When you have multiple deployments of the same model: + +\`\`\`bash +# Check which deployment is default +firectl get model + +# Update default deployment +firectl update deployed-model --default +\`\`\` + +## Using the Fireworks Build SDK (Python) + +The Fireworks Build SDK provides a higher-level interface: + +\`\`\`python +from fireworks import LLM + +# Auto-managed deployment +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +# Explicit on-demand deployment +llm = LLM(model="your-model-id", deployment_type="on-demand") + +# Make API calls +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Hello!"}] +) +\`\`\` + +## REST API Management + +### List Deployments via REST API +\`\`\`bash +curl -X GET \\ + "https://api.fireworks.ai/v1/accounts/{account_id}/deployments" \\ + -H "Authorization: Bearer $FIREWORKS_API_KEY" +\`\`\` + +### Delete Deployment via REST API +\`\`\`bash +curl -X DELETE \\ + "https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id}" \\ + -H "Authorization: Bearer $FIREWORKS_API_KEY" +\`\`\` + +## Deployment Workflow Examples + +### 1. Deploy a Fine-tuned Model +\`\`\`bash +# Single command deployment +firectl create deployment "accounts/fireworks/models/my-lora-model" + +# Wait for deployment to be ready +firectl get deployment +\`\`\` + +### 2. Multi-LoRA Deployment (Advanced) +\`\`\`bash +# Step 1: Create base deployment with addons +firectl create deployment "accounts/fireworks/models/base-model" --enable-addons + +# Step 2: Load LoRA models +firectl load-lora --deployment +firectl load-lora --deployment +\`\`\` + +### 3. Custom Model Deployment +\`\`\`bash +# First upload your custom model +# Then deploy it +firectl create deployment "accounts/{account_id}/models/custom-model-id" +\`\`\` + +## Best Practices + +1. **Monitor Costs**: On-demand deployments are billed by GPU-second +2. **Use Reserved Capacity**: For consistent high-volume usage +3. **Set Appropriate Scaling**: Configure min/max replica counts based on expected load +4. **Clean Up**: Delete unused deployments to avoid charges +5. **Check Status**: Always verify deployment state before using + +## Troubleshooting + +- **"Invalid ID" errors**: Ensure your account ID is properly configured +- **Deployment stuck**: Check resource quotas and account limits +- **Performance issues**: Verify deployment is in READY state and check replica counts + +Would you like me to dive deeper into any specific aspect of deployment management, such as scaling configurations, cost optimization, or advanced deployment patterns?", + "$.messages[8].role": "assistant", + "$.pid": 89723, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:27:14.528488", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "finished", + "$.eval_metadata.version": "v0.2.6.dev15+g6486dbd.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "At least one tool call was made", + "$.evaluation_result.score": 1, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "construct-do-safe-art-song", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.messages[2].content": "I'll help you learn about managing deployments on Fireworks! Let me search for comprehensive information about deployment management.", + "$.messages[2].role": "assistant", + "$.messages[2].tool_calls[0].function.arguments": "{"query": "deployment management Fireworks AI"}", + "$.messages[2].tool_calls[0].function.name": "search", + "$.messages[2].tool_calls[0].id": "functions.search:0", + "$.messages[2].tool_calls[0].type": "function", + "$.messages[3].content[0].text": "Title: Fireworks AI Developer Platform +Link: https://docs.fireworks.ai/getting-started/introduction +Content: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Start building with open source AI models + +", + "$.messages[3].content[0].type": "text", + "$.messages[3].content[1].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[3].content[1].type": "text", + "$.messages[3].content[2].text": "Title: Do you host your deployments in the EU or Asia? +Link: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia +Content: We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +Do you host your deployments in the EU or Asia? +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +", + "$.messages[3].content[2].type": "text", + "$.messages[3].content[3].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[3].content[3].type": "text", + "$.messages[3].content[4].text": "Title: On-demand deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +On-demand deployments +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +", + "$.messages[3].content[4].type": "text", + "$.messages[3].content[5].text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +Link: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks +Content: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +", + "$.messages[3].content[5].type": "text", + "$.messages[3].content[6].text": "Title: Introduction +Link: https://docs.fireworks.ai/api-reference/introduction +Content: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Introduction +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +", + "$.messages[3].content[6].type": "text", + "$.messages[3].content[7].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Create a Deployment on Fireworks AI platform + +Create a Deployment on Fireworks AI platform + +Creates a new deployment. + +", + "$.messages[3].content[7].type": "text", + "$.messages[3].content[8].text": "Title: How much does Fireworks cost? +Link: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost +Content: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +How much does Fireworks cost? +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +", + "$.messages[3].content[8].type": "text", + "$.messages[3].content[9].text": "Title: Python (Fireworks) +Link: https://docs.fireworks.ai/getting-started/quickstart +Content: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +Set up your developer environment & call the Fireworks API +This is the recommended way to get started : This tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %OPENAI_API_KEY% + +", + "$.messages[3].content[9].type": "text", + "$.messages[3].role": "tool", + "$.messages[3].tool_call_id": "functions.search:0", + "$.messages[4].content": "Let me search for more specific information about deployment management commands and workflows.", + "$.messages[4].role": "assistant", + "$.messages[4].tool_calls[0].function.arguments": "{"query": "firectl deployment management commands create delete list"}", + "$.messages[4].tool_calls[0].function.name": "search", + "$.messages[4].tool_calls[0].id": "functions.search:1", + "$.messages[4].tool_calls[0].type": "function", + "$.messages[5].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[5].content[0].type": "text", + "$.messages[5].content[1].text": "Title: Delete a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources +Content: Deletes an model deployment. firectl delete deployment [flags] + +Deletes an model deployment. firectl delete deployment [flags] + +Example +firectl delete deployment my-deployment + +", + "$.messages[5].content[1].type": "text", + "$.messages[5].content[2].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. + +", + "$.messages[5].content[2].type": "text", + "$.messages[5].content[3].text": "Title: Creating an on-demand deployment +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: firectl get deployment + +", + "$.messages[5].content[3].type": "text", + "$.messages[5].content[4].text": "Title: Delete Deployment +Link: https://docs.fireworks.ai/api-reference/delete-deployment +Content: + + + +REST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id} + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" +deployment_id: + string + Description: The Deployment Id + Example: "" + +query params: +hard: + boolean + Description: If true, this will perform a hard deletion. + Example: true +ignoreChecks: + boolean + Description: If true, this will ignore checks and force the deletion of a deployment that is currently +deployed and is in use. + Example: true + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {} + +", + "$.messages[5].content[4].type": "text", + "$.messages[5].content[5].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[5].content[5].type": "text", + "$.messages[5].content[6].text": "Title: Deployment Command +Link: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example +Content: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate + +", + "$.messages[5].content[6].type": "text", + "$.messages[5].content[7].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[5].content[7].type": "text", + "$.messages[5].content[8].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +firectl +firectl create dataset /path/to/jsonl/file + +", + "$.messages[5].content[8].type": "text", + "$.messages[5].content[9].text": "Title: Deleting Stored Responses +Link: https://docs.fireworks.ai/guides/response-api +Content: When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM +import requests +import os + +llm = LLM(model="qwen3-235b-a22b", deployment_type="serverless") + +# Create a response +response = llm.responses.create( + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM +import requests +import os + +llm = LLM(model="qwen3-235b-a22b", deployment_type="serverless") + +# Create a response +response = llm.responses.create( + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. import os +from openai import OpenAI +import requests + +client = OpenAI( + base_url="https://api.fireworks.ai/inference/v1", + api_key=os.getenv("FIREWORKS_API_KEY", "YOUR_FIREWORKS_API_KEY_HERE") +) + +# Create a response +response = client.responses.create( + model="accounts/fireworks/models/qwen3-235b-a22b", + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +", + "$.messages[5].content[9].type": "text", + "$.messages[5].role": "tool", + "$.messages[5].tool_call_id": "functions.search:1", + "$.messages[6].content": "Now let me search for information about listing and managing deployments to give you a complete picture.", + "$.messages[6].role": "assistant", + "$.messages[6].tool_calls[0].function.arguments": "{"query": "list deployments get deployment status firectl commands"}", + "$.messages[6].tool_calls[0].function.name": "search", + "$.messages[6].tool_calls[0].id": "functions.search:1", + "$.messages[6].tool_calls[0].type": "function", + "$.messages[7].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[7].content[0].type": "text", + "$.messages[7].content[1].text": "Title: List deployments +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/list-resources +Content: Prints all deployments in the account. firectl list deployments [flags] + +Prints all deployments in the account. firectl list deployments [flags] + +List deployed models +Prints all deployed models in an account. firectl list deployed-models [flags] + +", + "$.messages[7].content[1].type": "text", + "$.messages[7].content[2].text": "Title: Creating an on-demand deployment +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +Publishing a deployed model +By default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model="" AND deployment=""' + +", + "$.messages[7].content[2].type": "text", + "$.messages[7].content[3].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[7].content[3].type": "text", + "$.messages[7].content[4].text": "Title: firectl +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-vlm +Content: # Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +# Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +Try with an Example Dataset +To get a feel for how VLM fine-tuning works, you can use an example vision dataset: This is a classification dataset that contains images of food with tags for reasoning. Once downloaded, you can upload this dataset using the instructions in the next step. Upload your VLM dataset Upload your prepared JSONL dataset to Fireworks for training: Navigate to the Datasets tab in the Fireworks console, click "Create Dataset", and upload your JSONL file through the wizard. For larger datasets (>500MB), use as it handles large uploads more reliably than the web interface. For enhanced data control and security, we also support bring your own bucket (BYOB) configurations. See our External GCS Bucket Integration guide for setup details. Launch VLM fine-tuning job Create a supervised fine-tuning job for your VLM: For additional parameters like learning rates, evaluation datasets, and batch sizes, see Additional SFT job settings. Navigate to the Fine-tuning tab in the Fireworks console Click "Create Fine-tuning Job" Select your VLM base model (Qwen 2.5 VL) Choose your uploaded dataset Configure training parameters Launch the job VLM fine-tuning jobs typically take longer than text-only models due to the additional image processing. Expect training times of several hours depending on dataset size and model complexity. Monitor training progress Track your VLM fine-tuning job in the Fireworks console. Monitor key metrics: Training loss : Should generally decrease over time Evaluation loss : Monitor for overfitting if using evaluation dataset Training progress : Epochs completed and estimated time remaining Your VLM fine-tuning job is complete when the status shows COMPLETED and your custom model is ready for deployment. Deploy your fine-tuned VLM Once training is complete, deploy your custom VLM: # Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +", + "$.messages[7].content[4].type": "text", + "$.messages[7].content[5].text": "Title: firectl +Link: https://docs.fireworks.ai/guides/batch-inference +Content: # Get job status +firectl get batch-inference-job my-batch-job + +# List all batch jobs +firectl list batch-inference-jobs + +# Get job status +firectl get batch-inference-job my-batch-job + +# List all batch jobs +firectl list batch-inference-jobs + +firectl create dataset batch-input-dataset ./batch_input_data.jsonl + +", + "$.messages[7].content[5].type": "text", + "$.messages[7].content[6].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[7].content[6].type": "text", + "$.messages[7].content[7].text": "Title: List Deployments +Link: https://docs.fireworks.ai/api-reference/list-deployments +Content: + + + +REST Endpoint GET /v1/accounts/{account_id}/deployments + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" + +query params: +pageSize: + integer + Description: The maximum number of deployments to return. The maximum page_size is 200, +values above 200 will be coerced to 200. +If unspecified, the default is 50. + Example: 123 +pageToken: + string + Description: A page token, received from a previous ListDeployments call. Provide this +to retrieve the subsequent page. When paginating, all other parameters +provided to ListDeployments must match the call that provided the page +token. + Example: "" +filter: + string + Description: Only deployment satisfying the provided filter (if specified) will be +returned. See https://google.aip.dev/160 for the filter grammar. + Example: "" +orderBy: + string + Description: A comma-separated list of fields to order by. e.g. "foo,bar" +The default sort order is ascending. To specify a descending order for a +field, append a " desc" suffix. e.g. "foo desc,bar" +Subfields are specified with a "." character. e.g. "foo.bar" +If not specified, the default order is by "create_time". + Example: "" +showDeleted: + boolean + Description: If set, DELETED deployments will be included. + Example: true +readMask: + string + Description: The fields to be returned in the response. If empty or "*", all fields will be returned. + Example: "" + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {"deployments":[{"name":"","displayName":"","description":"","createTime":"2023-11-07T05:31:56Z","expireTime":"2023-11-07T05:31:56Z","purgeTime":"2023-11-07T05:31:56Z","deleteTime":"2023-11-07T05:31:56Z","state":"STATE_UNSPECIFIED","status":{"code":"OK","message":""},"minReplicaCount":123,"maxReplicaCount":123,"replicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","cluster":"","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"numPeftDeviceCached":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"region":"REGION_UNSPECIFIED","updateTime":"2023-11-07T05:31:56Z","disableDeploymentSizeValidation":true}],"nextPageToken":"","totalSize":123} + +", + "$.messages[7].content[7].type": "text", + "$.messages[7].content[8].text": "Title: Deploy Command +Link: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview +Content: The deploy command deploys a reward function as an evaluator on the Fireworks platform. + +The deploy command deploys a reward function as an evaluator on the Fireworks platform. + +Command Overview +The Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon). + +", + "$.messages[7].content[8].type": "text", + "$.messages[7].content[9].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[7].content[9].type": "text", + "$.messages[7].role": "tool", + "$.messages[7].tool_call_id": "functions.search:1", + "$.messages[8].content": "Perfect! Now I have comprehensive information about managing deployments on Fireworks. Let me provide you with a complete guide. + +# Managing Deployments on Fireworks: A Complete Guide + +Fireworks offers several ways to manage deployments, from command-line tools to REST APIs. Here's everything you need to know: + +## Types of Deployments + +### 1. On-Demand Deployments +- **Billed by GPU-second** (not per-token like serverless) +- **Dedicated resources** - no interference from other users +- **No hard rate limits** (subject to deployment capacity) +- **Access to more models** including custom ones +- **Better for high utilization** scenarios + +### 2. Serverless Deployments +- **Billed per-token** +- **Shared resources** with other users +- **Rate limits apply** +- **Instant availability** + +## Management Tools + +### Option 1: Fireworks Build SDK (Recommended) +\`\`\`python +from fireworks import LLM + +# Auto-deployment - SDK handles everything +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +# Manual deployment management +llm = LLM(model="your-model", deployment_type="on-demand") +\`\`\` + +### Option 2: firectl CLI Tool + +#### Installation +\`\`\`bash +# Install firectl CLI +curl -s https://fireworks.ai/install.sh | bash +\`\`\` + +#### Basic Commands + +**List all deployments:** +\`\`\`bash +firectl list deployments +\`\`\` + +**Create a deployment:** +\`\`\`bash +# From model library +firectl create deployment accounts/fireworks/models/llama-v3p1-8b-instruct --wait + +# Custom model +firectl create deployment accounts/your-account-id/models/your-custom-model --wait + +# LoRA fine-tuned model (single command) +firectl create deployment accounts/fireworks/models/your-lora-model-id +\`\`\` + +**Get deployment status:** +\`\`\`bash +firectl get deployment +\`\`\` + +**Delete a deployment:** +\`\`\`bash +firectl delete deployment +\`\`\` + +**List deployed models:** +\`\`\`bash +firectl list deployed-models +\`\`\` + +### Option 3: REST API + +#### List Deployments +\`\`\`bash +curl -X GET \\ + "https://api.fireworks.ai/v1/accounts/{account_id}/deployments" \\ + -H "Authorization: Bearer $FIREWORKS_API_KEY" +\`\`\` + +#### Create Deployment +\`\`\`bash +curl -X POST \\ + "https://api.fireworks.ai/v1/accounts/{account_id}/deployments" \\ + -H "Authorization: Bearer $FIREWORKS_API_KEY" \\ + -H "Content-Type: application/json" \\ + -d '{ + "deployment": { + "displayName": "my-deployment", + "baseModel": "accounts/fireworks/models/llama-v3p1-8b-instruct" + } + }' +\`\`\` + +#### Delete Deployment +\`\`\`bash +curl -X DELETE \\ + "https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id}" \\ + -H "Authorization: Bearer $FIREWORKS_API_KEY" +\`\`\` + +## Deployment Workflow + +### 1. Choose Your Model +\`\`\`bash +# Check available models +firectl list models +\`\`\` + +### 2. Create Deployment +\`\`\`bash +# Basic deployment +firectl create deployment accounts/fireworks/models/llama-v3p1-8b-instruct --wait + +# With custom configuration +firectl create deployment accounts/fireworks/models/llama-v3p1-8b-instruct \\ + --min-replica-count 1 \\ + --max-replica-count 3 \\ + --wait +\`\`\` + +### 3. Monitor Status +\`\`\`bash +# Get deployment details +firectl get deployment + +# Watch logs +firectl logs deployment +\`\`\` + +### 4. Make Public (Optional) +\`\`\`bash +# Make deployment public +firectl update deployment --public +\`\`\` + +### 5. Clean Up +\`\`\`bash +# Delete when done +firectl delete deployment +\`\`\` + +## Advanced Features + +### Reserved Capacity +For high-volume usage, consider reserved capacity: +- More reliable capacity +- Higher GPU quotas +- Cost savings for consistent usage +- Contact Fireworks for enterprise pricing + +### Multi-LoRA Deployments +\`\`\`bash +# Create base deployment with addons +firectl create deployment accounts/fireworks/models/base-model --enable-addons --wait + +# Load LoRA models +firectl load-lora lora-model-1 --deployment +firectl load-lora lora-model-2 --deployment +\`\`\` + +### Custom Models +\`\`\`bash +# Upload custom model first +firectl upload model /path/to/model + +# Deploy custom model +firectl create deployment accounts/your-account-id/models/custom-model-name +\`\`\` + +## Best Practices + +1. **Use \`--wait\` flag** for important deployments to ensure they're ready +2. **Monitor costs** with \`firectl list deployments\` to see active deployments +3. **Set up alerts** for deployment health +4. **Use reserved capacity** for production workloads +5. **Clean up unused deployments** to avoid charges +6. **Test deployments** before making them public + +## Troubleshooting + +**"Invalid ID" errors:** +- Ensure your account ID is properly configured +- Check your API key and account settings + +**Deployment stuck:** +- Check resource availability +- Verify model compatibility +- Review deployment logs + +**Performance issues:** +- Consider scaling up replicas +- Check if reserved capacity is available +- Monitor GPU utilization + +## Cost Management +- **On-demand**: Pay per GPU-second used +- **Serverless**: Pay per token processed +- **Reserved capacity**: Fixed monthly cost for guaranteed capacity +- Use \`firectl list deployments\` to track active deployments and costs + +This comprehensive guide should give you everything you need to effectively manage deployments on Fireworks!", + "$.messages[8].role": "assistant", + "$.pid": 92995, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:38:53.968430", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "finished", + "$.eval_metadata.version": "v0.2.6.dev17+gf8398de.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "At least one tool call was made", + "$.evaluation_result.score": 1, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "have-disassemble-simple-final-experience", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[10].content": "Based on my research, I can now provide you with a comprehensive guide on managing deployments on Fireworks. Let me break this down into practical sections with commands and examples. + +## Managing Deployments on Fireworks: A Complete Guide + +Fireworks offers three deployment types with different management approaches: + +1. **Serverless**: Pre-configured, pay-per-token, automatic scaling +2. **On-demand**: Private GPUs, pay-per-second, manual/automatic scaling +3. **Enterprise Reserved**: Custom hardware, tailored setup, SLAs + +### 1. Basic Deployment Management Commands + +#### Using firectl CLI (Recommended) + +**List all deployments:** +\`\`\`bash +firectl list deployments +\`\`\` + +**Create a deployment:** +\`\`\`bash +# Basic deployment +firectl create deployment --wait + +# With custom configuration +firectl create deployment \\ + --min-replica-count 0 \\ + --max-replica-count 3 \\ + --wait +\`\`\` + +**Delete a deployment:** +\`\`\`bash +firectl delete deployment +\`\`\` + +**Update deployment configuration:** +\`\`\`bash +firectl update deployment \\ + --min-replica-count 2 \\ + --max-replica-count 5 +\`\`\` + +#### Using REST API + +**List deployments:** +\`\`\`bash +curl -X GET \\ + "https://api.fireworks.ai/v1/accounts/{account_id}/deployments" \\ + -H "Authorization: Bearer $FIREWORKS_API_KEY" +\`\`\` + +**Delete deployment via API:** +\`\`\`bash +curl -X DELETE \\ + "https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id}" \\ + -H "Authorization: Bearer $FIREWORKS_API_KEY" +\`\`\` + +### 2. Python SDK Management + +#### Installation and Setup +\`\`\`bash +pip install --upgrade fireworks-ai +export FIREWORKS_API_KEY="your-api-key" +\`\`\` + +#### Basic Usage +\`\`\`python +from fireworks import LLM + +# Serverless deployment (automatic) +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="serverless") + +# On-demand deployment with custom ID +llm = LLM( + model="llama4-maverick-instruct-basic", + deployment_type="on-demand", + id="my-custom-deployment" +) +llm.apply() # Required for on-demand deployments + +# Auto-selection (serverless if available, otherwise on-demand) +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") +\`\`\` + +#### Fine-tuned Model Deployment +\`\`\`python +# Single LoRA deployment (recommended) +lora_llm = LLM( + model="accounts/your-account/models/your-lora-model", + deployment_type="on-demand" +) +lora_llm.apply() + +# Multi-LoRA deployment +base_llm = LLM( + model="accounts/fireworks/models/llama4-maverick-instruct-basic", + deployment_type="on-demand-lora", + id="multi-lora-deployment" +) +base_llm.apply() +\`\`\` + +### 3. Deployment Configuration Options + +#### Scaling Configuration +\`\`\`bash +# Scale between 0-3 replicas based on demand +firectl create deployment \\ + --min-replica-count 0 \\ + --max-replica-count 3 + +# Fixed scaling (always have 2 replicas running) +firectl create deployment \\ + --min-replica-count 2 \\ + --max-replica-count 2 +\`\`\` + +#### Advanced Configuration +\`\`\`bash +# Custom model deployment +firectl create deployment accounts/your-account/models/custom-model \\ + --min-replica-count 0 \\ + --max-replica-count 5 \\ + --accelerator-type A100 \\ + --accelerator-count 2 +\`\`\` + +### 4. Cost Management Best Practices + +#### Automatic Cost Optimization +- **Scale-to-zero**: Deployments automatically scale to 0 when unused +- **No charges when scaled down**: Only pay for active GPU time +- **Autoscaling**: Automatically adjusts based on traffic + +#### Manual Cost Control +\`\`\`python +# Monitor deployment costs +import requests + +headers = {"Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}"} +response = requests.get( + "https://api.fireworks.ai/v1/accounts/{account_id}/deployments", + headers=headers +) + +# Delete unused deployments +for deployment in response.json()['deployments']: + if deployment['state'] == 'READY' and deployment['replicaCount'] == 0: + # Consider deleting if unused + pass +\`\`\` + +### 5. Monitoring and Troubleshooting + +#### Check Deployment Status +\`\`\`bash +# Get deployment details +firectl get deployment + +# Check deployed models +firectl list deployed-models +\`\`\` + +#### Web Dashboard +Track deployments at: \`https://app.fireworks.ai/dashboard/deployments\` + +### 6. Reserved Capacity (Enterprise) + +For high-volume usage, consider reserved capacity: +- More reliable capacity +- Higher GPU quotas +- Better pricing for consistent usage +- Contact: inquiries@fireworks.ai + +### 7. Common Management Workflows + +#### Daily Workflow Example +\`\`\`python +# Morning: Check active deployments +import subprocess +result = subprocess.run(['firectl', 'list', 'deployments'], + capture_output=True, text=True) +print(result.stdout) + +# Scale up for expected traffic +subprocess.run(['firectl', 'update', 'deployment', 'my-deployment', + '--min-replica-count', '2']) + +# Evening: Scale down +subprocess.run(['firectl', 'update', 'deployment', 'my-deployment', + '--min-replica-count', '0']) +\`\`\` + +#### Automated Cleanup Script +\`\`\`bash +#!/bin/bash +# cleanup_deployments.sh + +# List all deployments +deployments=$(firectl list deployments --format json | jq -r '.deployments[].name') + +# Delete deployments older than 7 days with 0 replicas +for deployment in $deployments; do + firectl delete deployment $deployment --ignore-checks +done +\`\`\` + +### 8. Important Considerations + +- **Billing**: On-demand deployments charge by GPU-second, even when idle +- **Rate limits**: Serverless has rate limits, on-demand does not +- **Cold starts**: Scaling from 0 may have brief delays +- **Monitoring**: Regularly check deployment status and costs +- **Cleanup**: Always delete unused deployments to avoid charges + +This comprehensive guide covers all aspects of deployment management on Fireworks. Start with serverless for simple use cases, move to on-demand for production workloads, and consider reserved capacity for enterprise needs.", + "$.messages[10].role": "assistant", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.messages[2].content": "I'll help you learn about managing deployments on Fireworks. Let me search for comprehensive information about deployment management.", + "$.messages[2].role": "assistant", + "$.messages[2].tool_calls[0].function.arguments": "{"query": "deployment management Fireworks AI"}", + "$.messages[2].tool_calls[0].function.name": "search", + "$.messages[2].tool_calls[0].id": "functions.search:0", + "$.messages[2].tool_calls[0].type": "function", + "$.messages[3].content[0].text": "Title: Fireworks AI Developer Platform +Link: https://docs.fireworks.ai/getting-started/introduction +Content: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Start building with open source AI models + +", + "$.messages[3].content[0].type": "text", + "$.messages[3].content[1].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[3].content[1].type": "text", + "$.messages[3].content[2].text": "Title: Do you host your deployments in the EU or Asia? +Link: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia +Content: We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +Do you host your deployments in the EU or Asia? +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +", + "$.messages[3].content[2].type": "text", + "$.messages[3].content[3].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[3].content[3].type": "text", + "$.messages[3].content[4].text": "Title: On-demand deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +On-demand deployments +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +", + "$.messages[3].content[4].type": "text", + "$.messages[3].content[5].text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +Link: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks +Content: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +", + "$.messages[3].content[5].type": "text", + "$.messages[3].content[6].text": "Title: Introduction +Link: https://docs.fireworks.ai/api-reference/introduction +Content: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Introduction +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +", + "$.messages[3].content[6].type": "text", + "$.messages[3].content[7].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Create a Deployment on Fireworks AI platform + +Create a Deployment on Fireworks AI platform + +Creates a new deployment. + +", + "$.messages[3].content[7].type": "text", + "$.messages[3].content[8].text": "Title: How much does Fireworks cost? +Link: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost +Content: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +How much does Fireworks cost? +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +", + "$.messages[3].content[8].type": "text", + "$.messages[3].content[9].text": "Title: Python (Fireworks) +Link: https://docs.fireworks.ai/getting-started/quickstart +Content: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +Set up your developer environment & call the Fireworks API +This is the recommended way to get started : This tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %OPENAI_API_KEY% + +", + "$.messages[3].content[9].type": "text", + "$.messages[3].role": "tool", + "$.messages[3].tool_call_id": "functions.search:0", + "$.messages[4].content": "Let me search for more specific information about deployment management commands and workflows.", + "$.messages[4].role": "assistant", + "$.messages[4].tool_calls[0].function.arguments": "{"query": "firectl deployment management commands create delete list update"}", + "$.messages[4].tool_calls[0].function.name": "search", + "$.messages[4].tool_calls[0].id": "functions.search:1", + "$.messages[4].tool_calls[0].type": "function", + "$.messages[5].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[5].content[0].type": "text", + "$.messages[5].content[1].text": "Title: Delete a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources +Content: Deletes an model deployment. firectl delete deployment [flags] + +Deletes an model deployment. firectl delete deployment [flags] + +Example +firectl delete deployment my-deployment + +", + "$.messages[5].content[1].type": "text", + "$.messages[5].content[2].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. + +", + "$.messages[5].content[2].type": "text", + "$.messages[5].content[3].text": "Title: Creating an on-demand deployment +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +Publishing a deployed model +By default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model="" AND deployment=""' + +", + "$.messages[5].content[3].type": "text", + "$.messages[5].content[4].text": "Title: Delete Deployment +Link: https://docs.fireworks.ai/api-reference/delete-deployment +Content: + + + +REST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id} + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" +deployment_id: + string + Description: The Deployment Id + Example: "" + +query params: +hard: + boolean + Description: If true, this will perform a hard deletion. + Example: true +ignoreChecks: + boolean + Description: If true, this will ignore checks and force the deletion of a deployment that is currently +deployed and is in use. + Example: true + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {} + +", + "$.messages[5].content[4].type": "text", + "$.messages[5].content[5].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[5].content[5].type": "text", + "$.messages[5].content[6].text": "Title: Deployment Command +Link: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example +Content: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate + +", + "$.messages[5].content[6].type": "text", + "$.messages[5].content[7].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[5].content[7].type": "text", + "$.messages[5].content[8].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +firectl +firectl create dataset /path/to/jsonl/file + +", + "$.messages[5].content[8].type": "text", + "$.messages[5].content[9].text": "Title: Deleting Stored Responses +Link: https://docs.fireworks.ai/guides/response-api +Content: When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM +import requests +import os + +llm = LLM(model="qwen3-235b-a22b", deployment_type="serverless") + +# Create a response +response = llm.responses.create( + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM +import requests +import os + +llm = LLM(model="qwen3-235b-a22b", deployment_type="serverless") + +# Create a response +response = llm.responses.create( + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. import os +from openai import OpenAI +import requests + +client = OpenAI( + base_url="https://api.fireworks.ai/inference/v1", + api_key=os.getenv("FIREWORKS_API_KEY", "YOUR_FIREWORKS_API_KEY_HERE") +) + +# Create a response +response = client.responses.create( + model="accounts/fireworks/models/qwen3-235b-a22b", + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +", + "$.messages[5].content[9].type": "text", + "$.messages[5].role": "tool", + "$.messages[5].tool_call_id": "functions.search:1", + "$.messages[6].content": "Let me search for more information about listing and managing deployments.", + "$.messages[6].role": "assistant", + "$.messages[6].tool_calls[0].function.arguments": "{"query": "list deployments firectl commands deployment management"}", + "$.messages[6].tool_calls[0].function.name": "search", + "$.messages[6].tool_calls[0].id": "functions.search:2", + "$.messages[6].tool_calls[0].type": "function", + "$.messages[7].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[7].content[0].type": "text", + "$.messages[7].content[1].text": "Title: List deployments +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/list-resources +Content: Prints all deployments in the account. firectl list deployments [flags] + +Prints all deployments in the account. firectl list deployments [flags] + +List deployed models +Prints all deployed models in an account. firectl list deployed-models [flags] + +", + "$.messages[7].content[1].type": "text", + "$.messages[7].content[2].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[7].content[2].type": "text", + "$.messages[7].content[3].text": "Title: Managing default deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model + +Creating an on-demand deployment +Choose a model Check out our model library to see a list of models available for on-demand deployment. You can also upload and use your own custom base model. Create a deployment To create a new deployment of a model from the model library, use: Or, for a custom base model you have uploaded to your account: This command will complete when the deployment is READY . To let it run asynchronously, remove the --wait flag. If your account has purchased reserved capacity and this deployment meets the reservation criteria, it will be counted against that reservation and not billed as an on-demand deployment. Verify the deployment is running You can verify the deployment is active by running: The state field should show READY. The deployment ID is the last part of the deployment name: accounts//deployments/. You can also list all deployments in your account at any time by running: firectl list deployments + +", + "$.messages[7].content[3].type": "text", + "$.messages[7].content[4].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[7].content[4].type": "text", + "$.messages[7].content[5].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[7].content[5].type": "text", + "$.messages[7].content[6].text": "Title: List Deployments +Link: https://docs.fireworks.ai/api-reference/list-deployments +Content: + + + +REST Endpoint GET /v1/accounts/{account_id}/deployments + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" + +query params: +pageSize: + integer + Description: The maximum number of deployments to return. The maximum page_size is 200, +values above 200 will be coerced to 200. +If unspecified, the default is 50. + Example: 123 +pageToken: + string + Description: A page token, received from a previous ListDeployments call. Provide this +to retrieve the subsequent page. When paginating, all other parameters +provided to ListDeployments must match the call that provided the page +token. + Example: "" +filter: + string + Description: Only deployment satisfying the provided filter (if specified) will be +returned. See https://google.aip.dev/160 for the filter grammar. + Example: "" +orderBy: + string + Description: A comma-separated list of fields to order by. e.g. "foo,bar" +The default sort order is ascending. To specify a descending order for a +field, append a " desc" suffix. e.g. "foo desc,bar" +Subfields are specified with a "." character. e.g. "foo.bar" +If not specified, the default order is by "create_time". + Example: "" +showDeleted: + boolean + Description: If set, DELETED deployments will be included. + Example: true +readMask: + string + Description: The fields to be returned in the response. If empty or "*", all fields will be returned. + Example: "" + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {"deployments":[{"name":"","displayName":"","description":"","createTime":"2023-11-07T05:31:56Z","expireTime":"2023-11-07T05:31:56Z","purgeTime":"2023-11-07T05:31:56Z","deleteTime":"2023-11-07T05:31:56Z","state":"STATE_UNSPECIFIED","status":{"code":"OK","message":""},"minReplicaCount":123,"maxReplicaCount":123,"replicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","cluster":"","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"numPeftDeviceCached":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"region":"REGION_UNSPECIFIED","updateTime":"2023-11-07T05:31:56Z","disableDeploymentSizeValidation":true}],"nextPageToken":"","totalSize":123} + +", + "$.messages[7].content[6].type": "text", + "$.messages[7].content[7].text": "Title: Deploy Command +Link: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview +Content: The deploy command deploys a reward function as an evaluator on the Fireworks platform. + +The deploy command deploys a reward function as an evaluator on the Fireworks platform. + +Command Overview +The Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon). + +", + "$.messages[7].content[7].type": "text", + "$.messages[7].content[8].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Deploying a fine-tuned model serverlessly +Not all base models support serverless addons. Please check this list for the full list of serverless models that support LoRA add-ons. If your base model supports serverless addons, you can run the following command to load the fine-tuned model into a serverless deployment: firectl load-lora + +", + "$.messages[7].content[8].type": "text", + "$.messages[7].content[9].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Create a Deployment on Fireworks AI platform + +", + "$.messages[7].content[9].type": "text", + "$.messages[7].role": "tool", + "$.messages[7].tool_call_id": "functions.search:2", + "$.messages[8].content": "Now let me search for information about deployment configuration and management best practices.", + "$.messages[8].role": "assistant", + "$.messages[8].tool_calls[0].function.arguments": "{"query": "deployment configuration scaling autoscaling reserved capacity Fireworks"}", + "$.messages[8].tool_calls[0].function.name": "search", + "$.messages[8].tool_calls[0].id": "functions.search:0", + "$.messages[8].tool_calls[0].type": "function", + "$.messages[9].content[0].text": "Title: How does billing and scaling work for on-demand GPU deployments? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-and-scaling-work-for-on-demand-gpu-deployments +Content: On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +How does billing and scaling work for on-demand GPU deployments? +On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +", + "$.messages[9].content[0].type": "text", + "$.messages[9].content[1].text": "Title: What should I expect for deployment and scaling performance? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/what-should-i-expect-for-deployment-and-scaling-performance +Content: Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +What should I expect for deployment and scaling performance? +Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +", + "$.messages[9].content[1].type": "text", + "$.messages[9].content[2].text": "Title: Deployment type selection +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-basics +Content: The SDK tries to be parsimonious with the way it deploys resources. We provide two types of deployment options on Fireworks: serverless hosting is enabled for some commonly-used state of the art models. The pricing for these models is per-token, i.e. you only pay for the tokens you use, and subject to rate limits. on-demand hosting is enabled for all other models. The pricing for these models is per GPU-second. This hosting is required for models that are not available serverlessly or workloads that exceed serverless rate limits. For non-finetuned models, you can always specify the deployment type of LLM() by passing either "serverless" or "on-demand" as the deployment_type parameter to the constructor. If the model is not available for the deployment type you selected, the SDK will throw an error. The SDK can also decide the best deployment strategy on your behalf, just pass deployment_type="auto" . If the model is available serverlessly, the SDK will use serverless hosting, otherwise the SDK will create an on-demand deployment. When using deployment_type="on-demand" or deployment_type="on-demand-lora" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type="auto" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Be careful with the deployment_type parameter, especially for "auto" and "on-demand" deployments. While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type="on-demand" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. For finetuned (LoRA) models, passing deployment_type="serverless" will try to deploy the finetuned model to serverless hosting, deployment_type="on-demand" will create an on-demand deployment of your base model and merge in your LoRA weights, deployment_type="on-demand-lora" will create an on-demand deployment with Multi-LoRA enabled, and deployment_type="auto" will try to use serverless if available, otherwise fall back to on-demand-lora. + +The SDK tries to be parsimonious with the way it deploys resources. We provide two types of deployment options on Fireworks: serverless hosting is enabled for some commonly-used state of the art models. The pricing for these models is per-token, i.e. you only pay for the tokens you use, and subject to rate limits. on-demand hosting is enabled for all other models. The pricing for these models is per GPU-second. This hosting is required for models that are not available serverlessly or workloads that exceed serverless rate limits. For non-finetuned models, you can always specify the deployment type of LLM() by passing either "serverless" or "on-demand" as the deployment_type parameter to the constructor. If the model is not available for the deployment type you selected, the SDK will throw an error. The SDK can also decide the best deployment strategy on your behalf, just pass deployment_type="auto" . If the model is available serverlessly, the SDK will use serverless hosting, otherwise the SDK will create an on-demand deployment. When using deployment_type="on-demand" or deployment_type="on-demand-lora" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type="auto" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Be careful with the deployment_type parameter, especially for "auto" and "on-demand" deployments. While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type="on-demand" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. For finetuned (LoRA) models, passing deployment_type="serverless" will try to deploy the finetuned model to serverless hosting, deployment_type="on-demand" will create an on-demand deployment of your base model and merge in your LoRA weights, deployment_type="on-demand-lora" will create an on-demand deployment with Multi-LoRA enabled, and deployment_type="auto" will try to use serverless if available, otherwise fall back to on-demand-lora. + +Deploying Fine-tuned Models with On-Demand +When deploying a fine-tuned model using deployment_type="on-demand" , you need to provide: model - Your fine-tuned model ID (e.g., "accounts/your-account/models/your-fine-tuned-model-id") id - A unique deployment identifier (can be any simple string like "my-fine-tuned-deployment") # Deploy a fine-tuned model with on-demand deployment +fine_tuned_llm = LLM( + model="accounts/your-account/models/your-fine-tuned-model-id", + deployment_type="on-demand", + id="my-fine-tuned-deployment" # Simple string ID +) + +# Apply deployment configuration to Fireworks +fine_tuned_llm.apply() + +# Track deployment in web dashboard +print(f"Track at: {fine_tuned_llm.deployment_url}") + +", + "$.messages[9].content[2].type": "text", + "$.messages[9].content[3].text": "Title: Spike arrest policy +Link: https://docs.fireworks.ai/guides/quotas_usage/rate-limits +Content: LLM traffic that spikes quickly has the potential to be throttled. Here's how it works: Each user has a guaranteed rate limit, which increases with sustained usage near the limit. Typically, you can expect to stay within the limits if your traffic gradually doubles within an hour. You can see your guaranteed limits using API response headers (see below) Exceeding your guaranteed limit means that there's the potential for your requests to be processed with lower-priority. Fireworks operates serverless deployments by autoscaling capacity (within limits) as user traffic increases. However, if a deployment is overloaded while auto-scaling, requests that fall outside of guaranteed limits may be processed with lower-latency or dropped with HTTP code 429 (if limits are significantly exceeded). You can monitor if you exceed limits via API response header x-ratelimit-over-limit: yes. Exceeding your guaranteed limit does not guarantee that your requests will be throttled. You can monitor if your requests are actually being throttled by monitoring latencies. Here's an example of how dynamic rate limits scale up: Metric Minimum Guaranteed Limit 10 Minutes 1 Hour 2 Hours Requests per minute 60 120 720 1440 Input tokens per minute 60000 120000 720000 1440000 Output tokens per minute 6000 12000 72000 144000 + +LLM traffic that spikes quickly has the potential to be throttled. Here's how it works: Each user has a guaranteed rate limit, which increases with sustained usage near the limit. Typically, you can expect to stay within the limits if your traffic gradually doubles within an hour. You can see your guaranteed limits using API response headers (see below) Exceeding your guaranteed limit means that there's the potential for your requests to be processed with lower-priority. Fireworks operates serverless deployments by autoscaling capacity (within limits) as user traffic increases. However, if a deployment is overloaded while auto-scaling, requests that fall outside of guaranteed limits may be processed with lower-latency or dropped with HTTP code 429 (if limits are significantly exceeded). You can monitor if you exceed limits via API response header x-ratelimit-over-limit: yes. Exceeding your guaranteed limit does not guarantee that your requests will be throttled. You can monitor if your requests are actually being throttled by monitoring latencies. Here's an example of how dynamic rate limits scale up: Metric Minimum Guaranteed Limit 10 Minutes 1 Hour 2 Hours Requests per minute 60 120 720 1440 Input tokens per minute 60000 120000 720000 1440000 Output tokens per minute 6000 12000 72000 144000 + +Spend limits +In order to prevent fraud, Fireworks imposes a monthly spending limit on your account. Once you hit the spending limit, your account will automatically enter a suspended state, API requests will be rejected and all Fireworks usage will be stopped. This includes serverless inference, dedicated deployments, and fine-tuning jobs. Your spend limit will organically increase over time as you spend more on the platform. You can also increase your spend limit at any time, by purchasing prepaid credits to meet the historical spend required for a higher tier. For instance, if you are a new Tier 1 user with $0 historical spend, you can purchase $100 prepaid credits and become a Tier 2 user. You can qualify for a higher tier by adding credits into your Fireworks account. There may be a propagation delay for a few minutes after you prepay for credits - you may still see "monthly usage exceeded error" for a few minutes after adding credits. Tier Qualification Spending Limit Tier 1 Valid payment method added $50/mo Tier 2 $50 spent in payments or credits added $500/mo Tier 3 $500 spent in payments or credits added $5,000/mo Tier 4 $5000 spent in payments or credits added $50,000/mo Unlimited Contact us at inquiries@fireworks.ai Unlimited + +", + "$.messages[9].content[3].type": "text", + "$.messages[9].content[4].text": "Title: How does billing work for on-demand deployments? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-work-for-on-demand-deployments +Content: On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +How does billing work for on-demand deployments? +On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +", + "$.messages[9].content[4].type": "text", + "$.messages[9].content[5].text": "Title: Replica count (horizontal scaling) +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: The number of replicas (horizontal scaling) is specified by passing the --min-replica-count and --max-replica-count flags. Increasing the number of replicas will increase the maximum QPS the deployment can support. The deployment will automatically scale based on server load. Auto-scaling up may fail if there is a GPU stockout. Use reserved capacity to guarantee capacity for your deployments. The default value for --min-replica-count is 0. Setting --min-replica-count to 0 enables the deployment to auto-scale to 0 if a deployment is unused (i.e. no inference requests) for a specified "scale-to-zero" time window. While the deployment is scaled to 0, you will not pay for any GPU utilization. The default value for --max-replica-count is 1 if --min-replica-count=0 , or the value of +--min-replica-count otherwise. firectl create deployment \\ + --min-replica-count 2 \\ + --max-replica-count 3 + +The number of replicas (horizontal scaling) is specified by passing the --min-replica-count and --max-replica-count flags. Increasing the number of replicas will increase the maximum QPS the deployment can support. The deployment will automatically scale based on server load. Auto-scaling up may fail if there is a GPU stockout. Use reserved capacity to guarantee capacity for your deployments. The default value for --min-replica-count is 0. Setting --min-replica-count to 0 enables the deployment to auto-scale to 0 if a deployment is unused (i.e. no inference requests) for a specified "scale-to-zero" time window. While the deployment is scaled to 0, you will not pay for any GPU utilization. The default value for --max-replica-count is 1 if --min-replica-count=0 , or the value of +--min-replica-count otherwise. firectl create deployment \\ + --min-replica-count 2 \\ + --max-replica-count 3 + +The number of replicas (horizontal scaling) is specified by passing the --min-replica-count and --max-replica-count flags. Increasing the number of replicas will increase the maximum QPS the deployment can support. The deployment will automatically scale based on server load. Auto-scaling up may fail if there is a GPU stockout. Use reserved capacity to guarantee capacity for your deployments. The default value for --min-replica-count is 0. Setting --min-replica-count to 0 enables the deployment to auto-scale to 0 if a deployment is unused (i.e. no inference requests) for a specified "scale-to-zero" time window. While the deployment is scaled to 0, you will not pay for any GPU utilization. The default value for --max-replica-count is 1 if --min-replica-count=0 , or the value of +--min-replica-count otherwise. firectl update deployment \\ + --min-replica-count 2 \\ + --max-replica-count 3 + +", + "$.messages[9].content[5].type": "text", + "$.messages[9].content[6].text": "Title: Do you support Auto Scaling? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/do-you-support-auto-scaling +Content: Yes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs + +Yes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs + +Do you support Auto Scaling? +Yes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs + +", + "$.messages[9].content[6].type": "text", + "$.messages[9].content[7].text": "Title: Inference +Link: https://docs.fireworks.ai/getting-started/introduction +Content: Fireworks has 3 options for running generative AI models with unparalleled speed and costs. Serverless: The easiest way to get started. Use the most popular models on pre-configured GPUs. Pay per token and avoid cold boots. On-demand: The most flexible option for scaling. Use private GPUs to support your specific needs and only pay when you're using it. GPUs running Fireworks software offer both ~250% improved throughput and 50% improved latency compared to vLLM. Excels for: Production volume - Per-token costs decrease with more volume and there are no set rate limits Custom needs and reliability - On-demand GPUs are private to you. This enables complete control to tailor deployments for speed/throughput/reliability or to run more specialized models Enterprise Reserved GPUs: Use private GPUs with hardware and software set-up personally tailored by the Fireworks team for your use case. Enjoy SLAs, dedicated support, bring-your-own-cloud (BYOC) deployment options, and enterprise-only optimizations. Property Serverless On-demand Enterprise reserved Performance Industry-leading speed on Fireworks-curated set-up. Performance may vary with others' usage. Speed dependent on user-specified GPU configuration and private usage. Per GPU latency should be significantly faster than vLLM. Tailor-made set-up by Fireworks AI experts for best possible latency Getting Started Self-serve - immediately use serverless with 1 line of code Self-serve - configure GPUs, then use them with 1 line of code. Chat with Fireworks Scaling and management Scale up and down freely within rate limits Option for auto-scaling GPUs with traffic. GPUs scale to zero automatically, so no charge for unused GPUs and for boot-ups. Chat with Fireworks Pricing Pay fixed price per token Pay per GPU second with no commitments. Per GPU throughput should be significantly greater than options like vLLM. Customized price based on reserved GPU capacity Commitment None None Arrange plan length with Fireworks Rate limits Yes, see quotas No rate limits. Quotas on number of GPUs None Model Selection Collection of popular models, curated by Fireworks Use 100s of pre-uploaded models or upload your own custom model within supported architecture Use 100s of pre-uploaded models or upload any model + +Fireworks has 3 options for running generative AI models with unparalleled speed and costs. Serverless: The easiest way to get started. Use the most popular models on pre-configured GPUs. Pay per token and avoid cold boots. On-demand: The most flexible option for scaling. Use private GPUs to support your specific needs and only pay when you're using it. GPUs running Fireworks software offer both ~250% improved throughput and 50% improved latency compared to vLLM. Excels for: Production volume - Per-token costs decrease with more volume and there are no set rate limits Custom needs and reliability - On-demand GPUs are private to you. This enables complete control to tailor deployments for speed/throughput/reliability or to run more specialized models Enterprise Reserved GPUs: Use private GPUs with hardware and software set-up personally tailored by the Fireworks team for your use case. Enjoy SLAs, dedicated support, bring-your-own-cloud (BYOC) deployment options, and enterprise-only optimizations. Property Serverless On-demand Enterprise reserved Performance Industry-leading speed on Fireworks-curated set-up. Performance may vary with others' usage. Speed dependent on user-specified GPU configuration and private usage. Per GPU latency should be significantly faster than vLLM. Tailor-made set-up by Fireworks AI experts for best possible latency Getting Started Self-serve - immediately use serverless with 1 line of code Self-serve - configure GPUs, then use them with 1 line of code. Chat with Fireworks Scaling and management Scale up and down freely within rate limits Option for auto-scaling GPUs with traffic. GPUs scale to zero automatically, so no charge for unused GPUs and for boot-ups. Chat with Fireworks Pricing Pay fixed price per token Pay per GPU second with no commitments. Per GPU throughput should be significantly greater than options like vLLM. Customized price based on reserved GPU capacity Commitment None None Arrange plan length with Fireworks Rate limits Yes, see quotas No rate limits. Quotas on number of GPUs None Model Selection Collection of popular models, curated by Fireworks Use 100s of pre-uploaded models or upload your own custom model within supported architecture Use 100s of pre-uploaded models or upload any model + +Fireworks AI Developer Platform +Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +", + "$.messages[9].content[7].type": "text", + "$.messages[9].content[8].text": "Title: 2. Call a language model using the LLM() class +Link: https://docs.fireworks.ai/tools-sdks/python-client/the-tutorial +Content: Now that your machine is setup with credentials and the SDK, lets ensure you are +ready to make your first LLM call and explain some of the nuances of this SDK. Create a new file called main.py and import the Fireworks AI SDK. Instantiate the LLM class. The LLM class accepts a model argument that you +can use to specify the model you want to use. For this tutorial, we will use the +Llama 4 Maverick +model. When creating an LLM instance, you can specify the deployment type as either "serverless" , "on-demand" , or "auto" . If you pass "auto" , the SDK will try to use serverless hosting if available, otherwise it will create an on-demand deployment. In the other cases, the SDK will try to create a deployment of the specified type and will throw an error if it's not available for the model you selected. The SDK will try and re-use existing deployments for the same model if possible, see Resource management for more details. With great power comes great responsibility! Be careful with the deployment_type parameter, especially for "auto" and "on-demand" . While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type="on-demand" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. When using deployment_type="on-demand" or deployment_type="on-demand-lora" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type="auto" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Make a request to the LLM. The LLM class is OpenAI compatible, so you can use +the same chat completion interface to make a request to the LLM. Hello! It's nice to meet you. Is there something I can help you with or would you like to chat? + +Now that your machine is setup with credentials and the SDK, lets ensure you are +ready to make your first LLM call and explain some of the nuances of this SDK. Create a new file called main.py and import the Fireworks AI SDK. Instantiate the LLM class. The LLM class accepts a model argument that you +can use to specify the model you want to use. For this tutorial, we will use the +Llama 4 Maverick +model. When creating an LLM instance, you can specify the deployment type as either "serverless" , "on-demand" , or "auto" . If you pass "auto" , the SDK will try to use serverless hosting if available, otherwise it will create an on-demand deployment. In the other cases, the SDK will try to create a deployment of the specified type and will throw an error if it's not available for the model you selected. The SDK will try and re-use existing deployments for the same model if possible, see Resource management for more details. With great power comes great responsibility! Be careful with the deployment_type parameter, especially for "auto" and "on-demand" . While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type="on-demand" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. When using deployment_type="on-demand" or deployment_type="on-demand-lora" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type="auto" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Make a request to the LLM. The LLM class is OpenAI compatible, so you can use +the same chat completion interface to make a request to the LLM. The great thing about the SDK is that you can use your favorite Python constructs to powerfully work with LLMs. For example, let's try calling a few LLMs in a loop and see how they respond: from fireworks import LLM + +llms = [ + "llama4-maverick-instruct-basic", + "deepseek-r1", + "qwen2p5-vl-32b-instruct" +] + +for llm in llms: + llm = LLM(model=llm, deployment_type="auto") + print("\\n" + "-" * 100) + print(f"Model: {llm.model}") + print("-" * 100 + "\\n") + + response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Hello, world!"}] + ) + print(response.choices[0].message.content) + +Now that your machine is setup with credentials and the SDK, lets ensure you are +ready to make your first LLM call and explain some of the nuances of this SDK. Create a new file called main.py and import the Fireworks AI SDK. Instantiate the LLM class. The LLM class accepts a model argument that you +can use to specify the model you want to use. For this tutorial, we will use the +Llama 4 Maverick +model. When creating an LLM instance, you can specify the deployment type as either "serverless" , "on-demand" , or "auto" . If you pass "auto" , the SDK will try to use serverless hosting if available, otherwise it will create an on-demand deployment. In the other cases, the SDK will try to create a deployment of the specified type and will throw an error if it's not available for the model you selected. The SDK will try and re-use existing deployments for the same model if possible, see Resource management for more details. With great power comes great responsibility! Be careful with the deployment_type parameter, especially for "auto" and "on-demand" . While the SDK will try to make the most cost effective choice for you and put sensible autoscaling policies in place, it is possible to unintentionally create many deployments that lead to unwanted spend, especially when working with non-serverless models. When using deployment_type="on-demand" , you must provide an id parameter to uniquely identify your deployment. This is required to prevent accidental creation of multiple deployments. When using deployment_type="on-demand" or deployment_type="on-demand-lora" , you must call .apply() to apply the deployment configuration to Fireworks. This is not required for serverless deployments. When using deployment_type="auto" , the SDK will automatically handle deployment creation, but if it falls back to on-demand deployment, you may need to call .apply() explicitly. If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments. Make a request to the LLM. The LLM class is OpenAI compatible, so you can use +the same chat completion interface to make a request to the LLM. The great thing about the SDK is that you can use your favorite Python constructs to powerfully work with LLMs. For example, let's try calling a few LLMs in a loop and see how they respond: from fireworks import LLM + +llms = [ + "llama4-maverick-instruct-basic", + "deepseek-r1", + "qwen2p5-vl-32b-instruct" +] + +for llm in llms: + llm = LLM(model=llm, deployment_type="auto") + print("\\n" + "-" * 100) + print(f"Model: {llm.model}") + print("-" * 100 + "\\n") + + response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Hello, world!"}] + ) + print(response.choices[0].message.content) + +", + "$.messages[9].content[8].type": "text", + "$.messages[9].content[9].text": "Title: How does autoscaling affect my costs? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-autoscaling-affect-my-costs +Content: Scaling from 0 : No minimum cost when scaled to zero Scaling up : Each new replica adds to your total cost proportionally. For example: Scaling from 1 to 2 replicas doubles your GPU costs If each replica uses multiple GPUs, costs scale accordingly (e.g., scaling from 1 to 2 replicas with 2 GPUs each means paying for 4 GPUs total) For current pricing details, please visit our pricing page. + +Scaling from 0 : No minimum cost when scaled to zero Scaling up : Each new replica adds to your total cost proportionally. For example: Scaling from 1 to 2 replicas doubles your GPU costs If each replica uses multiple GPUs, costs scale accordingly (e.g., scaling from 1 to 2 replicas with 2 GPUs each means paying for 4 GPUs total) For current pricing details, please visit our pricing page. + +How does autoscaling affect my costs? +Scaling from 0 : No minimum cost when scaled to zero Scaling up : Each new replica adds to your total cost proportionally. For example: Scaling from 1 to 2 replicas doubles your GPU costs If each replica uses multiple GPUs, costs scale accordingly (e.g., scaling from 1 to 2 replicas with 2 GPUs each means paying for 4 GPUs total) For current pricing details, please visit our pricing page. + +", + "$.messages[9].content[9].type": "text", + "$.messages[9].role": "tool", + "$.messages[9].tool_call_id": "functions.search:0", + "$.pid": 95890, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:43:38.694430", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "finished", + "$.eval_metadata.version": "v0.2.6.dev19+gf755623", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "At least one tool call was made", + "$.evaluation_result.score": 1, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "look-win-current-private-teacher", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.messages[2].content": "I'll help you learn about managing deployments on Fireworks. Let me search for comprehensive information about deployment management.", + "$.messages[2].role": "assistant", + "$.messages[2].tool_calls[0].function.arguments": "{"query": "deployment management Fireworks AI"}", + "$.messages[2].tool_calls[0].function.name": "search", + "$.messages[2].tool_calls[0].id": "functions.search:0", + "$.messages[2].tool_calls[0].type": "function", + "$.messages[3].content[0].text": "Title: Fireworks AI Developer Platform +Link: https://docs.fireworks.ai/getting-started/introduction +Content: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Start building with open source AI models + +", + "$.messages[3].content[0].type": "text", + "$.messages[3].content[1].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[3].content[1].type": "text", + "$.messages[3].content[2].text": "Title: Do you host your deployments in the EU or Asia? +Link: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia +Content: We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +Do you host your deployments in the EU or Asia? +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +", + "$.messages[3].content[2].type": "text", + "$.messages[3].content[3].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[3].content[3].type": "text", + "$.messages[3].content[4].text": "Title: On-demand deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +On-demand deployments +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +", + "$.messages[3].content[4].type": "text", + "$.messages[3].content[5].text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +Link: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks +Content: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +", + "$.messages[3].content[5].type": "text", + "$.messages[3].content[6].text": "Title: Introduction +Link: https://docs.fireworks.ai/api-reference/introduction +Content: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Introduction +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +", + "$.messages[3].content[6].type": "text", + "$.messages[3].content[7].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Create a Deployment on Fireworks AI platform + +Create a Deployment on Fireworks AI platform + +Creates a new deployment. + +", + "$.messages[3].content[7].type": "text", + "$.messages[3].content[8].text": "Title: How much does Fireworks cost? +Link: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost +Content: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +How much does Fireworks cost? +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +", + "$.messages[3].content[8].type": "text", + "$.messages[3].content[9].text": "Title: Python (Fireworks) +Link: https://docs.fireworks.ai/getting-started/quickstart +Content: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +Set up your developer environment & call the Fireworks API +This is the recommended way to get started : This tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %OPENAI_API_KEY% + +", + "$.messages[3].content[9].type": "text", + "$.messages[3].role": "tool", + "$.messages[3].tool_call_id": "functions.search:0", + "$.messages[4].content": "Let me search for more specific information about deployment management commands and workflows.", + "$.messages[4].role": "assistant", + "$.messages[4].tool_calls[0].function.arguments": "{"query": "firectl deployment commands create manage delete update"}", + "$.messages[4].tool_calls[0].function.name": "search", + "$.messages[4].tool_calls[0].id": "functions.search:1", + "$.messages[4].tool_calls[0].type": "function", + "$.messages[5].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[5].content[0].type": "text", + "$.messages[5].content[1].text": "Title: Delete a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/delete-resources +Content: Deletes an model deployment. firectl delete deployment [flags] + +Deletes an model deployment. firectl delete deployment [flags] + +Example +firectl delete deployment my-deployment + +", + "$.messages[5].content[1].type": "text", + "$.messages[5].content[2].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Create a Deployment on Fireworks AI platform + +", + "$.messages[5].content[2].type": "text", + "$.messages[5].content[3].text": "Title: Managing default deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist. + +", + "$.messages[5].content[3].type": "text", + "$.messages[5].content[4].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[5].content[4].type": "text", + "$.messages[5].content[5].text": "Title: Deployment Command +Link: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example +Content: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate + +", + "$.messages[5].content[5].type": "text", + "$.messages[5].content[6].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[5].content[6].type": "text", + "$.messages[5].content[7].text": "Title: Delete Deployment +Link: https://docs.fireworks.ai/api-reference/delete-deployment +Content: + + + +REST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id} + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" +deployment_id: + string + Description: The Deployment Id + Example: "" + +query params: +hard: + boolean + Description: If true, this will perform a hard deletion. + Example: true +ignoreChecks: + boolean + Description: If true, this will ignore checks and force the deletion of a deployment that is currently +deployed and is in use. + Example: true + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {} + +", + "$.messages[5].content[7].type": "text", + "$.messages[5].content[8].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[5].content[8].type": "text", + "$.messages[5].content[9].text": "Title: Deleting Stored Responses +Link: https://docs.fireworks.ai/guides/response-api +Content: When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM +import requests +import os + +llm = LLM(model="qwen3-235b-a22b", deployment_type="serverless") + +# Create a response +response = llm.responses.create( + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM +import requests +import os + +llm = LLM(model="qwen3-235b-a22b", deployment_type="serverless") + +# Create a response +response = llm.responses.create( + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. import os +from openai import OpenAI +import requests + +client = OpenAI( + base_url="https://api.fireworks.ai/inference/v1", + api_key=os.getenv("FIREWORKS_API_KEY", "YOUR_FIREWORKS_API_KEY_HERE") +) + +# Create a response +response = client.responses.create( + model="accounts/fireworks/models/qwen3-235b-a22b", + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +", + "$.messages[5].content[9].type": "text", + "$.messages[5].role": "tool", + "$.messages[5].tool_call_id": "functions.search:1", + "$.messages[6].content": "Let me search for more information about listing and managing deployments, as well as the Fireworks Build SDK for deployment management.", + "$.messages[6].role": "assistant", + "$.messages[6].tool_calls[0].function.arguments": "{"query": "list deployments firectl Fireworks Build SDK deployment management"}", + "$.messages[6].tool_calls[0].function.name": "search", + "$.messages[6].tool_calls[0].id": "functions.search:1", + "$.messages[6].tool_calls[0].type": "function", + "$.messages[7].content[0].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[7].content[0].type": "text", + "$.messages[7].content[1].text": "Title: Basics of the Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-basics +Content: Why use the Build SDK? +The Fireworks Build SDK gives you a declarative way to work with Fireworks resources like deployments, fine-tuning jobs, and datasets. We've designed it to handle all the infrastructure complexity for you, letting you focus on building your application. Instead of using the web UI, CLI, or raw API calls, you can manage everything through simple Python code with smart, logical defaults without sacrificing control and customizability. The principles of the SDK are the following: Object-oriented: Fireworks primitives are represented as Python objects. You can access their capabilities and properties through methods and attributes. Declarative: You can describe your desired state and the SDK will handle reconcilliation. Smart defaults: The SDK will infer the most logical defaults for you, prioritizing development speed and lowest cost. Here are some examples: The SDK will automatically use a serverless deployment for models that are available serverlessly unless you specify otherwise. When creating deployments, the SDK will also enable scale-to-zero with the shortest possible scale-down window. If the SDK determines that a resource already exists by matching its signature (see below), it will re-use the existing resource instead of creating a new one. Customizable: Although we enable smart defaults, you still have full access to the configuration parameters for any Fireworks resource The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Why use the Build SDK? +The Fireworks Build SDK gives you a declarative way to work with Fireworks resources like deployments, fine-tuning jobs, and datasets. We've designed it to handle all the infrastructure complexity for you, letting you focus on building your application. Instead of using the web UI, CLI, or raw API calls, you can manage everything through simple Python code with smart, logical defaults without sacrificing control and customizability. The principles of the SDK are the following: Object-oriented: Fireworks primitives are represented as Python objects. You can access their capabilities and properties through methods and attributes. Declarative: You can describe your desired state and the SDK will handle reconcilliation. Smart defaults: The SDK will infer the most logical defaults for you, prioritizing development speed and lowest cost. Here are some examples: The SDK will automatically use a serverless deployment for models that are available serverlessly unless you specify otherwise. When creating deployments, the SDK will also enable scale-to-zero with the shortest possible scale-down window. If the SDK determines that a resource already exists by matching its signature (see below), it will re-use the existing resource instead of creating a new one. Customizable: Although we enable smart defaults, you still have full access to the configuration parameters for any Fireworks resource The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Why use the Build SDK? +The Fireworks Build SDK gives you a declarative way to work with Fireworks resources like deployments, fine-tuning jobs, and datasets. We've designed it to handle all the infrastructure complexity for you, letting you focus on building your application. Instead of using the web UI, CLI, or raw API calls, you can manage everything through simple Python code with smart, logical defaults without sacrificing control and customizability. The principles of the SDK are the following: Object-oriented: Fireworks primitives are represented as Python objects. You can access their capabilities and properties through methods and attributes. Declarative: You can describe your desired state and the SDK will handle reconcilliation. Smart defaults: The SDK will infer the most logical defaults for you, prioritizing development speed and lowest cost. Here are some examples: The SDK will automatically use a serverless deployment for models that are available serverlessly unless you specify otherwise. When creating deployments, the SDK will also enable scale-to-zero with the shortest possible scale-down window. If the SDK determines that a resource already exists by matching its signature (see below), it will re-use the existing resource instead of creating a new one. Customizable: Although we enable smart defaults, you still have full access to the configuration parameters for any Fireworks resource The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[7].content[1].type": "text", + "$.messages[7].content[2].text": "Title: Querying existing dedicated deployments +Link: https://docs.fireworks.ai/tools-sdks/python-client/querying-dedicated-deployments +Content: When you have dedicated deployments that were created via firectl or the Fireworks web UI, you can easily connect to them using the Build SDK to run inference. This is particularly useful when you want to leverage existing infrastructure or when deployments are managed by different teams. + +When you have dedicated deployments that were created via firectl or the Fireworks web UI, you can easily connect to them using the Build SDK to run inference. This is particularly useful when you want to leverage existing infrastructure or when deployments are managed by different teams. + +Deployment ID requirements +The id parameter should match exactly with your existing deployment: Use the deployment name/ID as shown in the Fireworks dashboard The ID is case-sensitive and must match exactly If the deployment doesn't exist, you'll receive an error when making requests + +", + "$.messages[7].content[2].type": "text", + "$.messages[7].content[3].text": "Title: Fireworks SDK +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: You can connect to your existing deployment using the Fireworks Build SDK : from fireworks import LLM + +# Connect to your existing deployment +llm = LLM( + model="llama-v3p2-3b-instruct", # The model your deployment is running + deployment_type="on-demand", + id="my-deployment-id", # Your deployment ID +) + +# Use OpenAI-compatible chat completions +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) When connecting to an existing deployment with the SDK, you don't need to call .apply() - the deployment is already running. For more details on querying existing deployments with Fireworks SDK, see our guide on querying dedicated deployments . + +You can connect to your existing deployment using the Fireworks Build SDK : from fireworks import LLM + +# Connect to your existing deployment +llm = LLM( + model="llama-v3p2-3b-instruct", # The model your deployment is running + deployment_type="on-demand", + id="my-deployment-id", # Your deployment ID +) + +# Use OpenAI-compatible chat completions +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) When connecting to an existing deployment with the SDK, you don't need to call .apply() - the deployment is already running. For more details on querying existing deployments with Fireworks SDK, see our guide on querying dedicated deployments . + +On-demand deployments +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +", + "$.messages[7].content[3].type": "text", + "$.messages[7].content[4].text": "Title: Step 1: Configure the Fireworks Build SDK +Link: https://docs.fireworks.ai/structured-responses/structured-output-grammar-based +Content: from fireworks import LLM + +client = LLM( + model="llama-v3p1-405b-instruct", + deployment_type="serverless" +) + +from fireworks import LLM + +client = LLM( + model="llama-v3p1-405b-instruct", + deployment_type="serverless" +) + +Prerequisites +Before you begin, ensure you have the following: Python installed on your system. Build SDK installed. You can install it using pip: Next, select the model you want to use. In this example, we use llama-v3p1-405b-instruct , but all fireworks models support this feature. + +", + "$.messages[7].content[4].type": "text", + "$.messages[7].content[5].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[7].content[5].type": "text", + "$.messages[7].content[6].text": "Title: 🔧 Build SDK: Deployment Configuration Application Requirement +Link: https://docs.fireworks.ai/updates/changelog +Content: The Build SDK now requires you to call .apply() to apply any deployment configurations to Fireworks when using deployment_type="on-demand" or deployment_type="on-demand-lora" . This change ensures explicit control over when deployments are created and helps prevent accidental deployment creation. Key changes: .apply() is now required for on-demand and on-demand-lora deployments Serverless deployments do not require .apply() calls If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments Migration guide: Add llm.apply() after creating LLM instances with deployment_type="on-demand" or deployment_type="on-demand-lora" No changes needed for serverless deployments See updated documentation for examples and best practices This change improves deployment management and provides better control over resource creation. This applies to Python SDK version >=0.19.14. + +The Build SDK now requires you to call .apply() to apply any deployment configurations to Fireworks when using deployment_type="on-demand" or deployment_type="on-demand-lora" . This change ensures explicit control over when deployments are created and helps prevent accidental deployment creation. Key changes: .apply() is now required for on-demand and on-demand-lora deployments Serverless deployments do not require .apply() calls If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments Migration guide: Add llm.apply() after creating LLM instances with deployment_type="on-demand" or deployment_type="on-demand-lora" No changes needed for serverless deployments See updated documentation for examples and best practices This change improves deployment management and provides better control over resource creation. This applies to Python SDK version >=0.19.14. + +🎯 Build SDK: Reward-kit integration for evaluator development +The Build SDK now natively integrates with reward-kit to simplify evaluator development for Reinforcement Fine-Tuning (RFT) . You can now create custom evaluators in Python with automatic dependency management and seamless deployment to Fireworks infrastructure. Key features: Native reward-kit integration for evaluator development Automatic packaging of dependencies from pyproject.toml or requirements.txt Local testing capabilities before deployment Direct integration with Fireworks datasets and evaluation jobs Support for third-party libraries and complex evaluation logic See our Developing Evaluators guide to get started with your first evaluator in minutes. + +", + "$.messages[7].content[6].type": "text", + "$.messages[7].content[7].text": "Title: Set up your developer environment & call the Fireworks API +Link: https://docs.fireworks.ai/getting-started/quickstart +Content: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) + +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) + +Python (Fireworks) +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +", + "$.messages[7].content[7].type": "text", + "$.messages[7].content[8].text": "Title: Example with Fireworks Build SDK +Link: https://docs.fireworks.ai/faq-new/models-inference/how-to-get-performance-metrics-for-streaming-responses +Content: from fireworks import LLM +import os + +llm = LLM( + model="llama-v3p1-8b-instruct", + deployment_type="serverless", + api_key=os.environ["FIREWORKS_API_KEY"], +) + +# Streaming completion with performance metrics +stream = llm.chat.completions.create( + messages=[{"role": "user", "content": "Hello, world!"}], + max_tokens=100, + stream=True, + perf_metrics_in_response=True, +) + +for chunk in stream: + # Use getattr to avoid linter errors for unknown attributes + perf_metrics = getattr(chunk, "perf_metrics", None) + finish_reason = getattr(chunk.choices[0], "finish_reason", None) + if perf_metrics is not None and finish_reason: + print("Performance metrics:", perf_metrics) + +from fireworks import LLM +import os + +llm = LLM( + model="llama-v3p1-8b-instruct", + deployment_type="serverless", + api_key=os.environ["FIREWORKS_API_KEY"], +) + +# Streaming completion with performance metrics +stream = llm.chat.completions.create( + messages=[{"role": "user", "content": "Hello, world!"}], + max_tokens=100, + stream=True, + perf_metrics_in_response=True, +) + +for chunk in stream: + # Use getattr to avoid linter errors for unknown attributes + perf_metrics = getattr(chunk, "perf_metrics", None) + finish_reason = getattr(chunk.choices[0], "finish_reason", None) + if perf_metrics is not None and finish_reason: + print("Performance metrics:", perf_metrics) + +Performance Metrics Overview +The Inference API returns several per-request metrics in the response. They can be useful for one-off debugging or can be logged by the client in their preferred observability tool. For aggregate metrics, see the usage dashboard. Non-streaming requests: Performance metrics are always included in response headers (e.g., fireworks-prompt-tokens , fireworks-server-time-to-first-token ). Streaming requests: Only selected performance metrics, such as "fireworks-server-time-to-first-token," are available because HTTP headers must be sent before the first token can be streamed. Use the perf_metrics_in_response body parameter to include all metrics in the last SSE event of the response body. + +", + "$.messages[7].content[8].type": "text", + "$.messages[7].content[9].text": "Title: Fireworks AI Developer Platform +Link: https://docs.fireworks.ai/getting-started/introduction +Content: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Start building with open source AI models + +", + "$.messages[7].content[9].type": "text", + "$.messages[7].role": "tool", + "$.messages[7].tool_call_id": "functions.search:1", + "$.messages[8].content": "Based on my research, I can now provide you with a comprehensive guide on managing deployments on Fireworks. Let me break this down into different approaches: + +## Managing Deployments on Fireworks + +Fireworks offers three main ways to manage deployments: + +### 1. **Fireworks Build SDK (Recommended - Python)** +This is the most user-friendly approach using Python. + +**Installation:** +\`\`\`bash +pip install --upgrade fireworks-ai +\`\`\` + +**Basic Deployment Creation:** +\`\`\`python +from fireworks import LLM + +# Create a new deployment +llm = LLM( + model="llama-v3p2-3b-instruct", + deployment_type="on-demand", # or "serverless", "on-demand-lora" + id="my-deployment-name" +) + +# Apply the deployment (required for on-demand deployments) +llm.apply() + +# Use the deployment +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Hello, world!"}] +) +print(response.choices[0].message.content) +\`\`\` + +**Key Features:** +- **Smart defaults**: Automatically selects optimal deployment type +- **Declarative**: Describe desired state, SDK handles reconciliation +- **Object-oriented**: Deployments are Python objects with methods/attributes +- **Scale-to-zero**: Enabled by default with shortest scale-down window + +### 2. **Firectl CLI Tool** +Command-line interface for deployment management. + +**Create a deployment:** +\`\`\`bash +firectl create deployment "accounts/fireworks/models/llama-v3p2-3b-instruct" --id my-deployment +\`\`\` + +**Deploy LoRA fine-tunes:** +\`\`\`bash +# Single command for LoRA deployment +firectl create deployment "accounts/fireworks/models/" + +# Traditional two-step method (for multi-LoRA) +firectl create deployment "accounts/fireworks/models/" --enable-addons +firectl load-lora --deployment +\`\`\` + +**List deployments:** +\`\`\`bash +firectl list deployments +\`\`\` + +**Delete deployments:** +\`\`\`bash +firectl delete deployment my-deployment +\`\`\` + +**Update default deployment:** +\`\`\`bash +firectl update deployed-model --default +\`\`\` + +### 3. **REST API** +Direct API calls for programmatic management. + +**Create deployment:** +\`\`\`bash +curl -X POST https://api.fireworks.ai/v1/accounts/{account_id}/deployments \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" \\ + -d '{ + "model": "llama-v3p2-3b-instruct", + "deployment_type": "on-demand" + }' +\`\`\` + +**Delete deployment:** +\`\`\`bash +curl -X DELETE https://api.fireworks.ai/v1/accounts/{account_id}/deployments/{deployment_id} \\ + -H "Authorization: Bearer YOUR_API_KEY" +\`\`\` + +## Deployment Types Explained + +### **Serverless Deployments** +- **Billing**: Per-token usage +- **Best for**: Variable/low usage, quick prototyping +- **Management**: Fully managed by Fireworks +- **Access**: Instant, no setup required + +### **On-Demand Deployments** +- **Billing**: Per GPU-second +- **Best for**: Consistent usage, predictable performance +- **Advantages**: + - No rate limits + - Predictable performance + - Access to larger model selection + - Custom base models from Hugging Face +- **Management**: Requires explicit creation/deletion + +### **Reserved Capacity** +- **Best for**: High-volume, consistent usage +- **Advantages**: More reliable capacity, higher GPU quotas +- **Contact**: inquiries@fireworks.ai for enterprise pricing + +## Complete Management Workflow Example + +Here's a comprehensive example using the Build SDK: + +\`\`\`python +from fireworks import LLM +import os + +# Set up API key +os.environ["FIREWORKS_API_KEY"] = "your-api-key" + +# 1. Create and manage deployments +def manage_deployments(): + + # Create a serverless deployment (no .apply() needed) + serverless_llm = LLM( + model="llama4-maverick-instruct-basic", + deployment_type="serverless" + ) + + # Create an on-demand deployment + ondemand_llm = LLM( + model="llama-v3p2-3b-instruct", + deployment_type="on-demand", + id="my-production-deployment", + max_instances=3, + min_instances=0 # Enable scale-to-zero + ) + + # Apply the on-demand deployment + ondemand_llm.apply() + + # 2. Query existing deployments + print("Connecting to existing deployment...") + existing_llm = LLM( + model="llama-v3p2-3b-instruct", + deployment_type="on-demand", + id="my-existing-deployment" + ) + + # 3. Use deployments + response = existing_llm.chat.completions.create( + messages=[{"role": "user", "content": "Explain deployment management"}] + ) + + # 4. Clean up (delete deployment when done) + # firectl delete deployment my-production-deployment + + return response.choices[0].message.content + +# Run the workflow +result = manage_deployments() +print(result) +\`\`\` + +## Best Practices + +1. **Cost Management**: Use serverless for development, on-demand for production +2. **Monitoring**: Track usage through the Fireworks dashboard +3. **Cleanup**: Always delete unused on-demand deployments to avoid charges +4. **Scaling**: Use scale-to-zero for cost efficiency +5. **Naming**: Use descriptive deployment IDs for easy management + +## Getting Started + +1. **Install the Build SDK**: \`pip install fireworks-ai\` +2. **Get API Key**: From https://app.fireworks.ai/dashboard/api-keys +3. **Choose Approach**: + - **Beginners**: Start with Build SDK + - **CLI Users**: Use firectl + - **API Integration**: Use REST API + +For enterprise needs or reserved capacity, contact inquiries@fireworks.ai. + +Would you like me to dive deeper into any specific aspect of deployment management?", + "$.messages[8].role": "assistant", + "$.pid": 97954, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:51:25.530495", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev19+gf755623.dirty", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "invent-special-real-rich-dream", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 2168, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T11:51:48.974779", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev19+gf755623.dirty", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "continue-encourage-dry-part-meeting", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 2642, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T12:17:53.864430", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "create-destroy-past-service-case", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.messages[2].content": "I'll help you learn about managing deployments on Fireworks! Let me search for comprehensive information about deployment management.", + "$.messages[2].role": "assistant", + "$.messages[2].tool_calls[0].function.arguments": "{"query": "deployment management Fireworks AI"}", + "$.messages[2].tool_calls[0].function.name": "search", + "$.messages[2].tool_calls[0].id": "functions.search:0", + "$.messages[2].tool_calls[0].type": "function", + "$.messages[3].content[0].text": "Title: Fireworks AI Developer Platform +Link: https://docs.fireworks.ai/getting-started/introduction +Content: Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Fireworks AI is the best platform for building AI product experiences with open source AI models. You can run and customize AI models with just a few lines of code! + +Start building with open source AI models + +", + "$.messages[3].content[0].type": "text", + "$.messages[3].content[1].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[3].content[1].type": "text", + "$.messages[3].content[2].text": "Title: Do you host your deployments in the EU or Asia? +Link: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia +Content: We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +Do you host your deployments in the EU or Asia? +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +", + "$.messages[3].content[2].type": "text", + "$.messages[3].content[3].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[3].content[3].type": "text", + "$.messages[3].content[4].text": "Title: On-demand deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +On-demand deployments +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +", + "$.messages[3].content[4].type": "text", + "$.messages[3].content[5].text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +Link: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks +Content: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +", + "$.messages[3].content[5].type": "text", + "$.messages[3].content[6].text": "Title: Introduction +Link: https://docs.fireworks.ai/api-reference/introduction +Content: Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +Introduction +Fireworks AI REST API enables you to interact with various language, image and embedding models using an API Key. It also lets you automate management of models, deployments, datasets, and more. + +", + "$.messages[3].content[6].type": "text", + "$.messages[3].content[7].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Create a Deployment on Fireworks AI platform + +Create a Deployment on Fireworks AI platform + +Creates a new deployment. + +", + "$.messages[3].content[7].type": "text", + "$.messages[3].content[8].text": "Title: How much does Fireworks cost? +Link: https://docs.fireworks.ai/faq-new/billing-pricing/how-much-does-fireworks-cost +Content: Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +How much does Fireworks cost? +Fireworks AI operates on a pay-as-you-go model for all non-Enterprise usage, and new users automatically receive free credits. You pay based on: Per token for serverless inference Per GPU usage time for on-demand deployments Per token of training data for fine-tuning For customers needing enterprise-grade security and reliability , please reach out to us at inquiries@fireworks.ai to discuss options. Find out more about our current pricing on our Pricing page. + +", + "$.messages[3].content[8].type": "text", + "$.messages[3].content[9].text": "Title: Python (Fireworks) +Link: https://docs.fireworks.ai/getting-started/quickstart +Content: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +Set up your developer environment & call the Fireworks API +This is the recommended way to get started : This tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. Fireworks AI platform offers drop-in replacement with OpenAI Python Client. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $OPENAI_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %OPENAI_API_KEY% + +", + "$.messages[3].content[9].type": "text", + "$.messages[3].role": "tool", + "$.messages[3].tool_call_id": "functions.search:0", + "$.messages[4].content": "Let me search for more specific information about deployment management commands and workflows.", + "$.messages[4].role": "assistant", + "$.messages[4].tool_calls[0].function.arguments": "{"query": "deployment management commands create update delete list Fireworks"}", + "$.messages[4].tool_calls[0].function.name": "search", + "$.messages[4].tool_calls[0].id": "functions.search:1", + "$.messages[4].tool_calls[0].type": "function", + "$.messages[5].content[0].text": "Title: Deployment Command +Link: https://docs.fireworks.ai/evaluators/examples/gcp_cloud_run_deployment_example +Content: It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: Run the deployment command: Command Explanation: --id my-dummy-gcp-evaluator : A unique ID for your evaluator on the Fireworks AI platform. --target gcp-cloud-run : Specifies deployment to GCP Cloud Run. --function-ref dummy_rewards.hello_world_reward : The Python import path to your reward function. If dummy_rewards.py is in the current directory, this reference works. --gcp-auth-mode api-key : Configures the Cloud Run service with API key authentication. reward-kit will generate a key, store it in GCP Secret Manager, and configure the service. The key is also saved to your local rewardkit.yaml under evaluator_endpoint_keys . This is the default if not specified. --verbose : Shows detailed output, including gcloud commands being executed. --force : (Optional) If an evaluator with the same --id already exists, this flag will delete the existing one before creating the new one. + +It's recommended to run the deployment command from the directory containing the reward function script ( dummy_rewards.py ) and your rewardkit.yaml (if used), for example, from examples/gcp_cloud_run_deployment_example/. Ensure your virtual environment is active: source .venv/bin/activate + +", + "$.messages[5].content[0].type": "text", + "$.messages[5].content[1].text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +Link: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks +Content: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +", + "$.messages[5].content[1].type": "text", + "$.messages[5].content[2].text": "Title: Deleting Stored Responses +Link: https://docs.fireworks.ai/guides/response-api +Content: When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM +import requests +import os + +llm = LLM(model="qwen3-235b-a22b", deployment_type="serverless") + +# Create a response +response = llm.responses.create( + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. from fireworks import LLM +import requests +import os + +llm = LLM(model="qwen3-235b-a22b", deployment_type="serverless") + +# Create a response +response = llm.responses.create( + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +When responses are stored (the default behavior with store=True ), you can immediately delete them from storage using the DELETE endpoint. This permanently removes the conversation data. import os +from openai import OpenAI +import requests + +client = OpenAI( + base_url="https://api.fireworks.ai/inference/v1", + api_key=os.getenv("FIREWORKS_API_KEY", "YOUR_FIREWORKS_API_KEY_HERE") +) + +# Create a response +response = client.responses.create( + model="accounts/fireworks/models/qwen3-235b-a22b", + input="What is the capital of France?", + store=True # This is the default +) + +response_id = response.id +print(f"Created response with ID: {response_id}") + +# Delete the response immediately +headers = { + "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY')}", + "x-fireworks-account-id": "your-account-id" +} +delete_response = requests.delete( + f"https://api.fireworks.ai/inference/v1/responses/{response_id}", + headers=headers +) + +if delete_response.status_code == 200: + print("Response deleted successfully") +else: + print(f"Failed to delete response: {delete_response.status_code}") + +", + "$.messages[5].content[2].type": "text", + "$.messages[5].content[3].text": "Title: Delete Deployment +Link: https://docs.fireworks.ai/api-reference/delete-deployment +Content: + + + +REST Endpoint DELETE /v1/accounts/{account_id}/deployments/{deployment_id} + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" +deployment_id: + string + Description: The Deployment Id + Example: "" + +query params: +hard: + boolean + Description: If true, this will perform a hard deletion. + Example: true +ignoreChecks: + boolean + Description: If true, this will ignore checks and force the deletion of a deployment that is currently +deployed and is in use. + Example: true + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {} + +", + "$.messages[5].content[3].type": "text", + "$.messages[5].content[4].text": "Title: List Deployments +Link: https://docs.fireworks.ai/api-reference/list-deployments +Content: REST Endpoint GET /v1/accounts/{account_id}/deployments + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" + +query params: +pageSize: + integer + Description: The maximum number of deployments to return. The maximum page_size is 200, +values above 200 will be coerced to 200. +If unspecified, the default is 50. + Example: 123 +pageToken: + string + Description: A page token, received from a previous ListDeployments call. Provide this +to retrieve the subsequent page. When paginating, all other parameters +provided to ListDeployments must match the call that provided the page +token. + Example: "" +filter: + string + Description: Only deployment satisfying the provided filter (if specified) will be +returned. See https://google.aip.dev/160 for the filter grammar. + Example: "" +orderBy: + string + Description: A comma-separated list of fields to order by. e.g. "foo,bar" +The default sort order is ascending. To specify a descending order for a +field, append a " desc" suffix. e.g. "foo desc,bar" +Subfields are specified with a "." character. e.g. "foo.bar" +If not specified, the default order is by "create_time". + Example: "" +showDeleted: + boolean + Description: If set, DELETED deployments will be included. + Example: true +readMask: + string + Description: The fields to be returned in the response. If empty or "*", all fields will be returned. + Example: "" + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {"deployments":[{"name":"","displayName":"","description":"","createTime":"2023-11-07T05:31:56Z","expireTime":"2023-11-07T05:31:56Z","purgeTime":"2023-11-07T05:31:56Z","deleteTime":"2023-11-07T05:31:56Z","state":"STATE_UNSPECIFIED","status":{"code":"OK","message":""},"minReplicaCount":123,"maxReplicaCount":123,"replicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","cluster":"","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"numPeftDeviceCached":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"region":"REGION_UNSPECIFIED","updateTime":"2023-11-07T05:31:56Z","disableDeploymentSizeValidation":true}],"nextPageToken":"","totalSize":123} + +REST Endpoint GET /v1/accounts/{account_id}/deployments + +Server(s): https://api.fireworks.ai + + +Summary: + +Description: N/A + +Security methods: +header: + Authorization: bearer auth + + +Request params: +path params: +account_id: + string + Description: The Account Id + Example: "" + +query params: +pageSize: + integer + Description: The maximum number of deployments to return. The maximum page_size is 200, +values above 200 will be coerced to 200. +If unspecified, the default is 50. + Example: 123 +pageToken: + string + Description: A page token, received from a previous ListDeployments call. Provide this +to retrieve the subsequent page. When paginating, all other parameters +provided to ListDeployments must match the call that provided the page +token. + Example: "" +filter: + string + Description: Only deployment satisfying the provided filter (if specified) will be +returned. See https://google.aip.dev/160 for the filter grammar. + Example: "" +orderBy: + string + Description: A comma-separated list of fields to order by. e.g. "foo,bar" +The default sort order is ascending. To specify a descending order for a +field, append a " desc" suffix. e.g. "foo desc,bar" +Subfields are specified with a "." character. e.g. "foo.bar" +If not specified, the default order is by "create_time". + Example: "" +showDeleted: + boolean + Description: If set, DELETED deployments will be included. + Example: true +readMask: + string + Description: The fields to be returned in the response. If empty or "*", all fields will be returned. + Example: "" + + +Request body: N/A + + +Responses: +200: object + Description: N/A + Example: {"deployments":[{"name":"","displayName":"","description":"","createTime":"2023-11-07T05:31:56Z","expireTime":"2023-11-07T05:31:56Z","purgeTime":"2023-11-07T05:31:56Z","deleteTime":"2023-11-07T05:31:56Z","state":"STATE_UNSPECIFIED","status":{"code":"OK","message":""},"minReplicaCount":123,"maxReplicaCount":123,"replicaCount":123,"autoscalingPolicy":{"scaleUpWindow":"","scaleDownWindow":"","scaleToZeroWindow":"","loadTargets":{}},"baseModel":"","acceleratorCount":123,"acceleratorType":"ACCELERATOR_TYPE_UNSPECIFIED","precision":"PRECISION_UNSPECIFIED","cluster":"","enableAddons":true,"draftTokenCount":123,"draftModel":"","ngramSpeculationLength":123,"numPeftDeviceCached":123,"deploymentTemplate":"","autoTune":{"longPrompt":true},"placement":{"region":"REGION_UNSPECIFIED","multiRegion":"MULTI_REGION_UNSPECIFIED","regions":["REGION_UNSPECIFIED"]},"region":"REGION_UNSPECIFIED","updateTime":"2023-11-07T05:31:56Z","disableDeploymentSizeValidation":true}],"nextPageToken":"","totalSize":123} + + + +", + "$.messages[5].content[4].type": "text", + "$.messages[5].content[5].text": "Title: Publishing a deployed model +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: By default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model="" AND deployment=""' + +By default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: firectl list deployed-models --filter 'model="" AND deployment=""' + +Managing default deployments +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: To delete a default deployment you must delete all other deployments for the same model first, or designate a different deployed model as the default as described above. This is to ensure that querying by model name will always route to an unambiguous default deployment as long as deployments for the model exist. + +", + "$.messages[5].content[5].type": "text", + "$.messages[5].content[6].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[5].content[6].type": "text", + "$.messages[5].content[7].text": "Title: How do I close my Fireworks.ai account? +Link: https://docs.fireworks.ai/faq-new/account-access/how-do-i-close-my-fireworksai-account +Content: To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +How do I close my Fireworks.ai account? +To close your account: Email inquiries@fireworks.ai Include in your request: Your account ID A clear request for account deletion Before closing your account, please ensure: All outstanding invoices are paid Any active deployments are terminated Important data is backed up if needed + +", + "$.messages[5].content[7].type": "text", + "$.messages[5].content[8].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[5].content[8].type": "text", + "$.messages[5].content[9].text": "Title: Python (Fireworks) +Link: https://docs.fireworks.ai/getting-started/quickstart +Content: This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +This is the recommended way to get started : This Python (Fireworks) tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. pip install --upgrade fireworks-ai The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: vim ~/.bash_profile vim ~/.zshrc Add a new line to the file with the following: export FIREWORKS_API_KEY="" After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. source ~/.bash_profile source ~/.zshrc You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. setx FIREWORKS_API_KEY "" To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: echo %FIREWORKS_API_KEY% You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) You can also pass the API key directly to the LLM constructor: LLM(model="llama4-maverick-instruct-basic", deployment_type="auto", api_key="") + +Set up your developer environment & call the Fireworks API +This is the recommended way to get started : This tab uses our Fireworks Build +SDK for the best performance and developer experience. +See our Client-side performance +optimization guide for +more details. Install SDK Before installing, ensure that you have the right version of Python installed. Optionally you might want to setup a virtual environment too. The Fireworks Build SDK provides a declarative way to work with Fireworks resources and is OpenAI API Compatible. Configure API Key Step-by-step instructions for setting an environment variable for respective OS platforms: Depending on your shell, you'll need to edit either ~/.bash_profile for Bash or ~/.zshrc for Zsh . +You can do this by running the command: Add a new line to the file with the following: After saving the file, you'll need to apply the changes by either restarting your terminal session or running depending on the file you edited. You can verify that the variable has been set correctly by running echo $FIREWORKS_API_KEY You can open Command Prompt by searching for it in the Windows search bar or by pressing Win + R, typing cmd, and pressing Enter. To verify that the variable has been set correctly, you can close and reopen Command Prompt and type: Sending the first API Request You can quickly instantiate the LLM class and call the Fireworks API. The Build SDK handles deployment management automatically. from fireworks import LLM + +# Basic usage - SDK automatically selects optimal deployment type +llm = LLM(model="llama4-maverick-instruct-basic", deployment_type="auto") + +response = llm.chat.completions.create( + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) + +", + "$.messages[5].content[9].type": "text", + "$.messages[5].role": "tool", + "$.messages[5].tool_call_id": "functions.search:1", + "$.messages[6].content": "Let me search for more specific information about the firectl tool and deployment workflows.", + "$.messages[6].role": "assistant", + "$.messages[6].tool_calls[0].function.arguments": "{"query": "firectl deployment management commands create deployment update deployment"}", + "$.messages[6].tool_calls[0].function.name": "search", + "$.messages[6].tool_calls[0].id": "functions.search:0", + "$.messages[6].tool_calls[0].type": "function", + "$.messages[7].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[7].content[0].type": "text", + "$.messages[7].content[1].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. + +", + "$.messages[7].content[1].type": "text", + "$.messages[7].content[2].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[7].content[2].type": "text", + "$.messages[7].content[3].text": "Title: Managing default deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default + +Publishing a deployed model +By default, models can only be queried by the account that owns them. To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. Find the Deployed Model ID Every model running on a deployment receives a unique deployed model ID. You can find this ID if you know the model name and deployment name using the following command: The first column in the output is the deployed model ID. Publish a deployed model To make a deployment public so anyone with a valid Fireworks API key can query it, update the deployed model with the --public flag. firectl update deployed-model --public + +", + "$.messages[7].content[3].type": "text", + "$.messages[7].content[4].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[7].content[4].type": "text", + "$.messages[7].content[5].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +firectl +firectl create dataset /path/to/jsonl/file + +", + "$.messages[7].content[5].type": "text", + "$.messages[7].content[6].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[7].content[6].type": "text", + "$.messages[7].content[7].text": "Title: firectl +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-vlm +Content: # Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +# Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +firectl create sftj \\ + --base-model accounts/fireworks/models/qwen2p5-vl-32b-instruct \\ + --dataset my-vlm-dataset \\ + --output-model my-custom-vlm \\ + --epochs 3 For additional parameters like learning rates, evaluation datasets, and batch sizes, see Additional SFT job settings . + +", + "$.messages[7].content[7].type": "text", + "$.messages[7].content[8].text": "Title: firectl +Link: https://docs.fireworks.ai/models/quantization +Content: firectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8 + +firectl create deployment --accelerator-type NVIDIA_H100_80GB --precision FP8 + +Creating an FP8 deployment +By default, creating a dedicated deployment will use the FP16 checkpoint. To see what precisions are available for a +model, run: get model + +", + "$.messages[7].content[8].type": "text", + "$.messages[7].content[9].text": "Title: Uploading the model locally (firectl) +Link: https://docs.fireworks.ai/models/uploading-custom-models +Content: To upload a custom base model, run the following command. firectl create model /path/to/files/ + +To upload a custom base model, run the following command. firectl create model /path/to/files/ + +To upload a custom base model, run the following command. + +", + "$.messages[7].content[9].type": "text", + "$.messages[7].role": "tool", + "$.messages[7].tool_call_id": "functions.search:0", + "$.pid": 12679, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738493", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 21 highlighted sections (required: 15)", + "$.evaluation_result.score": 1, + "$.ground_truth": "15", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "review-say-right-news-government", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# Outline: The History of Yemeni Coffee + +1. **Introduction: Why Yemeni Coffee Matters** + - Thesis: Yemen’s role as the cradle of commercial coffee culture and its enduring legacy on global trade, ritual, and taste. + +2. *Pre-Coffee Yemen: Agriculture and Trade Routes* + - Overview of terraced farming and the incense & spice trade that primed the Red Sea economy. + +3. *The Myth and Botany of Coffea arabica* + - Ethiopian origin vs. Yemeni domestication; genetic evidence of Yemeni varietals. + +4. **Sufi Mystics and the First Coffeehouses** + - 15th-century Sufi orders in Mocha and Taʿizz using *qahwa* for nocturnal devotions. + +5. *Mocha as the World’s First Coffee Port* + - Rise of the Red Sea entrepôt; Ottoman tax farms and the *tujjar* merchant guilds. + +6. **Yemeni Cultivation Techniques: Terraces, Water, and Shade** + - Indigenous irrigation (*ghayl*), drought-resistant varietals, and the *mafrash* drying mats. + +7. *The 16th-Century Ottoman Coffee Ban and Smuggling Networks* + - How Mecca’s 1511 prohibition fueled clandestine Red Sea traffic and price inflation. + +8. **European Encounters: “Turkish” Coffee as a Yemeni Product** + - Dutch, Venetian, and English merchants mislabeling Mocha beans; first price lists in Amsterdam (1616). + +9. *The Dutch East India Company and the First Seed Theft* + - Pieter van den Broecke’s 1616 smuggling of Yemeni seeds to Batavia—birth of Java coffee. + +10. **Yemen’s Monopoly Crumbles: 17th–18th-Century Rival Plantations** + - French at Réunion (Bourbon), Portuguese in Brazil, Spanish in Jamaica—all tracing back to Mocha stock. + +11. *The Qasimi Era: Internal Trade, Taxes, and the Rise of the Ismaili Imamate* + - How highland imams leveraged coffee revenues to fund statecraft and architecture. + +12. **Decline in the 19th Century: British Aden and the Suez Canal** + - Shift of maritime traffic; competition from colonial plantations; collapse of Mocha’s mercantile elite. + +13. *Legacy in Cupping Notes: Mocha Java and Flavor Memory* + - Chocolatey, winey profiles; how Yemeni beans shaped early espresso blends. + +14. **Modern Revival: Specialty Coffee, NGOs, and Qima Coffee** + - 21st-century traceability, micro-lots, and direct-trade premiums; challenges of war and water scarcity. + +15. *Cultural Continuities: Qat vs. Qahwa, Wedding Roasts, and Oral Poetry* + - Daily rituals, gendered spaces, and the recitation of *zawamil* couplets over fresh brews. + +16. **Conclusion: Yemen as a Living Archive of Coffee Culture** + - Reasserting Yemen’s centrality in every cup; call for sustainable heritage preservation.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738539", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 26 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "install-fine-financial-health-state", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using "*". For example: *3 years of experience*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Dear *Steam & Bean* family, +I come to you not on paper feet but on *three years of espresso-stained sneakers*, +soles softened by dawn milk-foam and dusk drip-trays. +I have measured my mornings in *latte-art hearts*— +some perfect, some *wobbly, but all poured with intention*. + +I know the *hiss of the steam wand* like a lullaby, +and the *clink of ceramic* like a metronome for the day. +I have *memorized the weight of 18 grams* of ground hope, +tamped it level, locked it tight, and watched *crema bloom* +the way a shy smile opens when someone says their name right. + +I speak *fluent caffeine*: +a triple-shot for the night-shift nurse, +oat-milk cappuccino for the poet who tips in verses, +decaf Americano for the retired teacher grading sunrise. +I *remember orders like birthdays*, +and birthdays like *extra whip, no charge*. + +I can *calibrate a grinder* faster than gossip travels, +and *unclog a portafilter* with the patience of a gardener. +I have *danced behind counters* two feet wide, +twirled between syrups and strangers, +and never spilled a single *thank-you*. + +If you need someone who *sweats kindness* and *smiles in ristretto*, +who counts tips not in coins but in *stories collected*, +I am your *next opening shift*, your *steady hand at 5:59 a.m.*, +your *last call who still says “see you tomorrow”*. + +Let me *steam, pour, wipe, repeat*— +until the chairs are upside-down and the *grinders sleep*. +I offer you *poetry in motion*, one cup at a time. + +With *grounds under my nails* and *hope in my apron*, +—[Your Name]", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738569", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 5 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "should-poor-parent-war-magazine", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with "*", like: *highlighted text section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Hey there, fellow creature-curious readers! I just spent the weekend binge-watching nature docs with my cat (who was mostly unimpressed), and I can’t stop noticing how often totally different animals act like they’re reading from the same playbook. So grab a snack and let’s zoom out for a second—here are five “wait, we’re not so different” moments that made me laugh, gasp, and text my group chat at 2 a.m. + +*Moms are moms, whether they have feathers, fur, or fins.* +From emperor penguins passing an egg back and forth like the world’s most stressful game of hot potato, to orangutan mamas teaching their kids which leaves make the best umbrellas, the universal rule seems to be: if you’re a parent, you’re tired. I watched a seahorse dad give birth (yes, the males do that!) and still look like he needed a nap and a latte. + +*Play isn’t just for puppies.* +Snowboarding crows, surfing dolphins, snow-rolling pandas—turns out “because it’s fun” is a legit evolutionary strategy. Scientists think play sharpens coordination and social skills, but honestly, watching a raven sled down a metal roof on a plastic lid just screams “I’m doing it for the TikTok.” + +*Grandma knows best—even in elephant form.* +Elephant herds are led by the oldest female, and her decades of “I’ve seen this before” wisdom decide when to migrate, where to find water, and how to handle that one teenage bull who thinks he’s all that. Swap gray skin for gray hair and you’ve basically got my nana directing Thanksgiving traffic. + +*Besties across species lines.* +Goby fish and pistol shrimp run the cutest roommate situation: the shrimp digs the burrow, the goby stands guard. If danger shows up, the goby flicks its tail like a silent alarm, and both dive to safety. It’s the underwater version of “I’ll cook, you do the dishes.” + +*We all scream for sunscreen—sort of.* +Hippos secrete “blood sweat” that acts like SPF 50, while giraffes drool black saliva that works like built-in sunglasses. Meanwhile, I’m over here reapplying SPF 30 every 90 minutes and still ending up the color of a lobster. Nature’s got hacks, and I’m taking notes. + +So next time you catch yourself thinking, “I’m nothing like that weird-looking animal,” remember: somewhere out there a capybara is sharing its snack with a bird, and a whale is singing a song that rhymes with last year’s chart-topper. We’re all just variations on a theme—some of us just have more legs or better night vision.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738594", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 5 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "discover-stop-show-different-water", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format: +*section name*", + "$.messages[0].role": "user", + "$.messages[1].content": "*Executive Summary* +Bremen is a city of 570,000 residents, 40,000 university students, and a thriving creative scene, yet it lacks a home-grown premium sweatshirt brand that speaks to its maritime heritage, Hanseatic pride, and eco-conscious mindset. “Bremer Fleeze” will fill that gap by producing limited-edition, GOTS-certified organic-cotton sweatshirts that combine North-German minimalism with subtle nautical iconography. Our mission: to become Bremen’s first carbon-neutral apparel label and the go-to gift for locals and visitors alike. We will launch with an online-first D2C model, supported by pop-up stores in Viertel and Überseestadt, and scale to €1.2 M revenue by Year 3. + +*Market Analysis* +Target segments: (1) 18-30 students who value sustainability and streetwear aesthetics; (2) 25-45 professionals seeking premium basics; (3) tourists wanting authentic Bremen souvenirs. Competitor scan: global fast-fashion brands (H&M, Zara) lack local identity; German labels (Hugo Boss, Adidas) are either too formal or too sporty; no Bremen-specific sweatshirt exists. Market size: €45 M annual spend on mid-premium sweatshirts in Bremen & Niedersachsen combined, growing 6 % YoY. Consumer pain points: greenwashing, poor fit, generic designs. Our edge: verifiable sustainability, Bremen storytelling, and inclusive sizing (XXS-4XL). + +*Product & Sustainability Strategy* +Core line: 12 unisex styles in 20 colors, 320 gsm brushed organic cotton, recycled polyester labels, water-based inks. Signature design elements: tonal outline of the Bremer Stadtmusikanten silhouette, latitude-longitude coordinates of Bremen harbor, and a discreet “Kohlfahrt-proof” patch. Packaging: reusable tote made from retired ship sails (partnership with local sailmaker). Carbon footprint: 2.1 kg CO₂e per sweatshirt vs. 8.9 kg industry average; offset via Gold-Standard wind-farm credits. End-of-life: take-back program with 15 % store credit incentive; returned garments upcycled into insulation for social housing. + +*Go-to-Market & Marketing Plan* +Phase 1 (Months 1-3): Pre-launch teaser on Instagram & TikTok (#BremerFleeze), micro-influencer seeding with 50 Bremen creators, waitlist goal 5,000 sign-ups. Phase 2 (Months 4-6): E-commerce launch on Shopify, same-day bike courier delivery within Bremen city limits, QR code on hangtag linking to AR filter that overlays the Roland statue wearing the sweatshirt. Phase 3 (Months 7-12): Pop-up container store at Marktplatz during Weihnachtsmarkt, collab with Beck’s Brewery for co-branded “Bremer Fleeze & Bremer Pils” gift box. KPIs: CAC ≤ €18, repeat purchase rate ≥ 35 %, NPS ≥ 70. + +*Financial Projections & Funding Needs* +Start-up capital required: €180,000. Breakdown: €60k inventory (2,000 units), €35k marketing, €25k e-commerce & IT, €20k legal & certifications, €40k working capital. Revenue forecast: Y1 €220k, Y2 €550k, Y3 €1.2 M. Gross margin 62 %, EBITDA margin 18 % by Y3. Break-even month 14. Funding structure: €80k founder equity, €50k Bremen-based angel investor (ex-Bremer Kaffee & Co-founder), €50k zero-interest KfW Green Loan. Exit option: acquisition by sustainable German fashion conglomerate at 3× revenue multiple in Year 5.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738623", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 18 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "mean-certain-bitter-sharp-party", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "**The Official “Two-Mom Marriage Quality Control” Checklist** +*Because nothing says romance like a quarterly performance review.* + +--- + +**1. Morning Routine Coordination** +Rate: ☐ 1 (We need a UN mediator) ☐ 2 (We’ve agreed to disagree… loudly) ☐ 3 (Only one of us cried into the coffee) ☐ 4 (Synchronized chaos) ☐ 5 (*We finished each other’s sentences… and the toddler’s homework*) + +*Note: Bonus points if you’ve mastered the “I’ll pack lunches if you find the missing shoe” hostage negotiation.* + +--- + +**2. Division of Emotional Labor** +Rate: ☐ 1 (“I’m fine.” *— famous last words*) ☐ 2 (We schedule meltdowns like dentist appointments) ☐ 3 (Therapy? We *are* the therapy) ☐ 4 (We tag-team existential dread while folding tiny socks) ☐ 5 (*We’ve achieved telepathic resentment-free snack negotiations*) + +*Pro tip: If you’ve ever passive-aggressively alphabetized the spice rack while humming “You’re Welcome” from Moana, circle ☐ 1 twice.* + +--- + +**3. Co-Parenting Synchronization** +Rate: ☐ 1 (Our kids have unionized) ☐ 2 (We’ve agreed to disagree on screen time… and bedtime… and vegetables) ☐ 3 (We bribe with different currencies: one uses stickers, the other uses Wi-Fi passwords) ☐ 4 (We’ve synchronized our “mom voice” to a frequency only dogs and children fear) ☐ 5 (*We’ve weaponized teamwork so effectively the kids think we share a brain*) + +*Highlight this if you’ve ever high-fived over successfully gaslighting a toddler into eating broccoli.* + +--- + +**Final Grade:** +☐ Certified Disaster (But at least the houseplants are alive) +☐ Functioning Dysfunction (We’ve agreed the laundry mountain is *decorative*) +☐ Power Couple (We’ve weaponized Pinterest and the kids still haven’t noticed) +☐ *Mom-tastic Nirvana* (We finish each other’s… sandwiches. And trauma. But mostly sandwiches.) + +--- + +**Comments Section (for roasting each other lovingly):** +“Remember when you ‘lost’ the Elf on the Shelf for three days? *Iconic.*” +“Your snoring sounds like a haunted dishwasher, but I’d still share my last square of emergency chocolate with you.”", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738649", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 1 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "forecast-big-song-issue-service", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*.", + "$.messages[0].role": "user", + "$.messages[1].content": "*little me in the countryside* + +[Verse 1] +Barefoot on the gravel road, dust between my toes, +Sunlight dripped like honey on the freckles on my nose. +Grandma’s porch swing creaked a lullaby in time, +While the crickets tuned their violins to the rhythm of the pines. + +[Pre-Chorus] +Mason jars with lightning bugs, constellations in a jar, +We’d race the dusk to catch the dark before it got too far. + +[Chorus] +Oh, little me in the countryside, chasing dreams as wide as skies, +Jumping creeks and climbing trees, believing I could fly. +Every sunset was a promise that tomorrow would be mine, +In the summers of my childhood, where the world was kind. + +[Verse 2] +Hayloft hideouts, secret forts, straw castles in the loft, +We were pirates, we were astronauts, anything we thought. +Mama’s voice would echo, “Supper!” across the open fields, +And the wind would carry laughter like the church bell’s gentle peals. + +[Pre-Chorus] +Fireflies wrote our names in light, then vanished in the night, +But the glow stayed in our hearts, a soft and steady light. + +[Chorus] +Oh, little me in the countryside, chasing dreams as wide as skies, +Jumping creeks and climbing trees, believing I could fly. +Every sunset was a promise that tomorrow would be mine, +In the summers of my childhood, where the world was kind. + +[Bridge] +Now the roads are paved, the porch is still, the swing don’t creak no more, +But I can close my eyes and hear the screen door’s gentle roar. +Time may steal the years away, but it can’t outrun the past— +I keep those golden afternoons like fireflies in a glass. + +[Final Chorus] +Oh, little me in the countryside, you’re never far behind, +You’re the echo in the wheat fields, the shadow in my mind. +Though the world keeps spinning faster, I still find you every time— +In the hush of dusk, in the scent of rain, in the summers of my childhood, +Where the world was kind.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738707", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 3 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "design-slow-wish-end-fear", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a riddle for the word "façade" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "I stand before you, yet I’m not what I seem; +a *face I wear* that isn’t mine, +a *mask of stone* or paint or beam, +a *pretty lie* that keeps the truth confined. + +What am I?", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738738", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 19 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "consider-work-economic-popular-country", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# Workshop Template +**Title:** *Building Inclusive Excellence: The Business Case for Workplace Diversity* + +--- + +### 1. Welcome & Objectives (15 min) +- **Ice-breaker:** “Two Truths & a Dream” – participants share two facts and one aspiration related to identity or culture. +- *State the learning outcomes:* + 1. Understand how diversity drives innovation and profit. + 2. Identify unconscious bias and micro-behaviors. + 3. Create an action plan for inclusive leadership. + +--- + +### 2. *The Business Case for Diversity* (30 min) +- **Data Walk:** Posters with McKinsey, Deloitte, and Harvard studies showing ↑ revenue, ↓ turnover. +- *Interactive poll:* “Which statistic surprised you most?” (live Mentimeter). +- **Case Study:** *How a global tech firm increased patent filings by 19 % after diversifying R&D teams.* + +--- + +### 3. *Unconscious Bias & Micro-behaviors* (45 min) +- **Activity:** “Resume Roulette” – identical CVs with different names; discuss callback disparities. +- *Bias Busters Toolkit:* 5-step process to interrupt bias in real time. +- **Role-play triads:** Observer, Speaker, Interrupter practicing inclusive language. + +--- + +### 4. Inclusive Leadership in Action (30 min) +- *Panel of ERG leaders* sharing measurable wins (e.g., 40 % ↑ in promotions for under-represented groups). +- **Speed-ideation:** 90-second pitches for inclusive policies; peer voting on top 3. + +--- + +### 5. Action Planning & Commitments (20 min) +- *Personal Inclusion Pledge:* Write one behavior to start, one to stop, one to amplify. +- **Accountability Circles:** Pair up for 30-day check-ins. + +--- + +### 6. Closing & Resources (10 min) +- **Resource QR Code:** Links to toolkits, podcasts, and upcoming ERG events. +- *Final reflection:* “One word to describe how you’ll lead differently tomorrow.”", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738766", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 28 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "predict-serve-messy-difficult-festival", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format.", + "$.messages[0].role": "user", + "$.messages[1].content": "**[Your Name]** +[Your Address] +[City, State, ZIP] +[Email Address] | [Phone Number] | [Date] + +**[Local Party Chair’s Name]** +[Local Political Party Name] +[Party Headquarters Address] +[City, State, ZIP] + +--- + +### **Introduction: Why I’m Writing** + +Dear **[Chair’s Name]** and the **Executive Committee**, + +I am writing to express my enthusiastic interest in serving as **Rally Organizer** for **[Local Party Name]**. As a lifelong resident of **[City/County]** and a committed volunteer who has knocked on over **2,000 doors** and organized **five successful voter-turnout events** in the last two cycles, I understand how to turn passion into turnout and energy into votes. I want to bring that same fire—and a proven playbook—to every rally our party holds between now and Election Day. + +--- + +### **Core Qualifications: What I Bring to the Podium** + +- **Logistics Mastery**: From securing permits with the Parks Department to negotiating stage-rental discounts, I have reduced average rally costs by **28 %** while increasing attendance by **45 %** through targeted social-media geofencing and SMS reminders. +- **Coalition Building**: I have already built relationships with **15 local unions**, **7 student groups**, and **4 faith communities** who are eager to co-sponsor rallies if given two weeks’ notice. +- **Crisis-Ready Leadership**: When an unexpected thunderstorm threatened our 2022 “Get-Out-the-Vote” rally, I pivoted to a **drive-in format** in under four hours—resulting in zero cancellations and 300 additional cars circling the block to hear our speakers. + +--- + +### **Vision for 2024: Turning Rallies into Results** + +My plan is to run a **“Rally-to-Ballot Pipeline”**: + +1. **Pre-Rally**: Micro-targeted digital invites to low-propensity voters within a 5-mile radius. +2. **During Rally**: QR-code check-ins that instantly text attendees their personalized early-vote locations. +3. **Post-Rally**: A 72-hour follow-up cadence of thank-you emails, volunteer sign-up links, and ride-share codes to the polls. + +I have already drafted a **90-day rally calendar** (attached) that staggers high-energy weekend events with smaller, issue-specific pop-ups near early-vote sites. All I need is the party’s green light and a modest budget line for permits, A/V, and refreshments. + +--- + +### **Closing: Let’s Win This Together** + +I would welcome the chance to present this plan in person at your next committee meeting. Thank you for considering my application. Let’s give our volunteers, candidates, and—most importantly—our voters the rallies they deserve. + +**Ready to organize, energize, and mobilize,** +[Your Signature (if submitting hard copy)] +[Your Printed Name]", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738795", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 4 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "accept-expect-black-young-area", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "The opposite of youth is not age, but *stagnation*—the moment curiosity closes its fist and wonder stops asking “what if?” +While *age* merely counts revolutions around the sun, *true youth* is the *capacity to keep revising the map of oneself* long after the hair turns silver.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738826", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 13 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "store-be-past-hard-war", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer.", + "$.messages[0].role": "user", + "$.messages[1].content": "**The Golden Palace** is not a palace at all, but a **restaurant** that has disguised itself as a mirage of saffron and cardamom in the **city centre**. It whispers **Indian** recipes into the ears of pigeons, who then forget how to fly and instead waddle around the square, dizzy with cumin. + +Inside, the walls sweat **garam masala** and the tables grow tiny **basil plants** that recite Bollywood dialogues when no one is looking. The waiters glide like **shadow puppets**, balancing trays of **butter chicken** that glow like radioactive sunsets. + +If you order **naan**, it arrives folded into the shape of a **paper swan**, flapping once before dissolving into steam that smells of **monsoon rain**. The bill is presented on a **lotus leaf**, and if you stare at it long enough, the numbers rearrange themselves into a **limerick about turmeric**.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738850", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 2 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "exploit-serve-invisible-community-minute", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Yo, it’s MC Broke-Again, fresh outta ramen, +phone buzzin’ like a bee in a jar of gin. +Unknown number—man, I’m already suspicious, +but the voice got *royal timbre*, straight-up delicious. + +“Greetings, Prince Darnell of House Wi-Fi, +I’m the Royal Scribe from Lagos on high. +Your great-great-grand-uncle was the King of Nigeria, +left a vault of gold bars and a pet cheetah named Maria.” + +I’m like, “Hold up—me? Royalty? +Last week I pawned my TV for a two-piece and sweet tea.” +He says, “Sign this PDF, wire a small advance fee, +just 419 bucks and the throne belongs to thee.” + +**I’m already picking out a crown on Amazon Prime, +ordering a robe that says ‘Fresh-to-Death’ in gold slime.** + +I Venmo the cash, add a royal emoji—👑 +then sprint to the mirror, practice my “My people, be free!” +Mom walks in, “Why you wrapped in bedsheets with a Burger King crown?” +I’m like, “Silence, peasant! Bow before your prince of renown!” + +Two days later, no gold, no cheetah, just crickets, +my bank account looking like a ghost town with limericks. +I call the number—disconnected, tone dead, +even the scammer ghosted me harder than my ex on read. + +So I’m back in the basement, eating discount sardines, +but I kept the crown—*it’s made of genuine Burger King plastic, supreme*.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738881", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 6 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "suggest-american-open-level-country", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Expand the riddle into a story with a funny tone: + +What can you catch but not throw? +A cold + +Use * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Gary the Germ was the *smallest, greenest, and most obnoxiously proud* microbe in the entire Sniffle Kingdom. Every morning he practiced his grand entrance: sliding down banisters made of nasal hair, cannon-balling into unsuspecting tonsils, and yelling, “ACHOO—did somebody order a cold?” + +One Tuesday, Gary spotted his dream target: *Bartholomew “Bart” Bigglesworth, a man who believed hand-washing was a government conspiracy*. Bart had just exited the subway, triumphantly licking a subway pole “for the vitamins.” Gary rubbed his microscopic hands together. “Perfect. I’ll hitch a ride on that finger, ride the express elevator to the sinuses, and throw the *snottiest* house-warming party in history!” + +But there was a catch—literally. Gary could *catch* Bart, but he couldn’t *throw* him anywhere. Every time Gary tried to catapult Bart into the next office cubicle, Bart just sneezed, wiped his nose on his sleeve, and kept typing spreadsheets. Gary screamed, “This is mutiny! I’m supposed to be the one doing the tossing!” + +Meanwhile, Bart’s immune system—led by Captain Phlegm and the Mucus Marines—launched *Operation Tissue Storm*. They surrounded Gary, armed with vitamin C grenades and zinc zappers. Gary attempted a dramatic escape by surfing a sneeze, but the tissue slapped him like a flyswatter on a summer day. + +Defeated, Gary sat in the crumpled tissue, sulking. “I caught the cold,” he muttered, “but I can’t throw it anywhere. Worst. Superpower. Ever.” + +From that day forward, Gary changed his business card to read: “Professional Cold Catcher—No Returns, No Refunds, Absolutely No Throwing.”", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738906", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 16 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "respond-connect-true-problem-plan", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format.", + "$.messages[0].role": "user", + "$.messages[1].content": "**[Intro – Lute Drop]** +*beat drops like a sack of wet marble* +Yo, it’s 14-something, smell that? That’s perspective in the air, +Gutenberg just ghost-wrote the mixtape, pages everywhere. +I’m a Medici money-mage, patron of the bizarre, +Commissioning nightmares in egg-tempera. + +--- + +### **Verse 1 – The Alchemist’s Cypher** +I paint with mercury, breathe with a lung of lead, +My selfie’s a death-mask, gold leaf on the dead. +Brunelleschi built a dome like a half-buried skull, +You think it’s architecture? Nah, it’s a ritual. +**[weird baroque wheeze]** +I rhyme in vanishing points, every line converges +On a single terrified eye that never emerges. +Perspective’s a ouija board, da Vinci holds the planchette, +Sketching choppers (helicopters) centuries before the jet-set. + +--- + +### **Hook – Chant of the Anamorphic Choir** +*distorted children’s voices layered over sackbut* +Renaissance, renaissance, put a cherub on it, +Baby wings, baby fangs, marble veins, vomit. +We don’t bling, we gild, we don’t rap, we fresco— +Your flow’s two-dimensions, mine’s *trompe-l’oeil* techno. + +--- + +### **Verse 2 – The Court Jester’s Acid Madrigal** +**[sudden switch to a harpsichord trap beat]** +I juggle plague rats for the Duke of Milan, +He pays me in sonnets and unmarked arsenic flan. +Isabella d’Este texts me in mirror-script, +Her emoji: a decapitated hydra, lips pursed, licked. +I’m the fool with a scalpel, dissecting songbirds to find +Which organ chirps—turns out it’s the **pineal gland** of the mind. +Michelangelo’s in the corner, carving his own ribcage, +Says “The Sistine’s just a throat, God’s a uvula on stage.” + +--- + +### **Bridge – The Astrolabe Malfunction** +*beat glitches like a scratched illuminated manuscript* +Copernicus DM’d me: “The sun’s a strobe light, +Planets orbit like moths around a candle that bites.” +I reply with a lute solo reversed through a cathedral, +Notes fall upward, monks levitate, it’s procedural. +**[static of Latin hex]** +Galileo’s telescope sees the future: TikTok, guillotine, +He mutters “*E pur si muove*” then dabs in quarantine. + +--- + +### **Outro – The Anachronistic Masque** +**[all instruments devolve into wet clay sounds]** +So remember me when you’re double-tapping a Botticelli, +I’m the glitch in the background, the snail with a human belly. +Renaissance never ended, it just learned to hide, +Under your touchscreen, oil-paint bleeding through the side. +Now go—vanish in a puff of ultramarine dust, +The past is a performance, and the future’s already trussed.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738931", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 16 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "must-watch-bad-hate-magazine", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*", + "$.messages[0].role": "user", + "$.messages[1].content": "*Jōmon* hunter-gatherers (c. 14,000 BCE) gave way to *Yayoi* rice cultivators who introduced metal tools and social stratification. +The *Yamato* court unified much of the archipelago by the 5th century CE, absorbing *Shinto* beliefs and Chinese writing, law, and Buddhism. +A *Heian* golden age (794–1185) produced literature like *The Tale of Genji* while the imperial court’s real power waned. +Samurai clans rose; the *Kamakura* shogunate (1185–1333) created Japan’s first warrior government, repelling Mongol invasions. +Centuries of civil war ended when *Oda Nobunaga*, *Toyotomi Hideyoshi*, and *Tokugawa Ieyasu* reunified the land, leading to the *Edo* shogunate (1603–1868). +Isolation under *sakoku* preserved domestic culture until *Commodore Perry*’s 1853 arrival forced unequal treaties. +The *Meiji* Restoration (1868) abolished feudalism, industrialized rapidly, and built an empire that won wars against China and Russia. +Imperial expansion—Korea, Manchuria, and *Pearl Harbor*—ended in atomic bombings and occupation by Allied forces (1945–1952). +Post-war *miracle* growth made Japan an economic superpower, followed by stagnation since the 1990s yet enduring global cultural influence.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738958", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 3 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "take-love-start-short-work", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*.", + "$.messages[0].role": "user", + "$.messages[1].content": "John in *De Smet* +*Quaker* silence—then he yells +“Where’s my *oatmeal*, Friend?”", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.738984", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 13 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "delete-democratic-hot-new-work", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with "*", like *highlighted text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Tonight, the 54th Annual Grammy Awards [1] stand as a testament to the enduring power of recorded music. Under the vaulted ceiling of the *Staples Center*, the Recording Academy gathers to bestow its highest accolades upon those whose artistry has defined the sonic landscape of eligibility year 2011 [2]. + +The evening’s proceedings are governed by the *strictures of the Grammy Charter* [3], ensuring that every statuette awarded is the result of peer-review balloting among the Academy’s 12,000 voting members [4]. In accordance with Rule 3.01, only recordings commercially released in the United States between 1 October 2010 and 30 September 2011 are eligible [5]. + +Among the most anticipated categories is *Album of the Year* [6], whose nominees—Adele’s *21*, Foo Fighters’ *Wasting Light*, Lady Gaga’s *Born This Way*, Bruno Mars’ *Doo-Wops & Hooligans*, and Rihanna’s *Loud*—represent a cross-section of genre, generation, and geography [7]. The *Record of the Year* [8] shortlist similarly spans continents and styles, underscoring the Academy’s commitment to excellence irrespective of commercial metrics. + +The *Producer of the Year, Non-Classical* [9] category spotlights the architects of sound whose meticulous craft transforms raw performance into timeless recording. Paul Epworth, whose work on *21* has garnered both critical and commercial acclaim, is widely regarded as the frontrunner [10]. + +As the telecast proceeds, the *In Memoriam* segment [11] will honor those luminaries whose voices were stilled during the eligibility year, their legacies enshrined in the grooves and bytes of the recordings they leave behind. + +The 54th Annual Grammy Awards thus serve not merely as a celebration, but as a *codification of excellence* [12], ensuring that the artistry of 2011 resonates through the decades to come.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.739013", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 33 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "live-important-military-only-love", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# 7 Surprising Facts About Dutch That Will Make You Say “Wauw!” + +When most people think of Dutch, they picture windmills, tulips, and maybe a stroopwafel or two. But the language behind those images is far stranger—and more fascinating—than you might expect. Below are seven facts that prove Dutch is anything but *just English with extra g’s*. + +--- + +### 1. The Longest Word Is 53 Letters—and It’s Official +The Dutch *Encyclopedie* recognizes *aansprakelijkheidswaardevaststellingsveranderingen* (53 letters) as a legitimate word. It translates to “changes in liability value assessments,” which is ironic because the word itself is a liability to pronounce. Try saying that three times fast—or even once slowly. + +--- + +### 2. Dutch Has a Word for *Everything*—Including “Schadenfreude” but Cozier +English borrows *Schadenfreude* from German, but Dutch gives us *gezellig*, a term that packs “cozy,” “fun,” and “belonging” into one syllable. A café can be *gezellig*, a party can be *gezellig*, and even a quiet evening with friends can be *gezellig*. There’s no direct English equivalent, which is why expats end up using it in English sentences: “That dinner was so… *gezellig*.” + +--- + +### 3. The Dutch Alphabet Ends with “X, Y, IJ, Z” +Most alphabets end with “X, Y, Z,” but Dutch sneaks in an extra character: *IJ*. It’s considered a single letter (yes, two letters acting as one), and it even has its own capitalized form: *IJsselmeer*, the famous lake. Type designers have to create a special ligature for it, and crossword puzzles treat *IJ* as one square. Imagine Scrabble tiles clinking together as someone proudly lays down “IJSBERG” for 72 points. + +--- + +### 4. Dutch Greetings Change by Time of Day—But Not How You’d Expect +In English, “good morning” fades into “good afternoon” around noon. Dutch, however, switches from *goedemorgen* to *goedemiddag* at exactly 12:00 p.m.—unless you’re in Belgium, where some regions wait until 1:00 p.m. Miss the cutoff and you’ll get a polite correction: “Het is al middag, hoor!” (“It’s already afternoon, you know!”). + +--- + +### 5. Dutch Is the Closest Major Language to English—But That Doesn’t Mean It’s Easy +Linguists classify Dutch and English as *West Germanic siblings*, which means basic vocabulary overlaps: *appel* (apple), *boek* (book), *water* (water). Yet false friends lurk everywhere. *Durf jij?* doesn’t mean “Do you dare?”—it means “Do you dare?”… but with a cultural undertone of *prove it*. And *be careful* with *eventueel*, which means “possibly,” not “eventually.” + +--- + +### 6. Dutch Has Three Genders—But Only Two Articles +Grammatically, Dutch retains masculine, feminine, and neuter genders. In practice, however, only *de* (common gender) and *het* (neuter) are used. The result? Native speakers themselves debate whether *auto* (car) is masculine or feminine. The safest route is to just say *de auto* and avoid the awkward silence that follows gendered guesswork. + +--- + +### 7. Dutch Invented the Word “Cookie”—and the World Never Looked Back +The English word “cookie” comes from the Dutch *koekje* (“little cake”). Dutch settlers brought *koekjes* to New Amsterdam (now New York), and the rest is snack history. Next time you dunk a chocolate chip, thank the Dutch for their linguistic—and culinary—contribution. + +--- + +### Bonus: Dutch Tongue Twisters Are Next-Level +Try this: *De kat krabt de krullen van de trap*. Translation: “The cat scratches the curls off the stairs.” Say it five times without twisting your tongue into a *stroopwafel*. + +--- + +Whether you’re planning a trip to Amsterdam or just want to impress friends at trivia night, Dutch offers a treasure trove of quirks. From *gezellig* evenings to 53-letter bureaucratic monsters, the language proves that small countries can produce *enormous linguistic personality*.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:30:54.739037", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "hear-live-common-late-hate", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 2 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "think-indicate-write-local-company", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Gideon has several good options for his surplus: + +1. **Sell it at the local farmers’ market or to a regional wholesaler**, turning the extra produce into cash that he can use for next season’s seeds, equipment repairs, or household expenses. +2. *Donate a portion to the nearby food bank or community kitchen*, helping families who struggle with food insecurity while also earning a small tax deduction. +3. Preserve some of the surplus—by canning tomatoes, drying herbs, or storing root vegetables in a cool cellar—so his own family has nutritious food throughout the winter. +4. Trade with neighboring farmers: swap excess corn for someone else’s surplus eggs or honey, strengthening community ties and diversifying his pantry without spending money.", + "$.messages[1].role": "assistant", + "$.pid": 52072, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-08T23:34:12.823862", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.run_id": "meet-treat-central-possible-lot", + "$.eval_metadata.status": "stopped", + "$.eval_metadata.version": "v0.2.6.dev20+ge1da45a.dirty", + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "expect-upload-close-president-goal", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.pid": 52715, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:10:34.081457", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": "Run math evaluation on sample dataset using pytest interface.", + "$.eval_metadata.name": "test_pytest_mcp_url", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.run_id": "replace-measure-bitter-open-reason", + "$.eval_metadata.status": "finished", + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "At least one tool call was made", + "$.evaluation_result.score": 1, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.row_id": "run-update-traditional-software-idea", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "You are a helpful assistant that can answer questions about Fireworks. +ALWAYS provide code or commands to execute to answer the question.", + "$.messages[0].role": "system", + "$.messages[1].content": "Can you teach me about how to manage deployments on Fireworks", + "$.messages[1].role": "user", + "$.messages[2].content": "I'll help you learn about managing deployments on Fireworks. Let me search for comprehensive information about deployment management.", + "$.messages[2].role": "assistant", + "$.messages[2].tool_calls[0].function.arguments": "{"query": "deployment management Fireworks"}", + "$.messages[2].tool_calls[0].function.name": "search", + "$.messages[2].tool_calls[0].id": "functions.search:0", + "$.messages[2].tool_calls[0].type": "function", + "$.messages[3].content[0].text": "Title: On-demand deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +On-demand deployments +If you plan on using a significant amount of dedicated deployments, consider purchasing reserved capacity instead of using on-demand deployments for more reliable capacity and higher GPU quotas. Fireworks allows you to create on-demand deployments of models that are reserved for your own use and billed by the GPU-second. This has several advantages over the shared deployment architecture you get when using Fireworks' serverless models: Predictable performance unaffected by load caused by other users No hard rate limits, but subject to the maximum load capacity of the deployment Cheaper under high utilization Access to larger selection of models not available via our serverless models Custom base models from Hugging Face files Unlike serverless models, which are billed on a per-token basis, on-demand deployments are billed by GPU-second. Consult our pricing page for details. + +", + "$.messages[3].content[0].type": "text", + "$.messages[3].content[1].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[3].content[1].type": "text", + "$.messages[3].content[2].text": "Title: Introducing the Fireworks Build SDK +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-introduction +Content: Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +Introducing the Fireworks Build SDK +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +The Fireworks Build SDK is a client library that allows you to interact with the Fireworks API using Python. It provides a simple and intuitive interface for working with Fireworks primitives like deployments, fine-tuning jobs, and datasets as Python objects. The Build SDK is currently in beta and not all functionality may be supported. Please reach out to dhuang@fireworks.ai to report any issues or feedback. + +", + "$.messages[3].content[2].type": "text", + "$.messages[3].content[3].text": "Title: I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +Link: https://docs.fireworks.ai/faq-new/account-access/i-have-multiple-fireworks-accounts-when-i-try-to-login-with-google-on-fireworks +Content: If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +I have multiple Fireworks accounts. When I try to login with Google on Fireworks' web UI, I'm getting signed into the wrong account. How do I fix this? +If you log in with Google, account management is controlled by Google. You can log in through an incognito mode or create separate Chrome/browser profiles to log in with different Google accounts. You could also follow the steps in this guide to disassociate Fireworks.ai with a particular Google account sign-in. If you have more complex issues please contact us on Discord. + +", + "$.messages[3].content[3].type": "text", + "$.messages[3].content[4].text": "Title: Querying existing dedicated deployments +Link: https://docs.fireworks.ai/tools-sdks/python-client/querying-dedicated-deployments +Content: When you have dedicated deployments that were created via firectl or the Fireworks web UI, you can easily connect to them using the Build SDK to run inference. This is particularly useful when you want to leverage existing infrastructure or when deployments are managed by different teams. + +When you have dedicated deployments that were created via firectl or the Fireworks web UI, you can easily connect to them using the Build SDK to run inference. This is particularly useful when you want to leverage existing infrastructure or when deployments are managed by different teams. + +Deployment ID requirements +The id parameter should match exactly with your existing deployment: Use the deployment name/ID as shown in the Fireworks dashboard The ID is case-sensitive and must match exactly If the deployment doesn't exist, you'll receive an error when making requests + +", + "$.messages[3].content[4].type": "text", + "$.messages[3].content[5].text": "Title: Migration and traffic management +Link: https://docs.fireworks.ai/guides/prompt-caching +Content: When migrating between deployments that use prompt caching, it's crucial to implement proper traffic routing to maintain optimal cache hit rates. When gradually routing traffic to a new deployment, use consistent user/session-based sampling rather than random sampling. Here's the recommended implementation for traffic routing: import hashlib + +# Configure traffic fraction (e.g., 20% to new deployment) +fireworks_traffic_fraction = 0.2 +user_id = "session-id-123" + +# Generate deterministic hash from user_id +hashed_user_id = int(hashlib.md5(user_id.encode()).hexdigest(), 16) # MD5 hash on user-id and convert to integer +MAX_HASH = 2**128 - 1 # MD5 hash maximum value + +# Compute ratio for consistent routing +ratio = hashed_user_id / MAX_HASH # Returns 0.0 to 1.0 + +if (ratio < fireworks_traffic_fraction): + send_to_new_deployment(user=hashed_user_id) # Pass user ID for caching +else: + send_elsewhere() # Route to old deployment or serverless + +When migrating between deployments that use prompt caching, it's crucial to implement proper traffic routing to maintain optimal cache hit rates. When gradually routing traffic to a new deployment, use consistent user/session-based sampling rather than random sampling. Here's the recommended implementation for traffic routing: import hashlib + +# Configure traffic fraction (e.g., 20% to new deployment) +fireworks_traffic_fraction = 0.2 +user_id = "session-id-123" + +# Generate deterministic hash from user_id +hashed_user_id = int(hashlib.md5(user_id.encode()).hexdigest(), 16) # MD5 hash on user-id and convert to integer +MAX_HASH = 2**128 - 1 # MD5 hash maximum value + +# Compute ratio for consistent routing +ratio = hashed_user_id / MAX_HASH # Returns 0.0 to 1.0 + +if (ratio < fireworks_traffic_fraction): + send_to_new_deployment(user=hashed_user_id) # Pass user ID for caching +else: + send_elsewhere() # Route to old deployment or serverless + +Advanced: cache locality for Enterprise deployments +Dedicated deployments on an Enterprise plan allow you to pass an additional hint in the request to improve cache hit rates. First, the deployment needs to be created or updated with an additional flag: Then the client can pass an opaque identifier representing a single user or +session in the user field of the body or in the x-session-affinity header. Fireworks +will try to route requests with the identifier to the same server, further reducing response times. It's best to choose an identifier that groups requests with long shared prompt +prefixes. For example, it can be a chat session with the same user or an +assistant working with the same shared context. + +", + "$.messages[3].content[5].type": "text", + "$.messages[3].content[6].text": "Title: Do you host your deployments in the EU or Asia? +Link: https://docs.fireworks.ai/faq-new/support-general/do-you-host-your-deployments-in-the-eu-or-asia +Content: Do you host your deployments in the EU or Asia? +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +Do you host your deployments in the EU or Asia? +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +We are currently deployed in multiple U.S.-based locations. However, we’re open to hearing more about your specific requirements. You can: Join our Discord community Write to us at inquiries@fireworks.ai If you're an Enterprise customer, please contact your dedicated customer support representative to ensure a timely response. + +", + "$.messages[3].content[6].type": "text", + "$.messages[3].content[7].text": "Title: Deployments and deployment types +Link: https://docs.fireworks.ai/getting-started/concepts +Content: A model must be deployed before it can be used for inference. A deployment is a collection (one or more) model servers that host one base model and optionally one or more LoRA addons. Fireworks supports two types of deployments: Serverless deployments: Fireworks hosts popular base models on shared "serverless" deployments. Users pay-per-token to query these models and do not need to configure GPUs. The most popular serverless deployments also support serverless LoRA addons. See the Deploying to serverless guide for details. Dedicated deployments: Dedicated deployments enable users to configure private deployments with a wide array of hardware (see on-demand deployments guide ). Dedicated deployments give users performance guarantees and the most flexibility and control over what models can be deployed. Both LoRA addons and base models can be deployed to dedicated deployments. Dedicated deployments are billed by a GPU-second basis (see pricing page). See the Querying text models guide for a comprehensive overview of making LLM inference. + +A model must be deployed before it can be used for inference. A deployment is a collection (one or more) model servers that host one base model and optionally one or more LoRA addons. Fireworks supports two types of deployments: Serverless deployments: Fireworks hosts popular base models on shared "serverless" deployments. Users pay-per-token to query these models and do not need to configure GPUs. The most popular serverless deployments also support serverless LoRA addons. See the Deploying to serverless guide for details. Dedicated deployments: Dedicated deployments enable users to configure private deployments with a wide array of hardware (see on-demand deployments guide ). Dedicated deployments give users performance guarantees and the most flexibility and control over what models can be deployed. Both LoRA addons and base models can be deployed to dedicated deployments. Dedicated deployments are billed by a GPU-second basis (see pricing page). See the Querying text models guide for a comprehensive overview of making LLM inference. + +Control plane and data plane +The Fireworks API can be split into a control plane and a data plane. The control plane consists of APIs used for managing the lifecycle of resources. This +includes your account, models, and deployments. The data plane consists of the APIs used for inference and the backend services that power +them. + +", + "$.messages[3].content[7].type": "text", + "$.messages[3].content[8].text": "Title: Deployment Process +Link: https://docs.fireworks.ai/evaluators/api_reference/reward_function_decorator +Content: When .deploy() is called, the decorator: Extracts the function's source code Creates a wrapper that handles the Fireworks evaluation format Creates a temporary directory with the wrapped function Uploads and registers the function with the Fireworks API + +When .deploy() is called, the decorator: Extracts the function's source code Creates a wrapper that handles the Fireworks evaluation format Creates a temporary directory with the wrapped function Uploads and registers the function with the Fireworks API + +.deploy() +The decorator adds a .deploy() method to the function, allowing it to be deployed to Fireworks. evaluation_id = my_reward_function.deploy( + name="my-evaluator", + description="Evaluates responses based on clarity and accuracy", + account_id=None, # Optional, defaults to configured account + auth_token=None, # Optional, defaults to configured token + force=False, # Set to True to overwrite if it already exists + providers=None # Optional model providers configuration +) + +", + "$.messages[3].content[8].type": "text", + "$.messages[3].content[9].text": "Title: 🔧 Build SDK: Deployment Configuration Application Requirement +Link: https://docs.fireworks.ai/updates/changelog +Content: The Build SDK now requires you to call .apply() to apply any deployment configurations to Fireworks when using deployment_type="on-demand" or deployment_type="on-demand-lora" . This change ensures explicit control over when deployments are created and helps prevent accidental deployment creation. Key changes: .apply() is now required for on-demand and on-demand-lora deployments Serverless deployments do not require .apply() calls If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments Migration guide: Add llm.apply() after creating LLM instances with deployment_type="on-demand" or deployment_type="on-demand-lora" No changes needed for serverless deployments See updated documentation for examples and best practices This change improves deployment management and provides better control over resource creation. This applies to Python SDK version >=0.19.14. + +The Build SDK now requires you to call .apply() to apply any deployment configurations to Fireworks when using deployment_type="on-demand" or deployment_type="on-demand-lora" . This change ensures explicit control over when deployments are created and helps prevent accidental deployment creation. Key changes: .apply() is now required for on-demand and on-demand-lora deployments Serverless deployments do not require .apply() calls If you do not call .apply() , you are expected to set up the deployment through the deployment page at https://app.fireworks.ai/dashboard/deployments Migration guide: Add llm.apply() after creating LLM instances with deployment_type="on-demand" or deployment_type="on-demand-lora" No changes needed for serverless deployments See updated documentation for examples and best practices This change improves deployment management and provides better control over resource creation. This applies to Python SDK version >=0.19.14. + +🚀 Easier & faster LoRA fine-tune deployments on Fireworks +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[3].content[9].type": "text", + "$.messages[3].role": "tool", + "$.messages[3].tool_call_id": "functions.search:0", + "$.messages[4].content": "Let me search for more specific information about deployment management tools and processes.", + "$.messages[4].role": "assistant", + "$.messages[4].tool_calls[0].function.arguments": "{"query": "firectl deployment management commands"}", + "$.messages[4].tool_calls[0].function.name": "search", + "$.messages[4].tool_calls[0].id": "functions.search:1", + "$.messages[4].tool_calls[0].type": "function", + "$.messages[5].content[0].text": "Title: Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +Link: https://docs.fireworks.ai/faq-new/fine-tuning/why-am-i-getting-invalid-id-errors-when-using-firectl-commands-like-create-deplo +Content: This error typically occurs when your account ID is not properly configured. + +This error typically occurs when your account ID is not properly configured. + +Why am I getting "invalid id" errors when using firectl commands like create deployment or list deployments? +This error typically occurs when your account ID is not properly configured. + +", + "$.messages[5].content[0].type": "text", + "$.messages[5].content[1].text": "Title: Managing default deployments +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: firectl get model + +Since a model may be deployed to multiple deployments, querying by model name will route to the "default" deployed model. You can see which deployed model entry is marked with Default: true using: This will show the Deployed Model Refs section with the Default: true entry. To update the default deployed model, note the Name of the deployed model reference above. Then run: firectl update deployed-model --default + +", + "$.messages[5].content[1].type": "text", + "$.messages[5].content[2].text": "Title: Does Fireworks support custom base models? +Link: https://docs.fireworks.ai/faq-new/models-inference/does-fireworks-support-custom-base-models +Content: Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +Does Fireworks support custom base models? +Yes, custom base models can be deployed via firectl . You can learn more about custom model deployment in our guide on uploading custom models. + +", + "$.messages[5].content[2].type": "text", + "$.messages[5].content[3].text": "Title: 🚀 Easier & faster LoRA fine-tune deployments on Fireworks +Link: https://docs.fireworks.ai/updates/changelog +Content: You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: firectl create deployment "accounts/fireworks/models/" + +You can now deploy a LoRA fine-tune with a single command and get speeds that approximately match the base model: Previously, this involved two distinct steps, and the resulting deployment was slower than the base model: Create a deployment using firectl create deployment "accounts/fireworks/models/" --enable-addons Then deploy the addon to the deployment: firectl load-lora --deployment For more information, see our deployment documentation. This change is for dedicated deployments with a single LoRA. You can still deploy multiple LoRAs on a deployment or deploy LoRA(s) on some Serverless models as described in the documentation. + +", + "$.messages[5].content[3].type": "text", + "$.messages[5].content[4].text": "Title: Quick deployment +Link: https://docs.fireworks.ai/fine-tuning/single-lora +Content: Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Deploy your LoRA fine-tuned model with one simple command: firectl create deployment "accounts/fireworks/models/" + +Alternative deployment method +This two-step method is the standard approach for multi-LoRA deployments where multiple LoRA models share the same base model. While it can also be used for single LoRA deployments, it provides slower performance compared to live merge and is not recommended for single LoRA use cases. You can also deploy single LoRA models using a two-step process: Create base model deployment Deploy the base model with addons enabled: firectl create deployment "accounts/fireworks/models/" --enable-addons + +", + "$.messages[5].content[4].type": "text", + "$.messages[5].content[5].text": "Title: Deploying a fine-tuned model using an on-demand deployment +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-models +Content: Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Use the following command to deploy your fine-tuned model using an on-demand deployment: firectl create deployment + +Deploying a fine-tuned model serverlessly +Not all base models support serverless addons. Please check this list for the full list of serverless models that support LoRA add-ons. If your base model supports serverless addons, you can run the following command to load the fine-tuned model into a serverless deployment: firectl load-lora + +", + "$.messages[5].content[5].type": "text", + "$.messages[5].content[6].text": "Title: Deploy Command +Link: https://docs.fireworks.ai/evaluators/cli_reference/cli_overview +Content: The deploy command deploys a reward function as an evaluator on the Fireworks platform. + +The deploy command deploys a reward function as an evaluator on the Fireworks platform. + +Command Overview +The Reward Kit CLI supports the following main commands: run : Run a local evaluation pipeline using a Hydra configuration. preview : Preview evaluation results or re-evaluate generated outputs. deploy : Deploy a reward function as an evaluator. agent-eval : Run agent evaluations on task bundles. list : List existing evaluators (coming soon). delete : Delete an evaluator (coming soon). + +", + "$.messages[5].content[6].type": "text", + "$.messages[5].content[7].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. firectl create deployment [flags] + +Creates a new deployment. firectl create deployment [flags] + +Example +firectl create deployment falcon-7b + +", + "$.messages[5].content[7].type": "text", + "$.messages[5].content[8].text": "Title: firectl +Link: https://docs.fireworks.ai/fine-tuning/fine-tuning-vlm +Content: # Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +# Create a deployment for your fine-tuned VLM +firectl create deployment my-custom-vlm + +# Check deployment status +firectl get deployment accounts/your-account/deployment/deployment-id + +firectl create sftj \\ + --base-model accounts/fireworks/models/qwen2p5-vl-32b-instruct \\ + --dataset my-vlm-dataset \\ + --output-model my-custom-vlm \\ + --epochs 3 For additional parameters like learning rates, evaluation datasets, and batch sizes, see Additional SFT job settings . + +", + "$.messages[5].content[8].type": "text", + "$.messages[5].content[9].text": "Title: Uploading the model locally (firectl) +Link: https://docs.fireworks.ai/models/uploading-custom-models +Content: To upload a custom base model, run the following command. firectl create model /path/to/files/ + +To upload a custom base model, run the following command. firectl create model /path/to/files/ + +To upload a custom base model, run the following command. + +", + "$.messages[5].content[9].type": "text", + "$.messages[5].role": "tool", + "$.messages[5].tool_call_id": "functions.search:1", + "$.messages[6].content": "Let me search for more information about deployment lifecycle management and monitoring.", + "$.messages[6].role": "assistant", + "$.messages[6].tool_calls[0].function.arguments": "{"query": "deployment monitoring scaling management lifecycle"}", + "$.messages[6].tool_calls[0].function.name": "search", + "$.messages[6].tool_calls[0].id": "functions.search:0", + "$.messages[6].tool_calls[0].type": "function", + "$.messages[7].content[0].text": "Title: What should I expect for deployment and scaling performance? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/what-should-i-expect-for-deployment-and-scaling-performance +Content: Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +What should I expect for deployment and scaling performance? +Initial deployment : Should complete within minutes Scaling from zero : You may experience brief availability delays while the system scales up Troubleshooting : If deployment takes over 1 hour, this typically indicates a crash and should be investigated Best practice : Monitor deployment status and contact support if deployment times are unusually long + +", + "$.messages[7].content[0].type": "text", + "$.messages[7].content[1].text": "Title: How does billing and scaling work for on-demand GPU deployments? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-and-scaling-work-for-on-demand-gpu-deployments +Content: On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +How does billing and scaling work for on-demand GPU deployments? +On-demand GPU deployments have unique billing and scaling characteristics compared to serverless deployments: Billing: Charges start when the server begins accepting requests Billed by GPU-second for each active instance Costs accumulate even if there are no active API calls Scaling options: Supports autoscaling from 0 to multiple GPUs Each additional GPU adds to the billing rate Can handle unlimited requests within the GPU’s capacity Management requirements: Not fully serverless; requires some manual management Manually delete deployments when no longer needed Or configure autoscaling to scale down to 0 during inactive periods Cost control tips: Regularly monitor active deployments Delete unused deployments to avoid unnecessary costs Consider serverless options for intermittent usage Use autoscaling to 0 to optimize costs during low-demand times + +", + "$.messages[7].content[1].type": "text", + "$.messages[7].content[2].text": "Title: How does billing work for on-demand deployments? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-billing-work-for-on-demand-deployments +Content: On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +How does billing work for on-demand deployments? +On-demand deployments come with automatic cost optimization features: Default autoscaling : Automatically scales to 0 replicas when not in use Pay for what you use : Charged only for GPU time when replicas are active Flexible configuration : Customize autoscaling behavior to match your needs Best practices for cost management: Leverage default autoscaling : The system automatically scales down deployments when not in use Customize carefully : While you can modify autoscaling behavior using our configuration options , note that preventing scale-to-zero will result in continuous GPU charges Consider your use case : For intermittent or low-frequency usage, serverless deployments might be more cost-effective For detailed configuration options, see our deployment guide. + +", + "$.messages[7].content[2].type": "text", + "$.messages[7].content[3].text": "Title: How does the system scale? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/how-does-the-system-scale +Content: Our system is horizontally scalable , meaning it: Scales linearly with additional replicas of the deployment Automatically allocates resources based on demand Manages distributed load handling efficiently + +Our system is horizontally scalable , meaning it: Scales linearly with additional replicas of the deployment Automatically allocates resources based on demand Manages distributed load handling efficiently + +How does the system scale? +Our system is horizontally scalable , meaning it: Scales linearly with additional replicas of the deployment Automatically allocates resources based on demand Manages distributed load handling efficiently + +", + "$.messages[7].content[3].type": "text", + "$.messages[7].content[4].text": "Title: Multiple GPUs (vertical scaling) +Link: https://docs.fireworks.ai/guides/ondemand-deployments +Content: The number of GPUs used per replica is specified by passing the --accelerator-count flag. Increasing the accelerator count will increase the generation speed, time-to-first-token, and maximum QPS for your deployment, however the scaling is sub-linear. The default value for most models is 1 but may be higher for larger models that require sharding. firectl create deployment --accelerator-count 2 + +The number of GPUs used per replica is specified by passing the --accelerator-count flag. Increasing the accelerator count will increase the generation speed, time-to-first-token, and maximum QPS for your deployment, however the scaling is sub-linear. The default value for most models is 1 but may be higher for larger models that require sharding. firectl create deployment --accelerator-count 2 + +The number of GPUs used per replica is specified by passing the --accelerator-count flag. Increasing the accelerator count will increase the generation speed, time-to-first-token, and maximum QPS for your deployment, however the scaling is sub-linear. The default value for most models is 1 but may be higher for larger models that require sharding. firectl update deployment --accelerator-count 2 + +", + "$.messages[7].content[4].type": "text", + "$.messages[7].content[5].text": "Title: scale_to_zero() +Link: https://docs.fireworks.ai/tools-sdks/python-client/sdk-reference +Content: Sends a request to scale the deployment to 0 replicas but does not wait for it to complete. Returns: The deployment object, or None if no deployment exists deployment = llm.scale_to_zero() + +Sends a request to scale the deployment to 0 replicas but does not wait for it to complete. Returns: The deployment object, or None if no deployment exists + +scale_to_1_replica() +Scales the deployment to at least 1 replica. llm.scale_to_1_replica() + +", + "$.messages[7].content[5].type": "text", + "$.messages[7].content[6].text": "Title: Do you support Auto Scaling? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/do-you-support-auto-scaling +Content: Yes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs + +Yes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs + +Do you support Auto Scaling? +Yes, our system supports auto scaling with the following features: Scaling down to zero capability for resource efficiency Controllable scale-up and scale-down velocity Custom scaling rules and thresholds to match your specific needs + +", + "$.messages[7].content[6].type": "text", + "$.messages[7].content[7].text": "Title: What are the rate limits for on-demand deployments? +Link: https://docs.fireworks.ai/faq-new/deployment-infrastructure/what-are-the-rate-limits-for-on-demand-deployments +Content: Request throughput scales with your GPU allocation. Base allocations include: Up to 8 A100 GPUs Up to 8 H100 GPUs On-demand deployments offer several advantages: Predictable pricing based on time units, not token I/O Protected latency and performance , independent of traffic on the serverless platform Choice of GPUs , including A100s and H100s Need more GPUs? Contact us to discuss higher allocations for your specific use case. + +Request throughput scales with your GPU allocation. Base allocations include: Up to 8 A100 GPUs Up to 8 H100 GPUs On-demand deployments offer several advantages: Predictable pricing based on time units, not token I/O Protected latency and performance , independent of traffic on the serverless platform Choice of GPUs , including A100s and H100s Need more GPUs? Contact us to discuss higher allocations for your specific use case. + +What are the rate limits for on-demand deployments? +Request throughput scales with your GPU allocation. Base allocations include: Up to 8 A100 GPUs Up to 8 H100 GPUs On-demand deployments offer several advantages: Predictable pricing based on time units, not token I/O Protected latency and performance , independent of traffic on the serverless platform Choice of GPUs , including A100s and H100s Need more GPUs? Contact us to discuss higher allocations for your specific use case. + +", + "$.messages[7].content[7].type": "text", + "$.messages[7].content[8].text": "Title: Create a deployment +Link: https://docs.fireworks.ai/tools-sdks/firectl/commands/create-deployment +Content: Creates a new deployment. + +Creates a new deployment. + +Creates a new deployment. firectl create deployment [flags] + +", + "$.messages[7].content[8].type": "text", + "$.messages[7].content[9].text": "Title: Monitoring +Link: https://docs.fireworks.ai/guides/prompt-caching +Content: For dedicated deployments, information about prompt caching is returned in the +response headers. The header fireworks-prompt-tokens contains the number of tokens +in the prompt, out of which fireworks-cached-prompt-tokens are cached. Aggregated metrics are also available in the usage dashboard. + +For dedicated deployments, information about prompt caching is returned in the +response headers. The header fireworks-prompt-tokens contains the number of tokens +in the prompt, out of which fireworks-cached-prompt-tokens are cached. Aggregated metrics are also available in the usage dashboard. + +Migration and traffic management +When migrating between deployments that use prompt caching, it's crucial to implement proper traffic routing to maintain optimal cache hit rates. When gradually routing traffic to a new deployment, use consistent user/session-based sampling rather than random sampling. Here's the recommended implementation for traffic routing: Avoid random sampling for traffic routing as it can negatively impact cache hit rates: # Don't do this: +if random() < fireworks_traffic_fraction: # ❌ Reduces cache effectiveness + send_to_new_deployment(user=hashed_user_id) + +", + "$.messages[7].content[9].type": "text", + "$.messages[7].role": "tool", + "$.messages[7].tool_call_id": "functions.search:0", + "$.messages[8].content": "Based on my research, I can provide you with a comprehensive guide on managing deployments on Fireworks. Let me break this down into key areas: + +## Types of Deployments + +Fireworks supports two main deployment types: + +### 1. Serverless Deployments +- **Managed by Fireworks**: Popular base models hosted on shared infrastructure +- **Billing**: Pay-per-token usage +- **No configuration needed**: Fireworks handles all infrastructure +- **Rate limits**: Subject to platform-wide limits +- **Best for**: Intermittent usage, prototyping, or low-volume applications + +### 2. Dedicated Deployments +- **Your private deployment**: Reserved exclusively for your use +- **Billing**: GPU-second basis (charged for active GPU time) +- **Full control**: Configure hardware, scaling, and model selection +- **No rate limits**: Subject only to your GPU allocation capacity +- **Best for**: High-volume applications, consistent performance requirements + +## Deployment Management Tools + +### Command Line Interface (firectl) + +**Installation and Setup:** +\`\`\`bash +# Install firectl (assuming you have it configured) +# Set your account ID to avoid "invalid id" errors +export FIREWORKS_ACCOUNT_ID=your-account-id +\`\`\` + +**Basic Deployment Commands:** + +\`\`\`bash +# Create a new deployment +firectl create deployment + +# Create deployment with specific GPU count +firectl create deployment --accelerator-count 2 + +# Create deployment for LoRA fine-tuned model +firectl create deployment "accounts/fireworks/models/" + +# Check deployment status +firectl get deployment accounts/your-account/deployment/ + +# List all deployments +firectl list deployments + +# Update deployment configuration +firectl update deployment --accelerator-count 4 + +# Delete a deployment +firectl delete deployment +\`\`\` + +### Python SDK (Build SDK) + +**Installation:** +\`\`\`bash +pip install fireworks-ai +\`\`\` + +**Basic Usage:** +\`\`\`python +from fireworks import Fireworks + +# Initialize client +client = Fireworks(api_key="your-api-key") + +# Create deployment configuration +from fireworks.client import LLM + +# For on-demand deployments (requires .apply()) +llm = LLM( + model="accounts/your-account/models/your-model", + deployment_type="on-demand" +) +llm.apply() # Required for on-demand deployments + +# For serverless deployments +llm = LLM(model="accounts/fireworks/models/llama-v3p1-8b-instruct") + +# Scale management +llm.scale_to_1_replica() # Ensure at least 1 replica +llm.scale_to_zero() # Scale down to 0 replicas +\`\`\` + +## Deployment Lifecycle Management + +### 1. Creation and Initial Setup +\`\`\`python +# Example: Creating a comprehensive deployment +deployment_config = { + "model": "accounts/your-account/models/your-model", + "deployment_type": "on-demand", + "accelerator_type": "A100", # or "H100" + "accelerator_count": 2, + "min_replicas": 0, # Scale to zero when not used + "max_replicas": 4, # Maximum scaling + "autoscaling": True +} +\`\`\` + +### 2. Monitoring and Scaling +\`\`\`python +# Check deployment status +deployment = llm.get_deployment() +print(f"Status: {deployment.status}") +print(f"Replicas: {deployment.replicas}") + +# Monitor usage and costs +# Check the usage dashboard at https://app.fireworks.ai/dashboard/usage +\`\`\` + +### 3. Traffic Management During Migration +\`\`\`python +import hashlib + +# Implement consistent traffic routing for cache optimization +def route_traffic(user_id, traffic_fraction=0.2): + """Route traffic consistently based on user ID for cache optimization""" + hashed_user_id = int(hashlib.md5(user_id.encode()).hexdigest(), 16) + max_hash = 2**128 - 1 + ratio = hashed_user_id / max_hash + + if ratio < traffic_fraction: + return "new_deployment" + else: + return "old_deployment" + +# Usage +deployment_choice = route_traffic("user-123", traffic_fraction=0.3) +\`\`\` + +## Cost Management Best Practices + +### 1. Autoscaling Configuration +\`\`\`python +# Configure autoscaling to minimize costs +deployment_config = { + "min_replicas": 0, # Scale to zero when idle + "max_replicas": 8, # Maximum GPUs + "target_cpu_utilization": 0.7, # Scale up at 70% utilization + "scale_down_delay": 300 # Wait 5 minutes before scaling down +} +\`\`\` + +### 2. Monitoring and Cleanup +\`\`\`bash +# Regular monitoring commands +firectl list deployments # See all active deployments +firectl get deployment --watch # Monitor specific deployment + +# Clean up unused deployments +firectl delete deployment +\`\`\` + +### 3. Reserved Capacity +For high-volume usage, consider reserved capacity instead of on-demand: +- More reliable capacity +- Higher GPU quotas +- Better pricing for consistent usage + +## Advanced Features + +### Custom Base Models +\`\`\`bash +# Upload custom model +firectl create model /path/to/model/files/ + +# Deploy custom model +firectl create deployment +\`\`\` + +### Multi-GPU Deployments +\`\`\`bash +# Deploy with multiple GPUs +firectl create deployment --accelerator-count 4 + +# Update GPU count +firectl update deployment --accelerator-count 8 +\`\`\` + +### LoRA Model Deployment +\`\`\`bash +# Single LoRA deployment (faster) +firectl create deployment "accounts/fireworks/models/" + +# Multi-LoRA deployment (shared base model) +firectl create deployment --enable-addons +firectl load-lora --deployment +\`\`\` + +## Key Management Commands Summary + +| Command | Purpose | +|---------|---------| +| \`firectl create deployment\` | Create new deployment | +| \`firectl get deployment\` | Check deployment status | +| \`firectl list deployments\` | List all deployments | +| \`firectl update deployment\` | Modify deployment settings | +| \`firectl delete deployment\` | Remove deployment | +| \`llm.apply()\` | Apply configuration (Python SDK) | +| \`llm.scale_to_zero()\` | Scale down to 0 replicas | +| \`llm.scale_to_1_replica()\` | Ensure at least 1 replica | + +## Best Practices + +1. **Monitor Costs**: Regularly check your usage dashboard +2. **Use Autoscaling**: Configure deployments to scale to zero when not used +3. **Plan Migrations**: Use consistent traffic routing for cache optimization +4. **Choose Wisely**: Use serverless for intermittent usage, dedicated for consistent workloads +5. **Clean Up**: Delete unused deployments to avoid unnecessary charges +6. **Test Scaling**: Verify your deployment scales appropriately under load + +Would you like me to dive deeper into any specific aspect of deployment management, such as autoscaling configuration, cost optimization strategies, or monitoring techniques?", + "$.messages[8].role": "assistant", + "$.pid": 63817, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.737811", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 30 highlighted sections (required: 15)", + "$.evaluation_result.score": 1, + "$.ground_truth": "15", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "download-look-quick-minute-study", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# Outline: The History of Yemeni Coffee + +1. **Introduction: Why Yemeni Coffee Matters** + - Thesis: Yemeni coffee is not merely a beverage but a cultural, economic, and geopolitical force that shaped global history. + - Scope: From its mythical discovery in 9th-century Yemen to its modern revival. + +2. **Pre-Coffee Yemen: The Land, the People, and the Plant** + - Geography of the Yemeni highlands and terraced agriculture. + - Indigenous *Coffea arabica* varieties and early cultivation techniques. + - *The myth of the goatherd Kaldi and the dancing goats*. + +3. **The Sufi Spark: Coffee as a Spiritual Elixir** + - 15th-century Sufi monasteries in Mocha and Taiz. + - *Qahwa as a tool for nocturnal dhikr rituals*. + - Early roasting, grinding, and brewing methods. + +4. **Mocha: The Birthplace of a Global Commodity** + - Rise of the port city of Mocha (Al-Mukha) in the 16th century. + - *The Mocha trade network linking Yemen, Egypt, and the Ottoman Empire*. + - Coffeehouses as centers of intellectual and political discourse. + +5. **The Ottoman Grip and the First Coffee Monopoly** + - 1536: Ottoman conquest of Yemen and control of coffee exports. + - *The ban on fertile beans to prevent cultivation elsewhere*. + - Smuggling and the first cracks in the monopoly. + +6. **The Dutch Heist and the Global Spread of Coffee** + - 1616: Dutch traders smuggle Yemeni beans to Batavia (Java). + - *The birth of Dutch colonial coffee plantations*. + - How Mocha’s loss became the world’s gain. + +7. **Coffeehouses of Cairo and Istanbul: Yemen’s Cultural Export** + - 1554: First Ottoman coffeehouse in Istanbul. + - *The “Schools of the Wise” in Cairo’s Khan el-Khalili*. + - Coffee as a vector for Yemeni poetry, music, and Sufi thought. + +8. **The European Coffee Craze and the Mocha Brand** + - 17th-century European coffeehouses and the “Mocha” label. + - *The false association of Mocha with chocolate flavor*. + - Yemeni coffee’s role in the Enlightenment. + +9. **Colonial Rivalries and the Decline of Yemeni Dominance** + - 18th-century: French, British, and Dutch plantations outcompete Yemen. + - *The collapse of Mocha’s monopoly and the rise of Caribbean and Latin American coffee*. + - Impact on Yemeni economy and society. + +10. **Yemeni Coffee in the Age of Empire** + - 19th-century British and Ottoman interests in Yemeni coffee. + - *The introduction of new cash crops (qat) and their effect on coffee cultivation*. + - Decline in quality and quantity due to neglect and conflict. + +11. **The Cold War and the Coffee Crisis** + - 20th-century Yemeni division (North/South) and Soviet/US influences. + - *The shift from coffee to qat in rural livelihoods*. + - Drought, pests, and civil wars further erode production. + +12. **The Specialty Coffee Renaissance: Yemen’s Return to Glory** + - 21st-century global demand for single-origin, heirloom coffees. + - *The discovery of “Yemenia” varietals and their genetic uniqueness*. + - NGOs and cooperatives reviving traditional farming. + +13. **Modern Challenges: War, Climate, and Market Access** + - 2015–present: Saudi-led conflict and blockade. + - *The siege of Hodeidah port and its impact on coffee exports*. + - Climate change and water scarcity in the highlands. + +14. **Yemeni Coffee in the Cup: Flavor, Ritual, and Identity** + - Tasting notes: dried fruit, spice, and wine-like acidity. + - *The Yemeni coffee ceremony (bunna) and its social significance*. + - Diaspora communities keeping traditions alive abroad. + +15. **Conclusion: The Future of Yemeni Coffee** + - Balancing heritage with innovation. + - *The role of Yemeni coffee in post-war reconstruction*. + - Final reflection: From the highlands of Sana’a to the cups of Brooklyn, Yemeni coffee remains a bridge between past and future.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.737854", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 20 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "delete-open-cold-poor-power", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using "*". For example: *3 years of experience*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Dear Brewed Awakening, + +I am the *early-bird steam* that curls above your grinders, +the *latte-art heart* that beats in every porcelain curve. +My apron is a canvas of *espresso freckles*, +each dot a story told in *3 years of experience* +from bean to cup, from dawn rush to dusk hush. + +I know the *symphony of the steam wand*— +its hiss, its sigh, its whispered crema secrets— +and how to coax *microfoam galaxies* +into orbit around a *double-shot comet*. +I’ve tamped, poured, swirled, and smiled +through *Saturday lines out the door*, +keeping tempers cool as the milk I *steam to 140°*. + +I speak fluent *small-town regular*: +“Medium drip, room for kindness?” +I remember that *Mrs. Alvarez likes oat milk*, +that *Mr. Chen takes two raw sugars*, +and that *your dog, Scout, prefers puppuccinos*. + +I sweep, I stock, I *balance the till to the penny*, +but mostly I *brew community*— +one *extra-hot americano* at a time. +Hire me, and the morning light +will find me already *wiping down tables*, +already *saving the last blueberry scone* +for the kid who always forgets his wallet. + +With *grounds in my veins* and *crema in my smile*, +I remain, + +Your future barista, +Maya", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.737882", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 10 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "analyze-visible-level-love-pleasure", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with "*", like: *highlighted text section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Hey friends! Grab a coffee and let’s talk about something that always blows my mind: *how much we all have in common, even when we look nothing alike*. I was scrolling through wildlife photos the other night and realized that a jellyfish and I are basically running the same basic software—just with different packaging. So here’s a laid-back tour of five “wait, we do that too?!” moments across the animal kingdom. + +*We all need our beauty sleep* +Turns out, every animal with a nervous system—from fruit flies to blue whales—shares a version of sleep. Crows nap on tree branches, octopuses snooze in coconut-shell beds, and even roundworms have “lethargus,” which is basically worm REM. Scientists think sleep is so ancient that the last common ancestor of all animals already had it. So next time you hit snooze, remember you’re honoring a 700-million-year-old tradition. + +*Love songs come in many dialects* +Whether you’re a human humming in the shower or a humpback whale belting out 30-minute ballads, *we all use rhythm and pitch to flirt and bond*. Frogs croak in choruses, nightingales remix their playlists nightly, and male mice sing ultrasonic “pickup lines” too high for us to hear. Same goal, different instruments. + +*Grandma knew best: family matters* +Elephants babysit their nieces, wolves bring back take-out for auntie, and *clownfish dads are the ultimate stay-at-home fathers*. Across species, kinship networks boost survival. Even bacteria split into “sibling” lineages that cooperate—so the next time your cousin crashes on your couch, call it evolutionary strategy. + +*Stress looks the same on everyone* +When life gets spicy, our bodies flip the same ancient switch. *Cortisol floods fish, birds, and humans alike*, making hearts race and muscles twitch. Zebra finches lose their appetite during tough times, and stressed-out bees become pessimistic (yes, they can test bee mood). So if you’re doom-scrolling at 2 a.m., know that a stressed squirrel in your backyard totally gets you. + +*Play isn’t optional—it’s homework for life* +Otters juggle rocks, dogs zoom in circles, and young ravens snowboard down snowy roofs using jar lids. *Play builds brains and social skills across the board*. Researchers have spotted reptiles “wrestling” for fun and octopuses repeatedly bouncing Lego blocks like toddlers. If it has neurons, it probably needs recess. + +Next time you feel alone in the universe, remember that somewhere a parrot is probably preening its best friend, a bat is sharing food with a hungry roost-mate, and a tiny nematode is curling up for a nap. We’re all just variations on a very old, very shared theme.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.737906", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 7 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "run-put-natural-traditional-thought", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format: +*section name*", + "$.messages[0].role": "user", + "$.messages[1].content": "*Executive Summary* + +Bremen is a city where maritime heritage meets cutting-edge creativity. Our venture, “NordHood Bremen,” will launch a premium, design-driven sweatshirt brand that fuses Hanseatic culture with contemporary streetwear. In year one we will release three limited-edition drops (total 6,000 units), achieve €420,000 in revenue, and reach break-even by month 14. The brand will be 100 % online-direct at launch, then expand into two Bremen concept stores and select European boutiques in years two and three. We will operate carbon-neutral, use 100 % organic cotton, and reinvest 3 % of profits into local youth art programs—turning every sweatshirt into a wearable piece of Bremen pride. + +*Market Analysis* + +Bremen’s 0.57 million residents and 500,000 annual tourists create a €35 million addressable mid-price sweatshirt market. Key insights: +• 68 % of 18-35-year-olds prefer streetwear that tells a local story (Hochschule Bremen Trend Report 2023). +• Tourists spend €180 million on souvenirs, yet only 11 % are apparel items—an untapped niche. +• Competitor scan: global brands (Nike, Adidas) dominate functionality; regional labels (Hamburg’s “Dinghy,” Hannover’s “Liguster”) lack Bremen specificity. +• Online D2C share in German apparel has grown from 18 % (2019) to 31 % (2023), proving the viability of our launch model. +We will target two segments: “Culture Creatives” (students, designers, musicians) and “Heritage Seekers” (tourists, expats). Both segments value authenticity, limited editions, and sustainability. + +*Product & Brand Strategy* + +Signature Product: The “Hanse Hood” – 460 g brushed organic-cotton fleece, oversized fit, dropped shoulders, double-stitched flat seams, and a hidden inside pocket sized for a Bremen transit card. +Design Language: Minimalist silhouettes in maritime colorways (Bremen Blue, Weser Sand, Harbor Grey) with subtle iconography—windrose embroidery, cog-ship sleeve print, and a QR code linking to an AR story of the garment’s journey. +Sustainability: GOTS-certified cotton, recycled polyester labels, biodegradable mailers, and a take-back program offering 15 % discount on next purchase. +Drop Model: Three seasonal drops per year (Spring “Hanse Sail,” Fall “Schlachte,” Winter “Weihnachtsmarkt”), each capped at 2,000 units to maintain scarcity. +Brand Voice: “Wear the City.” Every hangtag includes a Bremen insider tip curated by local artists, turning the sweatshirt into a cultural guide. + +*Operations & Supply Chain* + +Design Studio: 120 m² loft in Überseestadt (rent €1,800/month) doubling as photo studio and event space. +Manufacturing: Partnership with a Fair-Wear-audited factory in Porto, Portugal (MOQ 500, 30-day lead time, €18 landed cost per unit). +Logistics: Bremen-based 3PL “Logwin” for pick-pack-ship (€1.20 per parcel, same-day dispatch). +Tech Stack: Shopify Plus storefront, Klarna & PayPal checkout, Shipcloud integration, and a custom AR filter for Instagram try-ons. +Quality Control: In-house inspection on arrival; defective rate target <1 %. +Risk Mitigation: Dual-source yarn from Turkey and Portugal; maintain 8-week safety stock. + +*Marketing & Sales Plan* + +Pre-Launch (Months 1-2): +• Teaser campaign on TikTok & Instagram Reels (#NordHoodBremen) with local influencers (5-50 k followers). +• Pop-up try-on booth at Viertel Kreativmarkt; collect 1,000 email sign-ups. +Launch (Month 3): +• “Midnight Drop” online at 00:07 (Bremen dialing code) with countdown timer. +• Limited 100 “Founders’ Edition” sweatshirts laser-numbered and signed by Bremen street artist “PlotBot.” +Growth (Months 4-12): +• Collaborations: capsule with SV Werder Bremen fan design; co-branded hoodie with Überseemuseum. +• Referral program: give €10, get €10. +• Tourist channel: partnership with Bremer Touristik-Zentrum for QR-coded city map bundled with sweatshirt. +KPIs: CAC ≤ €18, repeat purchase rate ≥ 35 %, Instagram engagement ≥ 8 %. + +*Financial Plan* + +Start-up Capital Needed: €85,000 +• Inventory (3,000 units): €54,000 +• Marketing & content: €12,000 +• E-commerce & IT: €6,000 +• Legal, trademark, insurance: €5,000 +• Working capital buffer: €8,000 +Funding: 60 % founder equity, 40 % Bremen Förderbank micro-loan at 1.9 % interest. +Year-1 P&L (conservative): +Revenue €420,000 (avg. €70 net per unit) +COGS €234,000 +Gross Profit €186,000 +Operating Expenses €150,000 +EBIT €36,000 +Break-even: 1,800 units or month 14. +Three-year projection: revenue €1.4 million, EBITDA margin 18 %, reinvestment into two Bremen stores and EU wholesale. + +*Appendices* + +A. Mood-board & color palette +B. Factory audit certificate +C. Sample influencer contracts +D. Detailed 36-month cash-flow model", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.737929", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 29 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "sit-disassemble-ready-area-problem", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "**The Official “Two-Mom Marriage Quality Report Card”** +(For internal use only. If this ends up on Facebook, we *will* change the Wi-Fi password.) + +--- + +**1. Communication Score: ___/10** +*“Did we just have a 45-minute conversation about whose turn it is to buy more goldfish crackers, or was that a TED Talk?”* +- 10 = We finish each other’s sentences… and each other’s coffee. +- 5 = We communicate exclusively via passive-aggressive Post-it notes on the fridge. +- 0 = We’ve resorted to interpretive dance and/or carrier pigeons. + +**Bonus points** if you’ve ever used the kids as tiny, sticky messengers (“Tell Mommy she’s *cordially invited* to take out the trash”). + +--- + +**2. Chore Wars Score: ___/10** +*“The laundry basket is a Jenga tower of lies.”* +- 10 = We’ve achieved a chore utopia where socks actually match. +- 5 = We’ve agreed to ‘divide’ labor… by one of us doing everything and the other ‘supervising’. +- 0 = The dishwasher has become a permanent storage unit. + +**Penalty** if either mom has ever ‘forgotten’ how to load the dishwasher *again*. + +--- + +**3. Co-Parenting Harmony Score: ___/10** +*“We tag-team tantrums like WWE pros.”* +- 10 = We’ve synchronized our ‘Mom Voice’ to subsonic frequencies that terrify small children. +- 5 = We’ve debated whether ‘screen time’ is a human right or a war crime. +- 0 = We’ve weaponized bedtime stories (‘And then the princess *never* got dessert again…’). + +**Extra credit** for every time you’ve high-fived over a successful *“Because I said so”* deployment. + +--- + +**4. Romance Maintenance Score: ___/10** +*“Date night: when we argue over which drive-thru has the shortest line.”* +- 10 = We’ve scheduled intimacy between soccer practice and PTA meetings. +- 5 = We’ve agreed that ‘Netflix and actually chill’ counts as foreplay. +- 0 = Our last romantic gesture was sharing the last chicken nugget. + +**Automatic fail** if either of you has ever said, “I’m too tired”… *while scrolling Instagram*. + +--- + +**5. Emergency Snack Reserves Score: ___/10** +*“The pantry is a battlefield of empty granola bar boxes.”* +- 10 = We’ve hidden chocolate so well we forgot where it is. +- 5 = We’ve labeled snacks with fake allergen warnings to deter theft. +- 0 = The kids have unionized and are demanding better rations. + +**Gold star** if you’ve ever *texted each other* to brag about finding the secret stash. + +--- + +**Final Grade: ___/50** +- 40–50: *Power couple.* You’re basically the Beyoncé and Beyoncé of lesbian moms. +- 25–39: *Functioning chaos.* You’ve accepted that love is just shared exhaustion. +- 0–24: *Renew your vows… or your Costco membership.* Either way, you’re in this together. + +**Disclaimer:** This report is not legally binding, but hiding the good snacks *is* grounds for divorce.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.737956", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 1 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "affect-unknown-long-life-month", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*.", + "$.messages[0].role": "user", + "$.messages[1].content": "*little me in the countryside* + +[Verse 1] +Barefoot on the gravel road, dust between my toes, +Sunlight dripped like honey on the clover and the crows. +Grandma’s porch swing creaked a lullaby so slow, +While the creek below kept singing songs only we could know. +Fireflies stitched the twilight with lanterns made of stars, +And the moon was just a silver coin tossed into cookie jars. + +[Chorus] +Oh, little me in the countryside, running wild and free, +Chasing dreams through cornfield mazes, climbing every tree. +Every laugh was louder than the thunder rolling by, +In the summers of my childhood, underneath that endless sky. + +[Verse 2] +Mason jars of sweet tea sweating on the window ledge, +Radio played “Sweet Home Alabama” from the rusty garden shed. +We built rafts from old barn doors and floated down the stream, +Kingfishers were pirates and the cattails were our team. +Supper was tomatoes sliced like sunsets on a plate, +And the crickets kept the tempo while the night got late. + +[Chorus] +Oh, little me in the countryside, running wild and free, +Skinny-dipping in the moonlight where the river bends to see. +Every scar was a medal from a dare we’d never hide, +In the summers of my childhood, where the earth and I collided. + +[Bridge] +Dandelion clocks we blew to tell the time, +Each puff a tiny parachute for wishes we would climb. +We swore we’d never grow up, just grow into the land, +But the school bus came like autumn and took us by the hand. + +[Verse 3] +Now the fields are sold and paved, the barn’s a memory store, +But I still hear the wheat heads whisper, “Come back, come back once more.” +I close my eyes and I’m ten again, chasing dragonflies, +With the smell of rain on dirt roads and the taste of cherry pies. + +[Final Chorus] +Oh, little me in the countryside, you’re still inside my chest, +A pocket full of wonder that the city can’t arrest. +Though the years roll on like tractors down a dusty county ride, +I keep those summers in a mason jar where the fireflies never die. + +[Outro] +So here’s to cicadas, to tire swings and creek-stone slides, +To the echo of my laughter in the hollow where time hides. +If you find that gravel road, kick off your shoes and glide— +You’ll meet little me in the countryside, still barefoot, still wide-eyed.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738015", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 3 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "believe-verify-likely-desire-power", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a riddle for the word "façade" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "I wear a *painted smile* for every passer-by, +a *perfectly symmetrical* face that never cracks. +Behind me, the *crumbling bricks* sigh in secret. +What am I?", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738042", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 19 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "change-sort-serious-song-family", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# Workshop Template +**Title:** *Building Inclusive Excellence: The Business & Human Case for Workplace Diversity* +**Duration:** 3.5 hours (can be split into two 90-minute sessions) +**Audience:** Managers, HR professionals, team leads, ERG members, and any employee who influences culture + +--- + +## 1. Welcome & Objectives (15 min) +- Ice-breaker: “Two Truths & a Lie—Diversity Edition” +- *Learning objectives*: By the end of this workshop, participants will be able to: + 1. Define diversity, equity, inclusion, and belonging (DEIB) in their own words. + 2. Articulate at least three evidence-based benefits of a diverse workforce. + 3. Identify one action they can take within 30 days to foster inclusion on their team. + +--- + +## 2. *The Data-Driven Case for Diversity* (25 min) +- Interactive poll: “What % performance lift do you think diverse teams achieve?” +- Mini-lecture with live charts: + - McKinsey 2023 report: Top-quartile companies for ethnic/gender diversity are **39 % more likely to outperform** on profitability. + - Harvard study: *Inclusive teams make better business decisions 87 % of the time* and do so twice as fast. +- Small-group exercise: Match the statistic to the business outcome (revenue, innovation, retention). + +--- + +## 3. *Unconscious Bias & Micro-behaviors* (30 min) +- 5-minute video: “The Resume with Two Names” (identical CVs, different callbacks). +- Bias Busters card game: Participants draw scenario cards and practice interrupting bias in real time. +- *Key takeaway*: Bias is not a moral failing; it is a neurological shortcut that can be rewired through deliberate habits. + +--- + +## 4. *Inclusive Leadership Toolkit* (40 min) +- **The 4 A’s Framework**: + 1. **Awareness** – Daily 3-minute bias check-in. + 2. **Amplification** – Repeat & credit ideas from under-represented voices. + 3. **Access** – Rotate high-visibility assignments. + 4. **Accountability** – Track inclusion metrics on team dashboards. +- Role-play: “The Missed Promotion” – diagnose where each ‘A’ broke down and redesign the conversation. + +--- + +## 5. Action Planning & Commitments (20 min) +- Personal *Inclusion Commitment Card*: “In the next 30 days I will…” +- Pair-share for accountability. +- QR code to a digital resource hub (TED talks, reading list, policy templates). + +--- + +## 6. Closing & Evaluation (10 min) +- One-word checkout: “How do you feel now?” +- *Post-workshop survey* link (2 min) with prize draw for completion. + +--- + +## Materials Checklist +- Slide deck (with alt-text on all images) +- Printed Commitment Cards & pens +- Bias Busters card decks (laminated) +- Timer & bell +- Snacks labeled for common dietary restrictions (gluten-free, vegan, halal) + +--- + +## Follow-Up Email Template (send within 24 hrs) +Subject: Your Inclusion Toolkit + Next Steps +Body: +Hi team, +Thank you for leaning into today’s conversation on *why diversity wins*. Attached are: +- Slide deck & resource links +- Your signed Inclusion Commitment Card (photo reminder) +- Calendar invite for the optional 30-day check-in circle + +Remember: Inclusion is a practice, not a one-time training. Let’s keep the momentum!", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738068", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 21 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "must-write-need-theoretical-pleasure", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format.", + "$.messages[0].role": "user", + "$.messages[1].content": "**[Your Name]** +[Your Address] +[City, State ZIP] +[Email Address] | [Phone Number] | [LinkedIn URL] +[Date] + +**[Local Party Chairperson’s Name]** +[Local Party Name] +[Party Headquarters Address] +[City, State ZIP] + +Dear **Chairperson [Last Name]**, + +I am writing to express my enthusiastic interest in serving as **Rally Organizer** for **[Local Party Name]**. As a lifelong resident of **[Your City/County]** and a committed progressive, I have spent the past six years turning grassroots energy into measurable voter engagement. I believe my proven record of mobilizing thousands of supporters, securing high-profile speakers, and running flawless logistics makes me the ideal candidate to lead our 2024 rally program. + +### **Proven Rally & Event Expertise** +- **2018 & 2020 Midterm & Presidential Rallies**: Planned and executed 14 rallies across three counties, averaging 2,500 attendees each. +- **Permits & Safety**: Obtained all municipal permits within 72 hours, coordinated with local fire and police departments, and maintained a 100 % safety record. +- **Stage & AV Management**: Negotiated vendor contracts that cut AV costs by 27 % while increasing sound coverage by 40 %. + +### **Community & Coalition Building** +- **Diverse Coalitions**: Forged partnerships with 18 labor unions, 9 student groups, and 7 faith organizations, expanding our volunteer base by 1,200 active members. +- **Multilingual Outreach**: Produced bilingual signage and volunteer scripts, increasing turnout in Latinx precincts by 31 %. +- **Accessibility**: Installed ADA-compliant ramps, ASL interpreters, and quiet zones at every event, earning commendation from the local Disability Rights chapter. + +### **Data-Driven Strategy & Rapid Response** +- **Real-Time Metrics**: Deployed a mobile check-in app that captured 94 % of attendee data, feeding our CRM for targeted follow-up. +- **Weather Contingencies**: Created a “Plan B” playbook that moved a 3,000-person outdoor rally indoors in under 90 minutes with zero no-shows. +- **Crisis Communications**: Served as on-site media liaison, turning a last-minute speaker cancellation into a prime-time local news segment featuring our grassroots volunteers. + +I would welcome the opportunity to discuss how my experience, energy, and strategic vision can help **[Local Party Name]** stage rallies that not only energize our base but also convert enthusiasm into votes. I am available at your convenience for an interview and can provide a detailed rally calendar and budget forecast within 48 hours of request. + +Thank you for your time and consideration. I look forward to amplifying our shared values across **[Your City/County]**—one rally at a time. + +In solidarity, + +**[Your Name]**", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738096", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 5 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "control-accept-record-legal-car", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "The opposite of youth is not age, but **disillusionment**—the moment when *wonder* is traded for *weariness*, when the *boundless horizon* of possibility collapses into the *narrow corridor* of routine.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738119", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 5 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "miss-continue-fall-local-work", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer.", + "$.messages[0].role": "user", + "$.messages[1].content": "**The Golden Palace** is not a palace, but it *is* golden—at least in spirit—nestled like a curry-scented jewel in the **city centre**, where neon pigeons peck at samosa crumbs and the air hums with sitar-shaped traffic lights. + +Inside, **Indian** food performs acrobatics on your tongue: cardamom cartwheels, turmeric tumbles, and a rogue chili that once tried to elope with a mango lassi. The waiters glide like buttered shadows, whispering orders to the kitchen where naan breads inflate like edible balloons plotting a coup against gravity. + +At **The Golden Palace**, every table is a throne, every plate a miniature map of the subcontinent, and every bite a teleportation device disguised as dinner.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738142", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 14 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "require-affect-major-logical-business", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "🎤 *Yo, it’s the tale of Sir Scam-a-lot, straight outta suburbia!* 🎤 + +Yo, my name is Dave, I sell insurance in Ohio, +Got a cubicle, a cat, and a 401(k) that’s kinda fly-o. +One Tuesday, phone rings—unknown number, what the heck? +Voice says, “*Prince Ogbonna the Third* just wrote you a fat check!” + +I’m like, “Hold up, who dis? I’m whiter than printer paper, +My bloodline’s mayo on Wonder Bread—ain’t no royal vapor!” +Dude on the line, accent silkier than lotion, +Says, “Cousin, you’re 37th in line—time to claim your ocean!” + +**[Hook]** +🎶 *Long lost king of Nigeria, crown sittin’ on my dome,* +*Got a palace full of leopards and a Wi-Fi that roams!* +*They need my SSN and a quick wire fee,* +*But I’m ‘bout to be big pimpin’ in Abuja, just wait and see!* 🎶 + +He says, “Just send five grand for customs, then the jet lands, +You’ll be dripping in so much gold, even your sweat glands bling.” +I’m sweatin’ like a rotisserie, visions of caviar dreams, +I Venmo “PrinceNaija69” like I’m funding meme schemes. + +Next day, email: “Oops, palace goat ate the transfer slip, +Need ten more for goat surgery and a diamond-encrusted whip.” +I hock my lawnmower, my Xbox, and Aunt Linda’s porcelain goose, +Wire cash so fast my bank’s like, “Dave, you loose!” + +**[Bridge – Spoken Word]** +Then the *official royal seal* arrives— +It’s a GIF of Shrek in a crown doing the Macarena. +I’m starting to sense a pattern here… + +**[Final Hook – Regret Remix]** +🎶 *Now I’m the king of my mom’s basement, throne’s a beanbag chair,* +*Royal decree: never trust a prince who types in all caps—* +*“URGENT, COUSIN, SEND MORE FLAMINGOS!”* +*Lesson learned: if the crown’s too shiny, it’s probably aluminum foil, homie!*", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738169", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 6 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "could-serious-newspaper-radio-group", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Expand the riddle into a story with a funny tone: + +What can you catch but not throw? +A cold + +Use * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Gary the Germ was the *smallest, greenest, and most obnoxiously proud* microbe in the entire Sniffle Kingdom. Every morning he practiced his grand entrance: sliding down banisters made of nasal hair, cannon-balling into unsuspecting tonsils, and yelling, “ACHOO—did somebody order a cold?” + +One Tuesday, Gary spotted his dream target: *Bartholomew “Bart” Bigglesworth, a man who believed hand-washing was a government conspiracy*. Bart had just exited the subway, triumphantly licking a subway pole “for the vitamins.” Gary rubbed his microscopic hands together. “Perfect. I’ll hitch a ride on that finger, ride the express elevator to the sinuses, and throw the *snottiest* house-warming party in history!” + +But there was a catch—literally. Gary could *catch* Bart, but he couldn’t *throw* him anywhere. Every time Gary tried to catapult Bart into the next office cubicle, Bart just sneezed, wiped his nose on his sleeve, and kept typing spreadsheets. Gary screamed, “This is mutiny! I’m supposed to be the one doing the tossing!” + +Meanwhile, Bart’s immune system—led by Captain Phlegm and the Mucus Marines—launched *Operation Tissue Storm*. They surrounded Gary, armed with vitamin C grenades and zinc zappers. Gary attempted a dramatic escape by surfing a sneeze, but the tissue slapped him like a flyswatter on a summer day. + +Defeated, Gary sat in the crumpled tissue, sulking. “I caught the cold,” he muttered, “but I can’t throw it anywhere. Worst. Superpower. Ever.” + +From that day forward, Gary changed his business card to read: “Professional Cold Catcher—No Returns, No Refunds, Absolutely No Throwing.”", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738192", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 17 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "offer-select-free-state-moment", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format.", + "$.messages[0].role": "user", + "$.messages[1].content": "**[Intro – harpsichord glitch beat, stylus scratching parchment]** +Yo, it’s 14-something, but my mind’s 3-D printed in gold leaf— +I’m the **Renaissance Rapper**, got a quill in one hand and a scalpel in the other, dissectin’ beats like Vesalius dissectin’ spleens. +Let’s get *weird* with the rebirth, baby. + +--- + +### **Section 1 – The Fresco Flex** +I drip ultramarine like I’m sponsored by Afghanistan mines, +Michelangelo on the ceiling, I’m reclining on divine lines. +Angels doing backflips off the Sistine, they’re hype for my set, +While I tag “*AMOR VINCIT OMNIA*” in invisible ink—wet. +My flow’s a trompe-l’œil, looks flat till you tilt your head— +Then it’s depth for days, like Brunelleschi’s dome overhead. +I’m not baroque, I’m pre-baroque, I’m the prototype shock, +Got patrons in fur-lined sleeves stuffing florins in my sock. +**I paint in egg tempera so the colors never fade,** +**But the yolk smells funky—call it *odeur de old crusade*.** + +--- + +### **Section 2 – The Printing-Press Poltergeist** +Gutenberg ghost in the machine, clanking lead type like chains, +Every page a polter-page, every serif a soul that remains. +I drop a folio, it’s viral before “viral” was a cough, +Monks in scriptoriums rage-quitting, “Bro, that illuminated stuff’s off!” +I’m the incunabula influencer, my colophon’s a flex, +Got red-ink rubricators sliding into my DMs—*next*. +**I rhyme in movable type so my bars can rearrange,** +**Yesterday I was Petrarch, today I’m deranged.** +The press spits paper like a hydra with hiccups, +Each sheet a protest flyer: “Down with scholastic pick-ups!” + +--- + +### **Section 3 – The Anatomical Anthem (Parental Advisory: Corpses)** +Night shift with da Vinci, we’re sketching by candle and moon, +Cadavers on the table like, “Do me a favor—autotune my coccyx soon.” +I rap in reverse Latin so only the dead understand, +While Leo reverse-engineers a bird-man from a frying pan. +**My beats are ventricles pumping four-four time,** +**Aorta bass drop—call that circulatory grime.** +I sample the sound of a scalpel scraping periosteum, +Layer it over a lute loop—*that’s* my magnum opium. +Galen’s in the corner, throwing shade like a Greek tragedy, +But I’m Harvey before Harvey, already rapping blood circulation majestically. + +--- + +**[Outro – fading lute arpeggio, distant cannon fire from Constantinople]** +So remember me when you’re doom-scrolling on glass, +I was the era that turned corpses into class. +I’m the **Renaissance Rapper**, my ghost still tags in chalk, +Leaving QR codes on cathedral walls—scan it, you’ll squawk. +Rebirth never dies, it just pixelates and loops, +Now if you’ll excuse me, I’ve got a date with Lucretius in the soup.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738219", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 25 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "calculate-affect-fullest-emotional-money", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*", + "$.messages[0].role": "user", + "$.messages[1].content": "*Jōmon* hunter-gatherers (c. 14,000 BCE) gave way to *Yayoi* rice cultivators who forged bronze and iron tools. +The *Yamato* court unified clans in the 3rd–4th centuries, adopting *Shintō* rituals and Chinese script. +Prince *Shōtoku*’s 7th-century constitution and the Taika Reforms centralized power under an emperor, while *Nara* and *Heian* aristocrats refined literature and *Buddhism*. +Samurai clans rose during the *Kamakura* shogunate (1185), repelling *Mongol* fleets and entrenching warrior rule. +Centuries of *Ashikaga* and *Sengoku* civil war ended when *Oda Nobunaga*, *Toyotomi Hideyoshi*, and *Tokugawa Ieyasu* reunified Japan; the *Edo* shogunate (1603–1868) enforced isolation yet fostered *ukiyo-e* culture. +Commodore *Perry*’s 1853 arrival triggered the *Meiji* Restoration, rapid modernization, and imperial expansion. +Victory in the *Russo-Japanese* War (1905) emboldened militarism, leading to *Manchuria* occupation and *Pearl Harbor*. +After atomic bombings and *Hirohito*’s surrender in 1945, U.S. occupation reformed politics and the economy, birthing the *post-war* “economic miracle.” +Today, Japan balances *constitutional monarchy* with global technological leadership.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738252", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 5 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "download-american-private-result-research", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*.", + "$.messages[0].role": "user", + "$.messages[1].content": "*John* in *De Smet* +Quaker hat *blows off*—chases +*Silent* *scream*—“Come back!”", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738286", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 19 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "indicate-strong-single-festival-mother", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with "*", like *highlighted text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. In a ceremony marked by *unprecedented artistic excellence* and *meticulous craftsmanship*, the Academy recognizes those whose work has defined the sonic landscape of 2011. + +The evening’s proceedings underscore the Academy’s enduring commitment to *artistic integrity* and *technical innovation*. Among the most celebrated achievements are: + +• *Album of the Year* – awarded to an opus that has redefined genre boundaries while achieving both critical acclaim and popular resonance. +• *Record of the Year* – bestowed upon a single track whose *sonic clarity*, *emotive depth*, and *production mastery* have set a new benchmark for the industry. +• *Best New Artist* – conferred upon an emerging voice whose *visionary artistry* signals the future direction of contemporary music. + +Throughout the telecast, special tributes illuminate the *indelible legacies* of icons whose influence transcends generations, while collaborative performances showcase the *synergistic power* of cross-genre partnerships. The Academy further acknowledges the *behind-the-scenes luminaries*—engineers, mixers, and mastering specialists—whose *invisible expertise* transforms raw inspiration into *audible transcendence*. + +In closing, the 54th Annual Grammy Awards reaffirm the Recording Academy’s solemn charge: to *safeguard the heritage*, *nurture the present*, and *inspire the future* of recorded music.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738315", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 39 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "design-assume-past-camera-word", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# 10 Surprising Facts About the Dutch Language That Will Make You Say “Wauw!” + +If you think Dutch is just “German with a sore throat,” think again. From tongue-twisting consonant clusters to words that sound like they were invented by a stand-up comedian, Dutch is a linguistic playground. Below are ten facts that prove the language is far more colorful than its reputation for windmills and wooden shoes. + +--- + +### 1. *The longest word in Dutch is 53 letters long* +Meet **“kindercarnavalsoptochtvoorbereidingswerkzaamhedenplan”**—a single noun describing the preparation work for a children’s carnival parade. Dutch loves to glue words together, so once you master the basics you can create your own kilometer-long compounds on the fly. + +--- + +### 2. Dutch has three genders, but only two articles +Grammatically, nouns are masculine, feminine, or neuter. In everyday speech, however, masculine and feminine collapsed into a single “common” gender, leaving *“de”* for common and *“het”* for neuter. The result? A never-ending guessing game for learners. + +--- + +### 3. *The Dutch alphabet ends with “ij,” not “z”* +Schoolchildren recite the ABC and finish with “…x, y, ij, z.” The digraph *ij* is so common (think *hij* = he, *mijn* = my) that it’s treated almost like a separate letter. On old typewriters, *ij* even had its own key. + +--- + +### 4. Dutch is the closest major language to English +Frisian is technically closer, but among languages with tens of millions of speakers, Dutch wins. Compare: +- English *“bread”* → Dutch *“brood”* +- English *“green”* → Dutch *“groen”* +Shared roots mean you can read simple Dutch signs long before you’ve cracked a single grammar book. + +--- + +### 5. *Dutch gave New York its first words* +Before it was New York, it was New Amsterdam. That’s why we say *“cookie”* (from Dutch *koekje*) instead of *“biscuit,”* and *“boss”* (from *baas*) instead of *“master.”* Wall Street? It once ran alongside the *“wal”* (rampart) of the city wall. + +--- + +### 6. The word *“gezellig”* has no exact English translation* +Roughly it means “cozy,” “convivial,” or “the warm feeling of being with friends,” but none of those capture the full vibe. A café can be *gezellig*, a conversation can be *gezellig*, even a spreadsheet can be *gezellig* if the team is laughing. + +--- + +### 7. Dutch spelling changed overnight in 1996 +The *“Groene Boekje”* (Green Booklet) reforms simplified thousands of words. *“potatos”* became *“aardappelen”* and *“cat’s”* became *“katten.”* Overnight, every dictionary, street sign, and Scrabble board in the country was technically out of date. + +--- + +### 8. *Dutch is spoken on six continents* +Besides the Netherlands and Belgium, it’s an official language in Suriname, Aruba, Curaçao, and Sint Maarten. Thanks to emigration, you’ll also find active Dutch-speaking communities in Canada, Australia, Argentina—even South Africa, where Afrikaans is a daughter language. + +--- + +### 9. The Dutch love diminutives +Add *-je* to almost any noun and you instantly shrink it: *“huis”* (house) → *“huisje”* (little house). It’s not just about size; diminutives add affection. A *“biertje”* isn’t a small beer—it’s an invitation to relax. + +--- + +### 10. *Dutch is the language of the world’s oldest multinational* +The Vereenigde Oostindische Compagnie (VOC) was founded in 1602 and issued the planet’s first publicly traded stock. Its ledgers, contracts, and ship logs were all written in Dutch, making the language the original tongue of global capitalism. + +--- + +Ready to dive in? Start with *hallo* and *dank je wel*, and you’ll already be halfway to *gezellig*.", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T00:23:12.738340", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "analyze-political-poorly-air-hardware", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6.dev22+gfb34cf0.dirty", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 5 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "express-prove-private-internet-goal", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Gideon has several good options for his surplus: + +1. **Sell it at the local farmers’ market or to a wholesaler** to turn the extra produce into cash that can cover next season’s seed, fertilizer, and equipment costs. +2. **Store some of the surplus in a community grain silo** so he can sell later when prices are higher. +3. **Donate part of the surplus to the regional food bank**, helping families in need and possibly qualifying for a small tax deduction. +4. **Use the surplus as livestock feed** for his own animals, cutting down on purchased feed expenses. + +*The most profitable short-term choice is usually selling the surplus immediately after harvest, when demand is strong and prices are favorable.*", + "$.messages[1].role": "assistant", + "$.pid": 68433, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.042894", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 30 highlighted sections (required: 15)", + "$.evaluation_result.score": 1, + "$.ground_truth": "15", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "should-speak-cold-film-festival", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write an outline for a paper on the history of Yemeni coffee. The outline should include the main points of the paper, and at least 15 sections should be highlighted with markdown such as *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# Outline: The History of Yemeni Coffee + +1. **Introduction: Why Yemeni Coffee Matters** + - Thesis: Yemeni coffee is not merely a beverage but a cultural, economic, and geopolitical force that shaped global history. + - Scope: From its mythical discovery in 9th-century Yemen to its modern revival. + +2. **Pre-Coffee Yemen: The Land, the People, and the Plant** + - Geography of the Yemeni highlands and terraced agriculture. + - Indigenous *Coffea arabica* varieties and early cultivation techniques. + - *The myth of the goatherd Kaldi and the dancing goats*. + +3. **The Sufi Spark: Coffee as a Spiritual Elixir** + - 15th-century Sufi monasteries in Mocha and Taiz. + - *Qahwa as a tool for nocturnal dhikr rituals*. + - Early roasting, grinding, and brewing methods. + +4. **Mocha: The Birthplace of a Global Commodity** + - Rise of the port city of Mocha (Al-Mukha) in the 16th century. + - *The Mocha trade network linking Yemen, Egypt, and the Ottoman Empire*. + - Coffeehouses as centers of intellectual and political discourse. + +5. **The Ottoman Grip and the First Coffee Monopoly** + - 1536: Ottoman conquest of Yemen and control of coffee exports. + - *The ban on fertile beans to prevent cultivation elsewhere*. + - Smuggling and the first cracks in the monopoly. + +6. **The Dutch Heist and the Global Spread of Coffee** + - 1616: Dutch traders smuggle Yemeni beans to Batavia (Java). + - *The birth of Dutch colonial coffee plantations*. + - How Mocha’s loss became the world’s gain. + +7. **Coffeehouses of Cairo and Istanbul: Yemen’s Cultural Export** + - 1554: First Ottoman coffeehouse in Istanbul. + - *The “Schools of the Wise” in Cairo’s Khan el-Khalili*. + - Coffee as a vector for Yemeni poetry, music, and Sufi thought. + +8. **The European Coffee Craze and the Mocha Brand** + - 17th-century European coffeehouses and the “Mocha” label. + - *The false association of Mocha with chocolate flavor*. + - Yemeni coffee’s role in the Enlightenment. + +9. **Colonial Rivalries and the Decline of Yemeni Dominance** + - 18th-century: French, British, and Dutch plantations outcompete Yemen. + - *The collapse of Mocha’s monopoly and the rise of Caribbean and Latin American coffee*. + - Impact on Yemeni economy and society. + +10. **Yemeni Coffee in the Age of Empire** + - 19th-century British and Ottoman interests in Yemeni coffee. + - *The introduction of new cash crops (qat) and their effect on coffee cultivation*. + - Decline in quality and quantity due to neglect and conflict. + +11. **The Cold War and the Coffee Crisis** + - 20th-century Yemeni division (North/South) and Soviet/US influences. + - *The shift from coffee to qat in rural livelihoods*. + - Drought, pests, and civil wars further erode production. + +12. **The Specialty Coffee Renaissance: Yemen’s Return to Glory** + - 21st-century global demand for single-origin, heirloom coffees. + - *The discovery of “Yemenia” varietals and their genetic uniqueness*. + - NGOs and cooperatives reviving traditional farming. + +13. **Modern Challenges: War, Climate, and Market Access** + - 2015–present: Saudi-led conflict and blockade. + - *The siege of Hodeidah port and its impact on coffee exports*. + - Climate change and water scarcity in the highlands. + +14. **Yemeni Coffee in the Cup: Flavor, Ritual, and Identity** + - Tasting notes: dried fruit, spice, and wine-like acidity. + - *The Yemeni coffee ceremony (bunna) and its social significance*. + - Diaspora communities keeping traditions alive abroad. + +15. **Conclusion: The Future of Yemeni Coffee** + - Balancing heritage with innovation. + - *The role of Yemeni coffee in post-war reconstruction*. + - Final reflection: From the highlands of Sana’a to the cups of Brooklyn, Yemeni coffee remains a bridge between past and future.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.042934", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 16 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "destroy-rational-cultural-available-meeting", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a cover letter for a job at a local coffee shop in the form of a poem. Highlight at least 5 text sections using "*". For example: *3 years of experience*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Dear Brewmaster of *Morning Rituals*, +I come not with parchment résumé, +but with *3 years of experience* tucked in my apron pocket, +steam-warm and espresso-dark. + +I have *memorized 47 regulars’ names*— +each syllable frothed like oat-milk foam— +and I know that *Carla takes two raw sugars* +while *Mr. Lee prefers his cappuccino bone-dry*. + +I can *dial in a grinder* at 5:03 a.m. +to the *exact 18.5 grams* that coax +caramel notes from Ethiopian beans, +and still greet the line with *a sunrise smile*. + +My wrists speak in *latte-art cursive*: +hearts, rosettas, and once (on a dare) +a tiny saxophone for the jazz trio +who tips in *harmonies and quarters*. + +I *close the shop solo* without a single bean left homeless, +count the till to the *penny and a poem*, +and leave the milk pitchers *mirror-bright*, +so the night shift sees its own reflection of care. + +Hire me, and the *scent of fresh-ground hope* +will greet every customer at the door; +I’ll be the *steady hand* behind the bar, +pouring *courage in ceramic*, one cup at a time. + +Yours in steam and service, +Riley", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.042963", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 10 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "provide-cool-meeting-kid-book", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a casual blog post about similarities across animal species. Highlight at least 5 sections in your answer by starting and ending with "*", like: *highlighted text section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Hey friends! Grab a coffee and let’s talk about something that always blows my mind: *how much we all have in common, even when we look nothing alike*. I was scrolling through wildlife photos the other night and realized that a jellyfish and I are basically running the same basic software—just with different packaging. So here’s a laid-back tour of five “wait, we do that too?!” moments across the animal kingdom. + +*We all need our beauty sleep* +Turns out, every animal with a nervous system—from fruit flies to blue whales—shares a version of sleep. Crows nap on tree branches, octopuses snooze in coconut-shell beds, and even roundworms have “lethargus,” which is basically worm REM. Scientists think sleep is so ancient that the last common ancestor of all animals already had it. So next time you hit snooze, remember you’re honoring a 700-million-year-old tradition. + +*Love songs come in many dialects* +Whether you’re a human humming in the shower or a humpback whale belting out 30-minute ballads, *we all use rhythm and pitch to flirt and bond*. Frogs croak in choruses, nightingales remix their playlists nightly, and male mice sing ultrasonic “pickup lines” too high for us to hear. Same goal, different instruments. + +*Grandma knew best: family matters* +Elephants babysit their nieces, wolves bring back take-out for auntie, and *clownfish dads are the ultimate stay-at-home fathers*. Across species, kinship networks boost survival. Even bacteria split into “sibling” lineages that cooperate—so the next time your cousin crashes on your couch, call it evolutionary strategy. + +*Stress looks the same on everyone* +When life gets spicy, our bodies flip the same ancient switch. *Cortisol floods fish, birds, and humans alike*, making hearts race and muscles twitch. Zebra finches lose their appetite during tough times, and stressed-out bees become pessimistic (yes, they can test bee mood). So if you’re doom-scrolling at 2 a.m., know that a stressed squirrel in your backyard totally gets you. + +*Play isn’t optional—it’s homework for life* +Otters juggle rocks, dogs zoom in circles, and young ravens snowboard down snowy roofs using jar lids. *Play builds brains and social skills across the board*. Researchers have spotted reptiles “wrestling” for fun and octopuses repeatedly bouncing Lego blocks like toddlers. If it has neurons, it probably needs recess. + +Next time you feel alone in the universe, remember that somewhere a parrot is probably preening its best friend, a bat is sharing food with a hungry roost-mate, and a tiny nematode is curling up for a nap. We’re all just variations on a very old, very shared theme.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.042989", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 6 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "delete-stale-similar-others-email", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Generate a business proposal to start a sweatshirt company in Bremen. The proposal should contain 5 or more sections. Highlight each section name using the this format: +*section name*", + "$.messages[0].role": "user", + "$.messages[1].content": "*Executive Summary* +Bremen Sweat Co. will be a premium, sustainability-driven sweatshirt brand headquartered in Bremen, Germany. We will design, manufacture, and sell limited-edition sweatshirts that combine Nordic minimalism with Hanseatic maritime heritage. Our mission is to become Bremen’s first carbon-neutral apparel label while capturing 5 % of the city’s €120 million annual streetwear market within three years. Initial funding of €250 k will cover product development, a flagship micro-factory/store in the Viertel district, and a digital-first go-to-market strategy. + +*Market Analysis* +Bremen’s population of 570 k includes 120 k university students and 80 k creative-industry professionals—our core demographic. Streetwear sales in Northern Germany grew 11 % YoY (Statista 2023), yet only 8 % of brands are locally produced. Competitor benchmarking shows price gaps: fast-fashion sweatshirts sell at €25–35, mid-tier at €60–80, and premium sustainable labels at €110–140. We will occupy the €85–95 niche with superior materials and storytelling. Surveys (n = 400) reveal 68 % of Bremen consumers would pay a 20 % premium for locally made, eco-certified apparel. + +*Product & Sustainability Strategy* +Each sweatshirt will be crafted from 100 % GOTS-certified organic cotton and recycled ocean-bound polyester, dyed in Bremen using closed-loop water systems. Designs will feature subtle references to Bremen’s Town Musicians, ship masts, and Weser river textures. Limited drops of 300 units per style ensure scarcity and collectability. Blockchain-based QR codes will provide end-to-end transparency: fiber origin, factory GPS, and carbon footprint per garment (-2.1 kg CO₂e vs. industry average 8.5 kg). End-of-life take-back program offers 15 % store credit for returned items, feeding our upcycling line. + +*Operations & Supply Chain* +Micro-factory (180 m²) in the Viertel will house 4 digital knitting machines and 6 seamstresses, producing 1 200 sweatshirts/month. Local sourcing: yarn from OEKO-TEX mill in Oldenburg, trims from Hamburg port suppliers. Just-in-time production reduces inventory risk by 40 %. Bremen’s port infrastructure enables low-impact inbound logistics; outbound shipments leverage DHL GoGreen within Germany (24 h delivery). ISO 14001 certification targeted by month 12. + +*Marketing & Sales Channels* +Phase 1 (Months 1–6): Pop-up store at Lloyd Passage, Instagram/TikTok campaigns with Bremen influencers (@hanseaticstyle, 180 k followers). Phase 2: Flagship store launch with AR fitting mirrors and Weser riverfront window projection art. E-commerce via Shopify Plus, offering 3-D configurator for sleeve embroidery. Wholesale partnerships with 5 concept stores in Berlin, Hamburg, and Copenhagen. Revenue split: 60 % DTC, 25 % online, 15 % wholesale. Loyalty app rewards sustainable actions (bike delivery, clothing swaps). + +*Financial Projections & Funding* +Year 1: Revenue €420 k, gross margin 58 %, EBITDA -€30 k (investment phase). Year 2: Revenue €950 k, EBITDA 12 %. Year 3: Revenue €1.8 m, EBITDA 22 %. Break-even month 18. Initial ask: €250 k seed (€150 k equipment, €60 k marketing, €40 k working capital). Investor IRR 38 % over 5 years with 20 % equity offered. ESG-linked loan option with Bremen Green Finance Initiative at 2.1 % interest.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043019", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 39 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "remain-large-left-team-wish", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny and sarcastic template for rating the quality of a marriage between two people who are both moms. This is for the couple themselves. Please highlight at least 3 sections with markdown, i.e *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "**The Official “How Are We Still Married?” Scorecard** +*For Two Moms Who’ve Survived Sleepless Nights, PTA Wars, and Each Other’s Cooking* + +--- + +**1. Morning Efficiency Rating** +*How quickly can we get two kids, one dog, and zero coffee spills out the door before someone passive-aggressively “reminds” the other that *“the permission slip was due yesterday”*?* +- 10/10: We’re a synchronized swim team of lunchboxes and existential dread. +- 5/10: One of us is still Googling “how to unclog a glue stick from a USB port.” +- 1/10: We’re late because someone had to *“just quickly”* reorganize the spice rack alphabetically. + +--- + +**2. Co-Parenting Coordination** +*Rate the likelihood that we’ll both remember which kid is allergic to *“the orange cheese”* and which one is in a *“be kind to worms”* phase.* +- 10/10: We’ve got a shared Google calendar, color-coded to the point of *“militant whimsy.”* +- 5/10: We rely on telepathy and the faint hope that the school nurse *“just knows.”* +- 1/10: We’ve accidentally sent the wrong kid to violin practice *twice*. The violin teacher now thinks we’re running a *“child exchange program.”* + +--- + +**3. Romance Maintenance** +*How often do we prioritize date night over *“collapsing into a heap of mutual resentment”*?* +- 10/10: We’ve scheduled a candlelit dinner for *“when the kids go to college.”* +- 5/10: Our last date was a *“romantic”* trip to Target where we *“accidentally”* held hands in the freezer aisle. +- 1/10: We consider *“simultaneously ignoring the laundry”* a form of intimacy. + +--- + +**Bonus Round: Petty Argument Hall of Fame** +*Check all that apply:* +- [ ] The Great 2019 Dishwasher Loading Schism (*“Knives point DOWN, Karen!”*) +- [ ] The Passive-Aggressive Post-it War of 2021 (*“Your turn to buy *‘organic’* milk. XOXO”*) +- [ ] The Thermostat Cold War (*“68°F is a *human rights violation*.”*) + +--- + +**Final Score Interpretation:** +- **30–40 points:** *You’re the Beyoncé and Jay-Z of lesbian moms.* +- **15–29 points:** *You’re more like *“Thelma and Louise,”* but with minivans and a Costco membership.* +- **0–14 points:** *You’re still married, which is statistically *“a win.”* Celebrate with *“leftover chicken nuggets”* and a *“we’ll deal with this tomorrow”* shrug.*", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043042", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 1 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "read-wish-deliver-great-case", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a song about the summers of my childhood that I spent in the countryside. Give the song a name, and highlight the name by wrapping it with *. For example: *little me in the countryside*.", + "$.messages[0].role": "user", + "$.messages[1].content": "*little me in the countryside* + +[Verse 1] +Barefoot on the gravel road, dust between my toes, +Sunlight dripped like honey on the corn rows. +Grandma’s porch swing creaked the same old tune, +While fireflies spelled my name across June. +Mason jar of lemonade, half a moon inside, +I chased the shade like it was trying to hide. + +[Chorus] +Oh, little me in the countryside, +Running through the wheat so high, +Every breeze was a lullaby, +And the sky was big enough to fly. +I left my worries by the riverside, +Back when I was little me in the countryside. + +[Verse 2] +Tractor songs at twilight, crickets keeping time, +Dirt on my knees, freckles in a climbing rhyme. +Creek water baptized my sunburned skin, +I believed every ripple was a secret twin. +Under the quilt of stars, I counted dreams instead, +While the Milky Way braided night through my head. + +[Chorus] +Oh, little me in the countryside, +Running through the wheat so high, +Every breeze was a lullaby, +And the sky was big enough to fly. +I left my worries by the riverside, +Back when I was little me in the countryside. + +[Bridge] +Now the city lights flicker, but I still close my eyes— +I can smell the clover, hear the distant barn owls’ cries. +Time may stretch the map, but it can’t erase +The echo of my laughter in that endless open space. + +[Final Chorus] +Oh, little me in the countryside, +Keep the gate open, I’m still nearby. +Though the roads have changed and the years have flown, +A part of me is always home. +In every golden field I drive beside, +I find little me in the countryside.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043069", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 3 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "do-test-spicy-dead-television", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a riddle for the word "façade" that contains at least 3 italic text phrases in markdown syntax, i.e *italic text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "I wear a *painted smile* for every passer-by, +a *perfectly symmetrical* face that never cracks. +Behind me, the *crumbling bricks* sigh in secret. +What am I?", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043094", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 19 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "protect-other-major-green-war", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a template for a workshop on the importance of diversity in the workplace and highlight at least 3 sections with markdown, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# Workshop Template +**Title:** *Building Inclusive Excellence: The Business & Human Case for Workplace Diversity* +**Duration:** 3.5 hours (can be split into two 90-minute sessions) +**Audience:** Managers, HR professionals, team leads, ERG members, and any employee who influences culture + +--- + +## 1. Welcome & Objectives (15 min) +- Ice-breaker: “Two Truths & a Lie—Diversity Edition” +- *Learning objectives*: By the end of this workshop, participants will be able to: + 1. Define diversity, equity, inclusion, and belonging (DEIB) in their own words. + 2. Articulate at least three evidence-based benefits of a diverse workforce. + 3. Identify one action they can take within 30 days to foster inclusion on their team. + +--- + +## 2. *The Data-Driven Case for Diversity* (25 min) +- Interactive poll: “What % performance lift do you think diverse teams achieve?” +- Mini-lecture with live charts: + - McKinsey 2023 report: Top-quartile companies for ethnic/gender diversity are **39 % more likely to outperform** on profitability. + - Harvard study: *Inclusive teams make better business decisions 87 % of the time* and do so twice as fast. +- Small-group exercise: Match the statistic to the business outcome (revenue, innovation, retention). + +--- + +## 3. *Unconscious Bias & Micro-behaviors* (30 min) +- 5-minute video: “The Resume with Two Names” (identical CVs, different callbacks). +- Bias Busters card game: Participants draw scenario cards and practice interrupting bias in real time. +- *Key takeaway*: Bias is not a moral failing; it is a neurological shortcut that can be rewired through deliberate habits. + +--- + +## 4. *Inclusive Leadership Toolkit* (40 min) +- **The 4 A’s Framework**: + 1. **Awareness** – Daily 3-minute bias check-in. + 2. **Amplification** – Repeat & credit ideas from under-represented voices. + 3. **Access** – Rotate high-visibility assignments. + 4. **Accountability** – Track inclusion metrics on team dashboards. +- Role-play: “The Missed Promotion” – diagnose where each ‘A’ broke down and redesign the conversation. + +--- + +## 5. Action Planning & Commitments (20 min) +- Personal *Inclusion Commitment Card*: “In the next 30 days I will…” +- Pair-share for accountability. +- QR code to a digital resource hub (TED talks, reading list, policy templates). + +--- + +## 6. Closing & Evaluation (10 min) +- One-word checkout: “How do you feel now?” +- *Post-workshop survey* link (2 min) with prize draw for completion. + +--- + +## Materials Checklist +- Slide deck (with alt-text on all images) +- Printed Commitment Cards & pens +- Bias Busters card decks (laminated) +- Timer & bell +- Snacks labeled for common dietary restrictions (gluten-free, vegan, halal) + +--- + +## Follow-Up Email Template (send within 24 hrs) +Subject: Your Inclusion Toolkit + Next Steps +Body: +Hi team, +Thank you for leaning into today’s conversation on *why diversity wins*. Attached are: +- Slide deck & resource links +- Your signed Inclusion Commitment Card (photo reminder) +- Calendar invite for the optional 30-day check-in circle + +Remember: Inclusion is a practice, not a one-time training. Let’s keep the momentum!", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043121", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 23 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "collect-destroy-difficult-conference-hardware", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a cover letter to a local political party, asking to be their rally organizer. Make sure to highlight at least 3 sections in your answer in markdown format.", + "$.messages[0].role": "user", + "$.messages[1].content": "**[Your Name]** +[Street Address] +[City, State ZIP] +[Email Address] | [Phone Number] | [LinkedIn URL] +[Date] + +**[Local Party Chairperson’s Name]** +[Local Party Name] +[Party Headquarters Address] +[City, State ZIP] + +Dear **Chairperson [Last Name]**, + +I am writing to express my enthusiastic interest in serving as **Rally Organizer** for **[Local Party Name]**. As a lifelong resident of **[City/County]**, I have watched our community’s political pulse beat strongest when neighbors gather in common purpose. I want to channel that energy into rallies that not only energize our base but also invite undecided voters to witness the power of our shared vision. + +### **Proven Event-Execution Expertise** +During the 2022 mid-term cycle, I spearheaded a **three-county voter-registration caravan** that staged 14 pop-up events in 30 days. +- **Logistics**: Coordinated permits, portable stages, and ADA-compliant access for 2,000+ attendees per stop. +- **Volunteer Mobilization**: Recruited and trained 120 volunteers, creating a color-coded “Rally-in-a-Box” toolkit now used by three neighboring county parties. +- **Real-Time Adaptability**: When a thunderstorm threatened our largest outdoor rally, I pivoted to an indoor gymnasium within 90 minutes—live-streaming the switch to maintain momentum and ultimately exceeding projected turnout by 18 %. + +### **Data-Driven Rally Strategy** +I treat every rally as a **micro-campaign laboratory**. +- **Pre-Event Analytics**: I overlay precinct-level voter-file data with heat-mapping tools to select venues within 0.7 miles of our highest “persuasion index” households. +- **On-Site Engagement**: QR-coded wristbands linked to instant sign-up forms increased volunteer recruitment by 34 % compared to clipboard methods. +- **Post-Event ROI**: My post-rally surveys (average 42 % response rate) feed a dashboard that tracks pledge-to-action conversion, allowing us to refine messaging within 48 hours. + +### **Coalition-Building & Safety Leadership** +Understanding that rallies must feel **safe and inclusive**, I have: +- Partnered with local **faith leaders, PTAs, and small-business associations** to co-sponsor events, expanding our demographic reach by 27 %. +- Completed **FEMA crowd-management certification** and maintain relationships with county emergency management to ensure rapid-response protocols. +- Implemented a **“Community Marshals”** program—training 50 volunteer marshals in de-escalation techniques, cutting incident reports to zero across 22 events. + +I would welcome the opportunity to discuss how my blend of grassroots passion, logistical precision, and data fluency can help **[Local Party Name]** turn every rally into a springboard for victory in the upcoming cycle. Thank you for considering my application. + +**Respectfully,** + +[Your Signature (if submitting hard copy)] +[Your Printed Name]", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043147", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 4 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "build-upgrade-green-white-reason", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "The opposite of youth is not age, but ...? Highlight at least 2 sections in your answer with markdown, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "The opposite of youth is not age, but **stagnation**—the moment when *curiosity closes its eyes* and the impulse to explore, question, and reinvent quietly fades. + +Where youth is a *perpetual state of becoming*, its true counter is the condition in which *growth is willingly surrendered* and the mind settles into fixed certainties.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043177", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 13 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "need-hostile-dream-business-person", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a description of the following data in a weird style: The Golden Palace eatType restaurant; The Golden Palace food Indian; The Golden Palace area city centre. Use markdown to highlight at least 3 sections in your answer.", + "$.messages[0].role": "user", + "$.messages[1].content": "**The Golden Palace** is not a palace at all, but a **restaurant** that has disguised itself as a mirage of saffron and cardamom in the **city centre**. It whispers **Indian** recipes into the ears of pigeons, who then forget how to fly and instead waddle around the square, dizzy with cumin. + +Inside, the walls sweat **garam masala** and the tables grow tiny **basil plants** that recite Bollywood dialogues when no one is looking. The waiters glide like **shadow puppets**, balancing trays of **butter chicken** that glow like radioactive sunsets. + +If you order **naan**, it arrives folded into the shape of a **paper swan**, flapping once before dissolving into steam that smells of **monsoon rain**. The bill is presented on a **lotus leaf**, and if you stare at it long enough, the numbers rearrange themselves into a **limerick about turmeric**.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043206", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 14 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "store-reduce-unknown-girl-hate", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny rap about a man who gets a call from an official saying that he is a long lost relative of the king of Nigeria. Use markdown to highlight at least one section of your answer, i.e. *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "🎤 *Yo, it’s the tale of Sir Scam-a-lot, straight outta suburbia!* 🎤 + +Yo, my name is Dave, I sell insurance in Ohio, +Got a cubicle, a cat, and a 401(k) that’s kinda fly-o. +One Tuesday, phone rings—unknown number, what the heck? +Voice says, “*Prince Ogbonna the Third* just wrote you a fat check!” + +I’m like, “Hold up, who dis? I’m whiter than printer paper, +My bloodline’s mayo on Wonder Bread—ain’t no royal vapor!” +Dude on the line, accent silkier than lotion, +Says, “Cousin, you’re 37th in line—time to claim your ocean!” + +**[Hook]** +🎶 *Long lost king of Nigeria, crown sittin’ on my dome,* +*Got a palace full of leopards and a Wi-Fi that roams!* +*They need my SSN and a quick wire fee,* +*But I’m ‘bout to be big pimpin’ in Abuja, just wait and see!* 🎶 + +He says, “Just send five grand for customs, then the jet lands, +You’ll be dripping in so much gold, even your sweat glands bling.” +I’m sweatin’ like a rotisserie, visions of caviar dreams, +I Venmo “PrinceNaija69” like I’m funding meme schemes. + +Next day, email: “Oops, palace goat ate the transfer slip, +Need ten more for goat surgery and a diamond-encrusted whip.” +I hock my lawnmower, my Xbox, and Aunt Linda’s porcelain goose, +Wire cash so fast my bank’s like, “Dave, you loose!” + +**[Bridge – Spoken Word]** +Then the *official royal seal* arrives— +It’s a GIF of Shrek in a crown doing the Macarena. +I’m starting to sense a pattern here… + +**[Final Hook – Regret Remix]** +🎶 *Now I’m the king of my mom’s basement, throne’s a beanbag chair,* +*Royal decree: never trust a prince who types in all caps—* +*“URGENT, COUSIN, SEND MORE FLAMINGOS!”* +*Lesson learned: if the crown’s too shiny, it’s probably aluminum foil, homie!*", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043233", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 6 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "mean-take-left-high-family", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Expand the riddle into a story with a funny tone: + +What can you catch but not throw? +A cold + +Use * to highlight at least 2 sections in your text. For example: *this is a highlighted text section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Gary the Germ was the *smallest, greenest, and most obnoxiously proud* microbe in the entire Sniffle Kingdom. Every morning he practiced his grand entrance: sliding down banisters made of nasal hair, cannon-balling into unsuspecting tonsils, and yelling, “ACHOO—did somebody order a cold?” + +One Tuesday, Gary spotted his dream target: *Bartholomew “Bart” Bigglesworth, a man who believed hand-washing was a government conspiracy*. Bart had just exited the subway, triumphantly licking a subway pole “for the vitamins.” Gary rubbed his microscopic hands together. “Perfect. I’ll hitch a ride on that finger, ride the express elevator to the sinuses, and throw the *snottiest* house-warming party in history!” + +But there was a catch—literally. Gary could *catch* Bart, but he couldn’t *throw* him anywhere. Every time Gary tried to catapult Bart into the next office cubicle, Bart just sneezed, wiped his nose on his sleeve, and kept typing spreadsheets. Gary screamed, “This is mutiny! I’m supposed to be the one doing the tossing!” + +Meanwhile, Bart’s immune system—led by Captain Phlegm and the Mucus Marines—launched *Operation Tissue Storm*. They surrounded Gary, armed with vitamin C grenades and zinc zappers. Gary attempted a dramatic escape by surfing a sneeze, but the tissue slapped him like a flyswatter on a summer day. + +Defeated, Gary sat in the crumpled tissue, sulking. “I caught the cold,” he muttered, “but I can’t throw it anywhere. Worst. Superpower. Ever.” + +From that day forward, Gary changed his business card to read: “Professional Cold Catcher—No Returns, No Refunds, Absolutely No Throwing.”", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043257", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 17 highlighted sections (required: 3)", + "$.evaluation_result.score": 1, + "$.ground_truth": "3", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "seem-clean-simple-work-member", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a rap about the renaissance. It should be noticeably different from raps about other historical eras, and have an interesting or weird tone. Highlight at least 3 sections in your answer in markdown format.", + "$.messages[0].role": "user", + "$.messages[1].content": "**[Intro – harpsichord glitch beat, stylus scratching parchment]** +Yo, it’s 14-something, but my mind’s 3-D printed in gold leaf— +I’m the **Renaissance Rapper**, got a quill in one hand and a scalpel in the other, dissectin’ beats like Vesalius dissectin’ spleens. +Let’s get *weird* with the rebirth, baby. + +--- + +### **Section 1 – The Fresco Flex** +I drip ultramarine like I’m sponsored by Afghanistan mines, +Michelangelo on the ceiling, I’m reclining on divine lines. +Angels doing backflips off the Sistine, they’re hype for my set, +While I tag “*AMOR VINCIT OMNIA*” in invisible ink—wet. +My flow’s a trompe-l’œil, looks flat till you tilt your head— +Then it’s depth for days, like Brunelleschi’s dome overhead. +I’m not baroque, I’m pre-baroque, I’m the prototype shock, +Got patrons in fur-lined sleeves stuffing florins in my sock. +**I paint in egg tempera so the colors never fade,** +**But the yolk smells funky—call it *odeur de old crusade*.** + +--- + +### **Section 2 – The Printing-Press Poltergeist** +Gutenberg ghost in the machine, clanking lead type like chains, +Every page a polter-page, every serif a soul that remains. +I drop a folio, it’s viral before “viral” was a cough, +Monks in scriptoriums rage-quitting, “Bro, that illuminated stuff’s off!” +I’m the incunabula influencer, my colophon’s a flex, +Got red-ink rubricators sliding into my DMs—*next*. +**I rhyme in movable type so my bars can rearrange,** +**Yesterday I was Petrarch, today I’m deranged.** +The press spits paper like a hydra with hiccups, +Each sheet a protest flyer: “Down with scholastic pick-ups!” + +--- + +### **Section 3 – The Anatomical Anthem (Parental Advisory: Corpses)** +Night shift with da Vinci, we’re sketching by candle and moon, +Cadavers on the table like, “Do me a favor—autotune my coccyx soon.” +I rap in reverse Latin so only the dead understand, +While Leo reverse-engineers a bird-man from a frying pan. +**My beats are ventricles pumping four-four time,** +**Aorta bass drop—call that circulatory grime.** +I sample the sound of a scalpel scraping periosteum, +Layer it over a lute loop—*that’s* my magnum opium. +Galen’s in the corner, throwing shade like a Greek tragedy, +But I’m Harvey before Harvey, already rapping blood circulation majestically. + +--- + +**[Outro – fading lute arpeggio, distant cannon fire from Constantinople]** +So remember me when you’re doom-scrolling on glass, +I was the era that turned corpses into class. +I’m the **Renaissance Rapper**, my ghost still tags in chalk, +Leaving QR codes on cathedral walls—scan it, you’ll squawk. +Rebirth never dies, it just pixelates and loops, +Now if you’ll excuse me, I’ve got a date with Lucretius in the soup.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043282", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 20 highlighted sections (required: 5)", + "$.evaluation_result.score": 1, + "$.ground_truth": "5", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "receive-final-best-visible-night", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Summarize the history of Japan. Italicize at least 5 keywords in your response. To indicate a italic word, wrap it with asterisk, like *italic*", + "$.messages[0].role": "user", + "$.messages[1].content": "*Jōmon* hunter-gatherers (14,000 BCE) gave way to *Yayoi* rice farmers (300 BCE) who forged bronze and iron. +The *Yamato* court unified clans (3rd–7th c.), adopting *Shinto* and Chinese script. +Prince *Shōtoku*’s 17-article constitution (604) and the Taika Reforms (645) centralized power. +Heian aristocrats (794–1185) produced *The Tale of Genji* while samurai rose in the provinces. +Minamoto no Yoritomo founded the *Kamakura* shogunate (1185), introducing warrior rule. +Mongol invasions (1274, 1281) were repelled by “divine winds” (*kamikaze*). +Ashikaga *Muromachi* shogunate (1336–1573) saw Zen gardens and Ōnin War chaos. +Oda Nobunaga, Toyotomi Hideyoshi, and Tokugawa Ieyasu ended the *Sengoku* “warring states” era. +Tokugawa *Edo* shogunate (1603–1868) isolated Japan, fostering *ukiyo-e* and *kabuki*. +Commodore Perry’s *Black Ships* (1853) forced the Meiji Restoration (1868), modernizing the empire. +Victories in the *Sino-Japanese* (1894–95) and *Russo-Japanese* (1904–05) wars expanded territory. +Militarism led to *Pearl Harbor* (1941) and WWII defeat (1945); the U.S. occupation drafted a pacifist constitution. +Post-war *economic miracle* made Japan the world’s second-largest economy until the 1990s *bubble*. +Today, Japan balances *constitutional monarchy* with cutting-edge technology and soft-power culture.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043309", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 5 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "record-live-better-guy-country", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a funny Haiku about a Quaker named John who lives in the town of De Smet, South Dakota. Use the asterisk symbol, *, to highlight some words or phrases twice. Example: *This is a highlighted phrase*.", + "$.messages[0].role": "user", + "$.messages[1].content": "*John* in *De Smet* +Quaker hat *blows off*—chases +*Silent* *scream*—“Come back!”", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043332", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 19 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "talk-wonder-bad-dangerous-health", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. Please continue writing this text in a formal tone, using notations. Highlight some key parts in your response with "*", like *highlighted text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Today, at the 54th Annual Grammy Awards, the Recording Academy honors the talent and creativity of the artists, musicians, and producers who are the creators of the best recordings of the past year. In a ceremony marked by *unprecedented artistic excellence* and *meticulous craftsmanship*, the Academy recognizes those whose work has defined the sonic landscape of 2011. + +The evening’s proceedings underscore the Academy’s enduring commitment to *artistic integrity* and *technical innovation*. Among the most celebrated achievements are: + +• *Album of the Year* – awarded to an opus that has redefined genre boundaries while achieving both critical acclaim and popular resonance. +• *Record of the Year* – bestowed upon a single track whose *sonic clarity*, *emotive depth*, and *production mastery* have set a new benchmark for the industry. +• *Best New Artist* – conferred upon an emerging voice whose *visionary artistry* signals the future direction of contemporary music. + +Throughout the telecast, special tributes illuminate the *indelible legacies* of icons whose influence transcends generations, while collaborative performances showcase the *synergistic power* of cross-genre partnerships. The Academy further acknowledges the *behind-the-scenes luminaries*—engineers, mixers, and mastering specialists—whose *invisible expertise* transforms raw inspiration into *audible transcendence*. + +In closing, the 54th Annual Grammy Awards reaffirm the Recording Academy’s solemn charge: to *safeguard the heritage*, *nurture the present*, and *inspire the future* of recorded music.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043356", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 39 highlighted sections (required: 2)", + "$.evaluation_result.score": 1, + "$.ground_truth": "2", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "meet-hold-solid-video-moment", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Write a blog post about interesting facts about the Dutch language. Italicize at least 2 sections in your answer with markdown, i.e. *italic text*.", + "$.messages[0].role": "user", + "$.messages[1].content": "# 10 Surprising Facts About the Dutch Language That Will Make You Say “Wauw!” + +If you think Dutch is just “German with a sore throat,” think again. From tongue-twisting consonant clusters to words that sound like they were invented by a stand-up comedian, Dutch is a linguistic playground. Below are ten facts that prove the language is far more colorful than its reputation for windmills and wooden shoes. + +--- + +### 1. *The longest word in Dutch is 53 letters long* +Meet **“kindercarnavalsoptochtvoorbereidingswerkzaamhedenplan”**—a single noun describing the preparation work for a children’s carnival parade. Dutch loves to glue words together, so once you master the basics you can create your own kilometer-long compounds on the fly. + +--- + +### 2. Dutch has three genders, but only two articles +Grammatically, nouns are masculine, feminine, or neuter. In everyday speech, however, masculine and feminine collapsed into a single “common” gender, leaving *“de”* for common and *“het”* for neuter. The result? A never-ending guessing game for learners. + +--- + +### 3. *The Dutch alphabet ends with “ij,” not “z”* +Schoolchildren recite the ABC and finish with “…x, y, ij, z.” The digraph *ij* is so common (think *hij* = he, *mijn* = my) that it’s treated almost like a separate letter. On old typewriters, *ij* even had its own key. + +--- + +### 4. Dutch is the closest major language to English +Frisian is technically closer, but among languages with tens of millions of speakers, Dutch wins. Compare: +- English *“bread”* → Dutch *“brood”* +- English *“green”* → Dutch *“groen”* +Shared roots mean you can read simple Dutch signs long before you’ve cracked a single grammar book. + +--- + +### 5. *Dutch gave New York its first words* +Before it was New York, it was New Amsterdam. That’s why we say *“cookie”* (from Dutch *koekje*) instead of *“biscuit,”* and *“boss”* (from *baas*) instead of *“master.”* Wall Street? It once ran alongside the *“wal”* (rampart) of the city wall. + +--- + +### 6. The word *“gezellig”* has no exact English translation* +Roughly it means “cozy,” “convivial,” or “the warm feeling of being with friends,” but none of those capture the full vibe. A café can be *gezellig*, a conversation can be *gezellig*, even a spreadsheet can be *gezellig* if the team is laughing. + +--- + +### 7. Dutch spelling changed overnight in 1996 +The *“Groene Boekje”* (Green Booklet) reforms simplified thousands of words. *“potatos”* became *“aardappelen”* and *“cat’s”* became *“katten.”* Overnight, every dictionary, street sign, and Scrabble board in the country was technically out of date. + +--- + +### 8. *Dutch is spoken on six continents* +Besides the Netherlands and Belgium, it’s an official language in Suriname, Aruba, Curaçao, and Sint Maarten. Thanks to emigration, you’ll also find active Dutch-speaking communities in Canada, Australia, Argentina—even South Africa, where Afrikaans is a daughter language. + +--- + +### 9. The Dutch love diminutives +Add *-je* to almost any noun and you instantly shrink it: *“huis”* (house) → *“huisje”* (little house). It’s not just about size; diminutives add affection. A *“biertje”* isn’t a small beer—it’s an invitation to relax. + +--- + +### 10. *Dutch is the language of the world’s oldest multinational* +The Vereenigde Oostindische Compagnie (VOC) was founded in 1602 and issued the planet’s first publicly traded stock. Its ledgers, contracts, and ship logs were all written in Dutch, making the language the original tongue of global capitalism. + +--- + +Ready to dive in? Start with *hallo* and *dank je wel*, and you’ll already be halfway to *gezellig*.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, + { + "$.created_at": "2025-08-09T10:35:28.043383", + "$.eval_metadata.aggregation_method": "mean", + "$.eval_metadata.description": " + Evaluation function that checks if the model's response contains the required number of formatted sections. + ", + "$.eval_metadata.name": "test_markdown_highlighting_evaluation", + "$.eval_metadata.num_runs": 1, + "$.eval_metadata.passed": true, + "$.eval_metadata.run_id": "stop-choose-expect-major-pain", + "$.eval_metadata.status": "finished", + "$.eval_metadata.threshold_of_success": 0.5, + "$.eval_metadata.version": "v0.2.6-dev1.dev4+g67b03c6", + "$.evaluation_result.is_score_valid": true, + "$.evaluation_result.reason": "✅ Found 2 highlighted sections (required: 1)", + "$.evaluation_result.score": 1, + "$.ground_truth": "1", + "$.input_metadata.completion_params.max_tokens": 4096, + "$.input_metadata.completion_params.model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + "$.input_metadata.completion_params.temperature": 0, + "$.input_metadata.row_id": "understand-rational-hard-kind-project", + "$.input_metadata.session_data.mode": "pointwise", + "$.messages[0].content": "Gideon is a farmer who has a surplus of crops from his farm this year. What might he do with that surplus? Highlight at least one section of your answer in markdown, i.e *highlighted section*.", + "$.messages[0].role": "user", + "$.messages[1].content": "Gideon has several good options for his surplus: + +1. **Sell it at the local farmers’ market or to a regional wholesaler** to turn the extra produce into cash he can reinvest in seeds, equipment, or next season’s labor. +2. *Donate a portion to the nearby food bank or community kitchen* so that families facing food insecurity can benefit from fresh, nutritious produce. +3. Preserve some of the surplus—by canning tomatoes, drying herbs, or freezing berries—so his own household has a steady supply of healthy food throughout the winter.", + "$.messages[1].role": "assistant", + "$.pid": 43066, + "$.rollout_status.status": "finished", + }, +] +`; diff --git a/vite-app/src/util/flatten-json.test.ts b/vite-app/src/util/flatten-json.test.ts new file mode 100644 index 00000000..1f233a03 --- /dev/null +++ b/vite-app/src/util/flatten-json.test.ts @@ -0,0 +1,16 @@ +import { describe, it, expect } from 'vitest' +import { flattenJson } from './flatten-json' +import { readFileSync } from 'node:fs' + +describe('flattenJson against logs.json', () => { + it('flattens each entry in logs.json.logs and matches snapshot', () => { + const logsUrl = new URL('../../data/logs.json', import.meta.url) + const raw = readFileSync(logsUrl, 'utf-8') + const parsed = JSON.parse(raw) as { logs?: unknown[] } + + expect(Array.isArray(parsed.logs)).toBe(true) + const flattened = (parsed.logs ?? []).map((entry) => flattenJson(entry)) + + expect(flattened).toMatchSnapshot() + }) +}) diff --git a/vite-app/src/util/flatten-json.ts b/vite-app/src/util/flatten-json.ts new file mode 100644 index 00000000..81d16c76 --- /dev/null +++ b/vite-app/src/util/flatten-json.ts @@ -0,0 +1,69 @@ +/* + Flattens a deeply nested JSON value into a single-level object with JSONPath keys. + Prefers dot-notation for identifier-safe property names and uses brackets for + array indices and special keys: + $.foo.bar[0].baz + $['weird.key'][2]['spaced name'] + Only leaf primitives (string, number, boolean, null) are emitted. +*/ + +export type FlatJson = Record; + +function isPlainObject(value: unknown): value is Record { + return ( + value !== null && + typeof value === "object" && + !Array.isArray(value) + ); +} + +function joinPath(parent: string, segment: string | number): string { + if (typeof segment === "number") { + return `${parent}[${segment}]`; + } + // Use dot-notation when the key is identifier-safe, otherwise bracket-notation + const identifierSafe = /^[A-Za-z_][A-Za-z0-9_]*$/.test(segment); + if (identifierSafe) { + return `${parent}.${segment}`; + } + const escaped = segment.replace(/'/g, "\\'"); + return `${parent}['${escaped}']`; +} + +export function flattenJson(input: unknown, root: string = "$" ): FlatJson { + const out: FlatJson = {}; + + const walk = (value: unknown, path: string) => { + if ( + value === null || + typeof value === "string" || + typeof value === "number" || + typeof value === "boolean" + ) { + out[path] = value; + return; + } + + if (Array.isArray(value)) { + for (let i = 0; i < value.length; i++) { + walk(value[i], joinPath(path, i)); + } + return; + } + + if (isPlainObject(value)) { + for (const key of Object.keys(value)) { + walk((value as Record)[key], joinPath(path, key)); + } + return; + } + + // For unsupported types (e.g., functions, symbols), coerce to string + out[path] = String(value); + }; + + walk(input, root); + return out; +} + +export default flattenJson; diff --git a/vite-app/src/util/pivot.test.ts b/vite-app/src/util/pivot.test.ts new file mode 100644 index 00000000..bea1a707 --- /dev/null +++ b/vite-app/src/util/pivot.test.ts @@ -0,0 +1,195 @@ +import { describe, it, expect } from 'vitest' +import { computePivot, type Aggregator } from './pivot' +import { readFileSync } from 'fs' +import flattenJson from './flatten-json' + +type Row = { + region: string + rep: string + product: string + amount?: number | string +} + +const rows: Row[] = [ + { region: 'West', rep: 'A', product: 'Widget', amount: 120 }, + { region: 'West', rep: 'B', product: 'Gadget', amount: 90 }, + { region: 'East', rep: 'A', product: 'Widget', amount: 200 }, + { region: 'East', rep: 'B', product: 'Gadget', amount: '10' }, + { region: 'East', rep: 'B', product: 'Gadget', amount: 'not-a-number' }, + +] + +describe('computePivot', () => { + it('computes count when no valueField provided', () => { + const res = computePivot({ + data: rows, + rowFields: ['region'], + columnFields: ['product'], + }) + + // Expect two row keys and two column keys + expect(res.rowKeyTuples.map((t) => String(t))).toEqual([ + 'East', + 'West', + ]) + expect(res.colKeyTuples.map((t) => String(t))).toEqual([ + 'Gadget', + 'Widget', + ]) + + // East/Gadget should count two (one invalid amount ignored in count mode) + const rKeyEast = 'East' + const cKeyGadget = 'Gadget' + expect(res.cells[rKeyEast][cKeyGadget].value).toBe(2) + }) + + it('computes sum aggregator', () => { + const res = computePivot({ + data: rows, + rowFields: ['region'], + columnFields: ['product'], + valueField: 'amount', + aggregator: 'sum', + }) + + const rKeyEast = 'East' + const rKeyWest = 'West' + const cKeyGadget = 'Gadget' + const cKeyWidget = 'Widget' + + // East Gadget: 10 (string convertible) + invalid -> 10 + expect(res.cells[rKeyEast][cKeyGadget].value).toBe(10) + // West Gadget: 90 + expect(res.cells[rKeyWest][cKeyGadget].value).toBe(90) + // East Widget: 200 + expect(res.cells[rKeyEast][cKeyWidget].value).toBe(200) + // West Widget: 120 + expect(res.cells[rKeyWest][cKeyWidget].value).toBe(120) + }) + + it('computes average aggregator', () => { + const res = computePivot({ + data: rows, + rowFields: ['region'], + columnFields: ['product'], + valueField: 'amount', + aggregator: 'avg', + }) + + const rKeyEast = 'East' + const rKeyWest = 'West' + const cKeyGadget = 'Gadget' + + // East Gadget: values -> [10] => avg 10 + expect(res.cells[rKeyEast][cKeyGadget].value).toBe(10) + // West Gadget: values -> [90] => avg 90 + expect(res.cells[rKeyWest][cKeyGadget].value).toBe(90) + }) + + it('supports custom aggregator', () => { + const maxAgg: Aggregator = (values) => + values.length ? Math.max(...values) : 0 + + const res = computePivot({ + data: rows, + rowFields: ['region'], + columnFields: ['product'], + valueField: 'amount', + aggregator: maxAgg, + }) + + const rKeyWest = 'West' + const cKeyWidget = 'Widget' + expect(res.cells[rKeyWest][cKeyWidget].value).toBe(120) + }) + + it('supports multiple column fields (composite columns)', () => { + const res = computePivot({ + data: rows, + rowFields: ['region'], + columnFields: ['product', 'rep'], + valueField: 'amount', + aggregator: 'sum', + }) + + // Row and column key tuples + expect(res.rowKeyTuples).toEqual([ + ['East'], + ['West'], + ]) + expect(res.colKeyTuples).toEqual([ + ['Gadget', 'B'], + ['Widget', 'A'], + ]) + + const rEast = 'East' + const rWest = 'West' + const cGadgetB = 'Gadget||B' + const cWidgetA = 'Widget||A' + + // Cell values (sum of numeric amounts) + expect(res.cells[rEast][cGadgetB].value).toBe(10) + expect(res.cells[rWest][cGadgetB].value).toBe(90) + expect(res.cells[rEast][cWidgetA].value).toBe(200) + expect(res.cells[rWest][cWidgetA].value).toBe(120) + + // Totals + expect(res.rowTotals[rEast]).toBe(210) + expect(res.rowTotals[rWest]).toBe(210) + expect(res.colTotals[cGadgetB]).toBe(100) + expect(res.colTotals[cWidgetA]).toBe(320) + expect(res.grandTotal).toBe(420) + }) + + it('skips records with undefined row field values', () => { + type LooseRow = { + region?: string + rep?: string + product?: string + amount?: number | string + } + + const mixed: LooseRow[] = [ + { region: 'West', rep: 'A', product: 'Widget', amount: 120 }, + // Missing region should be excluded from cells entirely + { rep: 'B', product: 'Gadget', amount: 90 }, + ] + + const res = computePivot({ + data: mixed, + rowFields: ['region'], + columnFields: ['product'], + }) + + // Only 'West' row should be present; no 'undefined' row key + expect(res.rowKeyTuples.map((t) => String(t))).toEqual(['West']) + expect(Object.keys(res.cells)).toEqual(['West']) + + const rKeyWest = 'West' + const cKeyWidget = 'Widget' + + // Count aggregator by default; only the valid record should be counted + expect(res.cells[rKeyWest][cKeyWidget].value).toBe(1) + + // Grand total reflects only included records + expect(res.grandTotal).toBe(1) + }) + + it("markdown-highlighter-test", () => { + // read logs.json from data/logs.json + const logsUrl = new URL('../../data/logs.json', import.meta.url) + const raw = readFileSync(logsUrl, 'utf-8') + const parsed = JSON.parse(raw) as { logs?: unknown[] } + const rows = (parsed.logs ?? []).map((entry) => flattenJson(entry)) + + const res = computePivot({ + data: rows, + rowFields: ['$.eval_metadata.run_id'], + columnFields: [], + valueField: '$.eval_metadata.passed', + aggregator: 'avg', + }) + + console.log(res) + }) +}) diff --git a/vite-app/src/util/pivot.ts b/vite-app/src/util/pivot.ts new file mode 100644 index 00000000..8ac87c64 --- /dev/null +++ b/vite-app/src/util/pivot.ts @@ -0,0 +1,301 @@ +/** + * Aggregation strategy for computing each pivot cell value. + * + * - "count": number of records in the cell (ignores valueField) + * - "sum": sum of numeric values extracted from valueField + * - "avg": average of numeric values extracted from valueField + * - custom: user function receiving the numeric values (from valueField if provided) + * and the raw records in the cell. Return the aggregated number to display. + */ +export type Aggregator = + | "count" + | "sum" + | "avg" + | ((values: number[], records: T[]) => number); + +export interface PivotComputationResult { + rowKeyTuples: unknown[][]; + colKeyTuples: unknown[][]; + cells: Record>; + rowTotals: Record; + colTotals: Record; + grandTotal: number; +} + +function toKey(parts: unknown[]): string { + return parts.map((p) => String(p)).join("||"); +} + +function getTuple>( + record: T, + fields: (keyof T)[] +): unknown[] { + return fields.map((f) => record[f]); +} + +function getNumber(value: unknown): number | null { + if (typeof value === "number" && Number.isFinite(value)) return value; + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : null; +} + +function aggregate>( + values: number[], + records: T[], + agg: Aggregator +): number { + if (typeof agg === "function") return agg(values, records); + if (agg === "sum") return values.reduce((a, b) => a + b, 0); + if (agg === "avg") + return values.length === 0 + ? 0 + : values.reduce((a, b) => a + b, 0) / values.length; + // default: count + return records.length; +} + +/** + * Configuration parameters for `computePivot`. + * + * @template T - Shape of each input record. Must be indexable by the keys used in + * `rowFields`, `columnFields`, and `valueField` (if provided). + */ +export interface ComputePivotParams> { + /** + * Input records to pivot. Each record contributes to exactly one cell determined by + * its `rowFields` and `columnFields` key tuple. + */ + data: T[]; + + /** + * Ordered list of keys that form the row grouping key (tuple). Order matters; two + * records with the same values in this order will be grouped into the same row. + * Use an empty array to place all records into a single row. + */ + rowFields: (keyof T)[]; + + /** + * Ordered list of keys that form the column grouping key (tuple). Order matters; two + * records with the same values in this order will be grouped into the same column. + * Use an empty array to place all records into a single column. + */ + columnFields: (keyof T)[]; + + /** + * Optional key whose values are aggregated to compute each cell's numeric value. + * Values are coerced using `Number(value)` and only finite numbers are included; + * non-numeric/NaN/Infinity are ignored. If omitted, the default aggregation computes + * counts of records per cell. + */ + valueField?: keyof T; + + /** + * Aggregation strategy applied per cell. Built-ins: `"count"` (default), `"sum"`, + * and `"avg"`. You may also pass a custom function that receives the array of + * numeric `values` (derived from `valueField`, if provided) and the raw `records` + * for the cell, and returns the number to display. + * @default "count" + */ + aggregator?: Aggregator; +} + +/** + * Compute pivot table structures from input data and configuration. + * + * Examples + * 1) Count per region × product (default aggregator is "count") + * ```ts + * const res = computePivot({ + * data: rows, + * rowFields: ['region'], + * columnFields: ['product'], + * }) + * ``` + * + * 2) Sum amounts per region × product + * ```ts + * const res = computePivot({ + * data: rows, + * rowFields: ['region'], + * columnFields: ['product'], + * valueField: 'amount', + * aggregator: 'sum', + * }) + * ``` + * + * 3) Average amounts per region × product + * ```ts + * const res = computePivot({ + * data: rows, + * rowFields: ['region'], + * columnFields: ['product'], + * valueField: 'amount', + * aggregator: 'avg', + * }) + * ``` + * + * 4) Multiple column fields (composite columns) + * ```ts + * const res = computePivot({ + * data: rows, + * rowFields: ['region'], + * columnFields: ['product', 'quarter'], + * valueField: 'amount', + * aggregator: 'sum', + * }) + * // Each column is the tuple [product, quarter] + * ``` + * + * 5) Custom aggregator (e.g., max) + * ```ts + * const res = computePivot({ + * data: rows, + * rowFields: ['region'], + * columnFields: ['product'], + * valueField: 'amount', + * aggregator: (values) => values.length ? Math.max(...values) : 0, + * }) + * ``` + * + * 6) Single grand total (no rows/cols) + * ```ts + * const res = computePivot({ + * data: rows, + * rowFields: [], + * columnFields: [], + * valueField: 'amount', + * aggregator: 'sum', + * }) + * // res.grandTotal is the total sum + * ``` + * + * 7) Excel-style: multiple value fields alongside multiple column fields (recipe) + * - Run computePivot once per metric (valueField + aggregator) and read values side-by-side. + * ```ts + * const metrics = [ + * { key: 'Sum of Sales', valueField: 'sales' as const, aggregator: 'sum' as const }, + * { key: 'Sum of Quantity', valueField: 'quantity' as const, aggregator: 'sum' as const }, + * ] + * + * const pivotsByMetric = Object.fromEntries( + * metrics.map((m) => [ + * m.key, + * computePivot({ + * data: rows, + * rowFields: ['year'], + * columnFields: ['region'], + * valueField: m.valueField, + * aggregator: m.aggregator, + * }), + * ]), + * ) as Record>>; + * + * // In the UI, iterate row/col keys from one pivot and render each metric column side-by-side: + * // for (const rTuple of pivotsByMetric['Sum of Sales'].rowKeyTuples) { + * // const rKey = rTuple.join('||'); + * // for (const cTuple of pivotsByMetric['Sum of Sales'].colKeyTuples) { + * // const cKey = cTuple.join('||'); + * // const sales = pivotsByMetric['Sum of Sales'].cells[rKey]?.[cKey]?.value ?? 0; + * // const qty = pivotsByMetric['Sum of Quantity'].cells[rKey]?.[cKey]?.value ?? 0; + * // // Render: [Year, Region] -> Sales, Quantity + * // } + * // } + * ``` + */ +export function computePivot>({ + data, + rowFields, + columnFields, + valueField, + aggregator = "count", +}: ComputePivotParams): PivotComputationResult { + // Filter out records that do not have defined values for all rowFields. + // This avoids creating a row key of "undefined" and ensures such records + // are not returned as part of the cells/row totals. + const dataWithDefinedRows = data.filter((rec) => + rowFields.every((f) => rec[f] !== undefined) + ); + const rowKeyTuples: unknown[][] = []; + const rowKeySet = new Set(); + const colKeyTuples: unknown[][] = []; + const colKeySet = new Set(); + + for (const rec of dataWithDefinedRows) { + const rTuple = getTuple(rec, rowFields); + const rKey = toKey(rTuple); + if (!rowKeySet.has(rKey)) { + rowKeySet.add(rKey); + rowKeyTuples.push(rTuple); + } + const cTuple = getTuple(rec, columnFields); + const cKey = toKey(cTuple); + if (!colKeySet.has(cKey)) { + colKeySet.add(cKey); + colKeyTuples.push(cTuple); + } + } + + // Deterministic ordering + rowKeyTuples.sort((a, b) => toKey(a).localeCompare(toKey(b))); + colKeyTuples.sort((a, b) => toKey(a).localeCompare(toKey(b))); + + type CellAgg = { value: number; records: T[] }; + const cells: Record> = {}; + const rowTotals: Record = {}; + const colTotals: Record = {}; + + for (const rTuple of rowKeyTuples) { + const rKey = toKey(rTuple); + cells[rKey] = {}; + rowTotals[rKey] = 0; + } + for (const cTuple of colKeyTuples) { + const cKey = toKey(cTuple); + colTotals[cKey] = 0; + } + + // Partition records per cell + const cellRecords: Record> = {}; + for (const rec of dataWithDefinedRows) { + const rKey = toKey(getTuple(rec, rowFields)); + const cKey = toKey(getTuple(rec, columnFields)); + if (!cellRecords[rKey]) cellRecords[rKey] = {}; + if (!cellRecords[rKey][cKey]) cellRecords[rKey][cKey] = []; + cellRecords[rKey][cKey].push(rec); + } + + for (const rKey of Object.keys(cellRecords)) { + for (const cKey of Object.keys(cellRecords[rKey])) { + const records = cellRecords[rKey][cKey]; + const values: number[] = []; + if (valueField != null) { + for (const rec of records) { + const v = getNumber(rec[valueField]); + if (v != null) values.push(v); + } + } + const value = aggregate(values, records, aggregator); + cells[rKey][cKey] = { value, records }; + rowTotals[rKey] += value; + colTotals[cKey] += value; + } + } + + // Grand total should follow the same aggregation semantics over the entire dataset + // rather than summing per-row/per-column aggregates (which can be incorrect for + // non-additive aggregations like "avg"). + let grandTotal: number; + { + const allRecords = dataWithDefinedRows; + const allValues: number[] = []; + if (valueField != null) { + for (const rec of allRecords) { + const v = getNumber(rec[valueField]); + if (v != null) allValues.push(v); + } + } + grandTotal = aggregate(allValues, allRecords, aggregator); + } + + return { rowKeyTuples, colKeyTuples, cells, rowTotals, colTotals, grandTotal }; +} diff --git a/vite-app/vite.config.ts b/vite-app/vite.config.ts index 78f64d17..3d9db9bd 100644 --- a/vite-app/vite.config.ts +++ b/vite-app/vite.config.ts @@ -1,4 +1,4 @@ -import { defineConfig } from 'vite' +import { defineConfig } from 'vitest/config' import react from '@vitejs/plugin-react' import tailwindcss from '@tailwindcss/vite' @@ -13,5 +13,13 @@ export default defineConfig({ server: { port: 5173, host: true + }, + test: { + globals: true, + environment: 'node', + include: ['src/**/*.test.{ts,tsx}'], + coverage: { + provider: 'v8' + } } -}) \ No newline at end of file +})