From b023d0b60786eceafcbf71ee65dabe9efbb5f2b2 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sun, 22 Mar 2026 20:25:01 +0800 Subject: [PATCH 01/15] Add missing deps for more models --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 36b6a33..1c307bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "numpy>=1.24.0", "torch>=2.4.0", "transformers>=4.40.0", + "bitsandbytes>=0.46.1", "accelerate>=0.26.0", "huggingface-hub>=0.23.0", "datasets>=2.14.0", @@ -32,6 +33,8 @@ dependencies = [ "wonderwords>=2.2.0", "openai>=1.0.0", "tiktoken>=0.7.0", + "python-dotenv>=1.0.0", + "auto-gptq>=0.5.0", ] [project.optional-dependencies] From 36c1c031b7cd20e4d76ddc83fea6f74973bc6b5f Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sun, 22 Mar 2026 20:28:30 +0800 Subject: [PATCH 02/15] Correct model classification --- src/llm_dna/models/ModelLoader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llm_dna/models/ModelLoader.py b/src/llm_dna/models/ModelLoader.py index f3c8b65..b2c52ee 100644 --- a/src/llm_dna/models/ModelLoader.py +++ b/src/llm_dna/models/ModelLoader.py @@ -82,7 +82,9 @@ def _detect_model_type(self, model_path_or_name: str) -> str: "cohere/command", "perplexity/", ] - if any(model_lower.startswith(prefix) for prefix in openrouter_prefixes): + huggingface_prefixes_openai = "openai/gpt-oss" + + if any(model_lower.startswith(prefix) for prefix in openrouter_prefixes) and not model_lower.startswith(huggingface_prefixes_openai): return "openrouter" # Check for Google Gemini model names From e6efb18b60254f4981253005fbb0a0183014a794 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sun, 22 Mar 2026 20:39:45 +0800 Subject: [PATCH 03/15] More deps --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 1c307bd..f561357 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,9 @@ dependencies = [ "tiktoken>=0.7.0", "python-dotenv>=1.0.0", "auto-gptq>=0.5.0", + "optimum>=1.16.0", + "mamba-ssm>=1.0.0", + "compressed-tensors>=0.1.0", ] [project.optional-dependencies] From 353c144352888df2ba7c664ba862307fa163a72b Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sun, 22 Mar 2026 21:08:03 +0800 Subject: [PATCH 04/15] More deps --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f561357..6890fb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ dependencies = [ "optimum>=1.16.0", "mamba-ssm>=1.0.0", "compressed-tensors>=0.1.0", + "mlx>=0.10.0", + "mlx-lm>=0.10.0", ] [project.optional-dependencies] From c7e54db0f93f3875bfbcc47f642e921e0daa7945 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sun, 22 Mar 2026 21:52:19 +0800 Subject: [PATCH 05/15] Update dep --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6890fb5..4497cba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "compressed-tensors>=0.1.0", "mlx>=0.10.0", "mlx-lm>=0.10.0", + "timm>=0.9.0", ] [project.optional-dependencies] From 7e9d0f5954507c9ea14b035fdb3e0b4b2cbf4d61 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sat, 28 Mar 2026 18:49:15 +0800 Subject: [PATCH 06/15] Refactor deps --- README.md | 21 +++++++++++++++++++++ pyproject.toml | 30 +++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8d66ea7..796b863 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,27 @@ pip install llm-dna Use `llm-dna` for install/package naming, and `llm_dna` for Python imports. +Optional extras are available for model families that need additional runtime dependencies: + +```bash +# Apple Silicon / MLX-backed models +pip install "llm-dna[apple]" + +# Quantized HuggingFace models (bitsandbytes, GPTQ, compressed-tensors, optimum) +pip install "llm-dna[quantization]" + +# Architecture-specific model families such as Mamba or TIMM-backed models +pip install "llm-dna[model_families]" + +# Everything above +pip install "llm-dna[full]" +``` + +Extra guidance: +- `apple`: required for MLX and `mlx-community/*` style model families on Apple Silicon. +- `quantization`: required for many GPTQ, bitsandbytes, and compressed-tensors model families. +- `model_families`: required for specific architectures whose modeling code depends on packages like `mamba-ssm` or `timm`. + ## Quick Start ```python diff --git a/pyproject.toml b/pyproject.toml index 4497cba..608182f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,6 @@ dependencies = [ "numpy>=1.24.0", "torch>=2.4.0", "transformers>=4.40.0", - "bitsandbytes>=0.46.1", "accelerate>=0.26.0", "huggingface-hub>=0.23.0", "datasets>=2.14.0", @@ -34,18 +33,39 @@ dependencies = [ "openai>=1.0.0", "tiktoken>=0.7.0", "python-dotenv>=1.0.0", +] + +[project.optional-dependencies] +model_scraping = ["requests>=2.31.0"] + +apple = [ + "mlx>=0.10.0", + "mlx-lm>=0.10.0", +] + +quantization = [ + "bitsandbytes>=0.46.1", "auto-gptq>=0.5.0", "optimum>=1.16.0", - "mamba-ssm>=1.0.0", "compressed-tensors>=0.1.0", +] + +model_families = [ + "mamba-ssm>=1.0.0", + "timm>=0.9.0", +] + +full = [ "mlx>=0.10.0", "mlx-lm>=0.10.0", + "bitsandbytes>=0.46.1", + "auto-gptq>=0.5.0", + "optimum>=1.16.0", + "compressed-tensors>=0.1.0", + "mamba-ssm>=1.0.0", "timm>=0.9.0", ] -[project.optional-dependencies] -model_scraping = ["requests>=2.31.0"] - vllm = ["vllm>=0.4.0"] dev = [ From 2e939e8242e4cb127b4cdf95083829a8d2533c90 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sat, 28 Mar 2026 18:50:34 +0800 Subject: [PATCH 07/15] Improve model loader classification --- src/llm_dna/models/ModelLoader.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/llm_dna/models/ModelLoader.py b/src/llm_dna/models/ModelLoader.py index b2c52ee..9a10b40 100644 --- a/src/llm_dna/models/ModelLoader.py +++ b/src/llm_dna/models/ModelLoader.py @@ -75,16 +75,15 @@ def _detect_model_type(self, model_path_or_name: str) -> str: "openrouter:", "anthropic/claude-", "deepseek/", - "openai/gpt-", + "openai/gpt-3", + "openai/gpt-4", "google/gemini-", - "z-ai/", "x-ai/grok-", "cohere/command", "perplexity/", ] - huggingface_prefixes_openai = "openai/gpt-oss" - if any(model_lower.startswith(prefix) for prefix in openrouter_prefixes) and not model_lower.startswith(huggingface_prefixes_openai): + if any(model_lower.startswith(prefix) for prefix in openrouter_prefixes): return "openrouter" # Check for Google Gemini model names From 38a3450acb33765eb4d7955e671dd9cdcaf411b4 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sat, 28 Mar 2026 18:51:19 +0800 Subject: [PATCH 08/15] Protect sensitive info like token --- src/llm_dna/core/extraction.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/llm_dna/core/extraction.py b/src/llm_dna/core/extraction.py index de46f42..a953fc1 100644 --- a/src/llm_dna/core/extraction.py +++ b/src/llm_dna/core/extraction.py @@ -629,6 +629,13 @@ def main(): logging.info(f"DNA signature saved to: {output_path}") # Save summary + # Create safe args dict without sensitive information + safe_args = vars(args).copy() + # Remove sensitive fields that should not be saved to output files + sensitive_fields = ['token', 'OPENROUTER_API_KEY', 'OPENAI_API_KEY'] + for field in sensitive_fields: + safe_args.pop(field, None) + summary = { "model_name": args.model_name, "dataset": args.dataset, @@ -641,7 +648,7 @@ def main(): "signature_stats": signature.get_statistics(), "metadata": signature.metadata.__dict__, "output_file": str(output_path), - "args": vars(args) + "args": safe_args } # Keep summary filename model-only as well From 5a89dac6531cbb3303ab2aa06253098eec44fe70 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sat, 28 Mar 2026 19:20:48 +0800 Subject: [PATCH 09/15] Fix: save responses.json for single model dna generation as well --- src/llm_dna/api.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/llm_dna/api.py b/src/llm_dna/api.py index 858e9a0..c5a60e9 100644 --- a/src/llm_dna/api.py +++ b/src/llm_dna/api.py @@ -642,6 +642,31 @@ def calc_dna(config: DNAExtractionConfig) -> DNAExtractionResult: ) vector = _validate_signature(signature) + if config.save: + cached_responses = _load_cached_responses(response_path, expected_count=len(probe_texts)) + if cached_responses is None: + logging.info( + "Generating and saving responses for '%s' to %s to align single-model caching with batch mode.", + config.model_name, + response_path, + ) + responses = _generate_responses_for_model( + model_name=config.model_name, + config=config, + model_meta=model_meta, + probe_texts=probe_texts, + device=resolved_device, + resolved_token=resolved_token, + incremental_save_path=response_path, + ) + _save_response_cache( + path=response_path, + model_name=config.model_name, + dataset=config.dataset, + prompts=probe_texts, + responses=responses, + ) + elapsed_seconds = time.time() - start_time output_path: Optional[Path] = None From b26887c6e5de313eeea6e564f6bfa2ce92a6dde6 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sat, 28 Mar 2026 19:48:56 +0800 Subject: [PATCH 10/15] Add awq dep --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b41e62b..9c9622a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ apple = [ quantization = [ "bitsandbytes>=0.46.1", + "autoawq>=0.2.0", "auto-gptq>=0.5.0", "optimum>=1.16.0", "compressed-tensors>=0.1.0", @@ -59,6 +60,7 @@ full = [ "mlx>=0.10.0", "mlx-lm>=0.10.0", "bitsandbytes>=0.46.1", + "autoawq>=0.2.0", "auto-gptq>=0.5.0", "optimum>=1.16.0", "compressed-tensors>=0.1.0", From 4c819e2d8200f4d0cdf05910e991b4fa939e2a05 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sat, 28 Mar 2026 20:51:23 +0800 Subject: [PATCH 11/15] Change cli to load env properly --- src/llm_dna/cli.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/llm_dna/cli.py b/src/llm_dna/cli.py index 10d203f..ef98247 100644 --- a/src/llm_dna/cli.py +++ b/src/llm_dna/cli.py @@ -8,6 +8,8 @@ from pathlib import Path from typing import Iterable, List, Optional +from dotenv import load_dotenv + def _load_models_from_file(path: Path) -> List[str]: """Load model names from a file, one per line.""" @@ -183,6 +185,10 @@ def main(argv: Optional[Iterable[str]] = None) -> int: """Main CLI entrypoint for DNA extraction.""" from .api import DNAExtractionConfig, calc_dna, calc_dna_parallel + project_root = Path(__file__).resolve().parents[2] + load_dotenv(project_root / ".env", override=False) + load_dotenv(override=False) + args = parse_arguments(argv) # Resolve model names From ce7cc7722eb392863227fbf73b5b443056d606a1 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sat, 28 Mar 2026 20:51:43 +0800 Subject: [PATCH 12/15] Directly read from openrouter list to prevent misclassification --- src/llm_dna/models/ModelLoader.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/llm_dna/models/ModelLoader.py b/src/llm_dna/models/ModelLoader.py index 9a10b40..bde650d 100644 --- a/src/llm_dna/models/ModelLoader.py +++ b/src/llm_dna/models/ModelLoader.py @@ -3,6 +3,7 @@ """ import os +import json from typing import Optional, Dict, Any, Union from pathlib import Path import logging @@ -14,10 +15,34 @@ class ModelLoader: """Factory class for loading different types of LLMs.""" + _openrouter_model_ids: Optional[set[str]] = None def __init__(self, config_dict: Optional[Dict[str, Any]] = None): self.logger = logging.getLogger(__name__) self.config_dict = config_dict or {} + + @classmethod + def _load_openrouter_model_ids(cls) -> set[str]: + if cls._openrouter_model_ids is not None: + return cls._openrouter_model_ids + + model_ids: set[str] = set() + config_path = Path(__file__).resolve().parents[3] / "configs" / "openrouter_llm_list.jsonl" + try: + with config_path.open("r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + record = json.loads(line) + model_id = str(record.get("model_id", "")).strip().lower() + if model_id: + model_ids.add(model_id) + except Exception: + model_ids = set() + + cls._openrouter_model_ids = model_ids + return model_ids def load_model( self, @@ -86,6 +111,9 @@ def _detect_model_type(self, model_path_or_name: str) -> str: if any(model_lower.startswith(prefix) for prefix in openrouter_prefixes): return "openrouter" + if model_lower in self._load_openrouter_model_ids(): + return "openrouter" + # Check for Google Gemini model names gemini_prefixes = [ "gemini-", From 4ec8af6182a175d76690d9aaf1172b96865e5677 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sun, 5 Apr 2026 20:13:09 +0800 Subject: [PATCH 13/15] Organise pyproject.toml --- pyproject.toml | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9c9622a..d202f2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,8 +39,8 @@ dependencies = [ model_scraping = ["requests>=2.31.0"] apple = [ - "mlx>=0.10.0", - "mlx-lm>=0.10.0", + "mlx>=0.10.0; sys_platform == 'darwin' and platform_machine == 'arm64'", + "mlx-lm>=0.10.0; sys_platform == 'darwin' and platform_machine == 'arm64'", ] quantization = [ @@ -52,20 +52,14 @@ quantization = [ ] model_families = [ - "mamba-ssm>=1.0.0", + "mamba-ssm>=1.0.0; sys_platform == 'linux'", "timm>=0.9.0", ] full = [ - "mlx>=0.10.0", - "mlx-lm>=0.10.0", - "bitsandbytes>=0.46.1", - "autoawq>=0.2.0", - "auto-gptq>=0.5.0", - "optimum>=1.16.0", - "compressed-tensors>=0.1.0", - "mamba-ssm>=1.0.0", - "timm>=0.9.0", + "llm-dna[apple]", + "llm-dna[quantization]", + "llm-dna[model_families]", ] vllm = ["vllm>=0.4.0"] From 1ac10eb99e5a9d43a839429782b3a6d92ab96394 Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sun, 5 Apr 2026 20:16:05 +0800 Subject: [PATCH 14/15] Fix hardcode --- src/llm_dna/cli.py | 2 -- src/llm_dna/models/ModelLoader.py | 30 +----------------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/src/llm_dna/cli.py b/src/llm_dna/cli.py index ef98247..56a4d64 100644 --- a/src/llm_dna/cli.py +++ b/src/llm_dna/cli.py @@ -185,8 +185,6 @@ def main(argv: Optional[Iterable[str]] = None) -> int: """Main CLI entrypoint for DNA extraction.""" from .api import DNAExtractionConfig, calc_dna, calc_dna_parallel - project_root = Path(__file__).resolve().parents[2] - load_dotenv(project_root / ".env", override=False) load_dotenv(override=False) args = parse_arguments(argv) diff --git a/src/llm_dna/models/ModelLoader.py b/src/llm_dna/models/ModelLoader.py index bde650d..15ac4ba 100644 --- a/src/llm_dna/models/ModelLoader.py +++ b/src/llm_dna/models/ModelLoader.py @@ -3,7 +3,6 @@ """ import os -import json from typing import Optional, Dict, Any, Union from pathlib import Path import logging @@ -15,34 +14,10 @@ class ModelLoader: """Factory class for loading different types of LLMs.""" - _openrouter_model_ids: Optional[set[str]] = None def __init__(self, config_dict: Optional[Dict[str, Any]] = None): self.logger = logging.getLogger(__name__) self.config_dict = config_dict or {} - - @classmethod - def _load_openrouter_model_ids(cls) -> set[str]: - if cls._openrouter_model_ids is not None: - return cls._openrouter_model_ids - - model_ids: set[str] = set() - config_path = Path(__file__).resolve().parents[3] / "configs" / "openrouter_llm_list.jsonl" - try: - with config_path.open("r", encoding="utf-8") as handle: - for line in handle: - line = line.strip() - if not line: - continue - record = json.loads(line) - model_id = str(record.get("model_id", "")).strip().lower() - if model_id: - model_ids.add(model_id) - except Exception: - model_ids = set() - - cls._openrouter_model_ids = model_ids - return model_ids def load_model( self, @@ -103,17 +78,14 @@ def _detect_model_type(self, model_path_or_name: str) -> str: "openai/gpt-3", "openai/gpt-4", "google/gemini-", + "z-ai/", "x-ai/grok-", "cohere/command", "perplexity/", ] - if any(model_lower.startswith(prefix) for prefix in openrouter_prefixes): return "openrouter" - if model_lower in self._load_openrouter_model_ids(): - return "openrouter" - # Check for Google Gemini model names gemini_prefixes = [ "gemini-", From b7ee090bcacd8d5fcbcfbe0898d6a9c616009a0d Mon Sep 17 00:00:00 2001 From: yuqiannemo Date: Sun, 5 Apr 2026 21:14:34 +0800 Subject: [PATCH 15/15] Do not regenerate for single mode --- src/llm_dna/api.py | 140 ++++++++++++--------------------------------- 1 file changed, 38 insertions(+), 102 deletions(-) diff --git a/src/llm_dna/api.py b/src/llm_dna/api.py index c5a60e9..ff6cac1 100644 --- a/src/llm_dna/api.py +++ b/src/llm_dna/api.py @@ -11,7 +11,6 @@ from datetime import datetime from dataclasses import asdict, dataclass, replace from pathlib import Path -from types import SimpleNamespace from typing import TYPE_CHECKING, Any, Dict, Optional import numpy as np @@ -310,6 +309,12 @@ def _load_cached_responses(path: Path, expected_count: int) -> Optional[list[str logging.warning("Failed to parse cached responses from %s: %s", path, exc) return None + if isinstance(payload, dict): + complete = payload.get("complete") + if complete is False: + logging.warning("Ignoring incomplete cached responses at %s", path) + return None + responses: list[str] if isinstance(payload, dict) and isinstance(payload.get("items"), list): responses = [str(item.get("response", "")) for item in payload["items"] if isinstance(item, dict)] @@ -322,6 +327,11 @@ def _load_cached_responses(path: Path, expected_count: int) -> Optional[list[str if not responses: return None + non_empty_count = sum(1 for response in responses if response.strip()) + if non_empty_count == 0: + logging.warning("Ignoring cached responses at %s because all responses are empty.", path) + return None + if len(responses) != expected_count: logging.warning( "Cached responses at %s have probe count mismatch (%s != %s); normalizing by truncating/padding.", @@ -537,11 +547,11 @@ def calc_dna(config: DNAExtractionConfig) -> DNAExtractionResult: signature: "DNASignature" vector: np.ndarray - is_api_mode = _is_api_parallel_mode(config, [config.model_name]) + if config.extractor_type != "embedding": + raise ValueError(f"Unsupported extractor_type for calc_dna: {config.extractor_type}") + response_path = _response_cache_path(config, config.model_name) - cached_responses: Optional[list[str]] = None - if is_api_mode and config.extractor_type == "embedding": - cached_responses = _load_cached_responses(response_path, expected_count=len(probe_texts)) + cached_responses = _load_cached_responses(response_path, expected_count=len(probe_texts)) if cached_responses is not None: logging.info( @@ -550,21 +560,22 @@ def calc_dna(config: DNAExtractionConfig) -> DNAExtractionResult: response_path, ) model_meta = _default_model_metadata(config.model_name) - signature, vector, _ = _extract_signature_from_text_responses( - model_name=config.model_name, - responses=cached_responses, - config=config, - model_meta=model_meta, - generation_device=resolved_device, - encoder_device=resolved_device, - ) - elif is_api_mode and config.extractor_type == "embedding": - # API model without cached responses: generate via API, then encode - logging.info( - "Generating responses for API model '%s' via provider API...", - config.model_name, - ) + responses = cached_responses + else: + if _is_api_model_type(config.model_type): + logging.info( + "Generating responses for API model '%s' via provider API...", + config.model_name, + ) + model_meta = _load_model_metadata_for_model(config.model_name, metadata_file, token=resolved_token) + is_generative = model_meta.get("architecture", {}).get("is_generative") + if is_generative is False: + arch_type = model_meta.get("architecture", {}).get("type") + raise ValueError( + f"Model '{config.model_name}' is non-generative (architecture={arch_type})." + ) + responses = _generate_responses_for_model( model_name=config.model_name, config=config, @@ -574,7 +585,6 @@ def calc_dna(config: DNAExtractionConfig) -> DNAExtractionResult: resolved_token=resolved_token, incremental_save_path=response_path if config.save else None, ) - # Save final response cache if config.save: _save_response_cache( path=response_path, @@ -583,89 +593,15 @@ def calc_dna(config: DNAExtractionConfig) -> DNAExtractionResult: prompts=probe_texts, responses=responses, ) - signature, vector, _ = _extract_signature_from_text_responses( - model_name=config.model_name, - responses=responses, - config=config, - model_meta=model_meta, - generation_device=resolved_device, - encoder_device=resolved_device, - ) - else: - # Non-API model: use hidden-state extraction - model_meta = _load_model_metadata_for_model(config.model_name, metadata_file, token=resolved_token) - - is_generative = model_meta.get("architecture", {}).get("is_generative") - if is_generative is False: - arch_type = model_meta.get("architecture", {}).get("type") - raise ValueError( - f"Model '{config.model_name}' is non-generative (architecture={arch_type})." - ) - - resolved_model_path = _resolve_model_path(config.model_path, model_meta) - - args = SimpleNamespace( - model_name=config.model_name, - model_path=resolved_model_path, - model_type=config.model_type, - dataset=config.dataset, - probe_set=config.probe_set, - max_samples=config.max_samples, - data_root=config.data_root, - extractor_type=config.extractor_type, - dna_dim=config.dna_dim, - reduction_method=config.reduction_method, - embedding_merge=config.embedding_merge, - max_length=config.max_length, - save_format="json", - output_dir=Path(config.output_dir), - load_in_8bit=config.load_in_8bit, - load_in_4bit=config.load_in_4bit, - no_quantization=config.no_quantization, - metadata_file=metadata_file, - token=resolved_token, - trust_remote_code=config.trust_remote_code, - device=resolved_device, - log_level=config.log_level, - random_seed=config.random_seed, - use_chat_template=config.use_chat_template, - ) - - signature = core.extract_dna_signature( - model_name=config.model_name, - model_path=resolved_model_path, - model_type=config.model_type, - probe_texts=probe_texts, - extractor_type=config.extractor_type, - model_metadata=model_meta, - args=args, - ) - vector = _validate_signature(signature) - if config.save: - cached_responses = _load_cached_responses(response_path, expected_count=len(probe_texts)) - if cached_responses is None: - logging.info( - "Generating and saving responses for '%s' to %s to align single-model caching with batch mode.", - config.model_name, - response_path, - ) - responses = _generate_responses_for_model( - model_name=config.model_name, - config=config, - model_meta=model_meta, - probe_texts=probe_texts, - device=resolved_device, - resolved_token=resolved_token, - incremental_save_path=response_path, - ) - _save_response_cache( - path=response_path, - model_name=config.model_name, - dataset=config.dataset, - prompts=probe_texts, - responses=responses, - ) + signature, vector, _ = _extract_signature_from_text_responses( + model_name=config.model_name, + responses=responses, + config=config, + model_meta=model_meta, + generation_device=resolved_device, + encoder_device=resolved_device, + ) elapsed_seconds = time.time() - start_time