From c5b148e1fe27ce52ae02f8bc56125dec08cabb33 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 13 Apr 2026 15:40:08 +0200 Subject: [PATCH 1/6] rules: introduce helper to parse features from parts --- capa/rules/__init__.py | 690 ++++++++++++++++++++++++++++------------- 1 file changed, 483 insertions(+), 207 deletions(-) diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 23fd0dd3c..88e8cf9fc 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -13,14 +13,14 @@ # limitations under the License. +import binascii +import collections +import copy import io +import logging import os import re -import copy import uuid -import logging -import binascii -import collections from enum import Enum from pathlib import Path @@ -33,26 +33,26 @@ # https://github.com/python/mypy/issues/1153 from backports.functools_lru_cache import lru_cache # type: ignore -from typing import Any, Union, Callable, Iterator, Optional, cast from dataclasses import asdict, dataclass +from typing import Any, Callable, Iterator, Optional, Union, cast -import yaml import pydantic +import yaml import yaml.parser -import capa.perf import capa.engine as ceng import capa.features -import capa.optimizer +import capa.features.basicblock import capa.features.com +import capa.features.common import capa.features.file import capa.features.insn -import capa.features.common -import capa.features.basicblock -from capa.engine import Statement, FeatureSet +import capa.optimizer +import capa.perf +from capa.engine import FeatureSet, Statement +from capa.features.address import Address from capa.features.com import ComType from capa.features.common import MAX_BYTES_FEATURE_SIZE, Feature -from capa.features.address import Address logger = logging.getLogger(__name__) @@ -138,7 +138,9 @@ def __repr__(self) -> str: elif self.dynamic: return f"dynamic-scope: {self.dynamic}" else: - raise ValueError("invalid rules class. at least one scope must be specified") + raise ValueError( + "invalid rules class. at least one scope must be specified" + ) @classmethod def from_dict(self, scopes: dict[str, str]) -> "Scopes": @@ -162,7 +164,9 @@ def from_dict(self, scopes: dict[str, str]) -> "Scopes": scopes_["dynamic"] = None if (not scopes_["static"]) and (not scopes_["dynamic"]): - raise InvalidRule("invalid scopes value. At least one scope must be specified") + raise InvalidRule( + "invalid scopes value. At least one scope must be specified" + ) # check that all the specified scopes are valid if scopes_["static"] and scopes_["static"] not in STATIC_SCOPES: @@ -356,13 +360,17 @@ def translate_com_feature(com_name: str, com_type: ComType) -> ceng.Statement: guid_bytes = bytes.fromhex("".join(reordered_hex_pairs)) prefix = capa.features.com.COM_PREFIXES[com_type] symbol = prefix + com_name - com_features.append(capa.features.common.String(guid, f"{symbol} as GUID string")) - com_features.append(capa.features.common.Bytes(guid_bytes, f"{symbol} as bytes")) + com_features.append( + capa.features.common.String(guid, f"{symbol} as GUID string") + ) + com_features.append( + capa.features.common.Bytes(guid_bytes, f"{symbol} as bytes") + ) return ceng.Or(com_features) def parse_int(s: str) -> int: - if s.startswith("0x"): + if s.startswith(("0x", "-0x")): return int(s, 0x10) else: return int(s, 10) @@ -461,7 +469,9 @@ def parse_bytes(s: str) -> bytes: try: b = bytes.fromhex(s.replace(" ", "")) except binascii.Error: - raise InvalidRule(f'unexpected bytes value: must be a valid hex sequence: "{s}"') + raise InvalidRule( + f'unexpected bytes value: must be a valid hex sequence: "{s}"' + ) if len(b) > MAX_BYTES_FEATURE_SIZE: raise InvalidRule( @@ -495,7 +505,9 @@ def parse_description(s: Union[str, int, bytes], value_type: str, description=No if description == "": # sanity check: # there is an empty description, like `number: 10 =` - raise InvalidRule(f'unexpected value: "{s}", description cannot be empty') + raise InvalidRule( + f'unexpected value: "{s}", description cannot be empty' + ) else: # this is a string, but there is no description, # like: `api: CreateFileA` @@ -516,13 +528,18 @@ def parse_description(s: Union[str, int, bytes], value_type: str, description=No or value_type.startswith(("number/", "offset/")) or ( value_type.startswith("operand[") - and (value_type.endswith("].number") or value_type.endswith("].offset")) + and ( + value_type.endswith("].number") + or value_type.endswith("].offset") + ) ) ): try: value = parse_int(value) except ValueError: - raise InvalidRule(f'unexpected value: "{value}", must begin with numerical value') + raise InvalidRule( + f'unexpected value: "{value}", must begin with numerical value' + ) else: # the value might be a number, like: `number: 10` @@ -553,7 +570,9 @@ def pop_statement_description_entry(d): return None # identify child of form '{ "description": }' - descriptions = list(filter(lambda c: isinstance(c, dict) and len(c) == 1 and "description" in c, d)) + descriptions = list( + filter(lambda c: isinstance(c, dict) and len(c) == 1 and "description" in c, d) + ) if len(descriptions) > 1: raise InvalidRule("statements can only have one description") @@ -617,7 +636,9 @@ def is_subscope_compatible(scope: Scope | None, subscope: Scope) -> bool: elif subscope in DYNAMIC_SCOPE_ORDER: try: - return DYNAMIC_SCOPE_ORDER.index(subscope) >= DYNAMIC_SCOPE_ORDER.index(scope) + return DYNAMIC_SCOPE_ORDER.index(subscope) >= DYNAMIC_SCOPE_ORDER.index( + scope + ) except ValueError: return False @@ -625,121 +646,16 @@ def is_subscope_compatible(scope: Scope | None, subscope: Scope) -> bool: raise ValueError("unexpected scope") -def build_statements(d, scopes: Scopes): - if len(d.keys()) > 2: - raise InvalidRule("too many statements") - - key = list(d.keys())[0] - description = pop_statement_description_entry(d[key]) - if key == "and": - return ceng.And(unique(build_statements(dd, scopes) for dd in d[key]), description=description) - elif key == "or": - return ceng.Or(unique(build_statements(dd, scopes) for dd in d[key]), description=description) - elif key == "not": - if len(d[key]) != 1: - raise InvalidRule("not statement must have exactly one child statement") - return ceng.Not(build_statements(d[key][0], scopes), description=description) - elif key.endswith(" or more"): - count = int(key[: -len("or more")]) - return ceng.Some(count, unique(build_statements(dd, scopes) for dd in d[key]), description=description) - elif key == "optional": - # `optional` is an alias for `0 or more` - # which is useful for documenting behaviors, - # like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`. - return ceng.Some(0, unique(build_statements(dd, scopes) for dd in d[key]), description=description) - - elif key == "process": - if not is_subscope_compatible(scopes.dynamic, Scope.PROCESS): - raise InvalidRule("`process` subscope supported only for `file` scope") - - if len(d[key]) != 1: - raise InvalidRule("subscope must have exactly one child statement") - - return ceng.Subscope( - Scope.PROCESS, build_statements(d[key][0], Scopes(dynamic=Scope.PROCESS)), description=description - ) - - elif key == "thread": - if not is_subscope_compatible(scopes.dynamic, Scope.THREAD): - raise InvalidRule("`thread` subscope supported only for the `process` scope") - - if len(d[key]) != 1: - raise InvalidRule("subscope must have exactly one child statement") - - return ceng.Subscope( - Scope.THREAD, build_statements(d[key][0], Scopes(dynamic=Scope.THREAD)), description=description - ) - - elif key == "span of calls": - if not is_subscope_compatible(scopes.dynamic, Scope.SPAN_OF_CALLS): - raise InvalidRule("`span of calls` subscope supported only for the `process` and `thread` scopes") - - if len(d[key]) != 1: - raise InvalidRule("subscope must have exactly one child statement") - - return ceng.Subscope( - Scope.SPAN_OF_CALLS, - build_statements(d[key][0], Scopes(dynamic=Scope.SPAN_OF_CALLS)), - description=description, - ) - - elif key == "call": - if not is_subscope_compatible(scopes.dynamic, Scope.CALL): - raise InvalidRule("`call` subscope supported only for the `process`, `thread`, and `call` scopes") - - if len(d[key]) != 1: - raise InvalidRule("subscope must have exactly one child statement") - - return ceng.Subscope( - Scope.CALL, build_statements(d[key][0], Scopes(dynamic=Scope.CALL)), description=description - ) - - elif key == "function": - if not is_subscope_compatible(scopes.static, Scope.FUNCTION): - raise InvalidRule("`function` subscope supported only for `file` scope") - - if len(d[key]) != 1: - raise InvalidRule("subscope must have exactly one child statement") - - return ceng.Subscope( - Scope.FUNCTION, build_statements(d[key][0], Scopes(static=Scope.FUNCTION)), description=description - ) - - elif key == "basic block": - if not is_subscope_compatible(scopes.static, Scope.BASIC_BLOCK): - raise InvalidRule("`basic block` subscope supported only for `function` scope") - - if len(d[key]) != 1: - raise InvalidRule("subscope must have exactly one child statement") - - return ceng.Subscope( - Scope.BASIC_BLOCK, build_statements(d[key][0], Scopes(static=Scope.BASIC_BLOCK)), description=description - ) - - elif key == "instruction": - if not is_subscope_compatible(scopes.static, Scope.INSTRUCTION): - raise InvalidRule("`instruction` subscope supported only for `function` and `basic block` scope") - - if len(d[key]) == 1: - statements = build_statements(d[key][0], Scopes(static=Scope.INSTRUCTION)) - else: - # for instruction subscopes, we support a shorthand in which the top level AND is implied. - # the following are equivalent: - # - # - instruction: - # - and: - # - arch: i386 - # - mnemonic: cmp - # - # - instruction: - # - arch: i386 - # - mnemonic: cmp - # - statements = ceng.And(unique(build_statements(dd, Scopes(static=Scope.INSTRUCTION)) for dd in d[key])) - - return ceng.Subscope(Scope.INSTRUCTION, statements, description=description) +def build_feature( + key: str, initial_value: str | int, initial_description: str | None = None +) -> Feature | ceng.Range | ceng.Statement: + """ + from a key-value pair, like ("number": "12 = Foo"), return a Feature (or Range or Statement). + parses the description from the value, or uses the initial_description if provided. - elif key.startswith("count(") and key.endswith(")"): + returns: Feature usually, or Range for count(...) features, or Statement for COM-derived featues. + """ + if key.startswith("count(") and key.endswith(")"): # e.g.: # # count(basic block) @@ -767,6 +683,7 @@ def build_statements(d, scopes: Scopes): value, description = parse_description(arg, term) if term == "api": + assert isinstance(value, str) value = trim_dll_part(value) feature = Feature(value, description=description) @@ -780,77 +697,95 @@ def build_statements(d, scopes: Scopes): feature = Feature(arg) else: feature = Feature() - ensure_feature_valid_for_scopes(scopes, feature) - count = d[key] + count = initial_value if isinstance(count, int): - return ceng.Range(feature, min=count, max=count, description=description) + return ceng.Range( + feature, min=count, max=count, description=initial_description + ) elif count.endswith(" or more"): min = parse_int(count[: -len(" or more")]) max = None - return ceng.Range(feature, min=min, max=max, description=description) + return ceng.Range( + feature, min=min, max=max, description=initial_description + ) elif count.endswith(" or fewer"): min = None max = parse_int(count[: -len(" or fewer")]) - return ceng.Range(feature, min=min, max=max, description=description) + return ceng.Range( + feature, min=min, max=max, description=initial_description + ) elif count.startswith("("): min, max = parse_range(count) - return ceng.Range(feature, min=min, max=max, description=description) + return ceng.Range( + feature, min=min, max=max, description=initial_description + ) else: raise InvalidRule(f"unexpected range: {count}") - elif key == "string" and not isinstance(d[key], str): - raise InvalidRule(f"ambiguous string value {d[key]}, must be defined as explicit string") + + elif key == "string" and not isinstance(initial_value, str): + raise InvalidRule( + f"ambiguous string value {initial_value}, must be defined as explicit string" + ) elif key.startswith("operand[") and key.endswith("].number"): - index = key[len("operand[") : -len("].number")] try: - index = int(index) + index = int(key[len("operand[") : -len("].number")]) except ValueError as e: raise InvalidRule("operand index must be an integer") from e - value, description = parse_description(d[key], key, d.get("description")) + value, description = parse_description( + initial_value, key, description=initial_description + ) assert isinstance(value, int) try: - feature = capa.features.insn.OperandNumber(index, value, description=description) + feature = capa.features.insn.OperandNumber( + index, value, description=description + ) except ValueError as e: raise InvalidRule(str(e)) from e - ensure_feature_valid_for_scopes(scopes, feature) return feature elif key.startswith("operand[") and key.endswith("].offset"): - index = key[len("operand[") : -len("].offset")] try: - index = int(index) + index = int(key[len("operand[") : -len("].offset")]) except ValueError as e: raise InvalidRule("operand index must be an integer") from e - value, description = parse_description(d[key], key, d.get("description")) + value, description = parse_description( + initial_value, key, description=initial_description + ) assert isinstance(value, int) try: - feature = capa.features.insn.OperandOffset(index, value, description=description) + feature = capa.features.insn.OperandOffset( + index, value, description=description + ) except ValueError as e: raise InvalidRule(str(e)) from e - ensure_feature_valid_for_scopes(scopes, feature) return feature elif ( - (key == "os" and d[key] not in capa.features.common.VALID_OS) - or (key == "format" and d[key] not in capa.features.common.VALID_FORMAT) - or (key == "arch" and d[key] not in capa.features.common.VALID_ARCH) + (key == "os" and initial_value not in capa.features.common.VALID_OS) + or (key == "format" and initial_value not in capa.features.common.VALID_FORMAT) + or (key == "arch" and initial_value not in capa.features.common.VALID_ARCH) ): - raise InvalidRule(f"unexpected {key} value {d[key]}") + raise InvalidRule(f"unexpected {key} value {initial_value}") elif key.startswith("property/"): access = key[len("property/") :] if access not in capa.features.common.VALID_FEATURE_ACCESS: raise InvalidRule(f"unexpected {key} access {access}") - value, description = parse_description(d[key], key, d.get("description")) + value, description = parse_description( + initial_value, key, description=initial_description + ) + assert isinstance(value, str) try: - feature = capa.features.insn.Property(value, access=access, description=description) + feature = capa.features.insn.Property( + value, access=access, description=description + ) except ValueError as e: raise InvalidRule(str(e)) from e - ensure_feature_valid_for_scopes(scopes, feature) return feature elif key.startswith("com/"): @@ -859,21 +794,259 @@ def build_statements(d, scopes: Scopes): com_type = ComType(com_type_name) except ValueError: raise InvalidRule(f"unexpected COM type: {com_type_name}") - value, description = parse_description(d[key], key, d.get("description")) + value, description = parse_description( + initial_value, key, description=initial_description + ) + assert isinstance(value, str) return translate_com_feature(value, com_type) else: Feature = parse_feature(key) - value, description = parse_description(d[key], key, d.get("description")) - - if key == "api": - value = trim_dll_part(value) + value, description = parse_description( + initial_value, key, description=initial_description + ) try: - feature = Feature(value, description=description) + match Feature: + case ( + capa.features.insn.OperandNumber | capa.features.insn.OperandOffset + ): + raise RuntimeError("should be impossible") + + case capa.features.insn.Offset | capa.features.insn.Number: + assert isinstance(value, int) + return Feature(value, description=description) + + case capa.features.insn.API: + assert isinstance(value, str) + # users can specify an API name with or without the DLL part (e.g. `CreateFileA` or `kernel32.CreateFileA`) + # and capa matches only the API name part, not the DLL part. + # the DLL name is ignored, its essentially just for human-oriented documentation. + # see #1824 + value = trim_dll_part(value) + return Feature(value, description=description) + + case capa.features.insn.Mnemonic: + assert isinstance(value, str) + return Feature(value, description=description) + + case capa.features.basicblock.BasicBlock: + return Feature(description=description) + + case ( + capa.features.file.Export + | capa.features.file.Import + | capa.features.file.Section + | capa.features.file.FunctionName + ): + assert isinstance(value, str) + return Feature(value, description=description) + + case ( + capa.features.common.MatchedRule + | capa.features.common.Characteristic + ): + assert isinstance(value, str) + return Feature(value, description=description) + + case ( + capa.features.common.StringFactory | capa.features.common.Substring + ): + assert isinstance(value, str) + return Feature(value, description=description) + + case ( + capa.features.common.Class + | capa.features.common.Namespace + | capa.features.insn.Property + ): + assert isinstance(value, str) + return Feature(value, description=description) + + case ( + capa.features.common.Arch + | capa.features.common.OS + | capa.features.common.Format + ): + assert isinstance(value, str) + return Feature(value, description=description) + + case capa.features.common.Bytes: + assert isinstance(value, bytes) + return Feature(value, description=description) + + case _ as unreachable: + assert_never(unreachable) except ValueError as e: raise InvalidRule(str(e)) from e - ensure_feature_valid_for_scopes(scopes, feature) + + +def build_statements(d, scopes: Scopes): + if len(d.keys()) > 2: + raise InvalidRule("too many statements") + + key = list(d.keys())[0] + description = pop_statement_description_entry(d[key]) + if key == "and": + return ceng.And( + unique(build_statements(dd, scopes) for dd in d[key]), + description=description, + ) + elif key == "or": + return ceng.Or( + unique(build_statements(dd, scopes) for dd in d[key]), + description=description, + ) + elif key == "not": + if len(d[key]) != 1: + raise InvalidRule("not statement must have exactly one child statement") + return ceng.Not(build_statements(d[key][0], scopes), description=description) + elif key.endswith(" or more"): + count = int(key[: -len("or more")]) + return ceng.Some( + count, + unique(build_statements(dd, scopes) for dd in d[key]), + description=description, + ) + elif key == "optional": + # `optional` is an alias for `0 or more` + # which is useful for documenting behaviors, + # like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`. + return ceng.Some( + 0, + unique(build_statements(dd, scopes) for dd in d[key]), + description=description, + ) + + elif key == "process": + if not is_subscope_compatible(scopes.dynamic, Scope.PROCESS): + raise InvalidRule("`process` subscope supported only for `file` scope") + + if len(d[key]) != 1: + raise InvalidRule("subscope must have exactly one child statement") + + return ceng.Subscope( + Scope.PROCESS, + build_statements(d[key][0], Scopes(dynamic=Scope.PROCESS)), + description=description, + ) + + elif key == "thread": + if not is_subscope_compatible(scopes.dynamic, Scope.THREAD): + raise InvalidRule( + "`thread` subscope supported only for the `process` scope" + ) + + if len(d[key]) != 1: + raise InvalidRule("subscope must have exactly one child statement") + + return ceng.Subscope( + Scope.THREAD, + build_statements(d[key][0], Scopes(dynamic=Scope.THREAD)), + description=description, + ) + + elif key == "span of calls": + if not is_subscope_compatible(scopes.dynamic, Scope.SPAN_OF_CALLS): + raise InvalidRule( + "`span of calls` subscope supported only for the `process` and `thread` scopes" + ) + + if len(d[key]) != 1: + raise InvalidRule("subscope must have exactly one child statement") + + return ceng.Subscope( + Scope.SPAN_OF_CALLS, + build_statements(d[key][0], Scopes(dynamic=Scope.SPAN_OF_CALLS)), + description=description, + ) + + elif key == "call": + if not is_subscope_compatible(scopes.dynamic, Scope.CALL): + raise InvalidRule( + "`call` subscope supported only for the `process`, `thread`, and `call` scopes" + ) + + if len(d[key]) != 1: + raise InvalidRule("subscope must have exactly one child statement") + + return ceng.Subscope( + Scope.CALL, + build_statements(d[key][0], Scopes(dynamic=Scope.CALL)), + description=description, + ) + + elif key == "function": + if not is_subscope_compatible(scopes.static, Scope.FUNCTION): + raise InvalidRule("`function` subscope supported only for `file` scope") + + if len(d[key]) != 1: + raise InvalidRule("subscope must have exactly one child statement") + + return ceng.Subscope( + Scope.FUNCTION, + build_statements(d[key][0], Scopes(static=Scope.FUNCTION)), + description=description, + ) + + elif key == "basic block": + if not is_subscope_compatible(scopes.static, Scope.BASIC_BLOCK): + raise InvalidRule( + "`basic block` subscope supported only for `function` scope" + ) + + if len(d[key]) != 1: + raise InvalidRule("subscope must have exactly one child statement") + + return ceng.Subscope( + Scope.BASIC_BLOCK, + build_statements(d[key][0], Scopes(static=Scope.BASIC_BLOCK)), + description=description, + ) + + elif key == "instruction": + if not is_subscope_compatible(scopes.static, Scope.INSTRUCTION): + raise InvalidRule( + "`instruction` subscope supported only for `function` and `basic block` scope" + ) + + if len(d[key]) == 1: + statements = build_statements(d[key][0], Scopes(static=Scope.INSTRUCTION)) + else: + # for instruction subscopes, we support a shorthand in which the top level AND is implied. + # the following are equivalent: + # + # - instruction: + # - and: + # - arch: i386 + # - mnemonic: cmp + # + # - instruction: + # - arch: i386 + # - mnemonic: cmp + # + statements = ceng.And( + unique( + build_statements(dd, Scopes(static=Scope.INSTRUCTION)) + for dd in d[key] + ) + ) + + return ceng.Subscope(Scope.INSTRUCTION, statements, description=description) + + else: + initial_value = d[key] + initial_description = d.get("description") + + feature = build_feature(key, initial_value, initial_description) + + # for count(...) features, validate the inner feature rather than the Range wrapper. + # for com/... features, translate_com_feature returns a compound Or(String, Bytes) Statement; + if isinstance(feature, ceng.Range): + ensure_feature_valid_for_scopes(scopes, feature.child) + elif isinstance(feature, Feature): + ensure_feature_valid_for_scopes(scopes, feature) + return feature @@ -886,7 +1059,9 @@ def second(s: list[Any]) -> Any: class Rule: - def __init__(self, name: str, scopes: Scopes, statement: Statement, meta, definition=""): + def __init__( + self, name: str, scopes: Scopes, statement: Statement, meta, definition="" + ): super().__init__() self.name = name self.scopes = scopes @@ -1062,13 +1237,19 @@ def from_dict(cls, d: dict[str, Any], definition: str) -> "Rule": # each rule has two scopes, a static-flavor scope, and a # dynamic-flavor one. which one is used depends on the analysis type. if "scope" in meta: - raise InvalidRule(f"legacy rule detected (rule.meta.scope), please update to the new syntax: {name}") + raise InvalidRule( + f"legacy rule detected (rule.meta.scope), please update to the new syntax: {name}" + ) elif "scopes" in meta: scopes_ = meta.get("scopes") else: - raise InvalidRule("please specify at least one of this rule's (static/dynamic) scopes") + raise InvalidRule( + "please specify at least one of this rule's (static/dynamic) scopes" + ) if not isinstance(scopes_, dict): - raise InvalidRule("the scopes field must contain a dictionary specifying the scopes") + raise InvalidRule( + "the scopes field must contain a dictionary specifying the scopes" + ) scopes: Scopes = Scopes.from_dict(scopes_) statements = d["rule"]["features"] @@ -1087,7 +1268,9 @@ def from_dict(cls, d: dict[str, Any], definition: str) -> "Rule": if not isinstance(meta.get("mbc", []), list): raise InvalidRule("MBC mapping must be a list") - return cls(name, scopes, build_statements(statements[0], scopes), meta, definition) + return cls( + name, scopes, build_statements(statements[0], scopes), meta, definition + ) @staticmethod @lru_cache() @@ -1099,7 +1282,9 @@ def _get_yaml_loader(): logger.debug("using libyaml CSafeLoader.") return yaml.CSafeLoader except Exception: - logger.debug("unable to import libyaml CSafeLoader, falling back to Python yaml parser.") + logger.debug( + "unable to import libyaml CSafeLoader, falling back to Python yaml parser." + ) logger.debug("this will be slower to load rules.") return yaml.SafeLoader @@ -1254,7 +1439,9 @@ def move_to_end(m, k): # only do this for the features section, so the meta description doesn't get reformatted # assumes features section always exists features_offset = doc.find("features") - doc = doc[:features_offset] + doc[features_offset:].replace(" description:", " description:") + doc = doc[:features_offset] + doc[features_offset:].replace( + " description:", " description:" + ) # for negative hex numbers, yaml dump outputs: # - offset: !!int '0x-30' @@ -1441,19 +1628,25 @@ def __init__( self.rules = {rule.name: rule for rule in rules} self.rules_by_namespace = index_rules_by_namespace(rules) - self.rules_by_scope = {scope: self._get_rules_for_scope(rules, scope) for scope in scopes} + self.rules_by_scope = { + scope: self._get_rules_for_scope(rules, scope) for scope in scopes + } # these structures are unstable and may change before the next major release. scores_by_rule: dict[str, int] = {} self._feature_indexes_by_scopes = { - scope: self._index_rules_by_feature(scope, self.rules_by_scope[scope], scores_by_rule) for scope in scopes + scope: self._index_rules_by_feature( + scope, self.rules_by_scope[scope], scores_by_rule + ) + for scope in scopes } # Pre-compute the topological index mapping for each scope. # This avoids rebuilding the dict on every call to _match (which runs once per # instruction/basic-block/function/file scope, i.e. potentially millions of times). self._rule_index_by_scope: dict[Scope, dict[str, int]] = { - scope: {rule.name: i for i, rule in enumerate(self.rules_by_scope[scope])} for scope in scopes + scope: {rule.name: i for i, rule in enumerate(self.rules_by_scope[scope])} + for scope in scopes } @property @@ -1499,7 +1692,9 @@ def __contains__(self, rulename): # this routine is unstable and may change before the next major release. @staticmethod - def _score_feature(scores_by_rule: dict[str, int], node: capa.features.common.Feature) -> int: + def _score_feature( + scores_by_rule: dict[str, int], node: capa.features.common.Feature + ) -> int: """ Score the given feature by how "uncommon" we think it will be. Features that we expect to be very selective (ie. uniquely identify a rule and be required to match), @@ -1554,7 +1749,9 @@ def _score_feature(scores_by_rule: dict[str, int], node: capa.features.common.Fe return scores_by_rule[rule_name] - elif isinstance(node, (capa.features.insn.Number, capa.features.insn.OperandNumber)): + elif isinstance( + node, (capa.features.insn.Number, capa.features.insn.OperandNumber) + ): v = node.value assert isinstance(v, int) @@ -1574,7 +1771,14 @@ def _score_feature(scores_by_rule: dict[str, int], node: capa.features.common.Fe # Other numbers are assumed to be uncommon. return 7 - elif isinstance(node, (capa.features.common.Substring, capa.features.common.Regex, capa.features.common.Bytes)): + elif isinstance( + node, + ( + capa.features.common.Substring, + capa.features.common.Regex, + capa.features.common.Bytes, + ), + ): # Scanning features (non-hashable), which we can't use for quick matching/filtering. return 0 @@ -1653,7 +1857,9 @@ class _RuleFeatureIndex: # this routine is unstable and may change before the next major release. @staticmethod - def _index_rules_by_feature(scope: Scope, rules: list[Rule], scores_by_rule: dict[str, int]) -> _RuleFeatureIndex: + def _index_rules_by_feature( + scope: Scope, rules: list[Rule], scores_by_rule: dict[str, int] + ) -> _RuleFeatureIndex: """ Index the given rules by their minimal set of most "uncommon" features required to match. @@ -1814,21 +2020,42 @@ def and_score_key(item): string_features = [ feature for feature in features - if isinstance(feature, (capa.features.common.Substring, capa.features.common.Regex)) + if isinstance( + feature, + (capa.features.common.Substring, capa.features.common.Regex), + ) + ] + bytes_features = [ + feature + for feature in features + if isinstance(feature, capa.features.common.Bytes) ] - bytes_features = [feature for feature in features if isinstance(feature, capa.features.common.Bytes)] hashable_features = [ feature for feature in features if not isinstance( - feature, (capa.features.common.Substring, capa.features.common.Regex, capa.features.common.Bytes) + feature, + ( + capa.features.common.Substring, + capa.features.common.Regex, + capa.features.common.Bytes, + ), ) ] - logger.debug("indexing: features: %d, score: %d, rule: %s", len(features), score, rule_name) + logger.debug( + "indexing: features: %d, score: %d, rule: %s", + len(features), + score, + rule_name, + ) scores_by_rule[rule_name] = score for feature in features: - logger.debug(" : [%d] %s", RuleSet._score_feature(scores_by_rule, feature), feature) + logger.debug( + " : [%d] %s", + RuleSet._score_feature(scores_by_rule, feature), + feature, + ) if string_features: string_rules[rule_name] = cast(list[Feature], string_features) @@ -1839,13 +2066,23 @@ def and_score_key(item): for feature in hashable_features: rules_by_feature[feature].add(rule_name) - logger.debug("indexing: %d features indexed for scope %s", len(rules_by_feature), scope) + logger.debug( + "indexing: %d features indexed for scope %s", len(rules_by_feature), scope + ) logger.debug( "indexing: %d indexed features are shared by more than 3 rules", - len([feature for feature, rules in rules_by_feature.items() if len(rules) > 3]), + len( + [ + feature + for feature, rules in rules_by_feature.items() + if len(rules) > 3 + ] + ), ) logger.debug( - "indexing: %d scanning string features, %d scanning bytes features", len(string_rules), len(bytes_rules) + "indexing: %d scanning string features, %d scanning bytes features", + len(string_rules), + len(bytes_rules), ) return RuleSet._RuleFeatureIndex(rules_by_feature, string_rules, bytes_rules) @@ -1909,20 +2146,36 @@ def filter_rules_by_meta(self, tag: str) -> "RuleSet": for rule in rules: for k, v in rule.meta.items(): if isinstance(v, str) and tag in v: - logger.debug('using rule "%s" and dependencies, found tag in meta.%s: %s', rule.name, k, v) - rules_filtered.update(set(get_rules_and_dependencies(rules, rule.name))) + logger.debug( + 'using rule "%s" and dependencies, found tag in meta.%s: %s', + rule.name, + k, + v, + ) + rules_filtered.update( + set(get_rules_and_dependencies(rules, rule.name)) + ) break if isinstance(v, list): for vv in v: if tag in vv: - logger.debug('using rule "%s" and dependencies, found tag in meta.%s: %s', rule.name, k, vv) - rules_filtered.update(set(get_rules_and_dependencies(rules, rule.name))) + logger.debug( + 'using rule "%s" and dependencies, found tag in meta.%s: %s', + rule.name, + k, + vv, + ) + rules_filtered.update( + set(get_rules_and_dependencies(rules, rule.name)) + ) break return RuleSet(list(rules_filtered)) # this routine is unstable and may change before the next major release. @staticmethod - def _sort_rules_by_index(rule_index_by_rule_name: dict[str, int], rules: list[Rule]): + def _sort_rules_by_index( + rule_index_by_rule_name: dict[str, int], rules: list[Rule] + ): """ Sort (in place) the given rules by their index provided by the given dict. This mapping is intended to represent the topologic index of the given rule; @@ -1931,7 +2184,9 @@ def _sort_rules_by_index(rule_index_by_rule_name: dict[str, int], rules: list[Ru """ rules.sort(key=lambda r: rule_index_by_rule_name[r.name]) - def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[FeatureSet, ceng.MatchResults]: + def _match( + self, scope: Scope, features: FeatureSet, addr: Address + ) -> tuple[FeatureSet, ceng.MatchResults]: """ Match rules from this ruleset at the given scope against the given features. @@ -1939,7 +2194,9 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[Fea It uses its knowledge of all the rules to evaluate a minimal set of candidate rules for the given features. """ - feature_index: RuleSet._RuleFeatureIndex = self._feature_indexes_by_scopes[scope] + feature_index: RuleSet._RuleFeatureIndex = self._feature_indexes_by_scopes[ + scope + ] # Topologic location of rule given its name. # That is, rules with a lower index should be evaluated first, since their dependencies # will be evaluated later. @@ -2075,7 +2332,9 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[Fea # such as rule or namespace matches. if augmented_features is features: # lazily create the copy of features only when a rule matches, since it could be expensive. - augmented_features = collections.defaultdict(set, copy.copy(features)) + augmented_features = collections.defaultdict( + set, copy.copy(features) + ) ceng.index_rule_matches(augmented_features, rule, [addr]) @@ -2090,12 +2349,18 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[Fea if new_features: new_candidates: list[str] = [] for new_feature in new_features: - new_candidates.extend(feature_index.rules_by_feature.get(new_feature, ())) + new_candidates.extend( + feature_index.rules_by_feature.get(new_feature, ()) + ) if new_candidates: candidate_rule_names.update(new_candidates) - candidate_rules.extend([self.rules[rule_name] for rule_name in new_candidates]) - RuleSet._sort_rules_by_index(rule_index_by_rule_name, candidate_rules) + candidate_rules.extend( + [self.rules[rule_name] for rule_name in new_candidates] + ) + RuleSet._sort_rules_by_index( + rule_index_by_rule_name, candidate_rules + ) candidate_rules.reverse() return (augmented_features, results) @@ -2125,17 +2390,25 @@ def match( if paranoid: rules: list[Rule] = self.rules_by_scope[scope] - paranoid_features, paranoid_matches = capa.engine.match(rules, features, addr) + paranoid_features, paranoid_matches = capa.engine.match( + rules, features, addr + ) if features != paranoid_features: logger.warning("paranoid: %s: %s", scope, addr) - for feature in sorted(set(features.keys()) & set(paranoid_features.keys())): + for feature in sorted( + set(features.keys()) & set(paranoid_features.keys()) + ): logger.warning("paranoid: %s", feature) - for feature in sorted(set(features.keys()) - set(paranoid_features.keys())): + for feature in sorted( + set(features.keys()) - set(paranoid_features.keys()) + ): logger.warning("paranoid: + %s", feature) - for feature in sorted(set(paranoid_features.keys()) - set(features.keys())): + for feature in sorted( + set(paranoid_features.keys()) - set(features.keys()) + ): logger.warning("paranoid: - %s", feature) assert features == paranoid_features @@ -2180,7 +2453,10 @@ def collect_rule_file_paths(rule_paths: list[Path]) -> list[Path]: continue for file in files: if not file.endswith(".yml"): - if not (file.startswith(".git") or file.endswith((".git", ".md", ".txt"))): + if not ( + file.startswith(".git") + or file.endswith((".git", ".md", ".txt")) + ): # expect to see .git* files, readme.md, format.md, and maybe a .git directory # other things maybe are rules, but are mis-named. logger.warning("skipping non-.yml file: %s", file) From 65d514af51708415830ba2127c6cf3e88a7b184c Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Thu, 2 Apr 2026 14:10:31 +0200 Subject: [PATCH 2/6] tests: migrate to data-driven fixtures for feature presence wip --- tests/fixtures.py | 640 ++++++++------- tests/fixtures/feature-presence.json | 1084 ++++++++++++++++++++++++++ 2 files changed, 1396 insertions(+), 328 deletions(-) create mode 100644 tests/fixtures/feature-presence.json diff --git a/tests/fixtures.py b/tests/fixtures.py index 6f15d0365..44005f141 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import logging import contextlib import collections @@ -20,6 +21,8 @@ import pytest +import capa.rules +import capa.engine as ceng import capa.loader import capa.features.file import capa.features.insn @@ -59,6 +62,75 @@ DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles" +def parse_feature_string(s: str) -> Feature | ceng.Range | ceng.Statement: + key, _, value = s.partition(": ") + return capa.rules.build_feature(key, value, initial_description=None) + + +FEATURE_MARKS: dict[tuple[str, str, str], list[dict]] = {} + + +def _load_feature_tests() -> tuple[list[tuple], list[tuple]]: + with (CD / "fixtures" / "feature-presence.json").open("r") as f: + data = json.load(f) + + presence_tests = [] + symtab_tests = [] + + for entry in data["features"]: + feature = parse_feature_string(entry["feature"]) + test_tuple = (entry["file"], entry["location"], feature, entry["expected"]) + + if "marks" in entry: + FEATURE_MARKS[(entry["file"], entry["location"], entry["feature"])] = entry["marks"] + + if "symtab" in entry.get("tags", []): + symtab_tests.append(test_tuple) + else: + presence_tests.append(test_tuple) + + presence_tests.sort(key=lambda t: (t[0], t[1])) + symtab_tests.sort(key=lambda t: (t[0], t[1])) + return presence_tests, symtab_tests + + +@lru_cache(maxsize=1) +def _load_fixture_file_paths() -> dict[str, Path]: + with (CD / "fixtures" / "feature-presence.json").open("r") as f: + data = json.load(f) + return {entry["key"]: CD / entry["path"] for entry in data["files"]} + + +def get_fixture_file_path(key: str) -> Path: + paths = _load_fixture_file_paths() + if key not in paths: + raise ValueError(f"unknown fixture file key: {key}") + return paths[key] + + +def apply_backend_marks(backend: str, sample_key: str, feature: Feature): + """Apply skip/xfail marks from fixtures for a specific backend. + + Args: + backend: backend name matching marks in fixtures (e.g. "idalib", "freeze") + sample_key: the file key from fixtures (e.g. "mimikatz", "pma12-04") + feature: the parsed Feature object to match against + """ + for (mk, _ml, mf), marks in FEATURE_MARKS.items(): + if mk != sample_key: + continue + if parse_feature_string(mf) != feature: + continue + for m in marks: + if m["backend"] != backend: + continue + if m["mark"] == "skip": + pytest.skip(m["reason"]) + elif m["mark"] == "xfail": + pytest.xfail(m["reason"]) + return + + @contextlib.contextmanager def xfail(condition, reason=None): """ @@ -233,7 +305,9 @@ def get_idalib_extractor(path: Path): # -1 - Generic errors (database already open, auto-analysis failed, etc.) # -2 - User cancelled operation ret = idapro.open_database( - str(path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R" + str(path), + run_auto_analysis=True, + args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R", ) if ret != 0: raise RuntimeError("failed to analyze input file") @@ -317,7 +391,12 @@ def get_ghidra_extractor(path: Path): # We use a larger cache size to avoid re-opening the same file multiple times # which is very slow with Ghidra. extractor = capa.loader.get_extractor( - path, FORMAT_AUTO, OS_AUTO, capa.loader.BACKEND_GHIDRA, [], disable_progress=True + path, + FORMAT_AUTO, + OS_AUTO, + capa.loader.BACKEND_GHIDRA, + [], + disable_progress=True, ) ctx = capa.features.extractors.ghidra.context.get_context() @@ -498,6 +577,8 @@ def get_data_path_by_name(name) -> Path: return CD / "data" / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_" elif name.startswith("2bf18d"): return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_" + elif name.startswith("2d3edc"): + return CD / "data" / "2d3edc218a90f03089cc01715a9f047f.exe_" elif name.startswith("0000a657"): return ( CD @@ -632,6 +713,8 @@ def get_sample_md5_by_name(name): return "3db3e55b16a7b1b1afb970d5e77c5d98" elif name.startswith("2bf18d"): return "2bf18d0403677378adad9001b1243211" + elif name.startswith("2d3edc"): + return "2d3edc218a90f03089cc01715a9f047f" elif name.startswith("ea2876"): return "76fa734236daa023444dec26863401dc" else: @@ -857,292 +940,8 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -FEATURE_PRESENCE_TESTS = sorted( - [ - # file/characteristic("embedded pe") - ("pma12-04", "file", capa.features.common.Characteristic("embedded pe"), True), - # file/string - ("mimikatz", "file", capa.features.common.String("SCardControl"), True), - ("mimikatz", "file", capa.features.common.String("SCardTransmit"), True), - ("mimikatz", "file", capa.features.common.String("ACR > "), True), - ("mimikatz", "file", capa.features.common.String("nope"), False), - # file/sections - ("mimikatz", "file", capa.features.file.Section(".text"), True), - ("mimikatz", "file", capa.features.file.Section(".nope"), False), - # IDA doesn't extract unmapped sections by default - # ("mimikatz", "file", capa.features.file.Section(".rsrc"), True), - # file/exports - ("kernel32", "file", capa.features.file.Export("BaseThreadInitThunk"), True), - ("kernel32", "file", capa.features.file.Export("lstrlenW"), True), - ("kernel32", "file", capa.features.file.Export("nope"), False), - # forwarded export - ("ea2876", "file", capa.features.file.Export("vresion.GetFileVersionInfoA"), True), - # file/imports - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True), - ("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True), - ("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True), - ("mimikatz", "file", capa.features.file.Import("IsWow64Process"), True), - ("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True), - ("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True), - ("mimikatz", "file", capa.features.file.Import("#11"), False), - ("mimikatz", "file", capa.features.file.Import("#nope"), False), - ("mimikatz", "file", capa.features.file.Import("nope"), False), - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True), - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True), - ("mimikatz", "file", capa.features.file.Import("CryptAcquireContextW"), True), - ("mimikatz", "file", capa.features.file.Import("CryptAcquireContext"), True), - # function/characteristic(loop) - ("mimikatz", "function=0x401517", capa.features.common.Characteristic("loop"), True), - ("mimikatz", "function=0x401000", capa.features.common.Characteristic("loop"), False), - # bb/characteristic(tight loop) - ("mimikatz", "function=0x402EC4", capa.features.common.Characteristic("tight loop"), True), - ("mimikatz", "function=0x401000", capa.features.common.Characteristic("tight loop"), False), - # bb/characteristic(stack string) - ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("stack string"), True), - ("mimikatz", "function=0x401000", capa.features.common.Characteristic("stack string"), False), - # bb/characteristic(tight loop) - ("mimikatz", "function=0x402EC4,bb=0x402F8E", capa.features.common.Characteristic("tight loop"), True), - ("mimikatz", "function=0x401000,bb=0x401000", capa.features.common.Characteristic("tight loop"), False), - # insn/mnemonic - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("push"), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("movzx"), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("xor"), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("in"), False), - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("out"), False), - # insn/operand.number - ("mimikatz", "function=0x40105D,bb=0x401073", capa.features.insn.OperandNumber(1, 0xFF), True), - ("mimikatz", "function=0x40105D,bb=0x401073", capa.features.insn.OperandNumber(0, 0xFF), False), - # insn/operand.offset - ("mimikatz", "function=0x40105D,bb=0x4010B0", capa.features.insn.OperandOffset(0, 4), True), - ("mimikatz", "function=0x40105D,bb=0x4010B0", capa.features.insn.OperandOffset(1, 4), False), - # insn/number - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x3136B0), True), - ("mimikatz", "function=0x401000", capa.features.insn.Number(0x0), True), - # insn/number: stack adjustments - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), - # insn/number: negative - ("mimikatz", "function=0x401553", capa.features.insn.Number(0xFFFFFFFF), True), - ("mimikatz", "function=0x43e543", capa.features.insn.Number(0xFFFFFFF0), True), - # insn/offset - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0xC), True), - # insn/offset, issue #276 - ("64d9f", "function=0x10001510,bb=0x100015B0", capa.features.insn.Offset(0x4000), True), - # insn/offset: stack references - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x8), False), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x10), False), - # insn/offset: negative - # 0x4012b4 MOVZX ECX, [EAX+0xFFFFFFFFFFFFFFFF] - ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True), - # 0x4012b8 MOVZX EAX, [EAX+0xFFFFFFFFFFFFFFFE] - ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True), - # - # insn/offset from mnemonic: add - # - # should not be considered, too big for an offset: - # .text:00401D85 81 C1 00 00 00 80 add ecx, 80000000h - ("mimikatz", "function=0x401D64,bb=0x401D73,insn=0x401D85", capa.features.insn.Offset(0x80000000), False), - # should not be considered, relative to stack: - # .text:00401CF6 83 C4 10 add esp, 10h - ("mimikatz", "function=0x401CC7,bb=0x401CDE,insn=0x401CF6", capa.features.insn.Offset(0x10), False), - # yes, this is also a offset (imagine eax is a pointer): - # .text:0040223C 83 C0 04 add eax, 4 - ("mimikatz", "function=0x402203,bb=0x402221,insn=0x40223C", capa.features.insn.Offset(0x4), True), - # - # insn/number from mnemonic: lea - # - # should not be considered, lea operand invalid encoding - # .text:00471EE6 8D 1C 81 lea ebx, [ecx+eax*4] - ("mimikatz", "function=0x471EAB,bb=0x471ED8,insn=0x471EE6", capa.features.insn.Number(0x4), False), - # should not be considered, lea operand invalid encoding - # .text:004717B1 8D 4C 31 D0 lea ecx, [ecx+esi-30h] - ("mimikatz", "function=0x47153B,bb=0x4717AB,insn=0x4717B1", capa.features.insn.Number(-0x30), False), - # yes, this is also a number (imagine ebx is zero): - # .text:004018C0 8D 4B 02 lea ecx, [ebx+2] - ("mimikatz", "function=0x401873,bb=0x4018B2,insn=0x4018C0", capa.features.insn.Number(0x2), True), - # insn/api - # not extracting dll anymore - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), False), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), False), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), False), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), False), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), False), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptImportKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptDestroyKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False), - # insn/api: thunk - # not extracting dll anymore - ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False), - ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True), - # insn/api: x64 - ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True), - # insn/api: x64 thunk - ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True), - # insn/api: x64 nested thunk - ("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True), - # insn/api: call via jmp - ("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True), - ("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True), - # insn/api: resolve indirect calls - # not extracting dll anymore - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), False), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), False), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), False), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), False), - ("c91887...", "function=0x401A77", capa.features.insn.API("CreatePipe"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("SetHandleInformation"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("CloseHandle"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("WriteFile"), True), - # insn/string - ("mimikatz", "function=0x40105D", capa.features.common.String("SCardControl"), True), - ("mimikatz", "function=0x40105D", capa.features.common.String("SCardTransmit"), True), - ("mimikatz", "function=0x40105D", capa.features.common.String("ACR > "), True), - ("mimikatz", "function=0x40105D", capa.features.common.String("nope"), False), - ("773290...", "function=0x140001140", capa.features.common.String(r"%s:\\OfficePackagesForWDAG"), True), - # overlapping string, see #1271 - ("294b8d...", "function=0x404970,bb=0x404970,insn=0x40499F", capa.features.common.String("\r\n\x00:ht"), False), - # insn/regex - ("pma16-01", "function=0x4021B0", capa.features.common.Regex("HTTP/1.0"), True), - ("pma16-01", "function=0x402F40", capa.features.common.Regex("www.practicalmalwareanalysis.com"), True), - ("pma16-01", "function=0x402F40", capa.features.common.Substring("practicalmalwareanalysis.com"), True), - # insn/string, pointer to string - ("mimikatz", "function=0x44EDEF", capa.features.common.String("INPUTEVENT"), True), - # insn/string, direct memory reference - ("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True), - # insn/bytes - ("mimikatz", "function=0x401517", capa.features.common.Bytes(bytes.fromhex("CA3B0E000000F8AF47")), True), - ("mimikatz", "function=0x404414", capa.features.common.Bytes(bytes.fromhex("0180000040EA4700")), True), - # don't extract byte features for obvious strings - ("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False), - ("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False), - ("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR > ".encode("utf-16le")), False), - ("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False), - # push offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction - ("mimikatz", "function=0x401000", capa.features.common.Bytes(bytes.fromhex("FDFF59F647")), False), - # IDA features included byte sequences read from invalid memory, fixed in #409 - ("mimikatz", "function=0x44570F", capa.features.common.Bytes(bytes.fromhex("FF" * 256)), False), - # insn/bytes, pointer to string bytes - ("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False), - # insn/characteristic(nzxor) - ("mimikatz", "function=0x410DFC", capa.features.common.Characteristic("nzxor"), True), - ("mimikatz", "function=0x40105D", capa.features.common.Characteristic("nzxor"), False), - # insn/characteristic(nzxor): no security cookies - ("mimikatz", "function=0x46D534", capa.features.common.Characteristic("nzxor"), False), - # insn/characteristic(nzxor): xorps - # viv needs fixup to recognize function, see above - ("mimikatz", "function=0x410dfc", capa.features.common.Characteristic("nzxor"), True), - # insn/characteristic(peb access) - ("kernel32-64", "function=0x1800017D0", capa.features.common.Characteristic("peb access"), True), - ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("peb access"), False), - # insn/characteristic(gs access) - ("kernel32-64", "function=0x180001068", capa.features.common.Characteristic("gs access"), True), - ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("gs access"), False), - # insn/characteristic(cross section flow) - ("a1982...", "function=0x4014D0", capa.features.common.Characteristic("cross section flow"), True), - # insn/characteristic(cross section flow): imports don't count - ("kernel32-64", "function=0x180001068", capa.features.common.Characteristic("cross section flow"), False), - ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("cross section flow"), False), - # insn/characteristic(recursive call) - ("mimikatz", "function=0x40640e", capa.features.common.Characteristic("recursive call"), True), - # before this we used ambiguous (0x4556E5, False), which has a data reference / indirect recursive call, see #386 - ("mimikatz", "function=0x4175FF", capa.features.common.Characteristic("recursive call"), False), - # insn/characteristic(indirect call) - ("mimikatz", "function=0x4175FF", capa.features.common.Characteristic("indirect call"), True), - ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("indirect call"), False), - # insn/characteristic(calls from) - ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("calls from"), True), - ("mimikatz", "function=0x4702FD", capa.features.common.Characteristic("calls from"), False), - # function/characteristic(calls to) - ("mimikatz", "function=0x40105D", capa.features.common.Characteristic("calls to"), True), - # function/characteristic(forwarded export) - ("ea2876", "file", capa.features.common.Characteristic("forwarded export"), True), - # before this we used ambiguous (0x4556E5, False), which has a data reference / indirect recursive call, see #386 - ("mimikatz", "function=0x456BB9", capa.features.common.Characteristic("calls to"), False), - # file/function-name - ("pma16-01", "file", capa.features.file.FunctionName("__aulldiv"), True), - # os & format & arch - ("pma16-01", "file", OS(OS_WINDOWS), True), - ("pma16-01", "file", OS(OS_LINUX), False), - ("mimikatz", "file", OS(OS_WINDOWS), True), - ("pma16-01", "function=0x401100", OS(OS_WINDOWS), True), - ("pma16-01", "function=0x401100,bb=0x401130", OS(OS_WINDOWS), True), - ("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True), - ("pma16-01", "file", Arch(ARCH_I386), True), - ("pma16-01", "file", Arch(ARCH_AMD64), False), - ("mimikatz", "file", Arch(ARCH_I386), True), - ("pma16-01", "function=0x401100", Arch(ARCH_I386), True), - ("pma16-01", "function=0x401100,bb=0x401130", Arch(ARCH_I386), True), - ("mimikatz", "function=0x40105D", Arch(ARCH_I386), True), - ("pma16-01", "file", Format(FORMAT_PE), True), - ("pma16-01", "file", Format(FORMAT_ELF), False), - ("mimikatz", "file", Format(FORMAT_PE), True), - # format is also a global feature - ("pma16-01", "function=0x401100", Format(FORMAT_PE), True), - ("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True), - # elf support - ("7351f.elf", "file", OS(OS_LINUX), True), - ("7351f.elf", "file", OS(OS_WINDOWS), False), - ("7351f.elf", "file", Format(FORMAT_ELF), True), - ("7351f.elf", "file", Format(FORMAT_PE), False), - ("7351f.elf", "file", Arch(ARCH_I386), False), - ("7351f.elf", "file", Arch(ARCH_AMD64), True), - ("7351f.elf", "function=0x408753", capa.features.common.String("/dev/null"), True), - ("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True), - ("79abd...", "function=0x10002385,bb=0x10002385", capa.features.common.Characteristic("call $+5"), True), - ("946a9...", "function=0x10001510,bb=0x100015c0", capa.features.common.Characteristic("call $+5"), True), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) +FEATURE_PRESENCE_TESTS, FEATURE_SYMTAB_FUNC_TESTS = _load_feature_tests() -# this list should be merged into the one above (FEATURE_PRESENSE_TESTS) -# once the debug symbol functionality has been added to all backends -FEATURE_SYMTAB_FUNC_TESTS = [ - ( - "2bf18d", - "function=0x4027b3,bb=0x402861,insn=0x40286d", - capa.features.insn.API("__GI_connect"), - True, - ), - ( - "2bf18d", - "function=0x4027b3,bb=0x402861,insn=0x40286d", - capa.features.insn.API("connect"), - True, - ), - ( - "2bf18d", - "function=0x4027b3,bb=0x402861,insn=0x40286d", - capa.features.insn.API("__libc_connect"), - True, - ), - ( - "2bf18d", - "function=0x4088a4", - capa.features.file.FunctionName("__GI_connect"), - True, - ), - ( - "2bf18d", - "function=0x4088a4", - capa.features.file.FunctionName("connect"), - True, - ), - ( - "2bf18d", - "function=0x4088a4", - capa.features.file.FunctionName("__libc_connect"), - True, - ), -] FEATURE_PRESENCE_TESTS_DOTNET = sorted( [ @@ -1150,66 +949,206 @@ def parametrize(params, values, **kwargs): ("b9f5b", "file", Arch(ARCH_AMD64), False), ("mixed-mode-64", "file", Arch(ARCH_AMD64), True), ("mixed-mode-64", "file", Arch(ARCH_I386), False), - ("mixed-mode-64", "file", capa.features.common.Characteristic("mixed mode"), True), - ("hello-world", "file", capa.features.common.Characteristic("mixed mode"), False), + ( + "mixed-mode-64", + "file", + capa.features.common.Characteristic("mixed mode"), + True, + ), + ( + "hello-world", + "file", + capa.features.common.Characteristic("mixed mode"), + False, + ), ("b9f5b", "file", OS(OS_ANY), True), ("b9f5b", "file", Format(FORMAT_PE), True), ("b9f5b", "file", Format(FORMAT_DOTNET), True), - ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::Main"), True), - ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::ctor"), True), - ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::cctor"), False), + ( + "hello-world", + "file", + capa.features.file.FunctionName("HelloWorld::Main"), + True, + ), + ( + "hello-world", + "file", + capa.features.file.FunctionName("HelloWorld::ctor"), + True, + ), + ( + "hello-world", + "file", + capa.features.file.FunctionName("HelloWorld::cctor"), + False, + ), ("hello-world", "file", capa.features.common.String("Hello World!"), True), ("hello-world", "file", capa.features.common.Class("HelloWorld"), True), ("hello-world", "file", capa.features.common.Class("System.Console"), True), - ("hello-world", "file", capa.features.common.Namespace("System.Diagnostics"), True), - ("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True), - ("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True), - ("hello-world", "function=0x250, bb=0x250, insn=0x257", capa.features.common.Class("System.Console"), True), - ("hello-world", "function=0x250, bb=0x250, insn=0x257", capa.features.common.Namespace("System"), True), - ("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True), - ("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True), - ("_1c444", "file", capa.features.common.String(r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall"), True), + ( + "hello-world", + "file", + capa.features.common.Namespace("System.Diagnostics"), + True, + ), + ( + "hello-world", + "function=0x250", + capa.features.common.String("Hello World!"), + True, + ), + ( + "hello-world", + "function=0x250, bb=0x250, insn=0x252", + capa.features.common.String("Hello World!"), + True, + ), + ( + "hello-world", + "function=0x250, bb=0x250, insn=0x257", + capa.features.common.Class("System.Console"), + True, + ), + ( + "hello-world", + "function=0x250, bb=0x250, insn=0x257", + capa.features.common.Namespace("System"), + True, + ), + ( + "hello-world", + "function=0x250", + capa.features.insn.API("System.Console::WriteLine"), + True, + ), + ( + "hello-world", + "file", + capa.features.file.Import("System.Console::WriteLine"), + True, + ), + ( + "_1c444", + "file", + capa.features.common.String(r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall"), + True, + ), ("_1c444", "file", capa.features.common.String("get_IsAlive"), True), - ("_1c444", "file", capa.features.file.Import("gdi32.CreateCompatibleBitmap"), True), + ( + "_1c444", + "file", + capa.features.file.Import("gdi32.CreateCompatibleBitmap"), + True, + ), ("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True), - ("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False), + ( + "_1c444", + "file", + capa.features.file.Import("gdi32::CreateCompatibleBitmap"), + False, + ), ("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True), # not extracting dll anymore - ("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), False), + ( + "_1c444", + "function=0x1F68", + capa.features.insn.API("user32.GetWindowDC"), + False, + ), ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), - ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), True), - ("_1c444", "token=0x6000018", capa.features.common.Characteristic("calls to"), False), - ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls from"), True), - ("_1c444", "token=0x600000F", capa.features.common.Characteristic("calls from"), False), + ( + "_1c444", + "token=0x600001D", + capa.features.common.Characteristic("calls to"), + True, + ), + ( + "_1c444", + "token=0x6000018", + capa.features.common.Characteristic("calls to"), + False, + ), + ( + "_1c444", + "token=0x600001D", + capa.features.common.Characteristic("calls from"), + True, + ), + ( + "_1c444", + "token=0x600000F", + capa.features.common.Characteristic("calls from"), + False, + ), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), - ("_692f", "token=0x6000004", capa.features.insn.API("System.Linq.Enumerable::First"), True), # generic method + ( + "_692f", + "token=0x6000004", + capa.features.insn.API("System.Linq.Enumerable::First"), + True, + ), # generic method ( "_692f", "token=0x6000004", capa.features.insn.Property("System.Linq.Enumerable::First"), False, ), # generic method - ("_692f", "token=0x6000004", capa.features.common.Namespace("System.Linq"), True), # generic method - ("_692f", "token=0x6000004", capa.features.common.Class("System.Linq.Enumerable"), True), # generic method - ("_1c444", "token=0x6000020", capa.features.common.Namespace("Reqss"), True), # ldftn - ("_1c444", "token=0x6000020", capa.features.common.Class("Reqss.Reqss"), True), # ldftn + ( + "_692f", + "token=0x6000004", + capa.features.common.Namespace("System.Linq"), + True, + ), # generic method + ( + "_692f", + "token=0x6000004", + capa.features.common.Class("System.Linq.Enumerable"), + True, + ), # generic method + ( + "_1c444", + "token=0x6000020", + capa.features.common.Namespace("Reqss"), + True, + ), # ldftn + ( + "_1c444", + "token=0x6000020", + capa.features.common.Class("Reqss.Reqss"), + True, + ), # ldftn ( "_1c444", "function=0x1F59, bb=0x1F59, insn=0x1F5B", capa.features.common.Characteristic("unmanaged call"), True, ), - ("_1c444", "function=0x2544", capa.features.common.Characteristic("unmanaged call"), False), + ( + "_1c444", + "function=0x2544", + capa.features.common.Characteristic("unmanaged call"), + False, + ), # same as above but using token instead of function - ("_1c444", "token=0x6000088", capa.features.common.Characteristic("unmanaged call"), False), + ( + "_1c444", + "token=0x6000088", + capa.features.common.Characteristic("unmanaged call"), + False, + ), ( "_1c444", "function=0x1F68, bb=0x1F68, insn=0x1FF9", capa.features.insn.API("System.Drawing.Image::FromHbitmap"), True, ), - ("_1c444", "function=0x1F68, bb=0x1F68, insn=0x1FF9", capa.features.insn.API("FromHbitmap"), False), + ( + "_1c444", + "function=0x1F68, bb=0x1F68, insn=0x1FF9", + capa.features.insn.API("FromHbitmap"), + False, + ), ( "_1c444", "token=0x600002B", @@ -1232,7 +1171,8 @@ def parametrize(params, values, **kwargs): "_1c444", "token=0x6000081", capa.features.insn.Property( - "System.Diagnostics.ProcessStartInfo::UseShellExecute", access=FeatureAccess.WRITE + "System.Diagnostics.ProcessStartInfo::UseShellExecute", + access=FeatureAccess.WRITE, ), # MemberRef property access True, ), @@ -1240,7 +1180,8 @@ def parametrize(params, values, **kwargs): "_1c444", "token=0x6000081", capa.features.insn.Property( - "System.Diagnostics.ProcessStartInfo::WorkingDirectory", access=FeatureAccess.WRITE + "System.Diagnostics.ProcessStartInfo::WorkingDirectory", + access=FeatureAccess.WRITE, ), # MemberRef property access True, ), @@ -1248,7 +1189,8 @@ def parametrize(params, values, **kwargs): "_1c444", "token=0x6000081", capa.features.insn.Property( - "System.Diagnostics.ProcessStartInfo::FileName", access=FeatureAccess.WRITE + "System.Diagnostics.ProcessStartInfo::FileName", + access=FeatureAccess.WRITE, ), # MemberRef property access True, ), @@ -1312,7 +1254,8 @@ def parametrize(params, values, **kwargs): "_692f", "token=0x6000006", capa.features.insn.Property( - "System.Management.Automation.PowerShell::Streams", access=FeatureAccess.READ + "System.Management.Automation.PowerShell::Streams", + access=FeatureAccess.READ, ), # MemberRef property access False, ), @@ -1361,7 +1304,8 @@ def parametrize(params, values, **kwargs): "_039a6", "token=0x6000023", capa.features.insn.Property( - "System.Runtime.CompilerServices.AsyncTaskMethodBuilder::Task", access=FeatureAccess.READ + "System.Runtime.CompilerServices.AsyncTaskMethodBuilder::Task", + access=FeatureAccess.READ, ), # MemberRef method False, ), @@ -1488,7 +1432,12 @@ def parametrize(params, values, **kwargs): FEATURE_BINJA_DATABASE_TESTS = sorted( [ # insn/regex - ("pma16-01_binja_db", "function=0x4021B0", capa.features.common.Regex("HTTP/1.0"), True), + ( + "pma16-01_binja_db", + "function=0x4021B0", + capa.features.common.Regex("HTTP/1.0"), + True, + ), ( "pma16-01_binja_db", "function=0x402F40", @@ -1501,7 +1450,12 @@ def parametrize(params, values, **kwargs): capa.features.common.Substring("practicalmalwareanalysis.com"), True, ), - ("pma16-01_binja_db", "file", capa.features.file.FunctionName("__aulldiv"), True), + ( + "pma16-01_binja_db", + "file", + capa.features.file.FunctionName("__aulldiv"), + True, + ), # os & format & arch ("pma16-01_binja_db", "file", OS(OS_WINDOWS), True), ("pma16-01_binja_db", "file", OS(OS_LINUX), False), @@ -1524,10 +1478,30 @@ def parametrize(params, values, **kwargs): FEATURE_COUNT_TESTS = [ ("mimikatz", "function=0x40E5C2", capa.features.basicblock.BasicBlock(), 7), - ("mimikatz", "function=0x4702FD", capa.features.common.Characteristic("calls from"), 0), - ("mimikatz", "function=0x40E5C2", capa.features.common.Characteristic("calls from"), 3), - ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("calls to"), 0), - ("mimikatz", "function=0x40B1F1", capa.features.common.Characteristic("calls to"), 3), + ( + "mimikatz", + "function=0x4702FD", + capa.features.common.Characteristic("calls from"), + 0, + ), + ( + "mimikatz", + "function=0x40E5C2", + capa.features.common.Characteristic("calls from"), + 3, + ), + ( + "mimikatz", + "function=0x4556E5", + capa.features.common.Characteristic("calls to"), + 0, + ), + ( + "mimikatz", + "function=0x40B1F1", + capa.features.common.Characteristic("calls to"), + 3, + ), ] @@ -1539,8 +1513,18 @@ def parametrize(params, values, **kwargs): FEATURE_COUNT_TESTS_GHIDRA = [ # Ghidra may render functions as labels, as well as provide differing amounts of call references - ("mimikatz", "function=0x4702FD", capa.features.common.Characteristic("calls from"), 0), - ("mimikatz", "function=0x401bf1", capa.features.common.Characteristic("calls to"), 2), + ( + "mimikatz", + "function=0x4702FD", + capa.features.common.Characteristic("calls from"), + 0, + ), + ( + "mimikatz", + "function=0x401bf1", + capa.features.common.Characteristic("calls to"), + 2, + ), ("mimikatz", "function=0x401000", capa.features.basicblock.BasicBlock(), 3), ] diff --git a/tests/fixtures/feature-presence.json b/tests/fixtures/feature-presence.json new file mode 100644 index 000000000..ccc7d0264 --- /dev/null +++ b/tests/fixtures/feature-presence.json @@ -0,0 +1,1084 @@ +{ + "files": [ + { + "key": "mimikatz", + "path": "data/mimikatz.exe_" + }, + { + "key": "kernel32", + "path": "data/kernel32.dll_" + }, + { + "key": "kernel32-64", + "path": "data/kernel32-64.dll_" + }, + { + "key": "pma12-04", + "path": "data/Practical Malware Analysis Lab 12-04.exe_" + }, + { + "key": "pma16-01", + "path": "data/Practical Malware Analysis Lab 16-01.exe_" + }, + { + "key": "7351f.elf", + "path": "data/7351f8a40c5450557b24622417fc478d.elf_" + }, + { + "key": "al-khaser x64", + "path": "data/al-khaser_x64.exe_" + }, + { + "key": "64d9f", + "path": "data/64d9f7d96b99467f36e22fada623c3bb.dll_" + }, + { + "key": "79abd", + "path": "data/79abd17391adc6251ecdc58d13d76baf.dll_" + }, + { + "key": "946a9", + "path": "data/946a99f36a46d335dec080d9a4371940.dll_" + }, + { + "key": "773290", + "path": "data/773290480d5445f11d3dc1b800728966.exe_" + }, + { + "key": "294b8d", + "path": "data/294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_" + }, + { + "key": "a1982", + "path": "data/a198216798ca38f280dc413f8c57f2c2.exe_" + }, + { + "key": "c91887", + "path": "data/c91887d861d9bd4a5872249b641bc9f9.exe_" + }, + { + "key": "2bf18d", + "path": "data/2bf18d0403677378adad9001b1243211.elf_" + }, + { + "key": "2d3edc", + "path": "data/2d3edc218a90f03089cc01715a9f047f.exe_" + }, + { + "key": "ea2876", + "path": "data/ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" + }, + { + "key": "pma01-01.frz", + "path": "fixtures/freeze/Practical Malware Analysis Lab 01-01.dll_.frz" + }, + { + "key": "009c2377.frz", + "path": "fixtures/freeze/009c2377b67997b0da1579f4bbc822c1.exe_.frz" + }, + { + "key": "055da8e6.frz", + "path": "fixtures/freeze/055da8e6ccfe5a9380231ea04b850e18.elf_.frz" + }, + { + "key": "034b7231.frz", + "path": "fixtures/freeze/034b7231a49387604e81a5a5d2fe7e08f6982c418a28b719d2faace3c312ebb5.exe_.frz" + } + ], + "features": [ + { + "file": "pma12-04", + "location": "file", + "feature": "characteristic: embedded pe", + "expected": true, + "explanation": "embedded PE file in resource section", + "marks": [ + { + "backend": "idalib", + "mark": "skip", + "reason": "Embedded PE is in .rsrc section at file offset 0x4060, which IDA doesn't load by default" + }, + { + "backend": "freeze", + "mark": "skip", + "reason": "Embedded PE is in .rsrc section at file offset 0x4060, which freeze doesn't handle correctly" + } + ] + }, + { + "file": "2d3edc", + "location": "file", + "feature": "characteristic: embedded pe", + "expected": true, + "explanation": "embedded PE file at file scope using file offset addresses", + "marks": [ + { + "backend": "freeze", + "mark": "skip", + "reason": "Python capa has bug extracting embedded PE files at absolute offsets" + } + ], + "comment": "Embedded PE at file offset 0x7FB0. Note: vivisect freeze has 0x4091b0 but that's a virtual address mislabeled as file offset." + }, + { + "file": "mimikatz", + "location": "file", + "feature": "string: SCardControl", + "expected": true, + "explanation": "basic UTF-16LE string" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "string: ACR > ", + "expected": true, + "explanation": "UTF-16LE encoded strings with unusual characters and trailing spaces" + }, + { + "file": "pma12-04", + "location": "file", + "feature": "string: winlogon.exe", + "expected": true, + "explanation": "basic ASCII string" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "string: nope", + "expected": false, + "explanation": "non-existant string" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "section: .text", + "expected": true, + "explanation": "basic section name" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "section: .nope", + "expected": false, + "explanation": "non-existant section" + }, + { + "file": "kernel32", + "location": "file", + "feature": "export: BaseThreadInitThunk", + "expected": true, + "explanation": "basic export name" + }, + { + "file": "kernel32", + "location": "file", + "feature": "export: nope", + "expected": false, + "explanation": "non-existant export" + }, + { + "file": "ea2876", + "location": "file", + "feature": "export: vresion.GetFileVersionInfoA", + "expected": true, + "explanation": "forwarded export" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "import: advapi32.CryptSetHashParam", + "expected": true, + "explanation": "import with DLL prefix" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "import: CryptSetHashParam", + "expected": true, + "explanation": "import with no DLL prefix" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "import: cabinet.#11", + "expected": true, + "explanation": "import by ordinal" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "import: #11", + "expected": false, + "explanation": "non-existant ordinal import" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "import: #nope", + "expected": false, + "explanation": "non-existant ordinal import" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "import: nope", + "expected": false, + "explanation": "non-existant import" + }, + { + "file": "mimikatz", + "location": "function=0x401517", + "feature": "characteristic: loop", + "expected": true, + "explanation": "loop" + }, + { + "file": "mimikatz", + "location": "function=0x401000", + "feature": "characteristic: loop", + "expected": false, + "explanation": "non-existant loop" + }, + { + "file": "mimikatz", + "location": "function=0x402EC4", + "feature": "characteristic: tight loop", + "expected": true, + "explanation": "tight-loop" + }, + { + "file": "mimikatz", + "location": "function=0x401000", + "feature": "characteristic: tight loop", + "expected": false, + "explanation": "non-existant tight-loop" + }, + { + "file": "mimikatz", + "location": "function=0x402EC4,bb=0x402F8E", + "feature": "characteristic: tight loop", + "expected": true, + "explanation": "tight-loop at basic block scope" + }, + { + "file": "mimikatz", + "location": "function=0x401000,bb=0x401000", + "feature": "characteristic: tight loop", + "expected": false, + "explanation": "non-existant tight-loop at basic block scope" + }, + { + "file": "mimikatz", + "location": "function=0x4556E5", + "feature": "characteristic: stack string", + "expected": true, + "explanation": "stack string (but capa doesn't extract it as a string yet)" + }, + { + "file": "mimikatz", + "location": "function=0x401000", + "feature": "characteristic: stack string", + "expected": false, + "explanation": "non-existant stack string" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "mnemonic: push", + "explanation": "basic mnemonic", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "mnemonic: in", + "expected": false, + "explanation": "non-existant mnemonic" + }, + { + "file": "mimikatz", + "location": "function=0x40105D,bb=0x401073,insn=0x401073", + "feature": "number: 0xFF", + "expected": true, + "explanation": "number" + }, + { + "file": "mimikatz", + "location": "function=0x40105D,bb=0x401073,insn=0x401073", + "feature": "operand[1].number: 0xFF", + "expected": true, + "explanation": "mov eax, 0FFh; instruction operand number" + }, + { + "file": "mimikatz", + "location": "function=0x40105D,bb=0x401073,insn=0x401073", + "feature": "operand[0].number: 0xFF", + "expected": false, + "explanation": "mov eax, 0FFh; non-existant instruction operand number" + }, + { + "file": "mimikatz", + "location": "function=0x40105D,bb=0x4010B0,insn=0x4010B4", + "feature": "operand[0].offset: 4", + "expected": true, + "explanation": "cmp [esi+4], ebx; instruction operand offset" + }, + { + "file": "mimikatz", + "location": "function=0x40105D,bb=0x4010B0,insn=0x4010B4", + "feature": "operand[1].offset: 4", + "expected": false, + "explanation": "cmp [esi+4], ebx; non-existant instruction operand offset" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "number: 0xFF", + "expected": true, + "explanation": "small number" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "number: 0x3136B0", + "expected": true, + "explanation": "large number" + }, + { + "file": "mimikatz", + "location": "function=0x401000", + "feature": "number: 0x0", + "expected": true, + "explanation": "zero number" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "number: 0xC", + "expected": false, + "explanation": "non-existant number" + }, + { + "file": "mimikatz", + "location": "function=0x401553", + "feature": "number: 0xFFFFFFFF", + "expected": true, + "explanation": "max u32 number" + }, + { + "file": "mimikatz", + "location": "function=0x43e543", + "feature": "number: 0xFFFFFFF0", + "expected": true, + "explanation": "large u32 number" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "offset: 0x0", + "explanation": "cmp [esi], ebx; zero offset", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "offset: 0x4", + "explanation": "cmp [esi+4], ebx; simple offset", + "expected": true + }, + { + "file": "64d9f", + "location": "function=0x10001510,bb=0x100015B0", + "feature": "offset: 0x4000", + "expected": true, + "explanation": "regression test for issue #276" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "offset: 0x8", + "expected": false, + "explanation": "no instruction in the function references [reg+8]" + }, + { + "file": "mimikatz", + "location": "function=0x4011FB", + "feature": "offset: -0x1", + "expected": true, + "explanation": "movzx ecx, [eax-1]; negative offset" + }, + { + "file": "mimikatz", + "location": "function=0x4011FB", + "feature": "offset: -0x2", + "expected": true, + "explanation": "cmp [eax-2], cx; negative offset -2" + }, + { + "file": "mimikatz", + "location": "function=0x4011FB", + "feature": "number: -0x2", + "expected": false, + "explanation": "cmp [eax-2], cx; negative offset shouldn't emit a number too" + }, + { + "file": "mimikatz", + "location": "function=0x401D64,bb=0x401D73,insn=0x401D85", + "feature": "offset: 0x80000000", + "expected": false, + "explanation": "add ecx, 80000000h; too-large immediate should not be considered an offset" + }, + { + "file": "mimikatz", + "location": "function=0x401CC7,bb=0x401CDE,insn=0x401CF6", + "feature": "offset: 0x10", + "expected": false, + "explanation": "add esp, 10h; stack-relative ADD should not be considered an offset" + }, + { + "file": "mimikatz", + "location": "function=0x402203,bb=0x402221,insn=0x40223C", + "feature": "offset: 0x4", + "expected": true, + "explanation": "add eax, 4; non-stack register ADD should emit an offset feature, treating eax as a pointer" + }, + { + "file": "mimikatz", + "location": "function=0x471EAB,bb=0x471ED8,insn=0x471EE6", + "feature": "number: 0x4", + "expected": false, + "explanation": "lea ebx, [ecx+eax*4]; should not emit Number feature for the scale" + }, + { + "file": "mimikatz", + "location": "function=0x47153B,bb=0x4717AB,insn=0x4717B1", + "feature": "number: -0x30", + "expected": false, + "explanation": "lea ecx, [ecx+esi-30h]; should not emit Number feature for the displacement" + }, + { + "file": "mimikatz", + "location": "function=0x401873,bb=0x4018B2,insn=0x4018C0", + "feature": "number: 0x2", + "expected": true, + "explanation": "lea ecx, [ebx+2]; should emit Number feature, treating ebx as zero" + }, + { + "file": "mimikatz", + "location": "function=0x403BAC", + "feature": "api: CryptAcquireContextW", + "expected": true, + "explanation": "basic API feature with trailing W" + }, + { + "file": "mimikatz", + "location": "function=0x403BAC", + "feature": "api: CryptAcquireContext", + "expected": true, + "explanation": "basic API feature with stripped W" + }, + { + "file": "mimikatz", + "location": "function=0x403BAC", + "feature": "api: Nope", + "expected": false, + "explanation": "non-existent API" + }, + { + "file": "mimikatz", + "location": "function=0x4556E5", + "feature": "api: LsaQueryInformationPolicy", + "expected": true + }, + { + "file": "kernel32-64", + "location": "function=0x180001010", + "feature": "api: RtlVirtualUnwind", + "expected": true, + "marks": [ + { + "backend": "idalib", + "mark": "skip", + "reason": "IDA identifies 0x180001010 as lib function and skips it" + } + ] + }, + { + "file": "kernel32-64", + "location": "function=0x1800202B0", + "feature": "api: RtlCaptureContext", + "expected": true, + "explanation": "API called via thunk", + "marks": [ + { + "backend": "idalib", + "mark": "skip", + "reason": "IDA identifies 0x1800202B0 as lib function _report_gsfailure" + }, + { + "backend": "freeze", + "mark": "skip", + "reason": "IDA skipping lib functions prevents freeze from detecting this API" + } + ] + }, + { + "file": "al-khaser x64", + "location": "function=0x14004B4F0", + "feature": "api: __vcrt_GetModuleHandle", + "expected": true, + "explanation": "API called via nested thunks", + "marks": [ + { + "backend": "idalib", + "mark": "skip", + "reason": "IDA identifies this as lib function GetPdbDll" + }, + { + "backend": "freeze", + "mark": "skip", + "reason": "IDA skipping lib functions prevents freeze from detecting this API" + } + ] + }, + { + "file": "mimikatz", + "location": "function=0x40B3C6", + "feature": "api: LocalFree", + "expected": true, + "explanation": "tail call to API via jmp" + }, + { + "file": "c91887", + "location": "function=0x40156F", + "feature": "api: CloseClipboard", + "expected": true, + "explanation": "tail call to API via jmp" + }, + { + "file": "c91887", + "location": "function=0x401A77", + "feature": "api: CreatePipe", + "expected": true, + "explanation": "API is present" + }, + { + "file": "c91887", + "location": "function=0x401A77", + "feature": "api: kernel32.CreatePipe", + "expected": true, + "explanation": "API is present, and DLL name is ignored" + }, + { + "file": "c91887", + "location": "function=0x401A77", + "feature": "api: CreatePipe", + "expected": true, + "explanation": "API resolved from call to GetProcAddress" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "string: SCardControl", + "expected": true, + "explanation": "basic string" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "string: ACR > ", + "expected": true, + "explanation": "basic string with trailing whitespace" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "string: nope", + "expected": false, + "explanation": "basic string not present" + }, + { + "file": "773290", + "location": "function=0x140001140", + "feature": "string: %s:\\\\OfficePackagesForWDAG", + "expected": true, + "explanation": "string with escaping characters" + }, + { + "file": "294b8d", + "location": "function=0x404970,bb=0x404970,insn=0x40499F", + "feature": "string: \r\n\u0000:ht", + "expected": false, + "explanation": "regression test for issue #1271: should not extract overlapping string spanning a NUL byte" + }, + { + "file": "pma16-01", + "location": "function=0x4021B0", + "feature": "substring: HTTP/1.0", + "expected": true, + "explanation": "basic substring" + }, + { + "file": "pma16-01", + "location": "function=0x402F40", + "feature": "string: /www.practicalmalwareanalysis.com/", + "expected": true, + "explanation": "basic regex" + }, + { + "file": "pma16-01", + "location": "function=0x402F40", + "feature": "substring: practicalmalwareanalysis.com", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x44EDEF", + "feature": "string: INPUTEVENT", + "expected": true, + "explanation": "string referenced via a pointer" + }, + { + "file": "mimikatz", + "location": "function=0x46D6CE", + "feature": "string: (null)", + "expected": true, + "explanation": "string referenced via direct memory reference" + }, + { + "file": "mimikatz", + "location": "function=0x401517", + "feature": "bytes: CA 3B 0E 00 00 00 F8 AF 47", + "expected": true, + "explanation": "basic bytes" + }, + { + "file": "mimikatz", + "location": "function=0x404414", + "feature": "bytes: 01 80 00 00 40 EA 47 00", + "expected": true, + "explanation": "basic bytes, which are a pointer" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "bytes: 53 00 43 00 61 00 72 00 64 00 43 00 6F 00 6E 00 74 00 72 00 6F 00 6C 00", + "expected": false, + "explanation": "should not extract bytes feature for an obvious string (here: UTF-16LE 'SCardControl')" + }, + { + "file": "mimikatz", + "location": "function=0x401000", + "feature": "bytes: FD FF 59 F6 47", + "expected": false, + "explanation": "push offset aAcsAcr1220 ('ACS...') where ACS == 41 00 43 00 happens to be a valid pointer to the middle of an instruction; should not be misinterpreted as bytes feature" + }, + { + "file": "mimikatz", + "location": "function=0x44570F", + "feature": "bytes: FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF FF", + "expected": false, + "explanation": "regression test for issue #409: should not extract bytes feature from byte sequences read from invalid memory" + }, + { + "file": "mimikatz", + "location": "function=0x44EDEF", + "feature": "bytes: 49 00 4E 00 50 00 55 00 54 00 45 00 56 00 45 00 4E 00 54 00", + "expected": false, + "explanation": "should not extract bytes feature when instruction references it as a pointer to string bytes (here: UTF-16LE 'INPUTEVENT')" + }, + { + "file": "mimikatz", + "location": "function=0x410DFC", + "feature": "characteristic: nzxor", + "expected": true, + "explanation": "should extract nzxor characteristic, including from xorps SSE instructions" + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "characteristic: nzxor", + "expected": false, + "explanation": "non-existant nzxor" + }, + { + "file": "mimikatz", + "location": "function=0x46D534", + "feature": "characteristic: nzxor", + "expected": false, + "explanation": "should not extract nzxor characteristic for security cookie xors" + }, + { + "file": "kernel32-64", + "location": "function=0x1800017D0", + "feature": "characteristic: peb access", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x4556E5", + "feature": "characteristic: peb access", + "expected": false + }, + { + "file": "kernel32-64", + "location": "function=0x180001068", + "feature": "characteristic: gs access", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x4556E5", + "feature": "characteristic: gs access", + "expected": false + }, + { + "file": "mimikatz", + "location": "function=0x410DFC,bb=0x410F05,insn=0x410F0B", + "feature": "characteristic: nzxor", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x410DFC,bb=0x410F05,insn=0x410F12", + "feature": "characteristic: nzxor", + "expected": true + }, + { + "file": "kernel32-64", + "location": "function=0x1800017D0,bb=0x1800018AD,insn=0x1800018AD", + "feature": "characteristic: peb access", + "expected": true + }, + { + "file": "kernel32-64", + "location": "function=0x180001068,bb=0x18000118D,insn=0x180001197", + "feature": "characteristic: gs access", + "expected": true + }, + { + "file": "kernel32-64", + "location": "function=0x180001068,bb=0x180001269,insn=0x18000127F", + "feature": "characteristic: gs access", + "expected": true + }, + { + "file": "kernel32", + "location": "function=0x7DD70E00,bb=0x7DD70E00,insn=0x7DD70E05", + "feature": "characteristic: fs access", + "expected": true + }, + { + "file": "kernel32", + "location": "function=0x7DD70E00,bb=0x7DD70E25,insn=0x7DD70E2D", + "feature": "characteristic: fs access", + "expected": true + }, + { + "file": "kernel32", + "location": "function=0x7DD70E00,bb=0x7DD70FCB,insn=0x7DD70FCB", + "feature": "characteristic: fs access", + "expected": true + }, + { + "file": "a1982", + "location": "function=0x4014D0", + "feature": "characteristic: cross section flow", + "expected": true + }, + { + "file": "kernel32-64", + "location": "function=0x180001068", + "feature": "characteristic: cross section flow", + "expected": false, + "explanation": "should not extract cross section flow characteristic for control transfers to imports" + }, + { + "file": "mimikatz", + "location": "function=0x4556E5", + "feature": "characteristic: cross section flow", + "expected": false + }, + { + "file": "mimikatz", + "location": "function=0x40640e", + "feature": "characteristic: recursive call", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x4175FF", + "feature": "characteristic: recursive call", + "expected": false, + "explanation": "issue #386: 0x4175FF makes indirect calls (via dword_4B821C) but never calls itself, directly or via callback" + }, + { + "file": "mimikatz", + "location": "function=0x4175FF", + "feature": "characteristic: indirect call", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x4556E5", + "feature": "characteristic: indirect call", + "expected": false + }, + { + "file": "mimikatz", + "location": "function=0x4556E5", + "feature": "characteristic: calls from", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x4702FD", + "feature": "characteristic: calls from", + "expected": false + }, + { + "file": "mimikatz", + "location": "function=0x40105D", + "feature": "characteristic: calls to", + "expected": true + }, + { + "file": "ea2876", + "location": "file", + "feature": "characteristic: forwarded export", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x456BB9", + "feature": "characteristic: calls to", + "expected": false, + "explanation": "issue #386: 0x456BB9 is only referenced from a function-pointer table at 0x475834, never via a direct call instruction" + }, + { + "file": "mimikatz", + "location": "function=0x40105D,bb=0x401089,insn=0x40108E", + "feature": "characteristic: calls from", + "expected": true + }, + { + "file": "mimikatz", + "location": "function=0x4175FF,bb=0x41761B,insn=0x417620", + "feature": "characteristic: indirect call", + "expected": true + }, + { + "file": "pma16-01", + "location": "file", + "feature": "function-name: __aulldiv", + "expected": true, + "explanation": "recognize function name via FLIRT signatures" + }, + { + "file": "pma16-01", + "location": "file", + "feature": "os: windows", + "expected": true + }, + { + "file": "pma16-01", + "location": "file", + "feature": "os: linux", + "expected": false + }, + { + "file": "mimikatz", + "location": "file", + "feature": "os: windows", + "expected": true + }, + { + "file": "pma16-01", + "location": "function=0x401100", + "feature": "os: windows", + "expected": true, + "explanation": "OS available at function scope" + }, + { + "file": "pma16-01", + "location": "function=0x401100,bb=0x401130", + "feature": "os: windows", + "expected": true, + "explanation": "OS available at basic block scope" + }, + { + "file": "pma16-01", + "location": "file", + "feature": "arch: i386", + "expected": true + }, + { + "file": "pma16-01", + "location": "file", + "feature": "arch: amd64", + "expected": false + }, + { + "file": "mimikatz", + "location": "file", + "feature": "arch: i386", + "expected": true + }, + { + "file": "pma16-01", + "location": "function=0x401100", + "feature": "arch: i386", + "expected": true, + "explanation": "arch available at function scope" + }, + { + "file": "pma16-01", + "location": "function=0x401100,bb=0x401130", + "feature": "arch: i386", + "expected": true, + "explanation": "arch available at basic blockscope" + }, + { + "file": "pma16-01", + "location": "file", + "feature": "format: pe", + "expected": true + }, + { + "file": "pma16-01", + "location": "file", + "feature": "format: elf", + "expected": false + }, + { + "file": "mimikatz", + "location": "file", + "feature": "format: pe", + "expected": true + }, + { + "file": "pma16-01", + "location": "function=0x401100", + "feature": "format: pe", + "expected": true, + "explanation": "format available at function scope" + }, + { + "file": "7351f.elf", + "location": "file", + "feature": "os: linux", + "expected": true + }, + { + "file": "7351f.elf", + "location": "file", + "feature": "os: windows", + "expected": false + }, + { + "file": "7351f.elf", + "location": "file", + "feature": "format: elf", + "expected": true + }, + { + "file": "7351f.elf", + "location": "file", + "feature": "format: pe", + "expected": false + }, + { + "file": "7351f.elf", + "location": "file", + "feature": "arch: i386", + "expected": false + }, + { + "file": "7351f.elf", + "location": "file", + "feature": "arch: amd64", + "expected": true + }, + { + "file": "7351f.elf", + "location": "function=0x408753", + "feature": "string: /dev/null", + "expected": true + }, + { + "file": "7351f.elf", + "location": "function=0x408753,bb=0x408781", + "feature": "api: open", + "expected": true, + "explanation": "API from ELF import" + }, + { + "file": "79abd", + "location": "function=0x10002385,bb=0x10002385", + "feature": "characteristic: call $+5", + "expected": true + }, + { + "file": "946a9", + "location": "function=0x10001510,bb=0x100015c0", + "feature": "characteristic: call $+5", + "expected": true + }, + { + "file": "2bf18d", + "location": "function=0x4027b3,bb=0x402861,insn=0x40286d", + "feature": "api: __GI_connect", + "expected": true, + "explanation": "API from symbol table alternative name" + }, + { + "file": "2bf18d", + "location": "function=0x4027b3,bb=0x402861,insn=0x40286d", + "feature": "api: connect", + "expected": true, + "explanation": "API from symbol table alternative name" + }, + { + "file": "2bf18d", + "location": "function=0x4027b3,bb=0x402861,insn=0x40286d", + "feature": "api: __libc_connect", + "expected": true, + "explanation": "API from symbol table alternative name" + }, + { + "file": "2bf18d", + "location": "function=0x4088a4", + "feature": "function-name: __GI_connect", + "expected": true, + "explanation": "function name from symbol table alternative name" + }, + { + "file": "2bf18d", + "location": "function=0x4088a4", + "feature": "function-name: connect", + "expected": true, + "explanation": "function name from symbol table alternative name" + }, + { + "file": "2bf18d", + "location": "function=0x4088a4", + "feature": "function-name: __libc_connect", + "expected": true, + "explanation": "function name from symbol table alternative name" + }, + { + "file": "mimikatz", + "location": "function=0x401000,bb=0x401000", + "feature": "basic blocks: x", + "expected": true, + "explanation": "basic block feature emitted" + }, + { + "file": "mimikatz", + "location": "file", + "feature": "basic blocks: 1", + "expected": false, + "explanation": "non-existant basic block feature" + } + ] +} From 8280c9b392b523740a5bbd204c3d7530bc94ba9e Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 15 Apr 2026 11:16:06 +0200 Subject: [PATCH 3/6] tests: migrate to data-driven fixtures for feature counts --- tests/fixtures.py | 84 ++++++++++------------------ tests/fixtures/feature-presence.json | 61 ++++++++++++++++++++ 2 files changed, 89 insertions(+), 56 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 44005f141..a2dff5979 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -70,28 +70,47 @@ def parse_feature_string(s: str) -> Feature | ceng.Range | ceng.Statement: FEATURE_MARKS: dict[tuple[str, str, str], list[dict]] = {} -def _load_feature_tests() -> tuple[list[tuple], list[tuple]]: +def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[tuple]]: with (CD / "fixtures" / "feature-presence.json").open("r") as f: data = json.load(f) presence_tests = [] symtab_tests = [] + count_tests = [] + ghidra_count_tests = [] for entry in data["features"]: - feature = parse_feature_string(entry["feature"]) - test_tuple = (entry["file"], entry["location"], feature, entry["expected"]) + feature_str = entry["feature"] + tags = entry.get("tags", []) if "marks" in entry: - FEATURE_MARKS[(entry["file"], entry["location"], entry["feature"])] = entry["marks"] + FEATURE_MARKS[(entry["file"], entry["location"], feature_str)] = entry["marks"] - if "symtab" in entry.get("tags", []): - symtab_tests.append(test_tuple) + if feature_str.startswith("count("): + key, _, value_str = feature_str.partition(": ") + count = int(value_str) + range_obj = capa.rules.build_feature(key, count, initial_description=None) + inner_feature = range_obj.child + test_tuple = (entry["file"], entry["location"], inner_feature, count) + + if "ghidra" in tags: + ghidra_count_tests.append(test_tuple) + else: + count_tests.append(test_tuple) else: - presence_tests.append(test_tuple) + feature = parse_feature_string(feature_str) + test_tuple = (entry["file"], entry["location"], feature, entry["expected"]) + + if "symtab" in tags: + symtab_tests.append(test_tuple) + else: + presence_tests.append(test_tuple) presence_tests.sort(key=lambda t: (t[0], t[1])) symtab_tests.sort(key=lambda t: (t[0], t[1])) - return presence_tests, symtab_tests + count_tests.sort(key=lambda t: (t[0], t[1])) + ghidra_count_tests.sort(key=lambda t: (t[0], t[1])) + return presence_tests, symtab_tests, count_tests, ghidra_count_tests @lru_cache(maxsize=1) @@ -940,7 +959,7 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -FEATURE_PRESENCE_TESTS, FEATURE_SYMTAB_FUNC_TESTS = _load_feature_tests() +FEATURE_PRESENCE_TESTS, FEATURE_SYMTAB_FUNC_TESTS, FEATURE_COUNT_TESTS, FEATURE_COUNT_TESTS_GHIDRA = _load_feature_tests() FEATURE_PRESENCE_TESTS_DOTNET = sorted( @@ -1476,59 +1495,12 @@ def parametrize(params, values, **kwargs): ) -FEATURE_COUNT_TESTS = [ - ("mimikatz", "function=0x40E5C2", capa.features.basicblock.BasicBlock(), 7), - ( - "mimikatz", - "function=0x4702FD", - capa.features.common.Characteristic("calls from"), - 0, - ), - ( - "mimikatz", - "function=0x40E5C2", - capa.features.common.Characteristic("calls from"), - 3, - ), - ( - "mimikatz", - "function=0x4556E5", - capa.features.common.Characteristic("calls to"), - 0, - ), - ( - "mimikatz", - "function=0x40B1F1", - capa.features.common.Characteristic("calls to"), - 3, - ), -] - - FEATURE_COUNT_TESTS_DOTNET = [ ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), 1), ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls from"), 9), ] -FEATURE_COUNT_TESTS_GHIDRA = [ - # Ghidra may render functions as labels, as well as provide differing amounts of call references - ( - "mimikatz", - "function=0x4702FD", - capa.features.common.Characteristic("calls from"), - 0, - ), - ( - "mimikatz", - "function=0x401bf1", - capa.features.common.Characteristic("calls to"), - 2, - ), - ("mimikatz", "function=0x401000", capa.features.basicblock.BasicBlock(), 3), -] - - def do_test_feature_presence(get_extractor, sample, scope, feature, expected): extractor = get_extractor(sample) features = scope(extractor) diff --git a/tests/fixtures/feature-presence.json b/tests/fixtures/feature-presence.json index ccc7d0264..4f64f1489 100644 --- a/tests/fixtures/feature-presence.json +++ b/tests/fixtures/feature-presence.json @@ -1079,6 +1079,67 @@ "feature": "basic blocks: 1", "expected": false, "explanation": "non-existant basic block feature" + }, + + { + "file": "mimikatz", + "location": "function=0x40E5C2", + "feature": "count(basic blocks): 7", + "expected": true, + "explanation": "7 basic blocks in function" + }, + { + "file": "mimikatz", + "location": "function=0x4702FD", + "feature": "count(characteristic(calls from)): 0", + "expected": true, + "explanation": "function has no calls" + }, + { + "file": "mimikatz", + "location": "function=0x40E5C2", + "feature": "count(characteristic(calls from)): 3", + "expected": true, + "explanation": "function has 3 calls" + }, + { + "file": "mimikatz", + "location": "function=0x4556E5", + "feature": "count(characteristic(calls to)): 0", + "expected": true, + "explanation": "function has no callers" + }, + { + "file": "mimikatz", + "location": "function=0x40B1F1", + "feature": "count(characteristic(calls to)): 3", + "expected": true, + "explanation": "function has 3 callers" + }, + + { + "file": "mimikatz", + "location": "function=0x4702FD", + "feature": "count(characteristic(calls from)): 0", + "expected": true, + "tags": ["ghidra"], + "explanation": "Ghidra: function has no calls" + }, + { + "file": "mimikatz", + "location": "function=0x401bf1", + "feature": "count(characteristic(calls to)): 2", + "expected": true, + "tags": ["ghidra"], + "explanation": "Ghidra: function has 2 callers" + }, + { + "file": "mimikatz", + "location": "function=0x401000", + "feature": "count(basic blocks): 3", + "expected": true, + "tags": ["ghidra"], + "explanation": "Ghidra: 3 basic blocks in function" } ] } From 88d3e24c8d6f490cfff2569fc5d37052744c9644 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 15 Apr 2026 11:29:48 +0200 Subject: [PATCH 4/6] tests: migrate to data-driven fixtures for .NET features Moves FEATURE_PRESENCE_TESTS_DOTNET and FEATURE_COUNT_TESTS_DOTNET from hardcoded Python lists in tests/fixtures.py into the shared tests/fixtures/feature-presence.json data file, tagged with "dotnet". --- tests/fixtures.py | 514 +------------------- tests/fixtures/feature-presence.json | 687 +++++++++++++++++++++++++++ 2 files changed, 712 insertions(+), 489 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index a2dff5979..09ba12b88 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -70,7 +70,7 @@ def parse_feature_string(s: str) -> Feature | ceng.Range | ceng.Statement: FEATURE_MARKS: dict[tuple[str, str, str], list[dict]] = {} -def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[tuple]]: +def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[tuple], list[tuple], list[tuple]]: with (CD / "fixtures" / "feature-presence.json").open("r") as f: data = json.load(f) @@ -78,6 +78,8 @@ def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[t symtab_tests = [] count_tests = [] ghidra_count_tests = [] + dotnet_presence_tests = [] + dotnet_count_tests = [] for entry in data["features"]: feature_str = entry["feature"] @@ -95,6 +97,8 @@ def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[t if "ghidra" in tags: ghidra_count_tests.append(test_tuple) + elif "dotnet" in tags: + dotnet_count_tests.append(test_tuple) else: count_tests.append(test_tuple) else: @@ -103,6 +107,8 @@ def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[t if "symtab" in tags: symtab_tests.append(test_tuple) + elif "dotnet" in tags: + dotnet_presence_tests.append(test_tuple) else: presence_tests.append(test_tuple) @@ -110,7 +116,16 @@ def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[t symtab_tests.sort(key=lambda t: (t[0], t[1])) count_tests.sort(key=lambda t: (t[0], t[1])) ghidra_count_tests.sort(key=lambda t: (t[0], t[1])) - return presence_tests, symtab_tests, count_tests, ghidra_count_tests + dotnet_presence_tests.sort(key=lambda t: (t[0], t[1])) + dotnet_count_tests.sort(key=lambda t: (t[0], t[1])) + return ( + presence_tests, + symtab_tests, + count_tests, + ghidra_count_tests, + dotnet_presence_tests, + dotnet_count_tests, + ) @lru_cache(maxsize=1) @@ -959,488 +974,15 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -FEATURE_PRESENCE_TESTS, FEATURE_SYMTAB_FUNC_TESTS, FEATURE_COUNT_TESTS, FEATURE_COUNT_TESTS_GHIDRA = _load_feature_tests() - +( + FEATURE_PRESENCE_TESTS, + FEATURE_SYMTAB_FUNC_TESTS, + FEATURE_COUNT_TESTS, + FEATURE_COUNT_TESTS_GHIDRA, + FEATURE_PRESENCE_TESTS_DOTNET, + FEATURE_COUNT_TESTS_DOTNET, +) = _load_feature_tests() -FEATURE_PRESENCE_TESTS_DOTNET = sorted( - [ - ("b9f5b", "file", Arch(ARCH_I386), True), - ("b9f5b", "file", Arch(ARCH_AMD64), False), - ("mixed-mode-64", "file", Arch(ARCH_AMD64), True), - ("mixed-mode-64", "file", Arch(ARCH_I386), False), - ( - "mixed-mode-64", - "file", - capa.features.common.Characteristic("mixed mode"), - True, - ), - ( - "hello-world", - "file", - capa.features.common.Characteristic("mixed mode"), - False, - ), - ("b9f5b", "file", OS(OS_ANY), True), - ("b9f5b", "file", Format(FORMAT_PE), True), - ("b9f5b", "file", Format(FORMAT_DOTNET), True), - ( - "hello-world", - "file", - capa.features.file.FunctionName("HelloWorld::Main"), - True, - ), - ( - "hello-world", - "file", - capa.features.file.FunctionName("HelloWorld::ctor"), - True, - ), - ( - "hello-world", - "file", - capa.features.file.FunctionName("HelloWorld::cctor"), - False, - ), - ("hello-world", "file", capa.features.common.String("Hello World!"), True), - ("hello-world", "file", capa.features.common.Class("HelloWorld"), True), - ("hello-world", "file", capa.features.common.Class("System.Console"), True), - ( - "hello-world", - "file", - capa.features.common.Namespace("System.Diagnostics"), - True, - ), - ( - "hello-world", - "function=0x250", - capa.features.common.String("Hello World!"), - True, - ), - ( - "hello-world", - "function=0x250, bb=0x250, insn=0x252", - capa.features.common.String("Hello World!"), - True, - ), - ( - "hello-world", - "function=0x250, bb=0x250, insn=0x257", - capa.features.common.Class("System.Console"), - True, - ), - ( - "hello-world", - "function=0x250, bb=0x250, insn=0x257", - capa.features.common.Namespace("System"), - True, - ), - ( - "hello-world", - "function=0x250", - capa.features.insn.API("System.Console::WriteLine"), - True, - ), - ( - "hello-world", - "file", - capa.features.file.Import("System.Console::WriteLine"), - True, - ), - ( - "_1c444", - "file", - capa.features.common.String(r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall"), - True, - ), - ("_1c444", "file", capa.features.common.String("get_IsAlive"), True), - ( - "_1c444", - "file", - capa.features.file.Import("gdi32.CreateCompatibleBitmap"), - True, - ), - ("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True), - ( - "_1c444", - "file", - capa.features.file.Import("gdi32::CreateCompatibleBitmap"), - False, - ), - ("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True), - # not extracting dll anymore - ( - "_1c444", - "function=0x1F68", - capa.features.insn.API("user32.GetWindowDC"), - False, - ), - ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), - ( - "_1c444", - "token=0x600001D", - capa.features.common.Characteristic("calls to"), - True, - ), - ( - "_1c444", - "token=0x6000018", - capa.features.common.Characteristic("calls to"), - False, - ), - ( - "_1c444", - "token=0x600001D", - capa.features.common.Characteristic("calls from"), - True, - ), - ( - "_1c444", - "token=0x600000F", - capa.features.common.Characteristic("calls from"), - False, - ), - ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), - ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), - ( - "_692f", - "token=0x6000004", - capa.features.insn.API("System.Linq.Enumerable::First"), - True, - ), # generic method - ( - "_692f", - "token=0x6000004", - capa.features.insn.Property("System.Linq.Enumerable::First"), - False, - ), # generic method - ( - "_692f", - "token=0x6000004", - capa.features.common.Namespace("System.Linq"), - True, - ), # generic method - ( - "_692f", - "token=0x6000004", - capa.features.common.Class("System.Linq.Enumerable"), - True, - ), # generic method - ( - "_1c444", - "token=0x6000020", - capa.features.common.Namespace("Reqss"), - True, - ), # ldftn - ( - "_1c444", - "token=0x6000020", - capa.features.common.Class("Reqss.Reqss"), - True, - ), # ldftn - ( - "_1c444", - "function=0x1F59, bb=0x1F59, insn=0x1F5B", - capa.features.common.Characteristic("unmanaged call"), - True, - ), - ( - "_1c444", - "function=0x2544", - capa.features.common.Characteristic("unmanaged call"), - False, - ), - # same as above but using token instead of function - ( - "_1c444", - "token=0x6000088", - capa.features.common.Characteristic("unmanaged call"), - False, - ), - ( - "_1c444", - "function=0x1F68, bb=0x1F68, insn=0x1FF9", - capa.features.insn.API("System.Drawing.Image::FromHbitmap"), - True, - ), - ( - "_1c444", - "function=0x1F68, bb=0x1F68, insn=0x1FF9", - capa.features.insn.API("FromHbitmap"), - False, - ), - ( - "_1c444", - "token=0x600002B", - capa.features.insn.Property("System.IO.FileInfo::Length", access=FeatureAccess.READ), - True, - ), # MemberRef property access - ( - "_1c444", - "token=0x600002B", - capa.features.insn.Property("System.IO.FileInfo::Length"), - True, - ), # MemberRef property access - ( - "_1c444", - "token=0x6000081", - capa.features.insn.API("System.Diagnostics.Process::Start"), - True, - ), # MemberRef property access - ( - "_1c444", - "token=0x6000081", - capa.features.insn.Property( - "System.Diagnostics.ProcessStartInfo::UseShellExecute", - access=FeatureAccess.WRITE, - ), # MemberRef property access - True, - ), - ( - "_1c444", - "token=0x6000081", - capa.features.insn.Property( - "System.Diagnostics.ProcessStartInfo::WorkingDirectory", - access=FeatureAccess.WRITE, - ), # MemberRef property access - True, - ), - ( - "_1c444", - "token=0x6000081", - capa.features.insn.Property( - "System.Diagnostics.ProcessStartInfo::FileName", - access=FeatureAccess.WRITE, - ), # MemberRef property access - True, - ), - ( - "_1c444", - "token=0x6000087", - capa.features.insn.Property( - "Sockets.MySocket::reConnectionDelay", access=FeatureAccess.WRITE - ), # Field property access - True, - ), - ( - "_1c444", - "token=0x600008A", - capa.features.insn.Property( - "Sockets.MySocket::isConnected", access=FeatureAccess.WRITE - ), # Field property access - True, - ), - ( - "_1c444", - "token=0x600008A", - capa.features.common.Class("Sockets.MySocket"), # Field property access - True, - ), - ( - "_1c444", - "token=0x600008A", - capa.features.common.Namespace("Sockets"), # Field property access - True, - ), - ( - "_1c444", - "token=0x600008A", - capa.features.insn.Property( - "Sockets.MySocket::onConnected", access=FeatureAccess.READ - ), # Field property access - True, - ), - ( - "_0953c", - "token=0x6000004", - capa.features.insn.Property( - "System.Diagnostics.Debugger::IsAttached", access=FeatureAccess.READ - ), # MemberRef property access - True, - ), - ( - "_0953c", - "token=0x6000004", - capa.features.common.Class("System.Diagnostics.Debugger"), # MemberRef property access - True, - ), - ( - "_0953c", - "token=0x6000004", - capa.features.common.Namespace("System.Diagnostics"), # MemberRef property access - True, - ), - ( - "_692f", - "token=0x6000006", - capa.features.insn.Property( - "System.Management.Automation.PowerShell::Streams", - access=FeatureAccess.READ, - ), # MemberRef property access - False, - ), - ( - "_387f15", - "token=0x600009E", - capa.features.insn.Property( - "Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE::geoplugin_countryCode", - access=FeatureAccess.READ, - ), # MethodDef property access - True, - ), - ( - "_387f15", - "token=0x600009E", - capa.features.common.Class( - "Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE" - ), # MethodDef property access - True, - ), - ( - "_387f15", - "token=0x600009E", - capa.features.common.Namespace("Modulo"), # MethodDef property access - True, - ), - ( - "_039a6", - "token=0x6000007", - capa.features.insn.API("System.Reflection.Assembly::Load"), - True, - ), - ( - "_039a6", - "token=0x600001D", - capa.features.insn.Property("StagelessHollow.Arac::Marka", access=FeatureAccess.READ), # MethodDef method - True, - ), - ( - "_039a6", - "token=0x600001C", - capa.features.insn.Property("StagelessHollow.Arac::Marka", access=FeatureAccess.READ), # MethodDef method - False, - ), - ( - "_039a6", - "token=0x6000023", - capa.features.insn.Property( - "System.Runtime.CompilerServices.AsyncTaskMethodBuilder::Task", - access=FeatureAccess.READ, - ), # MemberRef method - False, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("mynamespace.myclass_outer0"), - True, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("mynamespace.myclass_outer1"), - True, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("mynamespace.myclass_outer0/myclass_inner0_0"), - True, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("mynamespace.myclass_outer0/myclass_inner0_1"), - True, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_0"), - True, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_1"), - True, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_0/myclass_inner_inner"), - True, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("myclass_inner_inner"), - False, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("myclass_inner1_0"), - False, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("myclass_inner1_1"), - False, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("myclass_inner0_0"), - False, - ), - ( - "nested_typedef", - "file", - capa.features.common.Class("myclass_inner0_1"), - False, - ), - ( - "nested_typeref", - "file", - capa.features.file.Import("Android.OS.Build/VERSION::SdkInt"), - True, - ), - ( - "nested_typeref", - "file", - capa.features.file.Import("Android.Media.Image/Plane::Buffer"), - True, - ), - ( - "nested_typeref", - "file", - capa.features.file.Import("Android.Provider.Telephony/Sent/Sent::ContentUri"), - True, - ), - ( - "nested_typeref", - "file", - capa.features.file.Import("Android.OS.Build::SdkInt"), - False, - ), - ( - "nested_typeref", - "file", - capa.features.file.Import("Plane::Buffer"), - False, - ), - ( - "nested_typeref", - "file", - capa.features.file.Import("Sent::ContentUri"), - False, - ), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) FEATURE_PRESENCE_TESTS_IDA = [ # file/imports @@ -1495,12 +1037,6 @@ def parametrize(params, values, **kwargs): ) -FEATURE_COUNT_TESTS_DOTNET = [ - ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), 1), - ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls from"), 9), -] - - def do_test_feature_presence(get_extractor, sample, scope, feature, expected): extractor = get_extractor(sample) features = scope(extractor) diff --git a/tests/fixtures/feature-presence.json b/tests/fixtures/feature-presence.json index 4f64f1489..a768f392a 100644 --- a/tests/fixtures/feature-presence.json +++ b/tests/fixtures/feature-presence.json @@ -83,6 +83,47 @@ { "key": "034b7231.frz", "path": "fixtures/freeze/034b7231a49387604e81a5a5d2fe7e08f6982c418a28b719d2faace3c312ebb5.exe_.frz" + }, + + { + "key": "b9f5b", + "path": "data/b9f5bd514485fb06da39beff051b9fdc.exe_" + }, + { + "key": "mixed-mode-64", + "path": "data/dotnet/dnfile-testfiles/mixed-mode/ModuleCode/bin/ModuleCode_amd64.exe" + }, + { + "key": "hello-world", + "path": "data/dotnet/dnfile-testfiles/hello-world/hello-world.exe" + }, + { + "key": "_1c444", + "path": "data/dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_" + }, + { + "key": "_692f", + "path": "data/dotnet/692f7fd6d198e804d6af98eb9e390d61.exe_" + }, + { + "key": "_0953c", + "path": "data/0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_" + }, + { + "key": "_039a6", + "path": "data/039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_" + }, + { + "key": "_387f15", + "path": "data/dotnet/387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_" + }, + { + "key": "nested_typedef", + "path": "data/dotnet/dd9098ff91717f4906afe9dafdfa2f52.exe_" + }, + { + "key": "nested_typeref", + "path": "data/dotnet/2c7d60f77812607dec5085973ff76cea.dll_" } ], "features": [ @@ -1140,6 +1181,652 @@ "expected": true, "tags": ["ghidra"], "explanation": "Ghidra: 3 basic blocks in function" + }, + + { + "file": "b9f5b", + "location": "file", + "feature": "arch: i386", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "b9f5b", + "location": "file", + "feature": "arch: amd64", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "mixed-mode-64", + "location": "file", + "feature": "arch: amd64", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "mixed-mode-64", + "location": "file", + "feature": "arch: i386", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "mixed-mode-64", + "location": "file", + "feature": "characteristic: mixed mode", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "characteristic: mixed mode", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "b9f5b", + "location": "file", + "feature": "os: any", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "b9f5b", + "location": "file", + "feature": "format: pe", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "b9f5b", + "location": "file", + "feature": "format: dotnet", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "function-name: HelloWorld::Main", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "function-name: HelloWorld::ctor", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "function-name: HelloWorld::cctor", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "string: Hello World!", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "class: HelloWorld", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "class: System.Console", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "namespace: System.Diagnostics", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "function=0x250", + "feature": "string: Hello World!", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "function=0x250,bb=0x250,insn=0x252", + "feature": "string: Hello World!", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "function=0x250,bb=0x250,insn=0x257", + "feature": "class: System.Console", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "function=0x250,bb=0x250,insn=0x257", + "feature": "namespace: System", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "function=0x250", + "feature": "api: System.Console::WriteLine", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "hello-world", + "location": "file", + "feature": "import: System.Console::WriteLine", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "file", + "feature": "string: SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "file", + "feature": "string: get_IsAlive", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "file", + "feature": "import: gdi32.CreateCompatibleBitmap", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "file", + "feature": "import: CreateCompatibleBitmap", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "file", + "feature": "import: gdi32::CreateCompatibleBitmap", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "function=0x1F68", + "feature": "api: GetWindowDC", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "function=0x1F68", + "feature": "number: 0xCC0020", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "token=0x600001D", + "feature": "characteristic: calls to", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "token=0x6000018", + "feature": "characteristic: calls to", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "token=0x600001D", + "feature": "characteristic: calls from", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "token=0x600000F", + "feature": "characteristic: calls from", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "function=0x1F68", + "feature": "number: 0x0", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "function=0x1F68", + "feature": "number: 0x1", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "_692f", + "location": "token=0x6000004", + "feature": "api: System.Linq.Enumerable::First", + "expected": true, + "tags": ["dotnet"], + "explanation": "generic method" + }, + { + "file": "_692f", + "location": "token=0x6000004", + "feature": "property: System.Linq.Enumerable::First", + "expected": false, + "tags": ["dotnet"], + "explanation": "generic method" + }, + { + "file": "_692f", + "location": "token=0x6000004", + "feature": "namespace: System.Linq", + "expected": true, + "tags": ["dotnet"], + "explanation": "generic method" + }, + { + "file": "_692f", + "location": "token=0x6000004", + "feature": "class: System.Linq.Enumerable", + "expected": true, + "tags": ["dotnet"], + "explanation": "generic method" + }, + { + "file": "_1c444", + "location": "token=0x6000020", + "feature": "namespace: Reqss", + "expected": true, + "tags": ["dotnet"], + "explanation": "ldftn" + }, + { + "file": "_1c444", + "location": "token=0x6000020", + "feature": "class: Reqss.Reqss", + "expected": true, + "tags": ["dotnet"], + "explanation": "ldftn" + }, + { + "file": "_1c444", + "location": "function=0x1F59,bb=0x1F59,insn=0x1F5B", + "feature": "characteristic: unmanaged call", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "function=0x2544", + "feature": "characteristic: unmanaged call", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "token=0x6000088", + "feature": "characteristic: unmanaged call", + "expected": false, + "tags": ["dotnet"], + "explanation": "same as above but using token instead of function" + }, + { + "file": "_1c444", + "location": "function=0x1F68,bb=0x1F68,insn=0x1FF9", + "feature": "api: System.Drawing.Image::FromHbitmap", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "function=0x1F68,bb=0x1F68,insn=0x1FF9", + "feature": "api: FromHbitmap", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "token=0x600002B", + "feature": "property/read: System.IO.FileInfo::Length", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_1c444", + "location": "token=0x600002B", + "feature": "property: System.IO.FileInfo::Length", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_1c444", + "location": "token=0x6000081", + "feature": "api: System.Diagnostics.Process::Start", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_1c444", + "location": "token=0x6000081", + "feature": "property/write: System.Diagnostics.ProcessStartInfo::UseShellExecute", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_1c444", + "location": "token=0x6000081", + "feature": "property/write: System.Diagnostics.ProcessStartInfo::WorkingDirectory", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_1c444", + "location": "token=0x6000081", + "feature": "property/write: System.Diagnostics.ProcessStartInfo::FileName", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_1c444", + "location": "token=0x6000087", + "feature": "property/write: Sockets.MySocket::reConnectionDelay", + "expected": true, + "tags": ["dotnet"], + "explanation": "Field property access" + }, + { + "file": "_1c444", + "location": "token=0x600008A", + "feature": "property/write: Sockets.MySocket::isConnected", + "expected": true, + "tags": ["dotnet"], + "explanation": "Field property access" + }, + { + "file": "_1c444", + "location": "token=0x600008A", + "feature": "class: Sockets.MySocket", + "expected": true, + "tags": ["dotnet"], + "explanation": "Field property access" + }, + { + "file": "_1c444", + "location": "token=0x600008A", + "feature": "namespace: Sockets", + "expected": true, + "tags": ["dotnet"], + "explanation": "Field property access" + }, + { + "file": "_1c444", + "location": "token=0x600008A", + "feature": "property/read: Sockets.MySocket::onConnected", + "expected": true, + "tags": ["dotnet"], + "explanation": "Field property access" + }, + { + "file": "_0953c", + "location": "token=0x6000004", + "feature": "property/read: System.Diagnostics.Debugger::IsAttached", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_0953c", + "location": "token=0x6000004", + "feature": "class: System.Diagnostics.Debugger", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_0953c", + "location": "token=0x6000004", + "feature": "namespace: System.Diagnostics", + "expected": true, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_692f", + "location": "token=0x6000006", + "feature": "property/read: System.Management.Automation.PowerShell::Streams", + "expected": false, + "tags": ["dotnet"], + "explanation": "MemberRef property access" + }, + { + "file": "_387f15", + "location": "token=0x600009E", + "feature": "property/read: Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE::geoplugin_countryCode", + "expected": true, + "tags": ["dotnet"], + "explanation": "MethodDef property access" + }, + { + "file": "_387f15", + "location": "token=0x600009E", + "feature": "class: Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE", + "expected": true, + "tags": ["dotnet"], + "explanation": "MethodDef property access" + }, + { + "file": "_387f15", + "location": "token=0x600009E", + "feature": "namespace: Modulo", + "expected": true, + "tags": ["dotnet"], + "explanation": "MethodDef property access" + }, + { + "file": "_039a6", + "location": "token=0x6000007", + "feature": "api: System.Reflection.Assembly::Load", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_039a6", + "location": "token=0x600001D", + "feature": "property/read: StagelessHollow.Arac::Marka", + "expected": true, + "tags": ["dotnet"], + "explanation": "MethodDef method" + }, + { + "file": "_039a6", + "location": "token=0x600001C", + "feature": "property/read: StagelessHollow.Arac::Marka", + "expected": false, + "tags": ["dotnet"], + "explanation": "MethodDef method" + }, + { + "file": "_039a6", + "location": "token=0x6000023", + "feature": "property/read: System.Runtime.CompilerServices.AsyncTaskMethodBuilder::Task", + "expected": false, + "tags": ["dotnet"], + "explanation": "MemberRef method" + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: mynamespace.myclass_outer0", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: mynamespace.myclass_outer1", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: mynamespace.myclass_outer0/myclass_inner0_0", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: mynamespace.myclass_outer0/myclass_inner0_1", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: mynamespace.myclass_outer1/myclass_inner1_0", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: mynamespace.myclass_outer1/myclass_inner1_1", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: mynamespace.myclass_outer1/myclass_inner1_0/myclass_inner_inner", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: myclass_inner_inner", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: myclass_inner1_0", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: myclass_inner1_1", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: myclass_inner0_0", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "nested_typedef", + "location": "file", + "feature": "class: myclass_inner0_1", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "nested_typeref", + "location": "file", + "feature": "import: Android.OS.Build/VERSION::SdkInt", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typeref", + "location": "file", + "feature": "import: Android.Media.Image/Plane::Buffer", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typeref", + "location": "file", + "feature": "import: Android.Provider.Telephony/Sent/Sent::ContentUri", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "nested_typeref", + "location": "file", + "feature": "import: Android.OS.Build::SdkInt", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "nested_typeref", + "location": "file", + "feature": "import: Plane::Buffer", + "expected": false, + "tags": ["dotnet"] + }, + { + "file": "nested_typeref", + "location": "file", + "feature": "import: Sent::ContentUri", + "expected": false, + "tags": ["dotnet"] + }, + + { + "file": "_1c444", + "location": "token=0x600001D", + "feature": "count(characteristic(calls to)): 1", + "expected": true, + "tags": ["dotnet"] + }, + { + "file": "_1c444", + "location": "token=0x600001D", + "feature": "count(characteristic(calls from)): 9", + "expected": true, + "tags": ["dotnet"] } ] } From ad6aeb0cc2a40a59c11ec974838e45ec414ef972 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 15 Apr 2026 12:13:08 +0200 Subject: [PATCH 5/6] tests: add data-driven fixtures for ELF features --- tests/fixtures.py | 4 +++ tests/fixtures/feature-presence.json | 54 ++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 09ba12b88..2eae0794e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -581,6 +581,10 @@ def get_data_path_by_name(name) -> Path: return CD / "data" / "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_" elif name == "7351f.elf": return CD / "data" / "7351f8a40c5450557b24622417fc478d.elf_" + elif name == "055da8e6.elf": + return CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_" + elif name == "bb38149.elf": + return CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_" elif name.startswith("79abd"): return CD / "data" / "79abd17391adc6251ecdc58d13d76baf.dll_" elif name.startswith("946a9"): diff --git a/tests/fixtures/feature-presence.json b/tests/fixtures/feature-presence.json index a768f392a..765718ed3 100644 --- a/tests/fixtures/feature-presence.json +++ b/tests/fixtures/feature-presence.json @@ -24,6 +24,14 @@ "key": "7351f.elf", "path": "data/7351f8a40c5450557b24622417fc478d.elf_" }, + { + "key": "055da8e6.elf", + "path": "data/055da8e6ccfe5a9380231ea04b850e18.elf_" + }, + { + "key": "bb38149.elf", + "path": "data/bb38149ff4b5c95722b83f24ca27a42b.elf_" + }, { "key": "al-khaser x64", "path": "data/al-khaser_x64.exe_" @@ -662,9 +670,16 @@ { "file": "pma16-01", "location": "function=0x402F40", - "feature": "string: /www.practicalmalwareanalysis.com/", + "feature": "string: /PRACTICALmalwareANALYSIS/i", "expected": true, - "explanation": "basic regex" + "explanation": "case-insensitive regex" + }, + { + "file": "pma16-01", + "location": "function=0x402F40", + "feature": "string: /www.*/", + "expected": true, + "explanation": "simple regex prefix match" }, { "file": "pma16-01", @@ -1053,6 +1068,41 @@ "expected": true, "explanation": "API from ELF import" }, + { + "file": "055da8e6.elf", + "location": "file", + "feature": "import: puts", + "expected": true, + "explanation": "ELF import promoted from elffile feature tests" + }, + { + "file": "055da8e6.elf", + "location": "file", + "feature": "section: .text", + "expected": true, + "explanation": "ELF section promoted from primary presence fixture" + }, + { + "file": "bb38149.elf", + "location": "file", + "feature": "import: __android_log_print", + "expected": true, + "explanation": "stripped ELF import promoted from elffile feature tests" + }, + { + "file": "bb38149.elf", + "location": "file", + "feature": "export: Java_o_ac_a", + "expected": true, + "explanation": "stripped ELF export promoted from elffile feature tests" + }, + { + "file": "bb38149.elf", + "location": "file", + "feature": "section: .dynamic", + "expected": true, + "explanation": "stripped ELF section promoted into the shared presence fixture" + }, { "file": "79abd", "location": "function=0x10002385,bb=0x10002385", From b68984e5146e1b0a04b4f08a24d71c32e3fccb58 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 15 Apr 2026 13:51:56 +0200 Subject: [PATCH 6/6] tests: consolidate feature test fixtures and runners --- tests/fixtures.py | 732 +++++++++++++----- tests/fixtures/features/README.md | 169 ++++ tests/fixtures/features/binexport.json | 517 +++++++++++++ tests/fixtures/features/binja-db.json | 91 +++ tests/fixtures/features/cape.json | 221 ++++++ tests/fixtures/features/drakvuf.json | 129 +++ .../static.json} | 684 +++++++++++++--- tests/fixtures/features/vmray.json | 233 ++++++ tests/test_binexport_features.py | 415 +--------- tests/test_binja_features.py | 50 +- tests/test_cape_features.py | 109 +-- tests/test_dnfile_features.py | 21 +- tests/test_dotnetfile_features.py | 29 +- tests/test_drakvuf_features.py | 89 +-- tests/test_ghidra_features.py | 43 +- tests/test_idalib_features.py | 66 +- tests/test_pefile_features.py | 33 +- tests/test_viv_features.py | 22 +- tests/test_vmray_features.py | 127 +-- 19 files changed, 2632 insertions(+), 1148 deletions(-) create mode 100644 tests/fixtures/features/README.md create mode 100644 tests/fixtures/features/binexport.json create mode 100644 tests/fixtures/features/binja-db.json create mode 100644 tests/fixtures/features/cape.json create mode 100644 tests/fixtures/features/drakvuf.json rename tests/fixtures/{feature-presence.json => features/static.json} (83%) create mode 100644 tests/fixtures/features/vmray.json diff --git a/tests/fixtures.py b/tests/fixtures.py index 2eae0794e..4b4dbda4a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -12,127 +12,276 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections +import contextlib import json import logging -import contextlib -import collections -from pathlib import Path +from dataclasses import dataclass, field from functools import lru_cache +from pathlib import Path +from typing import Callable, Optional, Union import pytest -import capa.rules import capa.engine as ceng -import capa.loader +import capa.features.basicblock +import capa.features.common import capa.features.file import capa.features.insn -import capa.features.common -import capa.features.basicblock +import capa.loader +import capa.rules +from capa.features.address import Address from capa.features.common import ( + ARCH_AMD64, + ARCH_I386, + FORMAT_AUTO, + FORMAT_DOTNET, + FORMAT_ELF, + FORMAT_PE, OS, OS_ANY, OS_AUTO, OS_LINUX, - ARCH_I386, - FORMAT_PE, - ARCH_AMD64, - FORMAT_ELF, OS_WINDOWS, - FORMAT_AUTO, - FORMAT_DOTNET, Arch, - Format, Feature, FeatureAccess, + Format, ) -from capa.features.address import Address from capa.features.extractors.base_extractor import ( BBHandle, CallHandle, + FunctionHandle, InsnHandle, - ThreadHandle, ProcessHandle, - FunctionHandle, + ThreadHandle, ) from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor logger = logging.getLogger(__name__) CD = Path(__file__).resolve().parent +FIXTURE_MANIFEST_DIR = CD / "fixtures" / "features" DOTNET_DIR = CD / "data" / "dotnet" DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles" def parse_feature_string(s: str) -> Feature | ceng.Range | ceng.Statement: - key, _, value = s.partition(": ") - return capa.rules.build_feature(key, value, initial_description=None) - - -FEATURE_MARKS: dict[tuple[str, str, str], list[dict]] = {} + """ + parse a fixture feature string into a Feature, Range, or Statement. + count(...) fixtures have a string integer value in the JSON + (e.g. `count(basic blocks): 7`). translate that to an int so + `build_feature` returns a Range rather than raising on an + unrecognized range expression. + """ + key, _, value = s.partition(": ") + initial_value: str | int = value + if key.startswith("count(") and key.endswith(")"): + try: + initial_value = int(value) + except ValueError: + # leave as string so that `build_feature` can handle + # "N or more"/"N or fewer"/"(N, M)" range expressions. + initial_value = value + return capa.rules.build_feature(key, initial_value, initial_description=None) + + +# scope-kind tags are derived from the fixture location and inserted +# into the fixture's tag set. backends that only support a subset of +# scopes (e.g., pefile is file-only) can exclude the others via tags. +SCOPE_KIND_TAGS: frozenset[str] = frozenset( + { + "file", + "function", + "basic-block", + "instruction", + "process", + "thread", + "call", + } +) -def _load_feature_tests() -> tuple[list[tuple], list[tuple], list[tuple], list[tuple], list[tuple], list[tuple]]: - with (CD / "fixtures" / "feature-presence.json").open("r") as f: - data = json.load(f) +# feature-type tags are derived from the fixture feature string's key +# and inserted into the fixture's tag set. backends that don't support +# a feature type (e.g., pefile has no function-name features) can +# exclude by tag rather than by Python class. +# +# values come from `capa.rules.parse_feature` keys so the tag names +# align with the textual rule syntax. +FEATURE_TYPE_TAGS: frozenset[str] = frozenset( + { + "api", + "string", + "substring", + "bytes", + "number", + "offset", + "mnemonic", + "basic blocks", + "characteristic", + "export", + "import", + "section", + "match", + "function-name", + "os", + "format", + "arch", + "class", + "namespace", + "property", + # operand[N].X is collapsed to operand.X (index-independent) + "operand.number", + "operand.offset", + } +) - presence_tests = [] - symtab_tests = [] - count_tests = [] - ghidra_count_tests = [] - dotnet_presence_tests = [] - dotnet_count_tests = [] +# known fixture tags used for backend selection. +# merged tags that are not listed here will fail collection, to catch typos. +KNOWN_FIXTURE_TAGS: frozenset[str] = ( + frozenset( + { + "static", + "dynamic", + "dotnet", + "elf", + "flirt", + "symtab", + "ghidra", + "binja-db", + "binexport", + "aarch64", + "cape", + "drakvuf", + "vmray", + } + ) + | SCOPE_KIND_TAGS + | FEATURE_TYPE_TAGS +) - for entry in data["features"]: - feature_str = entry["feature"] - tags = entry.get("tags", []) - if "marks" in entry: - FEATURE_MARKS[(entry["file"], entry["location"], feature_str)] = entry["marks"] +def get_scope_kind(location: str) -> str: + """ + classify a fixture location string into a scope kind. - if feature_str.startswith("count("): - key, _, value_str = feature_str.partition(": ") - count = int(value_str) - range_obj = capa.rules.build_feature(key, count, initial_description=None) - inner_feature = range_obj.child - test_tuple = (entry["file"], entry["location"], inner_feature, count) + reuses the same location grammar handled by `resolve_scope()`. + """ + if location == "file": + return "file" + if "insn=" in location: + return "instruction" + if "bb=" in location: + return "basic-block" + if "call=" in location: + return "call" + if "thread=" in location: + return "thread" + if "process=" in location: + return "process" + if location.startswith(("function", "token")): + return "function" + raise ValueError(f"unexpected scope location: {location}") + + +def get_feature_type_tag(feature_str: str) -> str: + """ + extract the feature-type tag from a fixture feature string. + + examples: + `api: CryptSetHashParam` -> `api` + `function-name: __aulldiv` -> `function-name` + `count(basic blocks): 7` -> `basic blocks` + `count(mnemonic(mov)): 3` -> `mnemonic` + `count(characteristic(nzxor))` -> `characteristic` + `operand[1].number: 0xFF` -> `operand.number` + `property/read: Foo.Bar` -> `property` + """ + if feature_str.startswith("count("): + # find the matching close-paren for the outer `count(` so that + # nested parens and colons inside the argument (e.g. strings with + # `:` or `(`) don't confuse a naive partition. + depth = 0 + for i, c in enumerate(feature_str): + if c == "(": + depth += 1 + elif c == ")": + depth -= 1 + if depth == 0: + inner = feature_str[len("count(") : i] + # collapse nested arg: `mnemonic(mov)` -> `mnemonic` + inner, _, _ = inner.partition("(") + return _normalize_feature_key(inner.strip()) + raise ValueError(f"unbalanced parentheses in feature string: {feature_str!r}") + key, _, _ = feature_str.partition(":") + return _normalize_feature_key(key.strip()) + + +def _normalize_feature_key(key: str) -> str: + # collapse `operand[N].X` -> `operand.X` so the tag is index-independent + if key.startswith("operand[") and "]." in key: + _, _, suffix = key.partition("].") + return f"operand.{suffix}" + # collapse `property/read` and `property/write` -> `property` + if key.startswith("property/"): + return "property" + return key + + +@dataclass(frozen=True) +class FixtureMark: + backend: str + mark: str + reason: str + + +@dataclass(frozen=True) +class FixtureFile: + key: str + path: Path + tags: frozenset[str] = frozenset() + + +@dataclass(frozen=True) +class FeatureFixture: + sample_key: str + sample_path: Path + location: str + scope_kind: str + statement: Union[Feature, ceng.Range, ceng.Statement] + expected: bool = True + tags: frozenset[str] = frozenset() + marks: tuple[FixtureMark, ...] = () + explanation: Optional[str] = None + comment: Optional[str] = None + + +@dataclass(frozen=True) +class BackendFeaturePolicy: + name: str + get_extractor: Callable[[Path], object] + include_tags: frozenset[str] = field(default_factory=frozenset) + exclude_tags: frozenset[str] = field(default_factory=frozenset) + + def __post_init__(self): + object.__setattr__(self, "include_tags", frozenset(self.include_tags)) + object.__setattr__(self, "exclude_tags", frozenset(self.exclude_tags)) - if "ghidra" in tags: - ghidra_count_tests.append(test_tuple) - elif "dotnet" in tags: - dotnet_count_tests.append(test_tuple) - else: - count_tests.append(test_tuple) - else: - feature = parse_feature_string(feature_str) - test_tuple = (entry["file"], entry["location"], feature, entry["expected"]) - if "symtab" in tags: - symtab_tests.append(test_tuple) - elif "dotnet" in tags: - dotnet_presence_tests.append(test_tuple) - else: - presence_tests.append(test_tuple) - - presence_tests.sort(key=lambda t: (t[0], t[1])) - symtab_tests.sort(key=lambda t: (t[0], t[1])) - count_tests.sort(key=lambda t: (t[0], t[1])) - ghidra_count_tests.sort(key=lambda t: (t[0], t[1])) - dotnet_presence_tests.sort(key=lambda t: (t[0], t[1])) - dotnet_count_tests.sort(key=lambda t: (t[0], t[1])) - return ( - presence_tests, - symtab_tests, - count_tests, - ghidra_count_tests, - dotnet_presence_tests, - dotnet_count_tests, - ) +@lru_cache(maxsize=1) +def _load_feature_fixture_manifests() -> tuple[tuple[Path, dict], ...]: + manifests = [] + for path in sorted(FIXTURE_MANIFEST_DIR.glob("*.json")): + with path.open("r") as f: + manifests.append((path, json.load(f))) + if not manifests: + raise ValueError(f"no fixture manifests found in {FIXTURE_MANIFEST_DIR}") + return tuple(manifests) @lru_cache(maxsize=1) def _load_fixture_file_paths() -> dict[str, Path]: - with (CD / "fixtures" / "feature-presence.json").open("r") as f: - data = json.load(f) - return {entry["key"]: CD / entry["path"] for entry in data["files"]} + return {key: file.path for key, file in load_feature_fixture_files().items()} def get_fixture_file_path(key: str) -> Path: @@ -142,27 +291,218 @@ def get_fixture_file_path(key: str) -> Path: return paths[key] -def apply_backend_marks(backend: str, sample_key: str, feature: Feature): - """Apply skip/xfail marks from fixtures for a specific backend. +@lru_cache(maxsize=1) +def load_feature_fixture_files() -> dict[str, FixtureFile]: + """ + load the combined `files` tables from `tests/fixtures/features/*.json`. + + file entries may include a `tags` list that will be inherited + by feature fixtures that reference the file. + """ + files: dict[str, FixtureFile] = {} + file_sources: dict[str, Path] = {} + for manifest_path, data in _load_feature_fixture_manifests(): + for entry in data["files"]: + key = entry["key"] + if key in files: + raise ValueError( + f"duplicate fixture file key {key!r} in {file_sources[key]} and {manifest_path}" + ) + + tags = frozenset(entry.get("tags", [])) + unknown = tags - KNOWN_FIXTURE_TAGS + if unknown: + raise ValueError( + f"unknown fixture tag(s) on file {key!r} in {manifest_path}: {sorted(unknown)}" + ) + files[key] = FixtureFile( + key=key, + path=CD / entry["path"], + tags=tags, + ) + file_sources[key] = manifest_path + return files + - Args: - backend: backend name matching marks in fixtures (e.g. "idalib", "freeze") - sample_key: the file key from fixtures (e.g. "mimikatz", "pma12-04") - feature: the parsed Feature object to match against +@lru_cache(maxsize=1) +def load_feature_fixtures() -> tuple[FeatureFixture, ...]: """ - for (mk, _ml, mf), marks in FEATURE_MARKS.items(): - if mk != sample_key: - continue - if parse_feature_string(mf) != feature: + load the full list of feature fixtures from `tests/fixtures/features/*.json`. + + merges file-level tags into feature-level tags, validates tags against + the known registry, parses the statement (including `count(...)`), and + defaults `expected` to True. + """ + files = load_feature_fixture_files() + fixtures_: list[FeatureFixture] = [] + for manifest_path, data in _load_feature_fixture_manifests(): + for entry in data["features"]: + key = entry["file"] + if key not in files: + raise ValueError( + f"unknown fixture file key referenced by feature in {manifest_path}: {key!r}" + ) + file = files[key] + + feature_str: str = entry["feature"] + feature_tags = frozenset(entry.get("tags", [])) + merged_tags = file.tags | feature_tags + unknown = merged_tags - KNOWN_FIXTURE_TAGS + if unknown: + raise ValueError( + f"unknown fixture tag(s) on feature {feature_str!r} for file {key!r} in {manifest_path}: {sorted(unknown)}" + ) + + location = entry["location"] + statement = parse_feature_string(feature_str) + scope_kind = get_scope_kind(location) + feature_type_tag = get_feature_type_tag(feature_str) + # scope-kind and feature-type tags are auto-derived so that + # backend policies can include/exclude scopes and feature types + # purely via `include_tags`/`exclude_tags`. they're drawn from + # the known-tag registry so no re-validation is needed here. + merged_tags = merged_tags | {scope_kind, feature_type_tag} + expected = entry.get("expected", True) + marks = tuple( + FixtureMark(backend=m["backend"], mark=m["mark"], reason=m["reason"]) + for m in entry.get("marks", []) + ) + + fixtures_.append( + FeatureFixture( + sample_key=key, + sample_path=file.path, + location=location, + scope_kind=scope_kind, + statement=statement, + expected=expected, + tags=merged_tags, + marks=marks, + explanation=entry.get("explanation"), + comment=entry.get("comment"), + ) + ) + + fixtures_.sort(key=lambda f: (f.sample_key, f.location)) + return tuple(fixtures_) + + +@dataclass(frozen=True) +class FixtureSelectionSummary: + total: int + selected: int + excluded: int + excluded_by_tag: dict[str, int] + + +def _fixture_is_included(policy: BackendFeaturePolicy, fixture: FeatureFixture) -> bool: + """decide whether a fixture is selected by a policy.""" + if policy.include_tags and not (fixture.tags & policy.include_tags): + return False + if fixture.tags & policy.exclude_tags: + return False + return True + + +def select_feature_fixtures(policy: BackendFeaturePolicy) -> list[FeatureFixture]: + """ + select fixtures matching a backend policy. + + rules (applied in order): + 1. start from all fixtures + 2. if `include_tags` is non-empty, keep fixtures whose tags intersect it + 3. drop fixtures whose tags intersect `exclude_tags` + + scope kinds and feature types are exposed as auto-derived tags, so + a policy can restrict scope or feature type via `exclude_tags` too. + """ + return [f for f in load_feature_fixtures() if _fixture_is_included(policy, f)] + + +def summarize_feature_selection( + policy: BackendFeaturePolicy, +) -> FixtureSelectionSummary: + """ + summarize the effect of a policy's fixture selection. + + useful for debug output and maintenance scripts. + """ + all_fixtures = load_feature_fixtures() + excluded_by_tag: dict[str, int] = collections.defaultdict(int) + selected = 0 + for fixture in all_fixtures: + if _fixture_is_included(policy, fixture): + selected += 1 continue - for m in marks: - if m["backend"] != backend: + for tag in sorted(fixture.tags): + excluded_by_tag[tag] += 1 + return FixtureSelectionSummary( + total=len(all_fixtures), + selected=selected, + excluded=len(all_fixtures) - selected, + excluded_by_tag=dict(excluded_by_tag), + ) + + +def _fixture_test_id(fixture: FeatureFixture) -> str: + """ + build a readable pytest parameter id for a fixture. + + mirrors the legacy `make_test_id` shape: sample-location-statement-expected. + """ + return "-".join( + [ + fixture.sample_key, + fixture.location, + str(fixture.statement), + str(fixture.expected), + ] + ) + + +def parametrize_backend_feature_fixtures(policy: BackendFeaturePolicy): + """ + build a pytest parametrize decorator for a backend's selected fixtures. + + applies JSON marks matching `policy.name` to the parameter set, so + backend-specific skip/xfail behavior stays in the JSON data file. + """ + selected = select_feature_fixtures(policy) + params = [] + for fixture in selected: + marks = [] + for mark in fixture.marks: + if mark.backend != policy.name: continue - if m["mark"] == "skip": - pytest.skip(m["reason"]) - elif m["mark"] == "xfail": - pytest.xfail(m["reason"]) - return + if mark.mark == "skip": + marks.append(pytest.mark.skip(reason=mark.reason)) + elif mark.mark == "xfail": + marks.append(pytest.mark.xfail(reason=mark.reason)) + else: + raise ValueError( + f"unknown mark {mark.mark!r} for backend {policy.name!r}" + ) + params.append(pytest.param(fixture, marks=marks, id=_fixture_test_id(fixture))) + return pytest.mark.parametrize("feature_fixture", params) + + +def run_feature_fixture(policy: BackendFeaturePolicy, fixture: FeatureFixture) -> None: + """ + generic runner that evaluates a feature fixture against a backend. + + handles both plain features and `count(...)` statements via one + `evaluate` path, comparing the boolean result to `fixture.expected`. + """ + extractor = policy.get_extractor(fixture.sample_path) + scope = resolve_scope(fixture.location) + features = scope(extractor) + result = fixture.statement.evaluate(features) + actual = bool(result) + if fixture.expected: + msg = f"{fixture.statement} should match in {fixture.location}" + else: + msg = f"{fixture.statement} should not match in {fixture.location}" + assert actual == fixture.expected, msg @contextlib.contextmanager @@ -206,8 +546,8 @@ def xfail(condition, reason=None): # need to limit cache size so GitHub Actions doesn't run out of memory, see #545 @lru_cache(maxsize=1) def get_viv_extractor(path: Path): - import capa.main import capa.features.extractors.viv.extractor + import capa.main sigpaths = [ CD / "data" / "sigs" / "test_aulldiv.pat", @@ -224,7 +564,9 @@ def get_viv_extractor(path: Path): else: vw = capa.loader.get_workspace(path, FORMAT_AUTO, sigpaths=sigpaths) vw.saveWorkspace() - extractor = capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, OS_AUTO) + extractor = capa.features.extractors.viv.extractor.VivisectFeatureExtractor( + vw, path, OS_AUTO + ) fixup_viv(path, extractor) return extractor @@ -318,8 +660,8 @@ def get_idalib_extractor(path: Path): if not idalib.load_idalib(): raise RuntimeError("failed to load IDA idalib module.") - import idapro import ida_auto + import idapro import capa.features.extractors.ida.extractor @@ -359,8 +701,8 @@ def fixup_idalib(path: Path, extractor): """ IDA fixups to overcome differences between backends """ - import idaapi import ida_funcs + import idaapi def remove_library_id_flag(fva): f = idaapi.get_func(fva) @@ -378,8 +720,8 @@ def remove_library_id_flag(fva): @lru_cache(maxsize=1) def get_cape_extractor(path): - from capa.helpers import load_json_from_path from capa.features.extractors.cape.extractor import CapeExtractor + from capa.helpers import load_json_from_path report = load_json_from_path(path) @@ -388,8 +730,8 @@ def get_cape_extractor(path): @lru_cache(maxsize=1) def get_drakvuf_extractor(path): - from capa.helpers import load_jsonl_from_path from capa.features.extractors.drakvuf.extractor import DrakvufExtractor + from capa.helpers import load_jsonl_from_path report = load_jsonl_from_path(path) @@ -445,10 +787,14 @@ def get_binexport_extractor(path): be2 = capa.features.extractors.binexport2.get_binexport2(path) search_paths = [CD / "data", CD / "data" / "aarch64"] - path = capa.features.extractors.binexport2.get_sample_from_binexport2(path, be2, search_paths) + path = capa.features.extractors.binexport2.get_sample_from_binexport2( + path, be2, search_paths + ) buf = path.read_bytes() - return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf) + return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor( + be2, buf + ) def extract_global_features(extractor): @@ -531,6 +877,13 @@ def extract_instruction_features(extractor, fh, bbh, ih) -> dict[Feature, set[Ad # note: to reduce the testing time it's recommended to reuse already existing test samples, if possible def get_data_path_by_name(name) -> Path: + # prefer the fixture manifest registry; fall back to the legacy hard-coded + # branches below for any keys not yet migrated. + lookup_key = name[:-3] if name.endswith("...") else name + json_paths = _load_fixture_file_paths() + if lookup_key in json_paths: + return json_paths[lookup_key] + if name == "mimikatz": return CD / "data" / "mimikatz.exe_" elif name == "kernel32": @@ -554,7 +907,11 @@ def get_data_path_by_name(name) -> Path: elif name == "al-khaser x64": return CD / "data" / "al-khaser_x64.exe_" elif name.startswith("39c05"): - return CD / "data" / "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_" + return ( + CD + / "data" + / "39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_" + ) elif name.startswith("499c2"): return CD / "data" / "499c2a85f6e8142c3f48d4251c9c7cd6.raw32" elif name.startswith("9324d"): @@ -578,7 +935,11 @@ def get_data_path_by_name(name) -> Path: elif name.startswith("77329"): return CD / "data" / "773290480d5445f11d3dc1b800728966.exe_" elif name.startswith("3b13b"): - return CD / "data" / "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_" + return ( + CD + / "data" + / "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_" + ) elif name == "7351f.elf": return CD / "data" / "7351f8a40c5450557b24622417fc478d.elf_" elif name == "055da8e6.elf": @@ -594,25 +955,54 @@ def get_data_path_by_name(name) -> Path: elif name.startswith("b9f5b"): return CD / "data" / "b9f5bd514485fb06da39beff051b9fdc.exe_" elif name.startswith("mixed-mode-64"): - return DNFILE_TESTFILES / "mixed-mode" / "ModuleCode" / "bin" / "ModuleCode_amd64.exe" + return ( + DNFILE_TESTFILES + / "mixed-mode" + / "ModuleCode" + / "bin" + / "ModuleCode_amd64.exe" + ) elif name.startswith("hello-world"): return DNFILE_TESTFILES / "hello-world" / "hello-world.exe" elif name.startswith("_1c444"): return DOTNET_DIR / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_" elif name.startswith("_387f15"): - return DOTNET_DIR / "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_" + return ( + DOTNET_DIR + / "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_" + ) elif name.startswith("_692f"): return DOTNET_DIR / "692f7fd6d198e804d6af98eb9e390d61.exe_" elif name.startswith("_0953c"): - return CD / "data" / "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_" + return ( + CD + / "data" + / "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_" + ) elif name.startswith("_039a6"): - return CD / "data" / "039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_" + return ( + CD + / "data" + / "039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_" + ) elif name.startswith("b5f052"): - return CD / "data" / "b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_" + return ( + CD + / "data" + / "b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_" + ) elif name.startswith("bf7a9c"): - return CD / "data" / "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_" + return ( + CD + / "data" + / "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_" + ) elif name.startswith("294b8d"): - return CD / "data" / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_" + return ( + CD + / "data" + / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_" + ) elif name.startswith("2bf18d"): return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_" elif name.startswith("2d3edc"): @@ -668,11 +1058,23 @@ def get_data_path_by_name(name) -> Path: / "eb12873c0ce3e9ea109c2a447956cbd10ca2c3e86936e526b2c6e28764999f21_min_archive.zip" ) elif name.startswith("ea2876"): - return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" + return ( + CD + / "data" + / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" + ) elif name.startswith("1038a2"): - return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_" + return ( + CD + / "data" + / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_" + ) elif name.startswith("3da7c"): - return CD / "data" / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_" + return ( + CD + / "data" + / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_" + ) elif name.startswith("nested_typedef"): return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_" elif name.startswith("nested_typeref"): @@ -818,7 +1220,9 @@ def get_basic_block(extractor, fh: FunctionHandle, va: int) -> BBHandle: raise ValueError("basic block not found") -def get_instruction(extractor, fh: FunctionHandle, bbh: BBHandle, va: int) -> InsnHandle: +def get_instruction( + extractor, fh: FunctionHandle, bbh: BBHandle, va: int +) -> InsnHandle: for ih in extractor.get_instructions(fh, bbh): if isinstance(extractor, DnfileFeatureExtractor): addr = ih.inner.offset @@ -978,65 +1382,28 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -( - FEATURE_PRESENCE_TESTS, - FEATURE_SYMTAB_FUNC_TESTS, - FEATURE_COUNT_TESTS, - FEATURE_COUNT_TESTS_GHIDRA, - FEATURE_PRESENCE_TESTS_DOTNET, - FEATURE_COUNT_TESTS_DOTNET, -) = _load_feature_tests() - - -FEATURE_PRESENCE_TESTS_IDA = [ - # file/imports - # IDA can recover more names of APIs imported by ordinal - ("mimikatz", "file", capa.features.file.Import("cabinet.FCIAddFile"), True), -] - -FEATURE_BINJA_DATABASE_TESTS = sorted( - [ - # insn/regex - ( - "pma16-01_binja_db", - "function=0x4021B0", - capa.features.common.Regex("HTTP/1.0"), - True, - ), - ( - "pma16-01_binja_db", - "function=0x402F40", - capa.features.common.Regex("www.practicalmalwareanalysis.com"), - True, - ), - ( - "pma16-01_binja_db", - "function=0x402F40", - capa.features.common.Substring("practicalmalwareanalysis.com"), - True, - ), - ( - "pma16-01_binja_db", - "file", - capa.features.file.FunctionName("__aulldiv"), - True, - ), - # os & format & arch - ("pma16-01_binja_db", "file", OS(OS_WINDOWS), True), - ("pma16-01_binja_db", "file", OS(OS_LINUX), False), - ("pma16-01_binja_db", "function=0x404356", OS(OS_WINDOWS), True), - ("pma16-01_binja_db", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True), - ("pma16-01_binja_db", "file", Arch(ARCH_I386), True), - ("pma16-01_binja_db", "file", Arch(ARCH_AMD64), False), - ("pma16-01_binja_db", "function=0x404356", Arch(ARCH_I386), True), - ("pma16-01_binja_db", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True), - ("pma16-01_binja_db", "file", Format(FORMAT_PE), True), - ("pma16-01_binja_db", "file", Format(FORMAT_ELF), False), - # format is also a global feature - ("pma16-01_binja_db", "function=0x404356", Format(FORMAT_PE), True), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. +# legacy tuple-of-tuples lists still needed by `test_binexport_features.py`, +# which rewrites a mimikatz sample path to its `.ghidra.BinExport` counterpart +# at test time. +# +# built from the new `load_feature_fixtures()` so the JSON manifests remain the +# single source of truth for fixture data. +FEATURE_PRESENCE_TESTS: list[tuple] = sorted( + ( + (f.sample_key, f.location, f.statement, f.expected) + for f in load_feature_fixtures() + if not isinstance(f.statement, ceng.Range) + and not (f.tags & frozenset({"dotnet", "symtab"})) + ), + key=lambda t: (t[0], t[1]), +) + +FEATURE_COUNT_TESTS_GHIDRA: list[tuple] = sorted( + ( + (f.sample_key, f.location, f.statement.child, f.statement.min) + for f in load_feature_fixtures() + if isinstance(f.statement, ceng.Range) and "ghidra" in f.tags + ), key=lambda t: (t[0], t[1]), ) @@ -1182,20 +1549,26 @@ def get_result_doc(path: Path): @pytest.fixture def pma0101_rd(): # python -m capa.main tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ --json > tests/data/rd/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_.json - return get_result_doc(CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json") + return get_result_doc( + CD / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json" + ) @pytest.fixture def dotnet_1c444e_rd(): # .NET sample # python -m capa.main tests/data/dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_ --json > tests/data/rd/1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json - return get_result_doc(CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json") + return get_result_doc( + CD / "data" / "rd" / "1c444ebeba24dcba8628b7dfe5fec7c6.exe_.json" + ) @pytest.fixture def a3f3bbc_rd(): # python -m capa.main tests/data/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_ --json > tests/data/rd/3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json - return get_result_doc(CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json") + return get_result_doc( + CD / "data" / "rd" / "3f3bbcf8fd90bdcdcdc5494314ed4225.exe_.json" + ) @pytest.fixture @@ -1213,7 +1586,9 @@ def al_khaserx64_rd(): @pytest.fixture def a076114_rd(): # python -m capa.main tests/data/0761142efbda6c4b1e801223de723578.dll_ --json > tests/data/rd/0761142efbda6c4b1e801223de723578.dll_.json - return get_result_doc(CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json") + return get_result_doc( + CD / "data" / "rd" / "0761142efbda6c4b1e801223de723578.dll_.json" + ) @pytest.fixture @@ -1221,5 +1596,8 @@ def dynamic_a0000a6_rd(): # python -m capa.main tests/data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json --json > tests/data/rd/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json # gzip tests/data/rd/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json return get_result_doc( - CD / "data" / "rd" / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz" + CD + / "data" + / "rd" + / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz" ) diff --git a/tests/fixtures/features/README.md b/tests/fixtures/features/README.md new file mode 100644 index 000000000..748a26e84 --- /dev/null +++ b/tests/fixtures/features/README.md @@ -0,0 +1,169 @@ +# backend feature fixtures + +This spec describes how contributors should add and consume backend feature fixtures. + +## Scope + +This spec covers feature-fixture tests only. It does not cover extractor helper tests, CLI smoke tests, or other bespoke tests. + +## Source of truth + +Feature fixtures live in these JSON manifests under `tests/fixtures/features/`: + +- `static.json` +- `binja-db.json` +- `binexport.json` +- `cape.json` +- `drakvuf.json` +- `vmray.json` + +Each manifest contains: + +- a `files` list that maps fixture keys to sample paths +- a `features` list that describes feature assertions + +The loader reads all of these manifests and combines them into one fixture set. + +A backend feature test should not maintain its own private list of feature fixtures if the same information can be expressed in these JSON manifests. + +## Fixture shape + +Each feature fixture specifies: + +- the sample key +- the location within the sample +- the feature or statement to evaluate +- optional tags +- optional backend marks +- optional `expected: false` + +If `expected` is omitted, it means `true`. + +This applies to ordinary feature assertions and `count(...)` assertions. + +Examples: + +```json +{ + "file": "pma16-01", + "location": "file", + "feature": "format: pe" +} +``` + +```json +{ + "file": "mimikatz", + "location": "function=0x40E5C2", + "feature": "count(basic blocks): 7" +} +``` + +```json +{ + "file": "mimikatz", + "location": "function=0x401000", + "feature": "characteristic: loop", + "expected": false +} +``` + +## Tags + +Tags are used to describe fixture requirements or sample properties that backends may need for selection. + +Examples include: + +- `dotnet` +- `elf` +- `dynamic` +- `flirt` +- `symtab` +- `binja-db` +- `binexport` +- `aarch64` + +Tags may appear on file entries or feature entries. file tags are inherited by their features. + +Tags should not duplicate information that can already be derived from: + +- the location string +- the parsed feature type + +Unknown tags should fail collection. + +## Backend selection + +Backends consume one shared fixture list and select the fixtures they support. + +Large backends should prefer exclusion-based selection. this means new fixtures run by default unless they are explicitly out of scope. + +Examples: + +- `viv` excludes `.NET` +- `ghidra` excludes `.NET` +- `binja` excludes `.NET` +- `idalib` excludes `.NET` + +Small-surface backends may use inclusion-based selection where that is clearer. + +Examples: + +- `dnfile` includes `.NET` +- `dotnetfile` includes `.NET` + +Backends may also restrict supported scopes or feature types. + +## Backend test file shape + +A backend feature test file should normally have: + +- one backend policy object +- one feature-test entry point that consumes shared fixtures + +For example: + +```python +import fixtures + +BACKEND = fixtures.BackendFeaturePolicy( + name="viv", + get_extractor=fixtures.get_viv_extractor, + exclude_tags={"dotnet"}, +) + + +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_viv_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) +``` + +Module-level availability checks are still allowed. runtime-specific hooks are allowed only when they depend on the installed backend or tool version and cannot be represented declaratively in the fixture manifests. + +## Known bugs and marks + +Known backend bugs should be represented in the fixture manifests through backend-specific marks. + +Backends should not usually edit the shared JSON manifests just to avoid a fixture. they should prefer selecting or excluding fixtures through backend policy. + +The main reason to keep marks in JSON is to record known exceptions such as: + +- a backend-specific `xfail` +- a backend-specific `skip` + +## Expected contributor workflow + +When adding a new feature test: + +1. add the sample path to the appropriate JSON manifest `files` list if it is not already present +2. add the feature fixture to that manifest `features` list +3. add tags only when they express a real requirement or sample property +4. omit `expected` unless the expected result is `false` +5. use JSON marks only for known backend bugs + +When adding a new backend: + +1. create one backend feature test file +2. define one backend policy describing extractor and exclusions +3. use the shared feature runner +4. add runtime hooks only if the environment or installed tool version requires them diff --git a/tests/fixtures/features/binexport.json b/tests/fixtures/features/binexport.json new file mode 100644 index 000000000..a9067a56e --- /dev/null +++ b/tests/fixtures/features/binexport.json @@ -0,0 +1,517 @@ +{ + "files": [ + { + "key": "687e79.ghidra.be2", + "path": "data/binexport2/687e79cde5b0ced75ac229465835054931f9ec438816f2827a8be5f3bd474929.elf_.ghidra.BinExport", + "tags": [ + "binexport", + "elf", + "aarch64" + ] + }, + { + "key": "d1e650.ghidra.be2", + "path": "data/binexport2/d1e6506964edbfffb08c0dd32e1486b11fbced7a4bd870ffe79f110298f0efb8.elf_.ghidra.BinExport", + "tags": [ + "binexport", + "elf", + "aarch64" + ] + } + ], + "features": [ + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "string: AppDataService start", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "string: nope", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "section: .text", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "section: .nope", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "export: android::clearDir", + "expected": true, + "marks": [ + { + "backend": "binexport", + "mark": "xfail", + "reason": "name demangling is not implemented" + } + ] + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "export: nope", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "import: fopen", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "import: exit", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "import: _ZN7android10IInterfaceD0Ev", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "import: nope", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1056c0", + "feature": "characteristic: loop", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1075c0", + "feature": "characteristic: loop", + "expected": false + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x114af4", + "feature": "characteristic: tight loop", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x118F1C", + "feature": "characteristic: tight loop", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x11464c", + "feature": "characteristic: tight loop", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x0", + "feature": "characteristic: stack string", + "expected": true, + "marks": [ + { + "backend": "binexport", + "mark": "xfail", + "reason": "stack string detection not implemented yet for binexport" + } + ] + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "mnemonic: stp", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "mnemonic: adrp", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "mnemonic: bl", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "mnemonic: in", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "mnemonic: adrl", + "expected": false + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x11451c", + "feature": "number: 0x10", + "expected": false, + "comment": "00114524 add x29,sp,#0x10" + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105128", + "feature": "number: 0xE0", + "expected": false, + "comment": "00105128 sub sp,sp,#0xE0" + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105128,bb=0x1051e4", + "feature": "operand[1].number: 0xFFFFFFFF", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588,bb=0x107588", + "feature": "operand[1].number: 0x8", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588,bb=0x107588,insn=0x1075a4", + "feature": "operand[1].number: 0x8", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105128,bb=0x105450", + "feature": "operand[2].offset: 0x10", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x124854,bb=0x1248AC,insn=0x1248B4", + "feature": "operand[2].offset: -0x48", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x13347c,bb=0x133548,insn=0x133554", + "feature": "operand[2].offset: 0x20", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105C88", + "feature": "number: 0xF000", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1057f8,bb=0x1057f8", + "feature": "number: 0xFFFFFFFFFFFFFFFF", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1066e0,bb=0x1068c4", + "feature": "number: 0xFFFFFFFF", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105128,bb=0x105450", + "feature": "offset: 0x10", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x13347c,bb=0x133548,insn=0x133554", + "feature": "offset: 0x20", + "expected": false, + "comment": "ldp x29,x30,[sp, #0x20]" + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x1183e0,bb=0x11849c,insn=0x1184b0", + "feature": "offset: 0x8", + "expected": true, + "comment": "stp x20,x0,[x19, #0x8]" + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x138688,bb=0x138994,insn=0x1389a8", + "feature": "offset: 0x8", + "expected": true, + "comment": "str xzr,[x8, #0x8]!" + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x138688,bb=0x138978,insn=0x138984", + "feature": "offset: 0x8", + "expected": true, + "comment": "ldr x9,[x8, #0x8]!" + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x11451c", + "feature": "offset: 0x20", + "expected": false, + "comment": "ldr x19,[sp], #0x20" + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x138a9c,bb=0x138b00,insn=0x138b00", + "feature": "offset: 0x1", + "expected": true, + "comment": "ldrb w9,[x8, #0x1]" + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x124854,bb=0x1248AC,insn=0x1248B4", + "feature": "offset: -0x48", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105128,bb=0x105128,insn=0x10514c", + "feature": "offset: 0x8", + "expected": true, + "comment": "0010514c add x23,param_1,#0x8" + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105c88", + "feature": "api: memset", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105c88", + "feature": "api: Nope", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "string: AppDataService start", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1075c0", + "feature": "string: AppDataService", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "string: nope", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x106d58", + "feature": "string: /data/misc/wifi/wpa_supplicant.conf", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105c88", + "feature": "string: /innerRename/", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x106d58", + "feature": "string: /\\/data\\/misc/", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x106d58", + "feature": "substring: /data/misc", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x1165a4", + "feature": "bytes: E4 05 B8 93 70 BA 6B 41 9C D7 92 52 75 BF 6F CC 1E 83 60 CC", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1057f8", + "feature": "bytes: 2F 00 73 00 79 00 73 00 74 00 65 00 6D 00 2F 00 78 00 62 00 69 00 6E 00 2F 00 62 00 75 00 73 00 79 00 62 00 6F 00 78 00", + "expected": false, + "comment": "don't extract byte features for obvious strings" + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x114af4", + "feature": "characteristic: nzxor", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x117988", + "feature": "characteristic: nzxor", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105b38", + "feature": "characteristic: recursive call", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x106530", + "feature": "characteristic: recursive call", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x118620", + "feature": "characteristic: indirect call", + "expected": true + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x118500", + "feature": "characteristic: indirect call", + "expected": false + }, + { + "file": "d1e650.ghidra.be2", + "location": "function=0x11451c", + "feature": "characteristic: indirect call", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x105080", + "feature": "characteristic: calls from", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1070e8", + "feature": "characteristic: calls from", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1075c0", + "feature": "characteristic: calls to", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "function-name: __libc_init", + "expected": true, + "marks": [ + { + "backend": "binexport", + "mark": "xfail", + "reason": "TODO should this be a function-name?" + } + ] + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "os: android", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "os: linux", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "os: windows", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "os: android", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1075c0,bb=0x1076c0", + "feature": "os: android", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "arch: i386", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "arch: amd64", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "arch: aarch64", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "arch: aarch64", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x1075c0,bb=0x1076c0", + "feature": "arch: aarch64", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "format: elf", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "file", + "feature": "format: pe", + "expected": false + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "format: elf", + "expected": true + }, + { + "file": "687e79.ghidra.be2", + "location": "function=0x107588", + "feature": "format: pe", + "expected": false + } + ] +} diff --git a/tests/fixtures/features/binja-db.json b/tests/fixtures/features/binja-db.json new file mode 100644 index 000000000..dfcb30f71 --- /dev/null +++ b/tests/fixtures/features/binja-db.json @@ -0,0 +1,91 @@ +{ + "files": [ + { + "key": "pma16-01_binja_db", + "path": "data/Practical Malware Analysis Lab 16-01.exe_.bndb", + "tags": [ + "binja-db" + ] + } + ], + "features": [ + { + "file": "pma16-01_binja_db", + "location": "function=0x4021B0", + "feature": "string: /HTTP/1.0/" + }, + { + "file": "pma16-01_binja_db", + "location": "function=0x402F40", + "feature": "string: /www.practicalmalwareanalysis.com/" + }, + { + "file": "pma16-01_binja_db", + "location": "function=0x402F40", + "feature": "substring: practicalmalwareanalysis.com" + }, + { + "file": "pma16-01_binja_db", + "location": "file", + "feature": "function-name: __aulldiv" + }, + { + "file": "pma16-01_binja_db", + "location": "file", + "feature": "os: windows" + }, + { + "file": "pma16-01_binja_db", + "location": "file", + "feature": "os: linux", + "expected": false + }, + { + "file": "pma16-01_binja_db", + "location": "function=0x404356", + "feature": "os: windows" + }, + { + "file": "pma16-01_binja_db", + "location": "function=0x404356,bb=0x4043B9", + "feature": "os: windows" + }, + { + "file": "pma16-01_binja_db", + "location": "file", + "feature": "arch: i386" + }, + { + "file": "pma16-01_binja_db", + "location": "file", + "feature": "arch: amd64", + "expected": false + }, + { + "file": "pma16-01_binja_db", + "location": "function=0x404356", + "feature": "arch: i386" + }, + { + "file": "pma16-01_binja_db", + "location": "function=0x404356,bb=0x4043B9", + "feature": "arch: i386" + }, + { + "file": "pma16-01_binja_db", + "location": "file", + "feature": "format: pe" + }, + { + "file": "pma16-01_binja_db", + "location": "file", + "feature": "format: elf", + "expected": false + }, + { + "file": "pma16-01_binja_db", + "location": "function=0x404356", + "feature": "format: pe" + } + ] +} diff --git a/tests/fixtures/features/cape.json b/tests/fixtures/features/cape.json new file mode 100644 index 000000000..eff2e0388 --- /dev/null +++ b/tests/fixtures/features/cape.json @@ -0,0 +1,221 @@ +{ + "files": [ + { + "key": "0000a657", + "path": "data/dynamic/cape/v2.2/0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz", + "tags": [ + "dynamic", + "cape" + ] + }, + { + "key": "d46900", + "path": "data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz", + "tags": [ + "dynamic", + "cape" + ] + } + ], + "features": [ + { + "file": "0000a657", + "location": "file", + "feature": "string: T_Ba?.BcRJa" + }, + { + "file": "0000a657", + "location": "file", + "feature": "string: GetNamedPipeClientSessionId" + }, + { + "file": "0000a657", + "location": "file", + "feature": "string: nope", + "expected": false + }, + { + "file": "0000a657", + "location": "file", + "feature": "section: .rdata" + }, + { + "file": "0000a657", + "location": "file", + "feature": "section: .nope", + "expected": false + }, + { + "file": "0000a657", + "location": "file", + "feature": "import: NdrSimpleTypeUnmarshall" + }, + { + "file": "0000a657", + "location": "file", + "feature": "import: Nope", + "expected": false + }, + { + "file": "0000a657", + "location": "file", + "feature": "export: Nope", + "expected": false + }, + { + "file": "0000a657", + "location": "process=(1180:3052)", + "feature": "string: C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe" + }, + { + "file": "0000a657", + "location": "process=(1180:3052)", + "feature": "string: nope", + "expected": false + }, + { + "file": "0000a657", + "location": "process=(2900:2852),thread=2904", + "feature": "api: RegQueryValueExA" + }, + { + "file": "0000a657", + "location": "process=(2900:2852),thread=2904", + "feature": "api: RegQueryValueEx" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "api: NtQueryValueKey" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "api: GetActiveWindow", + "expected": false + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "number: 0xEC" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "number: 110173", + "expected": false + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "string: SetThreadUILanguage" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "string: nope", + "expected": false + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804,call=56", + "feature": "api: NtQueryValueKey" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804,call=1958", + "feature": "api: nope", + "expected": false + }, + { + "file": "0000a657", + "location": "file", + "feature": "count(string(T_Ba?.BcRJa)): 1" + }, + { + "file": "0000a657", + "location": "file", + "feature": "count(string(GetNamedPipeClientSessionId)): 1" + }, + { + "file": "0000a657", + "location": "file", + "feature": "count(string(nope)): 0" + }, + { + "file": "0000a657", + "location": "file", + "feature": "count(section(.rdata)): 1" + }, + { + "file": "0000a657", + "location": "file", + "feature": "count(section(.nope)): 0" + }, + { + "file": "0000a657", + "location": "file", + "feature": "count(import(NdrSimpleTypeUnmarshall)): 1" + }, + { + "file": "0000a657", + "location": "file", + "feature": "count(import(Nope)): 0" + }, + { + "file": "0000a657", + "location": "file", + "feature": "count(export(Nope)): 0" + }, + { + "file": "0000a657", + "location": "process=(1180:3052)", + "feature": "count(string(C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe)): 2" + }, + { + "file": "0000a657", + "location": "process=(1180:3052)", + "feature": "count(string(nope)): 0" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "count(api(NtQueryValueKey)): 7" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "count(api(GetActiveWindow)): 0" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "count(number(0xEC)): 1" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "count(number(110173)): 0" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "count(string(SetThreadUILanguage)): 1" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804", + "feature": "count(string(nope)): 0" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804,call=56", + "feature": "count(api(NtQueryValueKey)): 1" + }, + { + "file": "0000a657", + "location": "process=(2852:3052),thread=2804,call=1958", + "feature": "count(api(nope)): 0" + } + ] +} diff --git a/tests/fixtures/features/drakvuf.json b/tests/fixtures/features/drakvuf.json new file mode 100644 index 000000000..433b20306 --- /dev/null +++ b/tests/fixtures/features/drakvuf.json @@ -0,0 +1,129 @@ +{ + "files": [ + { + "key": "93b2d1-drakvuf", + "path": "data/dynamic/drakvuf/93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795.log.gz", + "tags": [ + "dynamic", + "drakvuf" + ] + } + ], + "features": [ + { + "file": "93b2d1-drakvuf", + "location": "file", + "feature": "string: \\Program Files\\WindowsApps\\does_not_exist", + "expected": false + }, + { + "file": "93b2d1-drakvuf", + "location": "file", + "feature": "import: SetUnhandledExceptionFilter" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592", + "feature": "api: LdrLoadDll" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592", + "feature": "api: DoesNotExist", + "expected": false + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=4716,call=17", + "feature": "api: CreateWindowExW" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=4716,call=17", + "feature": "api: CreateWindowEx" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "api: LdrLoadDll" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "api: DoesNotExist", + "expected": false + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "string: 0x667e2beb40:\"api-ms-win-core-fibers-l1-1-1\"" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "string: non_existant", + "expected": false + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "number: 0x801" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "number: 0x10101010101", + "expected": false + }, + { + "file": "93b2d1-drakvuf", + "location": "file", + "feature": "count(string(\\Program Files\\WindowsApps\\does_not_exist)): 0" + }, + { + "file": "93b2d1-drakvuf", + "location": "file", + "feature": "count(import(SetUnhandledExceptionFilter)): 1" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592", + "feature": "count(api(LdrLoadDll)): 9" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592", + "feature": "count(api(DoesNotExist)): 0" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "count(api(LdrLoadDll)): 1" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "count(api(DoesNotExist)): 0" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "count(string(0x667e2beb40:\"api-ms-win-core-fibers-l1-1-1\")): 1" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "count(string(non_existant)): 0" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "count(number(0x801)): 1" + }, + { + "file": "93b2d1-drakvuf", + "location": "process=(3564:4852),thread=6592,call=1", + "feature": "count(number(0x10101010101)): 0" + } + ] +} diff --git a/tests/fixtures/feature-presence.json b/tests/fixtures/features/static.json similarity index 83% rename from tests/fixtures/feature-presence.json rename to tests/fixtures/features/static.json index 765718ed3..669e9ea9a 100644 --- a/tests/fixtures/feature-presence.json +++ b/tests/fixtures/features/static.json @@ -2,79 +2,142 @@ "files": [ { "key": "mimikatz", - "path": "data/mimikatz.exe_" + "path": "data/mimikatz.exe_", + "tags": [ + "static" + ] }, { "key": "kernel32", - "path": "data/kernel32.dll_" + "path": "data/kernel32.dll_", + "tags": [ + "static" + ] }, { "key": "kernel32-64", - "path": "data/kernel32-64.dll_" + "path": "data/kernel32-64.dll_", + "tags": [ + "static" + ] }, { "key": "pma12-04", - "path": "data/Practical Malware Analysis Lab 12-04.exe_" + "path": "data/Practical Malware Analysis Lab 12-04.exe_", + "tags": [ + "static" + ] }, { "key": "pma16-01", - "path": "data/Practical Malware Analysis Lab 16-01.exe_" + "path": "data/Practical Malware Analysis Lab 16-01.exe_", + "tags": [ + "static" + ] }, { "key": "7351f.elf", - "path": "data/7351f8a40c5450557b24622417fc478d.elf_" + "path": "data/7351f8a40c5450557b24622417fc478d.elf_", + "tags": [ + "elf", + "static" + ] }, { "key": "055da8e6.elf", - "path": "data/055da8e6ccfe5a9380231ea04b850e18.elf_" + "path": "data/055da8e6ccfe5a9380231ea04b850e18.elf_", + "tags": [ + "elf", + "static" + ] }, { "key": "bb38149.elf", - "path": "data/bb38149ff4b5c95722b83f24ca27a42b.elf_" + "path": "data/bb38149ff4b5c95722b83f24ca27a42b.elf_", + "tags": [ + "elf", + "static" + ] }, { "key": "al-khaser x64", - "path": "data/al-khaser_x64.exe_" + "path": "data/al-khaser_x64.exe_", + "tags": [ + "static" + ] }, { "key": "64d9f", - "path": "data/64d9f7d96b99467f36e22fada623c3bb.dll_" + "path": "data/64d9f7d96b99467f36e22fada623c3bb.dll_", + "tags": [ + "static" + ] }, { "key": "79abd", - "path": "data/79abd17391adc6251ecdc58d13d76baf.dll_" + "path": "data/79abd17391adc6251ecdc58d13d76baf.dll_", + "tags": [ + "static" + ] }, { "key": "946a9", - "path": "data/946a99f36a46d335dec080d9a4371940.dll_" + "path": "data/946a99f36a46d335dec080d9a4371940.dll_", + "tags": [ + "static" + ] }, { "key": "773290", - "path": "data/773290480d5445f11d3dc1b800728966.exe_" + "path": "data/773290480d5445f11d3dc1b800728966.exe_", + "tags": [ + "static" + ] }, { "key": "294b8d", - "path": "data/294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_" + "path": "data/294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_", + "tags": [ + "elf", + "static" + ] }, { "key": "a1982", - "path": "data/a198216798ca38f280dc413f8c57f2c2.exe_" + "path": "data/a198216798ca38f280dc413f8c57f2c2.exe_", + "tags": [ + "static" + ] }, { "key": "c91887", - "path": "data/c91887d861d9bd4a5872249b641bc9f9.exe_" + "path": "data/c91887d861d9bd4a5872249b641bc9f9.exe_", + "tags": [ + "static" + ] }, { "key": "2bf18d", - "path": "data/2bf18d0403677378adad9001b1243211.elf_" + "path": "data/2bf18d0403677378adad9001b1243211.elf_", + "tags": [ + "elf", + "static", + "symtab" + ] }, { "key": "2d3edc", - "path": "data/2d3edc218a90f03089cc01715a9f047f.exe_" + "path": "data/2d3edc218a90f03089cc01715a9f047f.exe_", + "tags": [ + "static" + ] }, { "key": "ea2876", - "path": "data/ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" + "path": "data/ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_", + "tags": [ + "static" + ] }, { "key": "pma01-01.frz", @@ -92,46 +155,203 @@ "key": "034b7231.frz", "path": "fixtures/freeze/034b7231a49387604e81a5a5d2fe7e08f6982c418a28b719d2faace3c312ebb5.exe_.frz" }, - { "key": "b9f5b", - "path": "data/b9f5bd514485fb06da39beff051b9fdc.exe_" + "path": "data/b9f5bd514485fb06da39beff051b9fdc.exe_", + "tags": [ + "static" + ] }, { "key": "mixed-mode-64", - "path": "data/dotnet/dnfile-testfiles/mixed-mode/ModuleCode/bin/ModuleCode_amd64.exe" + "path": "data/dotnet/dnfile-testfiles/mixed-mode/ModuleCode/bin/ModuleCode_amd64.exe", + "tags": [ + "static" + ] }, { "key": "hello-world", - "path": "data/dotnet/dnfile-testfiles/hello-world/hello-world.exe" + "path": "data/dotnet/dnfile-testfiles/hello-world/hello-world.exe", + "tags": [ + "static" + ] }, { "key": "_1c444", - "path": "data/dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_" + "path": "data/dotnet/1c444ebeba24dcba8628b7dfe5fec7c6.exe_", + "tags": [ + "static" + ] }, { "key": "_692f", - "path": "data/dotnet/692f7fd6d198e804d6af98eb9e390d61.exe_" + "path": "data/dotnet/692f7fd6d198e804d6af98eb9e390d61.exe_", + "tags": [ + "static" + ] }, { "key": "_0953c", - "path": "data/0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_" + "path": "data/0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_", + "tags": [ + "static" + ] }, { "key": "_039a6", - "path": "data/039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_" + "path": "data/039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_", + "tags": [ + "static" + ] }, { "key": "_387f15", - "path": "data/dotnet/387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_" + "path": "data/dotnet/387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_", + "tags": [ + "static" + ] }, { "key": "nested_typedef", - "path": "data/dotnet/dd9098ff91717f4906afe9dafdfa2f52.exe_" + "path": "data/dotnet/dd9098ff91717f4906afe9dafdfa2f52.exe_", + "tags": [ + "static" + ] }, { "key": "nested_typeref", - "path": "data/dotnet/2c7d60f77812607dec5085973ff76cea.dll_" + "path": "data/dotnet/2c7d60f77812607dec5085973ff76cea.dll_", + "tags": [ + "static" + ] + }, + { + "key": "pma01-01", + "path": "data/Practical Malware Analysis Lab 01-01.dll_", + "tags": [ + "static" + ] + }, + { + "key": "pma01-01-rd", + "path": "data/rd/Practical Malware Analysis Lab 01-01.dll_.json" + }, + { + "key": "pma21-01", + "path": "data/Practical Malware Analysis Lab 21-01.exe_", + "tags": [ + "static" + ] + }, + { + "key": "al-khaser x86", + "path": "data/al-khaser_x86.exe_", + "tags": [ + "static" + ] + }, + { + "key": "39c05", + "path": "data/39c05b15e9834ac93f206bc114d0a00c357c888db567ba8f5345da0529cbed41.dll_", + "tags": [ + "static" + ] + }, + { + "key": "499c2", + "path": "data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32", + "tags": [ + "static" + ] + }, + { + "key": "9324d", + "path": "data/9324d1a8ae37a36ae560c37448c9705a.exe_", + "tags": [ + "static" + ] + }, + { + "key": "395eb", + "path": "data/395eb0ddd99d2c9e37b6d0b73485ee9c.exe_", + "tags": [ + "static" + ] + }, + { + "key": "a933a", + "path": "data/a933a1a402775cfa94b6bee0963f4b46.dll_", + "tags": [ + "static" + ] + }, + { + "key": "bfb9b", + "path": "data/bfb9b5391a13d0afd787e87ab90f14f5.dll_", + "tags": [ + "static" + ] + }, + { + "key": "82bf6", + "path": "data/82BF6347ACF15E5D883715DC289D8A2B.exe_", + "tags": [ + "static" + ] + }, + { + "key": "pingtaest", + "path": "data/ping_t\u00e4st.exe_", + "tags": [ + "static" + ] + }, + { + "key": "3b13b", + "path": "data/3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_", + "tags": [ + "static" + ] + }, + { + "key": "2f7f5f", + "path": "data/2f7f5fb5de175e770d7eae87666f9831.elf_", + "tags": [ + "elf", + "static" + ] + }, + { + "key": "b5f052", + "path": "data/b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_", + "tags": [ + "elf", + "static" + ] + }, + { + "key": "bf7a9c", + "path": "data/bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_", + "tags": [ + "elf", + "static" + ] + }, + { + "key": "1038a2", + "path": "data/1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_", + "tags": [ + "elf", + "static" + ] + }, + { + "key": "3da7c", + "path": "data/3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_", + "tags": [ + "elf", + "static" + ] } ], "features": [ @@ -1171,13 +1391,19 @@ "expected": false, "explanation": "non-existant basic block feature" }, - { "file": "mimikatz", "location": "function=0x40E5C2", "feature": "count(basic blocks): 7", "expected": true, - "explanation": "7 basic blocks in function" + "explanation": "7 basic blocks in function", + "marks": [ + { + "backend": "ghidra", + "mark": "xfail", + "reason": "Ghidra identifies different function boundaries; see ghidra-tagged count variant" + } + ] }, { "file": "mimikatz", @@ -1191,29 +1417,51 @@ "location": "function=0x40E5C2", "feature": "count(characteristic(calls from)): 3", "expected": true, - "explanation": "function has 3 calls" + "explanation": "function has 3 calls", + "marks": [ + { + "backend": "ghidra", + "mark": "xfail", + "reason": "Ghidra identifies different function boundaries; see ghidra-tagged count variant" + } + ] }, { "file": "mimikatz", "location": "function=0x4556E5", "feature": "count(characteristic(calls to)): 0", "expected": true, - "explanation": "function has no callers" + "explanation": "function has no callers", + "marks": [ + { + "backend": "ghidra", + "mark": "xfail", + "reason": "Ghidra identifies different function boundaries; see ghidra-tagged count variant" + } + ] }, { "file": "mimikatz", "location": "function=0x40B1F1", "feature": "count(characteristic(calls to)): 3", "expected": true, - "explanation": "function has 3 callers" + "explanation": "function has 3 callers", + "marks": [ + { + "backend": "ghidra", + "mark": "xfail", + "reason": "Ghidra identifies different function boundaries; see ghidra-tagged count variant" + } + ] }, - { "file": "mimikatz", "location": "function=0x4702FD", "feature": "count(characteristic(calls from)): 0", "expected": true, - "tags": ["ghidra"], + "tags": [ + "ghidra" + ], "explanation": "Ghidra: function has no calls" }, { @@ -1221,7 +1469,9 @@ "location": "function=0x401bf1", "feature": "count(characteristic(calls to)): 2", "expected": true, - "tags": ["ghidra"], + "tags": [ + "ghidra" + ], "explanation": "Ghidra: function has 2 callers" }, { @@ -1229,261 +1479,334 @@ "location": "function=0x401000", "feature": "count(basic blocks): 3", "expected": true, - "tags": ["ghidra"], + "tags": [ + "ghidra" + ], "explanation": "Ghidra: 3 basic blocks in function" }, - { "file": "b9f5b", "location": "file", "feature": "arch: i386", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "b9f5b", "location": "file", "feature": "arch: amd64", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "mixed-mode-64", "location": "file", "feature": "arch: amd64", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "mixed-mode-64", "location": "file", "feature": "arch: i386", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "mixed-mode-64", "location": "file", "feature": "characteristic: mixed mode", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "characteristic: mixed mode", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "b9f5b", "location": "file", "feature": "os: any", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "b9f5b", "location": "file", "feature": "format: pe", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "b9f5b", "location": "file", "feature": "format: dotnet", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "function-name: HelloWorld::Main", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "function-name: HelloWorld::ctor", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "function-name: HelloWorld::cctor", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "string: Hello World!", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "class: HelloWorld", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "class: System.Console", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "namespace: System.Diagnostics", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "function=0x250", "feature": "string: Hello World!", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "function=0x250,bb=0x250,insn=0x252", "feature": "string: Hello World!", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "function=0x250,bb=0x250,insn=0x257", "feature": "class: System.Console", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "function=0x250,bb=0x250,insn=0x257", "feature": "namespace: System", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "function=0x250", "feature": "api: System.Console::WriteLine", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "hello-world", "location": "file", "feature": "import: System.Console::WriteLine", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "file", "feature": "string: SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "file", "feature": "string: get_IsAlive", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "file", "feature": "import: gdi32.CreateCompatibleBitmap", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "file", "feature": "import: CreateCompatibleBitmap", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "file", "feature": "import: gdi32::CreateCompatibleBitmap", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "function=0x1F68", "feature": "api: GetWindowDC", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "function=0x1F68", "feature": "number: 0xCC0020", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "token=0x600001D", "feature": "characteristic: calls to", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "token=0x6000018", "feature": "characteristic: calls to", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "token=0x600001D", "feature": "characteristic: calls from", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "token=0x600000F", "feature": "characteristic: calls from", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "function=0x1F68", "feature": "number: 0x0", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "function=0x1F68", "feature": "number: 0x1", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_692f", "location": "token=0x6000004", "feature": "api: System.Linq.Enumerable::First", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "generic method" }, { @@ -1491,7 +1814,9 @@ "location": "token=0x6000004", "feature": "property: System.Linq.Enumerable::First", "expected": false, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "generic method" }, { @@ -1499,7 +1824,9 @@ "location": "token=0x6000004", "feature": "namespace: System.Linq", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "generic method" }, { @@ -1507,7 +1834,9 @@ "location": "token=0x6000004", "feature": "class: System.Linq.Enumerable", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "generic method" }, { @@ -1515,7 +1844,9 @@ "location": "token=0x6000020", "feature": "namespace: Reqss", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "ldftn" }, { @@ -1523,7 +1854,9 @@ "location": "token=0x6000020", "feature": "class: Reqss.Reqss", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "ldftn" }, { @@ -1531,21 +1864,27 @@ "location": "function=0x1F59,bb=0x1F59,insn=0x1F5B", "feature": "characteristic: unmanaged call", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "function=0x2544", "feature": "characteristic: unmanaged call", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "token=0x6000088", "feature": "characteristic: unmanaged call", "expected": false, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "same as above but using token instead of function" }, { @@ -1553,21 +1892,27 @@ "location": "function=0x1F68,bb=0x1F68,insn=0x1FF9", "feature": "api: System.Drawing.Image::FromHbitmap", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "function=0x1F68,bb=0x1F68,insn=0x1FF9", "feature": "api: FromHbitmap", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "token=0x600002B", "feature": "property/read: System.IO.FileInfo::Length", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1575,7 +1920,9 @@ "location": "token=0x600002B", "feature": "property: System.IO.FileInfo::Length", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1583,7 +1930,9 @@ "location": "token=0x6000081", "feature": "api: System.Diagnostics.Process::Start", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1591,7 +1940,9 @@ "location": "token=0x6000081", "feature": "property/write: System.Diagnostics.ProcessStartInfo::UseShellExecute", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1599,7 +1950,9 @@ "location": "token=0x6000081", "feature": "property/write: System.Diagnostics.ProcessStartInfo::WorkingDirectory", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1607,7 +1960,9 @@ "location": "token=0x6000081", "feature": "property/write: System.Diagnostics.ProcessStartInfo::FileName", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1615,7 +1970,9 @@ "location": "token=0x6000087", "feature": "property/write: Sockets.MySocket::reConnectionDelay", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "Field property access" }, { @@ -1623,7 +1980,9 @@ "location": "token=0x600008A", "feature": "property/write: Sockets.MySocket::isConnected", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "Field property access" }, { @@ -1631,7 +1990,9 @@ "location": "token=0x600008A", "feature": "class: Sockets.MySocket", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "Field property access" }, { @@ -1639,7 +2000,9 @@ "location": "token=0x600008A", "feature": "namespace: Sockets", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "Field property access" }, { @@ -1647,7 +2010,9 @@ "location": "token=0x600008A", "feature": "property/read: Sockets.MySocket::onConnected", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "Field property access" }, { @@ -1655,7 +2020,9 @@ "location": "token=0x6000004", "feature": "property/read: System.Diagnostics.Debugger::IsAttached", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1663,7 +2030,9 @@ "location": "token=0x6000004", "feature": "class: System.Diagnostics.Debugger", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1671,7 +2040,9 @@ "location": "token=0x6000004", "feature": "namespace: System.Diagnostics", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1679,7 +2050,9 @@ "location": "token=0x6000006", "feature": "property/read: System.Management.Automation.PowerShell::Streams", "expected": false, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef property access" }, { @@ -1687,7 +2060,9 @@ "location": "token=0x600009E", "feature": "property/read: Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE::geoplugin_countryCode", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MethodDef property access" }, { @@ -1695,7 +2070,9 @@ "location": "token=0x600009E", "feature": "class: Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MethodDef property access" }, { @@ -1703,7 +2080,9 @@ "location": "token=0x600009E", "feature": "namespace: Modulo", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MethodDef property access" }, { @@ -1711,14 +2090,18 @@ "location": "token=0x6000007", "feature": "api: System.Reflection.Assembly::Load", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_039a6", "location": "token=0x600001D", "feature": "property/read: StagelessHollow.Arac::Marka", "expected": true, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MethodDef method" }, { @@ -1726,7 +2109,9 @@ "location": "token=0x600001C", "feature": "property/read: StagelessHollow.Arac::Marka", "expected": false, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MethodDef method" }, { @@ -1734,7 +2119,9 @@ "location": "token=0x6000023", "feature": "property/read: System.Runtime.CompilerServices.AsyncTaskMethodBuilder::Task", "expected": false, - "tags": ["dotnet"], + "tags": [ + "dotnet" + ], "explanation": "MemberRef method" }, { @@ -1742,141 +2129,180 @@ "location": "file", "feature": "class: mynamespace.myclass_outer0", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: mynamespace.myclass_outer1", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: mynamespace.myclass_outer0/myclass_inner0_0", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: mynamespace.myclass_outer0/myclass_inner0_1", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: mynamespace.myclass_outer1/myclass_inner1_0", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: mynamespace.myclass_outer1/myclass_inner1_1", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: mynamespace.myclass_outer1/myclass_inner1_0/myclass_inner_inner", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: myclass_inner_inner", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: myclass_inner1_0", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: myclass_inner1_1", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: myclass_inner0_0", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typedef", "location": "file", "feature": "class: myclass_inner0_1", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typeref", "location": "file", "feature": "import: Android.OS.Build/VERSION::SdkInt", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typeref", "location": "file", "feature": "import: Android.Media.Image/Plane::Buffer", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typeref", "location": "file", "feature": "import: Android.Provider.Telephony/Sent/Sent::ContentUri", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typeref", "location": "file", "feature": "import: Android.OS.Build::SdkInt", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typeref", "location": "file", "feature": "import: Plane::Buffer", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "nested_typeref", "location": "file", "feature": "import: Sent::ContentUri", "expected": false, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, - { "file": "_1c444", "location": "token=0x600001D", "feature": "count(characteristic(calls to)): 1", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] }, { "file": "_1c444", "location": "token=0x600001D", "feature": "count(characteristic(calls from)): 9", "expected": true, - "tags": ["dotnet"] + "tags": [ + "dotnet" + ] } ] } diff --git a/tests/fixtures/features/vmray.json b/tests/fixtures/features/vmray.json new file mode 100644 index 000000000..ae1149714 --- /dev/null +++ b/tests/fixtures/features/vmray.json @@ -0,0 +1,233 @@ +{ + "files": [ + { + "key": "93b2d1-vmray", + "path": "data/dynamic/vmray/93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795_min_archive.zip", + "tags": [ + "dynamic", + "vmray" + ] + }, + { + "key": "2f8a79-vmray", + "path": "data/dynamic/vmray/2f8a79b12a7a989ac7e5f6ec65050036588a92e65aeb6841e08dc228ff0e21b4_min_archive.zip", + "tags": [ + "dynamic", + "vmray" + ] + }, + { + "key": "eb1287-vmray", + "path": "data/dynamic/vmray/eb12873c0ce3e9ea109c2a447956cbd10ca2c3e86936e526b2c6e28764999f21_min_archive.zip", + "tags": [ + "dynamic", + "vmray" + ] + } + ], + "features": [ + { + "file": "93b2d1-vmray", + "location": "file", + "feature": "string: api.%x%x.%s" + }, + { + "file": "93b2d1-vmray", + "location": "file", + "feature": "string: \\Program Files\\WindowsApps\\does_not_exist", + "expected": false + }, + { + "file": "93b2d1-vmray", + "location": "file", + "feature": "import: GetAddrInfoW" + }, + { + "file": "93b2d1-vmray", + "location": "file", + "feature": "import: GetAddrInfo" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2180", + "feature": "api: LoadLibraryExA" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2180", + "feature": "api: LoadLibraryEx" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420", + "feature": "api: GetAddrInfoW" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420", + "feature": "api: GetAddrInfo" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420", + "feature": "api: DoesNotExist", + "expected": false + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=2361", + "feature": "api: GetAddrInfoW" + }, + { + "file": "eb1287-vmray", + "location": "process=(4968:0),thread=5992,call=10981", + "feature": "api: CreateMutexW" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=10323", + "feature": "string: raw.githubusercontent.com" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2180,call=267", + "feature": "string: C:\\Users\\WhuOXYsD\\Desktop\\filename.exe", + "comment": "backslashes in paths; see #2428" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2180,call=267", + "feature": "string: C:\\\\Users\\\\WhuOXYsD\\\\Desktop\\\\filename.exe", + "expected": false + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "string: Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\System" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "string: Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Policies\\\\System", + "expected": false + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=2358", + "feature": "number: 0x1000", + "comment": "VirtualAlloc(4096, 4)" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=2358", + "feature": "number: 0x4" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "number: 0x80000001", + "comment": "RegOpenKeyExW(Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\System, 0, 131078); see #2" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "number: 0x0" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "number: 0x20006" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2397", + "feature": "number: 0x80000001", + "comment": "RegOpenKeyExW call 2397 (same parameters)" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2397", + "feature": "number: 0x0" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2397", + "feature": "number: 0x20006" + }, + { + "file": "93b2d1-vmray", + "location": "file", + "feature": "count(import(GetAddrInfoW)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420", + "feature": "count(api(free)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420", + "feature": "count(api(GetAddrInfoW)): 5" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=2345", + "feature": "count(api(free)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=2345", + "feature": "count(api(GetAddrInfoW)): 0" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=2361", + "feature": "count(api(GetAddrInfoW)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=10323", + "feature": "count(string(raw.githubusercontent.com)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=10323", + "feature": "count(string(non_existant)): 0" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=10315", + "feature": "count(number(0x1000)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=10315", + "feature": "count(number(0x4)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2420,call=10315", + "feature": "count(number(0x194)): 0" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "count(number(0x80000001)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "count(number(0x0)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "count(number(0x20006)): 1" + }, + { + "file": "93b2d1-vmray", + "location": "process=(2176:0),thread=2204,call=2395", + "feature": "count(number(0xf423f)): 0" + } + ] +} diff --git a/tests/test_binexport_features.py b/tests/test_binexport_features.py index 2695230c0..07ab217d7 100644 --- a/tests/test_binexport_features.py +++ b/tests/test_binexport_features.py @@ -14,408 +14,21 @@ from typing import cast -import pytest import fixtures +import pytest -import capa.features.file -import capa.features.insn import capa.features.common -import capa.features.basicblock -from capa.features.common import ( - OS, - OS_LINUX, - ARCH_I386, - FORMAT_PE, - ARCH_AMD64, - FORMAT_ELF, - OS_ANDROID, - OS_WINDOWS, - ARCH_AARCH64, - Arch, - Format, -) -FEATURE_PRESENCE_TESTS_BE2_ELF_AARCH64 = sorted( - [ - # file/string - ( - "687e79.ghidra.be2", - "file", - capa.features.common.String("AppDataService start"), - True, - ), - ("687e79.ghidra.be2", "file", capa.features.common.String("nope"), False), - # file/sections - ("687e79.ghidra.be2", "file", capa.features.file.Section(".text"), True), - ("687e79.ghidra.be2", "file", capa.features.file.Section(".nope"), False), - # file/exports - ( - "687e79.ghidra.be2", - "file", - capa.features.file.Export("android::clearDir"), - "xfail: name demangling is not implemented", - ), - ("687e79.ghidra.be2", "file", capa.features.file.Export("nope"), False), - # file/imports - ("687e79.ghidra.be2", "file", capa.features.file.Import("fopen"), True), - ("687e79.ghidra.be2", "file", capa.features.file.Import("exit"), True), - ( - "687e79.ghidra.be2", - "file", - capa.features.file.Import("_ZN7android10IInterfaceD0Ev"), - True, - ), - ("687e79.ghidra.be2", "file", capa.features.file.Import("nope"), False), - # function/characteristic(loop) - ( - "687e79.ghidra.be2", - "function=0x1056c0", - capa.features.common.Characteristic("loop"), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x1075c0", - capa.features.common.Characteristic("loop"), - False, - ), - # bb/characteristic(tight loop) - ( - "d1e650.ghidra.be2", - "function=0x114af4", - capa.features.common.Characteristic("tight loop"), - True, - ), - ( - "d1e650.ghidra.be2", - "function=0x118F1C", - capa.features.common.Characteristic("tight loop"), - True, - ), - ( - "d1e650.ghidra.be2", - "function=0x11464c", - capa.features.common.Characteristic("tight loop"), - False, - ), - # bb/characteristic(stack string) - ( - "687e79.ghidra.be2", - "function=0x0", - capa.features.common.Characteristic("stack string"), - "xfail: not implemented yet", - ), - ( - "687e79.ghidra.be2", - "function=0x0", - capa.features.common.Characteristic("stack string"), - "xfail: not implemented yet", - ), - # insn/mnemonic - ("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("stp"), True), - ("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("adrp"), True), - ("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("bl"), True), - ("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("in"), False), - ("687e79.ghidra.be2", "function=0x107588", capa.features.insn.Mnemonic("adrl"), False), - # insn/number - # 00114524 add x29,sp,#0x10 - ( - "d1e650.ghidra.be2", - "function=0x11451c", - capa.features.insn.Number(0x10), - False, - ), - # 00105128 sub sp,sp,#0xE0 - ( - "687e79.ghidra.be2", - "function=0x105128", - capa.features.insn.Number(0xE0), - False, - ), - # insn/operand.number - ( - "687e79.ghidra.be2", - "function=0x105128,bb=0x1051e4", - capa.features.insn.OperandNumber(1, 0xFFFFFFFF), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x107588,bb=0x107588", - capa.features.insn.OperandNumber(1, 0x8), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x107588,bb=0x107588,insn=0x1075a4", - capa.features.insn.OperandNumber(1, 0x8), - True, - ), - # insn/operand.offset - ( - "687e79.ghidra.be2", - "function=0x105128,bb=0x105450", - capa.features.insn.OperandOffset(2, 0x10), - True, - ), - ( - "d1e650.ghidra.be2", - "function=0x124854,bb=0x1248AC,insn=0x1248B4", - capa.features.insn.OperandOffset(2, -0x48), - True, - ), - ( - "d1e650.ghidra.be2", - "function=0x13347c,bb=0x133548,insn=0x133554", - capa.features.insn.OperandOffset(2, 0x20), - False, - ), - ("687e79.ghidra.be2", "function=0x105C88", capa.features.insn.Number(0xF000), True), - # insn/number: negative - ( - "687e79.ghidra.be2", - "function=0x1057f8,bb=0x1057f8", - capa.features.insn.Number(0xFFFFFFFFFFFFFFFF), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x1057f8,bb=0x1057f8", - capa.features.insn.Number(0xFFFFFFFFFFFFFFFF), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x1066e0,bb=0x1068c4", - capa.features.insn.Number(0xFFFFFFFF), - True, - ), - # insn/offset - ( - "687e79.ghidra.be2", - "function=0x105128,bb=0x105450", - capa.features.insn.Offset(0x10), - True, - ), - # ldp x29,x30,[sp, #0x20] - ( - "d1e650.ghidra.be2", - "function=0x13347c,bb=0x133548,insn=0x133554", - capa.features.insn.Offset(0x20), - False, - ), - # stp x20,x0,[x19, #0x8] - ( - "d1e650.ghidra.be2", - "function=0x1183e0,bb=0x11849c,insn=0x1184b0", - capa.features.insn.Offset(0x8), - True, - ), - # str xzr,[x8, #0x8]! - ( - "d1e650.ghidra.be2", - "function=0x138688,bb=0x138994,insn=0x1389a8", - capa.features.insn.Offset(0x8), - True, - ), - # ldr x9,[x8, #0x8]! - ( - "d1e650.ghidra.be2", - "function=0x138688,bb=0x138978,insn=0x138984", - capa.features.insn.Offset(0x8), - True, - ), - # ldr x19,[sp], #0x20 - ( - "d1e650.ghidra.be2", - "function=0x11451c", - capa.features.insn.Offset(0x20), - False, - ), - # ldrb w9,[x8, #0x1] - ( - "d1e650.ghidra.be2", - "function=0x138a9c,bb=0x138b00,insn=0x138b00", - capa.features.insn.Offset(0x1), - True, - ), - # insn/offset: negative - ( - "d1e650.ghidra.be2", - "function=0x124854,bb=0x1248AC,insn=0x1248B4", - capa.features.insn.Offset(-0x48), - True, - ), - # insn/offset from mnemonic: add - # 0010514c add x23,param_1,#0x8 - ( - "687e79.ghidra.be2", - "function=0x105128,bb=0x105128,insn=0x10514c", - capa.features.insn.Offset(0x8), - True, - ), - # insn/api - # not extracting dll name - ("687e79.ghidra.be2", "function=0x105c88", capa.features.insn.API("memset"), True), - ("687e79.ghidra.be2", "function=0x105c88", capa.features.insn.API("Nope"), False), - # insn/string - ( - "687e79.ghidra.be2", - "function=0x107588", - capa.features.common.String("AppDataService start"), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x1075c0", - capa.features.common.String("AppDataService"), - True, - ), - ("687e79.ghidra.be2", "function=0x107588", capa.features.common.String("nope"), False), - ( - "687e79.ghidra.be2", - "function=0x106d58", - capa.features.common.String("/data/misc/wifi/wpa_supplicant.conf"), - True, - ), - # insn/regex - ( - "687e79.ghidra.be2", - "function=0x105c88", - capa.features.common.Regex("innerRename"), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x106d58", - capa.features.common.Regex("/data/misc"), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x106d58", - capa.features.common.Substring("/data/misc"), - True, - ), - # insn/bytes - ( - "d1e650.ghidra.be2", - "function=0x1165a4", - capa.features.common.Bytes(bytes.fromhex("E405B89370BA6B419CD7925275BF6FCC1E8360CC")), - True, - ), - # # don't extract byte features for obvious strings - ( - "687e79.ghidra.be2", - "function=0x1057f8", - capa.features.common.Bytes("/system/xbin/busybox".encode("utf-16le")), - False, - ), - # insn/characteristic(nzxor) - ( - "d1e650.ghidra.be2", - "function=0x114af4", - capa.features.common.Characteristic("nzxor"), - True, - ), - ( - "d1e650.ghidra.be2", - "function=0x117988", - capa.features.common.Characteristic("nzxor"), - True, - ), - # # insn/characteristic(cross section flow) - # ("a1982...", "function=0x4014D0", capa.features.common.Characteristic("cross section flow"), True), - # # insn/characteristic(cross section flow): imports don't count - # ("mimikatz", "function=0x4556E5", capa.features.common.Characteristic("cross section flow"), False), - # insn/characteristic(recursive call) - ( - "687e79.ghidra.be2", - "function=0x105b38", - capa.features.common.Characteristic("recursive call"), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x106530", - capa.features.common.Characteristic("recursive call"), - True, - ), - # insn/characteristic(indirect call) - ("d1e650.ghidra.be2", "function=0x118620", capa.features.common.Characteristic("indirect call"), True), - ( - "d1e650.ghidra.be2", - "function=0x118500", - capa.features.common.Characteristic("indirect call"), - False, - ), - ("d1e650.ghidra.be2", "function=0x118620", capa.features.common.Characteristic("indirect call"), True), - ( - "d1e650.ghidra.be2", - "function=0x11451c", - capa.features.common.Characteristic("indirect call"), - True, - ), - # insn/characteristic(calls from) - ( - "687e79.ghidra.be2", - "function=0x105080", - capa.features.common.Characteristic("calls from"), - True, - ), - ( - "687e79.ghidra.be2", - "function=0x1070e8", - capa.features.common.Characteristic("calls from"), - False, - ), - # function/characteristic(calls to) - ( - "687e79.ghidra.be2", - "function=0x1075c0", - capa.features.common.Characteristic("calls to"), - True, - ), - # file/function-name - ( - "687e79.ghidra.be2", - "file", - capa.features.file.FunctionName("__libc_init"), - "xfail: TODO should this be a function-name?", - ), - # os & format & arch - ("687e79.ghidra.be2", "file", OS(OS_ANDROID), True), - ("687e79.ghidra.be2", "file", OS(OS_LINUX), False), - ("687e79.ghidra.be2", "file", OS(OS_WINDOWS), False), - # os & format & arch are also global features - ("687e79.ghidra.be2", "function=0x107588", OS(OS_ANDROID), True), - ("687e79.ghidra.be2", "function=0x1075c0,bb=0x1076c0", OS(OS_ANDROID), True), - ("687e79.ghidra.be2", "file", Arch(ARCH_I386), False), - ("687e79.ghidra.be2", "file", Arch(ARCH_AMD64), False), - ("687e79.ghidra.be2", "file", Arch(ARCH_AARCH64), True), - ("687e79.ghidra.be2", "function=0x107588", Arch(ARCH_AARCH64), True), - ("687e79.ghidra.be2", "function=0x1075c0,bb=0x1076c0", Arch(ARCH_AARCH64), True), - ("687e79.ghidra.be2", "file", Format(FORMAT_ELF), True), - ("687e79.ghidra.be2", "file", Format(FORMAT_PE), False), - ("687e79.ghidra.be2", "function=0x107588", Format(FORMAT_ELF), True), - ("687e79.ghidra.be2", "function=0x107588", Format(FORMAT_PE), False), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), +BACKEND = fixtures.BackendFeaturePolicy( + name="binexport", + get_extractor=fixtures.get_binexport_extractor, + include_tags={"binexport"}, ) -@fixtures.parametrize( - "sample,scope,feature,expected", - FEATURE_PRESENCE_TESTS_BE2_ELF_AARCH64, - indirect=["sample", "scope"], -) -def test_binexport_features_elf_aarch64(sample, scope, feature, expected): - if not isinstance(expected, bool): - # (for now) xfails indicates using string like: "xfail: not implemented yet" - pytest.xfail(expected) - fixtures.do_test_feature_presence(fixtures.get_binexport_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_binexport_features_elf_aarch64(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) @fixtures.parametrize( @@ -427,12 +40,16 @@ def test_binexport_features_pe_x86(sample, scope, feature, expected): if "mimikatz.exe_" not in sample.name: pytest.skip("for now only testing mimikatz.exe_ Ghidra BinExport file") - if isinstance(feature, capa.features.common.Characteristic) and "stack string" in cast(str, feature.value): + if isinstance( + feature, capa.features.common.Characteristic + ) and "stack string" in cast(str, feature.value): pytest.skip("for now only testing basic features") sample = sample.parent / "binexport2" / (sample.name + ".ghidra.BinExport") assert sample.exists() - fixtures.do_test_feature_presence(fixtures.get_binexport_extractor, sample, scope, feature, expected) + fixtures.do_test_feature_presence( + fixtures.get_binexport_extractor, sample, scope, feature, expected + ) @fixtures.parametrize( @@ -445,4 +62,6 @@ def test_binexport_feature_counts_ghidra(sample, scope, feature, expected): pytest.skip("for now only testing mimikatz.exe_ Ghidra BinExport file") sample = sample.parent / "binexport2" / (sample.name + ".ghidra.BinExport") assert sample.exists() - fixtures.do_test_feature_count(fixtures.get_binexport_extractor, sample, scope, feature, expected) + fixtures.do_test_feature_count( + fixtures.get_binexport_extractor, sample, scope, feature, expected + ) diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index c97a8d26c..122a60229 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -15,12 +15,11 @@ import logging from pathlib import Path -import pytest import fixtures +import pytest -import capa.main -import capa.features.file import capa.features.common +import capa.main logger = logging.getLogger(__file__) @@ -33,41 +32,50 @@ try: binaryninja.load(source=b"\x90") except RuntimeError: - logger.warning("Binary Ninja license is not valid, provide via $BN_LICENSE or license.dat") + logger.warning( + "Binary Ninja license is not valid, provide via $BN_LICENSE or license.dat" + ) else: binja_present = True except ImportError: pass -@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS + fixtures.FEATURE_BINJA_DATABASE_TESTS, - indirect=["sample", "scope"], +BACKEND = fixtures.BackendFeaturePolicy( + name="binja", + # binja also loads .bndb database files natively, so include `binja-db` + # alongside the regular static-binary fixtures. + get_extractor=fixtures.get_binja_extractor, + include_tags={"static", "binja-db"}, + exclude_tags={"dotnet", "ghidra"}, ) -def test_binja_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected) -@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_COUNT_TESTS, - indirect=["sample", "scope"], +@pytest.mark.skipif( + binja_present is False, + reason="Skip binja tests if the binaryninja Python API is not installed", ) -def test_binja_feature_counts(sample, scope, feature, expected): - fixtures.do_test_feature_count(fixtures.get_binja_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_binja_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) -@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") +@pytest.mark.skipif( + binja_present is False, + reason="Skip binja tests if the binaryninja Python API is not installed", +) def test_standalone_binja_backend(): CD = Path(__file__).resolve().parent - test_path = CD / ".." / "tests" / "data" / "Practical Malware Analysis Lab 01-01.exe_" + test_path = ( + CD / ".." / "tests" / "data" / "Practical Malware Analysis Lab 01-01.exe_" + ) assert capa.main.main([str(test_path), "-b", capa.main.BACKEND_BINJA]) == 0 -@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") +@pytest.mark.skipif( + binja_present is False, + reason="Skip binja tests if the binaryninja Python API is not installed", +) def test_binja_version(): version = binaryninja.core_version_info() assert version.major == 5 and version.minor == 2 diff --git a/tests/test_cape_features.py b/tests/test_cape_features.py index d3cc4bdd6..226f1cf04 100644 --- a/tests/test_cape_features.py +++ b/tests/test_cape_features.py @@ -15,108 +15,13 @@ import fixtures -import capa.main -import capa.features.file -import capa.features.insn -import capa.features.common -import capa.features.basicblock - -DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS = sorted( - [ - # file/string - ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), True), - ("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), True), - ("0000a657", "file", capa.features.common.String("nope"), False), - # file/sections - ("0000a657", "file", capa.features.file.Section(".rdata"), True), - ("0000a657", "file", capa.features.file.Section(".nope"), False), - # file/imports - ("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), True), - ("0000a657", "file", capa.features.file.Import("Nope"), False), - # file/exports - ("0000a657", "file", capa.features.file.Export("Nope"), False), - # process/environment variables - ( - "0000a657", - "process=(1180:3052)", - capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"), - True, - ), - ("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False), - # thread/api calls - ("0000a657", "process=(2900:2852),thread=2904", capa.features.insn.API("RegQueryValueExA"), True), - ("0000a657", "process=(2900:2852),thread=2904", capa.features.insn.API("RegQueryValueEx"), True), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False), - # thread/number call argument - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), True), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), False), - # thread/string call argument - ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), True), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), False), - ("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), True), - ("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), False), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) - -DYNAMIC_CAPE_FEATURE_COUNT_TESTS = sorted( - # TODO(yelhamer): use the same sample for testing CAPE and DRAKVUF extractors - # https://github.com/mandiant/capa/issues/2180 - [ - # file/string - ("0000a657", "file", capa.features.common.String("T_Ba?.BcRJa"), 1), - ("0000a657", "file", capa.features.common.String("GetNamedPipeClientSessionId"), 1), - ("0000a657", "file", capa.features.common.String("nope"), 0), - # file/sections - ("0000a657", "file", capa.features.file.Section(".rdata"), 1), - ("0000a657", "file", capa.features.file.Section(".nope"), 0), - # file/imports - ("0000a657", "file", capa.features.file.Import("NdrSimpleTypeUnmarshall"), 1), - ("0000a657", "file", capa.features.file.Import("Nope"), 0), - # file/exports - ("0000a657", "file", capa.features.file.Export("Nope"), 0), - # process/environment variables - ( - "0000a657", - "process=(1180:3052)", - capa.features.common.String("C:\\Users\\comp\\AppData\\Roaming\\Microsoft\\Jxoqwnx\\jxoqwn.exe"), - 2, - ), - ("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), 0), - # thread/api calls - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), 7), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), 0), - # thread/number call argument - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(0x000000EC), 1), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.Number(110173), 0), - # thread/string call argument - ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("SetThreadUILanguage"), 1), - ("0000a657", "process=(2852:3052),thread=2804", capa.features.common.String("nope"), 0), - ("0000a657", "process=(2852:3052),thread=2804,call=56", capa.features.insn.API("NtQueryValueKey"), 1), - ("0000a657", "process=(2852:3052),thread=2804,call=1958", capa.features.insn.API("nope"), 0), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), +BACKEND = fixtures.BackendFeaturePolicy( + name="cape", + get_extractor=fixtures.get_cape_extractor, + include_tags={"cape"}, ) -@fixtures.parametrize( - "sample,scope,feature,expected", - DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS, - indirect=["sample", "scope"], -) -def test_cape_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_cape_extractor, sample, scope, feature, expected) - - -@fixtures.parametrize( - "sample,scope,feature,expected", - DYNAMIC_CAPE_FEATURE_COUNT_TESTS, - indirect=["sample", "scope"], -) -def test_cape_feature_counts(sample, scope, feature, expected): - fixtures.do_test_feature_count(fixtures.get_cape_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_cape_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) diff --git a/tests/test_dnfile_features.py b/tests/test_dnfile_features.py index 1916c542a..89f1fe7c9 100644 --- a/tests/test_dnfile_features.py +++ b/tests/test_dnfile_features.py @@ -14,20 +14,13 @@ import fixtures - -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS_DOTNET, - indirect=["sample", "scope"], +BACKEND = fixtures.BackendFeaturePolicy( + name="dnfile", + get_extractor=fixtures.get_dnfile_extractor, + include_tags={"dotnet"}, ) -def test_dnfile_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected) -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_COUNT_TESTS_DOTNET, - indirect=["sample", "scope"], -) -def test_dnfile_feature_counts(sample, scope, feature, expected): - fixtures.do_test_feature_count(fixtures.get_dnfile_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_dnfile_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) diff --git a/tests/test_dotnetfile_features.py b/tests/test_dotnetfile_features.py index 2479c4a4c..abd67bccd 100644 --- a/tests/test_dotnetfile_features.py +++ b/tests/test_dotnetfile_features.py @@ -12,25 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import fixtures -import capa.features.file - - -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS_DOTNET, - indirect=["sample", "scope"], +BACKEND = fixtures.BackendFeaturePolicy( + name="dotnetfile", + get_extractor=fixtures.get_dotnetfile_extractor, + include_tags={"dotnet"}, + exclude_tags={ + # dotnetfile is a file-scope extractor; drop non-file scopes + "function", + "basic-block", + "instruction", + # and drop feature types dotnetfile doesn't produce + "function-name", + }, ) -def test_dotnetfile_features(sample, scope, feature, expected): - if scope.__name__ != "file": - pytest.xfail("dotnetfile only extracts file scope features") - if isinstance(feature, capa.features.file.FunctionName): - pytest.xfail("dotnetfile doesn't extract function names") - fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_dotnetfile_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) @fixtures.parametrize( diff --git a/tests/test_drakvuf_features.py b/tests/test_drakvuf_features.py index 9f5115ae3..3cb1e9af7 100644 --- a/tests/test_drakvuf_features.py +++ b/tests/test_drakvuf_features.py @@ -15,88 +15,13 @@ import fixtures -import capa.main -import capa.features.file -import capa.features.insn -import capa.features.common - -DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS = sorted( - [ - ("93b2d1-drakvuf", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), - # file/imports - ("93b2d1-drakvuf", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), True), - # thread/api calls - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), True), - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False), - # call/api - ("93b2d1-drakvuf", "process=(3564:4852),thread=4716,call=17", capa.features.insn.API("CreateWindowExW"), True), - ("93b2d1-drakvuf", "process=(3564:4852),thread=4716,call=17", capa.features.insn.API("CreateWindowEx"), True), - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("LdrLoadDll"), True), - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("DoesNotExist"), False), - # call/string argument - ( - "93b2d1-drakvuf", - "process=(3564:4852),thread=6592,call=1", - capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'), - True, - ), - ( - "93b2d1-drakvuf", - "process=(3564:4852),thread=6592,call=1", - capa.features.common.String("non_existant"), - False, - ), - # call/number argument - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x801), True), - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x010101010101), False), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) - -DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS = sorted( - [ - ("93b2d1-drakvuf", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), - # file/imports - ("93b2d1-drakvuf", "file", capa.features.file.Import("SetUnhandledExceptionFilter"), 1), - # thread/api calls - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), 9), - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False), - # call/api - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("LdrLoadDll"), 1), - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("DoesNotExist"), 0), - # call/string argument - ( - "93b2d1-drakvuf", - "process=(3564:4852),thread=6592,call=1", - capa.features.common.String('0x667e2beb40:"api-ms-win-core-fibers-l1-1-1"'), - 1, - ), - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.common.String("non_existant"), 0), - # call/number argument - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x801), 1), - ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.Number(0x010101010101), 0), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), +BACKEND = fixtures.BackendFeaturePolicy( + name="drakvuf", + get_extractor=fixtures.get_drakvuf_extractor, + include_tags={"drakvuf"}, ) -@fixtures.parametrize( - "sample,scope,feature,expected", - DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS, - indirect=["sample", "scope"], -) -def test_drakvuf_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_drakvuf_extractor, sample, scope, feature, expected) - - -@fixtures.parametrize( - "sample,scope,feature,expected", - DYNAMIC_DRAKVUF_FEATURE_COUNT_TESTS, - indirect=["sample", "scope"], -) -def test_drakvuf_feature_counts(sample, scope, feature, expected): - fixtures.do_test_feature_count(fixtures.get_drakvuf_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_drakvuf_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) diff --git a/tests/test_ghidra_features.py b/tests/test_ghidra_features.py index 58f03d022..6b9fd7a1e 100644 --- a/tests/test_ghidra_features.py +++ b/tests/test_ghidra_features.py @@ -11,42 +11,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os import importlib.util +import os -import pytest import fixtures +import pytest -import capa.features.common - -ghidra_present = importlib.util.find_spec("pyghidra") is not None and "GHIDRA_INSTALL_DIR" in os.environ +ghidra_present = ( + importlib.util.find_spec("pyghidra") is not None + and "GHIDRA_INSTALL_DIR" in os.environ +) -@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed") -@fixtures.parametrize( - "sample,scope,feature,expected", - [ - ( - pytest.param( - *t, - marks=pytest.mark.xfail( - reason="specific to Vivisect and basic blocks do not align with Ghidra's analysis" - ), - ) - if t[0] == "294b8d..." and t[2] == capa.features.common.String("\r\n\x00:ht") - else t - ) - for t in fixtures.FEATURE_PRESENCE_TESTS - ], - indirect=["sample", "scope"], +BACKEND = fixtures.BackendFeaturePolicy( + name="ghidra", + get_extractor=fixtures.get_ghidra_extractor, + include_tags={"static"}, + exclude_tags={"dotnet"}, ) -def test_ghidra_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_ghidra_extractor, sample, scope, feature, expected) @pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed") -@fixtures.parametrize( - "sample,scope,feature,expected", fixtures.FEATURE_COUNT_TESTS_GHIDRA, indirect=["sample", "scope"] -) -def test_ghidra_feature_counts(sample, scope, feature, expected): - fixtures.do_test_feature_count(fixtures.get_ghidra_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_ghidra_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) diff --git a/tests/test_idalib_features.py b/tests/test_idalib_features.py index 8604b94e3..7cd4ee14a 100644 --- a/tests/test_idalib_features.py +++ b/tests/test_idalib_features.py @@ -12,39 +12,56 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from pathlib import Path -import pytest import fixtures +import pytest import capa.features.extractors.ida.idalib +from capa.features.common import Characteristic from capa.features.file import FunctionName from capa.features.insn import API -from capa.features.common import Characteristic logger = logging.getLogger(__name__) idalib_present = capa.features.extractors.ida.idalib.has_idalib() if idalib_present: try: - import idapro # noqa: F401 [imported but unused] import ida_kernwin + import idapro # noqa: F401 [imported but unused] kernel_version: str = ida_kernwin.get_kernel_version() except ImportError: idalib_present = False kernel_version = "0.0" +else: + kernel_version = "0.0" -@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed") -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, - indirect=["sample", "scope"], +BACKEND = fixtures.BackendFeaturePolicy( + name="idalib", + get_extractor=fixtures.get_idalib_extractor, + include_tags={"static"}, + exclude_tags={"dotnet", "ghidra"}, ) -def test_idalib_features(sample: Path, scope, feature, expected): - if kernel_version in {"9.0", "9.1"} and sample.name.startswith("2bf18d"): - if isinstance(feature, (API, FunctionName)) and feature.value == "__libc_connect": + + +@pytest.mark.skipif( + idalib_present is False, + reason="Skip idalib tests if the idalib Python API is not installed", +) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_idalib_features(feature_fixture): + # apply runtime-conditional xfails for specific IDA versions. + # version-specific behavior stays in the test body because it + # depends on the installed IDA, not on the fixture data. + sample_name = feature_fixture.sample_path.name + statement = feature_fixture.statement + + if kernel_version in {"9.0", "9.1"} and sample_name.startswith("2bf18d"): + if ( + isinstance(statement, (API, FunctionName)) + and statement.value == "__libc_connect" + ): # see discussion here: https://github.com/mandiant/capa/pull/2742#issuecomment-3674146335 # # > i confirmed that there were changes in 9.2 related to the ELF loader handling names, @@ -52,35 +69,20 @@ def test_idalib_features(sample: Path, scope, feature, expected): # > prevented this name from surfacing. pytest.xfail(f"IDA {kernel_version} does not extract all ELF symbols") - if kernel_version in {"9.0"} and sample.name.startswith("Practical Malware Analysis Lab 12-04.exe_"): - if isinstance(feature, Characteristic) and feature.value == "embedded pe": + if kernel_version in {"9.0"} and sample_name.startswith( + "Practical Malware Analysis Lab 12-04.exe_" + ): + if isinstance(statement, Characteristic) and statement.value == "embedded pe": # see discussion here: https://github.com/mandiant/capa/pull/2742#issuecomment-3667086165 # # idalib for IDA 9.0 doesn't support argv arguments, so we can't ask that resources are loaded pytest.xfail("idalib 9.0 does not support loading resource segments") try: - fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected) + fixtures.run_feature_fixture(BACKEND, feature_fixture) finally: - logger.debug("closing database...") import idapro - idapro.close_database(save=False) - logger.debug("closed database.") - - -@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed") -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_COUNT_TESTS, - indirect=["sample", "scope"], -) -def test_idalib_feature_counts(sample, scope, feature, expected): - try: - fixtures.do_test_feature_count(fixtures.get_idalib_extractor, sample, scope, feature, expected) - finally: logger.debug("closing database...") - import idapro - idapro.close_database(save=False) logger.debug("closed database.") diff --git a/tests/test_pefile_features.py b/tests/test_pefile_features.py index ba2c18e46..c331d449b 100644 --- a/tests/test_pefile_features.py +++ b/tests/test_pefile_features.py @@ -12,24 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import fixtures -import capa.features.file - - -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS, - indirect=["sample", "scope"], +BACKEND = fixtures.BackendFeaturePolicy( + name="pefile", + get_extractor=fixtures.get_pefile_extractor, + include_tags={"static"}, + exclude_tags={ + "dotnet", + "elf", + # pefile is a file-scope extractor; drop non-file scopes + "function", + "basic-block", + "instruction", + # and drop feature types pefile doesn't produce + "function-name", + }, ) -def test_pefile_features(sample, scope, feature, expected): - if scope.__name__ != "file": - pytest.xfail("pefile only extracts file scope features") - if isinstance(feature, capa.features.file.FunctionName): - pytest.xfail("pefile doesn't extract function names") - if ".elf" in sample.name: - pytest.xfail("pefile doesn't handle ELF files") - fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_pefile_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index d07a0a086..d0873ce53 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -14,20 +14,14 @@ import fixtures - -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, - indirect=["sample", "scope"], +BACKEND = fixtures.BackendFeaturePolicy( + name="viv", + get_extractor=fixtures.get_viv_extractor, + include_tags={"static"}, + exclude_tags={"dotnet", "ghidra"}, ) -def test_viv_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_viv_extractor, sample, scope, feature, expected) -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_COUNT_TESTS, - indirect=["sample", "scope"], -) -def test_viv_feature_counts(sample, scope, feature, expected): - fixtures.do_test_feature_count(fixtures.get_viv_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_viv_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) diff --git a/tests/test_vmray_features.py b/tests/test_vmray_features.py index bf035b83a..a5e23df6b 100644 --- a/tests/test_vmray_features.py +++ b/tests/test_vmray_features.py @@ -15,129 +15,16 @@ import fixtures -import capa.main -import capa.features.file -import capa.features.insn -import capa.features.common - -DYNAMIC_VMRAY_FEATURE_PRESENCE_TESTS = sorted( - [ - ("93b2d1-vmray", "file", capa.features.common.String("api.%x%x.%s"), True), - ("93b2d1-vmray", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False), - # file/imports - ("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfoW"), True), - ("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfo"), True), - # thread/api calls - ("93b2d1-vmray", "process=(2176:0),thread=2180", capa.features.insn.API("LoadLibraryExA"), True), - ("93b2d1-vmray", "process=(2176:0),thread=2180", capa.features.insn.API("LoadLibraryEx"), True), - ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfoW"), True), - ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfo"), True), - ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("DoesNotExist"), False), - # call/api - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2361", capa.features.insn.API("GetAddrInfoW"), True), - ("eb1287-vmray", "process=(4968:0),thread=5992,call=10981", capa.features.insn.API("CreateMutexW"), True), - # call/string argument - ( - "93b2d1-vmray", - "process=(2176:0),thread=2420,call=10323", - capa.features.common.String("raw.githubusercontent.com"), - True, - ), - # backslashes in paths; see #2428 - ( - "93b2d1-vmray", - "process=(2176:0),thread=2180,call=267", - capa.features.common.String("C:\\Users\\WhuOXYsD\\Desktop\\filename.exe"), - True, - ), - ( - "93b2d1-vmray", - "process=(2176:0),thread=2180,call=267", - capa.features.common.String("C:\\\\Users\\\\WhuOXYsD\\\\Desktop\\\\filename.exe"), - False, - ), - ( - "93b2d1-vmray", - "process=(2176:0),thread=2204,call=2395", - capa.features.common.String("Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\System"), - True, - ), - ( - "93b2d1-vmray", - "process=(2176:0),thread=2204,call=2395", - capa.features.common.String("Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Policies\\\\System"), - False, - ), - # call/number argument - # VirtualAlloc(4096, 4) - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4096), True), - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4), True), - # call/number argument - registry API parameters (issue #2) - # RegOpenKeyExW(Software\Microsoft\Windows\CurrentVersion\Policies\System, 0, 131078) - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(2147483649), True), - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(0), True), - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(131078), True), - # RegOpenKeyExW call 2397 (same parameters) - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(2147483649), True), - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(0), True), - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2397", capa.features.insn.Number(131078), True), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), -) - -DYNAMIC_VMRAY_FEATURE_COUNT_TESTS = sorted( - [ - # file/imports - ("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfoW"), 1), - # thread/api calls - ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("free"), 1), - ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfoW"), 5), - # call/api - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2345", capa.features.insn.API("free"), 1), - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2345", capa.features.insn.API("GetAddrInfoW"), 0), - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2361", capa.features.insn.API("GetAddrInfoW"), 1), - # call/string argument - ( - "93b2d1-vmray", - "process=(2176:0),thread=2420,call=10323", - capa.features.common.String("raw.githubusercontent.com"), - 1, - ), - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=10323", capa.features.common.String("non_existant"), 0), - # call/number argument - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4096), 1), - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4), 1), - ("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(404), 0), - # call/number argument - registry API parameters (issue #2) - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(2147483649), 1), - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(0), 1), - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(131078), 1), - ("93b2d1-vmray", "process=(2176:0),thread=2204,call=2395", capa.features.insn.Number(999999), 0), - ], - # order tests by (file, item) - # so that our LRU cache is most effective. - key=lambda t: (t[0], t[1]), +BACKEND = fixtures.BackendFeaturePolicy( + name="vmray", + get_extractor=fixtures.get_vmray_extractor, + include_tags={"vmray"}, ) -@fixtures.parametrize( - "sample,scope,feature,expected", - DYNAMIC_VMRAY_FEATURE_PRESENCE_TESTS, - indirect=["sample", "scope"], -) -def test_vmray_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_vmray_extractor, sample, scope, feature, expected) - - -@fixtures.parametrize( - "sample,scope,feature,expected", - DYNAMIC_VMRAY_FEATURE_COUNT_TESTS, - indirect=["sample", "scope"], -) -def test_vmray_feature_counts(sample, scope, feature, expected): - fixtures.do_test_feature_count(fixtures.get_vmray_extractor, sample, scope, feature, expected) +@fixtures.parametrize_backend_feature_fixtures(BACKEND) +def test_vmray_features(feature_fixture): + fixtures.run_feature_fixture(BACKEND, feature_fixture) def test_vmray_processes():