Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .cursor/environment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"install": "bash .cursor/scripts/install-python-dev-tools.sh",
"start": "bash .cursor/scripts/startup-path.sh"
}
13 changes: 13 additions & 0 deletions .cursor/scripts/install-python-dev-tools.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -euo pipefail

# Install Python dev dependencies into user site-packages.
python3 -m pip install --user -r requirements-dev.txt

# Expose user-installed tool entrypoints from a stable HOME/bin directory.
mkdir -p "${HOME}/bin"
for tool in pytest ruff mypy; do
if [ -x "${HOME}/.local/bin/${tool}" ]; then
ln -sf "${HOME}/.local/bin/${tool}" "${HOME}/bin/${tool}"
fi
done
13 changes: 13 additions & 0 deletions .cursor/scripts/startup-path.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -euo pipefail

path_export='export PATH="$HOME/.local/bin:$HOME/bin:$PATH"'

for profile in "${HOME}/.bashrc" "${HOME}/.profile"; do
touch "${profile}"
if ! grep -Fq "${path_export}" "${profile}"; then
printf '\n%s\n' "${path_export}" >> "${profile}"
fi
done

export PATH="${HOME}/.local/bin:${HOME}/bin:${PATH}"
10 changes: 8 additions & 2 deletions src/dgf_common/code_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re

_FENCED_CODE_PATTERN = re.compile(r"```(?:c|C|cpp|c\+\+)?\s*(.*?)```", re.DOTALL)
_FENCED_C_CPP_PATTERN = re.compile(r"```(?:c|C|cpp|c\+\+)\s*(.*?)```", re.DOTALL)
_FENCED_UNTAGGED_PATTERN = re.compile(r"```[ \t]*\n(.*?)```", re.DOTALL)


def extract_c_code_block(raw_text):
Expand All @@ -11,7 +12,12 @@ def extract_c_code_block(raw_text):
if raw_text is None:
return ""

match = _FENCED_CODE_PATTERN.search(raw_text)
match = _FENCED_C_CPP_PATTERN.search(raw_text)
if match:
return match.group(1).strip()

# Backward-compatible fallback for unlabeled fenced blocks.
match = _FENCED_UNTAGGED_PATTERN.search(raw_text)
if match:
return match.group(1).strip()
return raw_text.strip()
6 changes: 6 additions & 0 deletions src/dgf_feedback/branch_coverage_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def collect_branch_coverage(self, binary_path, work_dir):
except subprocess.CalledProcessError as exc:
LOGGER.warning("Failed to merge profile data: %s", exc.stderr)
return {}, 0.0
except OSError as exc:
LOGGER.warning("Failed to execute profile merge command %s: %s", self.profdata, exc)
return {}, 0.0

export_cmd = [
self.cov, "export",
Expand All @@ -50,6 +53,9 @@ def collect_branch_coverage(self, binary_path, work_dir):
except subprocess.CalledProcessError as exc:
LOGGER.warning("Failed to export coverage json: %s", exc.stderr)
return {}, 0.0
except OSError as exc:
LOGGER.warning("Failed to execute coverage export command %s: %s", self.cov, exc)
return {}, 0.0
except json.JSONDecodeError:
LOGGER.warning("Invalid coverage JSON output from llvm-cov")
return {}, 0.0
Expand Down
3 changes: 2 additions & 1 deletion src/dgf_feedback/feedback_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def run_iteration(self, num_samples=5, base_num_funcs=5):
for i in range(num_samples):
# === 动态选取 APIs ===
candidate_apis = self.api_manager.sample_api_combination(base_num_funcs)
mutated_apis = self.mutator.mutate(candidate_apis)
parent_apis = history_api_combos[-1] if history_api_combos else None
mutated_apis = self.mutator.mutate(candidate_apis, parents=parent_apis)
history_api_combos.append(mutated_apis)

# 记录 prompt 使用次数
Expand Down
7 changes: 5 additions & 2 deletions src/dgf_feedback/prompt_mutator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,15 @@ def crossover(self, parent_apis1, parent_apis2):
return merged

def mutate(self, current_apis, parents=None):
mode = random.choice(["insert", "replace", "crossover"])
modes = ["insert", "replace"]
if parents:
modes.append("crossover")
mode = random.choice(modes)
if mode == "insert":
return self.insert(current_apis)
elif mode == "replace":
return self.replace(current_apis)
elif mode == "crossover" and parents is not None:
elif mode == "crossover":
return self.crossover(current_apis, parents)
else:
return current_apis # 保底返回
26 changes: 26 additions & 0 deletions src/dgf_feedback/test_feedback.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dgf_feedback.prompt_mutator as prompt_mutator_module
from dgf_feedback.api_manager import APIManager
from dgf_feedback.prompt_mutator import PromptMutator
from dgf_feedback.sample_filter import SampleFilter
Expand Down Expand Up @@ -39,3 +40,28 @@ def test_prompt_mutator_insert_replace_crossover():

crossed = mutator.crossover(["A", "B"], ["B", "C"])
assert set(crossed) == {"A", "B", "C"}


def test_prompt_mutator_without_parents_does_not_offer_crossover(monkeypatch):
manager = APIManager(["A", "B", "C"])
mutator = PromptMutator(manager)
seen_modes = []

def fake_choice(options):
seen_modes.extend(options)
return "insert"

monkeypatch.setattr(prompt_mutator_module.random, "choice", fake_choice)
mutated = mutator.mutate(["A"], parents=None)

assert "crossover" not in seen_modes
assert "A" in mutated


def test_prompt_mutator_crossover_with_parent(monkeypatch):
manager = APIManager(["A", "B", "C"])
mutator = PromptMutator(manager)

monkeypatch.setattr(prompt_mutator_module.random, "choice", lambda options: "crossover")
mutated = mutator.mutate(["A"], parents=["B", "C"])
assert set(mutated) == {"A", "B", "C"}
9 changes: 8 additions & 1 deletion src/dgf_header_parser/ast_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,16 @@ def parse(self, header_file):
tu = index.parse(header_file, args=args)
return tu

def extract(self, tu):
def extract(self, tu, source_file=None):
functions, structs, typedefs, enums = [], [], [], []
target_file = os.path.realpath(source_file or tu.spelling)
for node in tu.cursor.get_children():
location_file = getattr(getattr(node, "location", None), "file", None)
location_name = getattr(location_file, "name", None)
if not location_name:
continue
if os.path.realpath(location_name) != target_file:
continue
kind = node.kind
if kind == cindex.CursorKind.FUNCTION_DECL:
functions.append(self.extract_function(node))
Expand Down
2 changes: 1 addition & 1 deletion src/dgf_header_parser/constraint_inferencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def infer_constraints(self):
for file_entry in self.api_data:
for func in file_entry["result"]["functions"]:
func_name = func["name"]
constraints[func_name] = []
constraints.setdefault(func_name, [])

for param in func["parameters"]:
pname = param["name"].lower()
Expand Down
11 changes: 10 additions & 1 deletion src/dgf_header_parser/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,28 @@

def extract_all_api(header_dir, include_dirs):
headers = collect_header_files(header_dir)
if not headers:
LOGGER.warning("No header files found under %s", header_dir)
return []
parser = ASTParser(include_dirs)

all_results = []
failed = 0
for h in tqdm(headers, desc="Parsing Headers"):
try:
tu = parser.parse(h)
result = parser.extract(tu)
result = parser.extract(tu, source_file=h)
all_results.append({
"file": h,
"result": result
})
except Exception as e:
LOGGER.warning("Error parsing %s: %s", h, e)
failed += 1

LOGGER.info("Header extraction finished: %d succeeded, %d failed", len(all_results), failed)
if not all_results:
raise RuntimeError("Failed to parse any header file.")

return all_results

Expand Down
35 changes: 35 additions & 0 deletions src/dgf_header_parser/test_constraint_inferencer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from dgf_header_parser.constraint_inferencer import ConstraintInferencer


def test_infer_constraints_keeps_entries_from_same_function_name():
api_data = [
{
"file": "a.h",
"result": {
"functions": [
{
"name": "dup_func",
"result_type": "void",
"parameters": [{"name": "input_len", "type": "size_t"}],
}
]
},
},
{
"file": "b.h",
"result": {
"functions": [
{
"name": "dup_func",
"result_type": "void",
"parameters": [{"name": "file_path", "type": "const char *"}],
}
]
},
},
]

constraints = ConstraintInferencer(api_data).infer_constraints()
assert "dup_func" in constraints
params = {item["param"] for item in constraints["dup_func"]}
assert {"input_len", "file_path"}.issubset(params)
15 changes: 13 additions & 2 deletions src/dgf_prompt_generator/llm_caller.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ def __init__(self, api_key=None, base_url=None, model=None, temperature=None):
final_temperature = os.getenv("OPENAI_TEMPERATURE", "0.2")
if local_config is not None and temperature is None:
final_temperature = getattr(local_config, "TEMPERATURE", final_temperature)
final_temperature = float(final_temperature)
try:
final_temperature = float(final_temperature)
except (TypeError, ValueError):
LOGGER.warning("Invalid OPENAI_TEMPERATURE=%r, fallback to 0.2", final_temperature)
final_temperature = 0.2

self.client = openai.OpenAI(
api_key=final_api_key,
Expand All @@ -58,4 +62,11 @@ def generate_code(self, prompt):
temperature=self.temperature
)
LOGGER.debug("LLM generation finished using model=%s", self.model)
return response.choices[0].message.content
choices = getattr(response, "choices", None)
if not choices:
raise ValueError("LLM response has no choices.")
message = getattr(choices[0], "message", None)
content = getattr(message, "content", None)
if not isinstance(content, str) or not content.strip():
raise ValueError("LLM response has empty content.")
return content
34 changes: 24 additions & 10 deletions src/dgf_prompt_generator/prompt_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ def __init__(self, api_info_json, system_includes=None, api_prefixes=None):
with open(api_info_json, "r") as f:
self.api_data = json.load(f)

self.system_includes = system_includes or [
"stdint.h",
"stddef.h",
"stdio.h",
"stdlib.h",
"string.h",
]
if system_includes is None:
self.system_includes = [
"stdint.h",
"stddef.h",
"stdio.h",
"stdlib.h",
"string.h",
]
else:
self.system_includes = system_includes
self.api_prefixes = api_prefixes or []

# 约束推导初始化
Expand Down Expand Up @@ -59,16 +62,27 @@ def _generate_prompt_from_funcs(self, selected_funcs):

Please implement the LLVMFuzzerTestOneInput function that uses these APIs.

void LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {{
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {{
// Your implementation here
return 0;
}}"""
return prompt

def get_api_signatures(self, num_funcs=5):
functions = []
for file_entry in self.api_data:
functions.extend(file_entry["result"]["functions"])
selected_funcs = random.sample(functions, min(num_funcs, len(functions)))
for func in file_entry["result"]["functions"]:
name = func["name"]
if not self.api_prefixes or any(name.startswith(prefix) for prefix in self.api_prefixes):
functions.append(func)
if not functions:
return []
try:
safe_num_funcs = int(num_funcs)
except (TypeError, ValueError):
safe_num_funcs = 0
safe_num_funcs = max(0, safe_num_funcs)
selected_funcs = random.sample(functions, min(safe_num_funcs, len(functions)))
return selected_funcs

def format_func_signature(self, func):
Expand Down
61 changes: 61 additions & 0 deletions src/dgf_prompt_generator/test_prompt_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,64 @@ def test_prompt_template_filters_prefix_and_generates_signature(tmp_path):
prompt = template.generate_prompt_from_api_list(["cJSON_AddObjectToObject"])
assert "#include <cJSON.h>" in prompt
assert "cJSON_AddObjectToObject" in prompt


def test_prompt_template_generate_prompt_uses_int_signature_and_prefix_filter(tmp_path):
api_json = tmp_path / "api.json"
api_json.write_text(
json.dumps(
[
{
"file": "x.h",
"result": {
"functions": [
{
"name": "cJSON_Parse",
"result_type": "int",
"parameters": [],
},
{
"name": "OtherFunc",
"result_type": "void",
"parameters": [],
},
]
},
}
]
)
)

template = PromptTemplate(str(api_json), api_prefixes=["cJSON"])
prompt = template.generate_prompt(num_funcs=5)
assert "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)" in prompt
assert "return 0;" in prompt
assert "cJSON_Parse" in prompt
assert "OtherFunc" not in prompt


def test_prompt_template_allows_empty_system_includes_and_negative_num_funcs(tmp_path):
api_json = tmp_path / "api.json"
api_json.write_text(
json.dumps(
[
{
"file": "x.h",
"result": {
"functions": [
{
"name": "OnlyFunc",
"result_type": "void",
"parameters": [],
}
]
},
}
]
)
)

template = PromptTemplate(str(api_json), system_includes=[])
prompt = template.generate_prompt(num_funcs=-5)
assert "#include <stdint.h>" not in prompt
assert template.get_api_signatures(num_funcs=-5) == []
3 changes: 3 additions & 0 deletions src/dgf_validator/fuzzer_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ def run_libfuzzer(self, binary_path, work_dir):
except subprocess.CalledProcessError:
LOGGER.warning("Fuzzing crash detected for %s", binary_path)
return False
except OSError as exc:
LOGGER.warning("Failed to execute fuzzer binary %s: %s", binary_path, exc)
return False
3 changes: 3 additions & 0 deletions src/dgf_validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ def validate_source(self, src_file, include_dirs=None, max_retry=3):
continue
else:
return False, None
except OSError as e:
LOGGER.warning("Failed to execute compiler %s: %s", self.clang, e)
return False, None

return False, None

Expand Down
Loading