Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
black
tree-sitter>=0.20.0,<0.22.0
transformers
torch
tiktoken
replicate
openai
google-generativeai
tqdm
networkx
streamlit
botocore
boto3
black
anthropic
mypy
types-networkx
types-tqdm
boto3-stubs[essential]
7 changes: 4 additions & 3 deletions src/llmtool/dfbscan/intra_dataflow_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from os import path
import json
import time
from typing import List, Set, Optional, Dict
from typing import List, Set, Optional, Dict, Union
from llmtool.LLM_utils import *
from llmtool.LLM_tool import *
from memory.syntactic.function import *
Expand Down Expand Up @@ -149,7 +149,7 @@ def _parse_response(
r"Line:\s*([^;]+);"
)

current_path = None
current_path: Optional[Dict[str, Union[str, list]]] = None
for line in response.splitlines():
line = line.strip().lstrip("-").strip()
if not line:
Expand All @@ -176,7 +176,8 @@ def _parse_response(
"index": detail_match.group(4).strip(),
"line": detail_match.group(5).strip(),
}
current_path["propagation_details"].append(detail)
if isinstance(current_path["propagation_details"], list):
current_path["propagation_details"].append(detail)

elif current_path is not None:
paths.append(current_path)
Expand Down
43 changes: 23 additions & 20 deletions src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
" 2. Function Invocations: Call sites where SRC is passed as an argument.",
" 3. Return Statements: Points where the function returns, possibly propagating SRC.",
" 4. Parameter Assignments: Assignments where SRC is assigned to a parameter or an object field that is accessible outside the function.",
" 5. Non local variable assignment: Assignments where SRC is assigned to a predefined non local variable.",
" 5. Non local variable assignment: Assignments where SRC is assigned to a predefined non local variable that is accessible outside the function.",
"- Step 2: Identify all execution paths relevant to the key points found in Step 1. For each path:",
" - Identify every potential execution path;",
" - Verify whether the key points are executed along each path;",
Expand Down Expand Up @@ -69,39 +69,39 @@
"- Type: Argument; Name: a; Function: updateValue; Index: 0; Line: 8; Dependency: SRC (p) is assigned to 'a', then passed as an argument to updateValue;",
"- Type: Return; Name: a; Function: None; Index: 0; Line: 10; Dependency: SRC (p) is assigned to 'a', then returned.",
"",
"Example 2: Propagation via Parameter Passing and Return",
"Example 2: Propagation via Nonlocal Variable assignment",
"User:",
"Now I will give you a target function with the source point 'src' at line 1:",
"```javascript",
"function modifyValue(src, flag) {",
" if (flag > 0) {",
" flag = src; // source value: src",
" } else if (flag == 0) {",
" return src; // return statement",
"function update(src, count) {",
" if (count > 0) {",
" item = src;",
" }",
" return -1; // Default return value ",
" return item;",
"}",
"```",
"",
"Consider the following as non local variables:",
"- item at line 3",
"",
"Where does the source variable 'src' at line 1 propagate within this function?",
"System:",
"Explanation:",
"Step 1: Identify SRC and its alias;",
"SRC: The variable src is defined at line 1;",
"Step 2: Identify key points and execution paths:",
"Path 1 (flag > 0): src is assigned to flag at line 3, making it accessible outside the function if flag is referenced after the call;",
"Path 2 (flag == 0): src is returned at line 5, propagating to the caller;",
"Path 3 (flag < 0): Function returns -1, so SRC does not propagate in this path;",
"Path 1 (count > 0): src is used to modify the value of item at line 3, which is later returned at line 5;",
"Path 2 (count <= 0): the value of item is directly returned at line 5 without using src;",
"Step 3: Simulate the execution paths:",
"Path 1: When flag > 0, src is assigned to flag, allowing potential propagation outside the function through the parameter reference;",
"Path 2: When flag == 0, src is returned to the caller;",
"Path 3: When flag < 0, src does not propagate, as the function returns -1;",
"In Path 1, propagation is from src -> Nonlocal variable assignment at item = src -> return item;",
"In Path 2, propagation is from src -> return item;",
"Answer:",
"Path 1: Lines 1 -> 3;",
"- Type: Parameter; Name: flag; Function: None; Index: 1; Line: 3; Dependency: SRC (src) is assigned to parameter 'flag', which may be referenced by the caller;",
"Path 1: Lines 1 -> 3 -> 5;",
"- Type: Nonlocal; Name: item; Function: None; Index: None; Line: 3; Dependency: SRC (src) is used to update the value of item, allowing potential propagation outside the function;",
"- Type: Return; Name: item; Function: None; Index: 0; Line: 5; Dependency: the item nonlocal variable, which is updated to the value of src, is returned to the caller;",
"Path 2: Lines 1 -> 5;",
"- Type: Return; Name: src; Function: None; Index: 0; Line: 5; Dependency: SRC (src) is returned to the caller;",
"Path 3: Lines 1 -> 6;",
"- No propagation; Dependency: Default return value -1 is unrelated to SRC."
"- No propagation; Dependency: The value of the item nonlocal variable is directly returned to the caller;",
""
],
"question_template": "- Where does the source <SRC_NAME> at line <SRC_LINE> in this function propagate?",
"answer_format_cot": [
Expand All @@ -114,6 +114,7 @@
" - For parameter propagation: 'Type: Parameter; Name: {parameter name}; Function: None; Index: {parameter index}; Line: {assignment line number}; Dependency: {summary of dependency from SRC to parameter}';",
" - For sink propagation: 'Type: Sink; Name: {sink name}; Function: None; Index: None; Line: {sink statement line number}; Dependency: {summary of dependency from SRC to sink}';",
" - For non local variable assignment: 'Type: Nonlocal; Name: {non local name}; Function: None; Index: None; Line: {assignment statement line number}; Dependency: {summary of dependency from SRC to assignment}';",
" Note: Each bulletpoint under the path bulletpoint represent an identified key point along the execution path. Each key point information must be in a single line, and do not provide any key point information that does not adhere to one of the five types listed above.",
"(4) If there is no propagation along a path, provide a brief explanation of why SRC does not propagate in that path as follows:",
"- Path <Path Number>: <Execution Path>;",
" - No propagation; Dependency: {reason for no propagation};",
Expand All @@ -128,8 +129,10 @@
"Here are the Function call sites and return statements within the function, which can be used in Step 1;\n",
"<CALL_STATEMENTS>\n",
"<RETURN_VALUES>\n",
"<NONLOCAL_VALUES>",
"<NONLOCAL_VALUES>\n",
"Now, please answer the following question:\n<QUESTION>\n",
"Your response should strictly follow the format:\n<ANSWER>\n"
]
}


29 changes: 14 additions & 15 deletions src/tstool/analyzer/Javascript_TS_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,44 +22,43 @@ def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
:param tree: Parsed syntax tree
"""
scope_stack: List[int] = []
scope_id: int = 0

def search(root: Node) -> None:
nonlocal scope_id

for child in root.children:
if child.type == "statement_block":
if len(scope_stack) > 0:
self.scope_env[scope_stack[-1]][1].add(scope_id)
self.scope_env[scope_stack[-1]][1].add(self.current_scope_id)

self.scope_env[scope_id] = (child, set())
self.scope_root_to_scope_id[child] = scope_id
scope_stack.append(scope_id)
self.scope_env[self.current_scope_id] = (child, set())
self.scope_root_to_scope_id[child] = self.current_scope_id
scope_stack.append(self.current_scope_id)

if child.parent:
if child.parent.type == "function_declaration":
self.function_root_to_scope_id[child.parent] = scope_id
self.function_root_to_scope_id[child.parent] = (
self.current_scope_id
)
elif (
child.parent.type == "arrow_function"
or child.parent.type == "function_expression"
):
if child.parent.parent:
self.function_root_to_scope_id[child.parent.parent] = (
scope_id
self.current_scope_id
)

scope_id += 1
self.current_scope_id += 1
search(child)
scope_stack.pop()
else:
search(child)

return

self.scope_env[scope_id] = (tree.root_node, set())
self.scope_root_to_scope_id[tree.root_node] = scope_id
scope_stack.append(scope_id)
scope_id += 1
self.scope_env[self.current_scope_id] = (tree.root_node, set())
self.scope_root_to_scope_id[tree.root_node] = self.current_scope_id
scope_stack.append(self.current_scope_id)
self.current_scope_id += 1
search(tree.root_node)
return

Expand Down Expand Up @@ -147,7 +146,7 @@ def extract_nonlocal_info(self) -> None:
)

for candidate_node in identifiers_per_scope[child_scope_id]:
if candidate_node:
if not candidate_node:
continue

# Name mismatch
Expand Down
2 changes: 2 additions & 0 deletions src/tstool/analyzer/TS_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ def __init__(
self.scope_env: Dict[int, Tuple[Node, Set[int]]] = {}
self.api_env: Dict[int, API] = {}

self.current_scope_id: int = 0

# Dictionary storing mapping from the root node of the scope to its scope id
self.scope_root_to_scope_id: Dict[Node, int] = {}

Expand Down