From 805a0f09863391930c548e32a5e4671a00aa327e Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Thu, 28 Aug 2025 16:23:12 -0400
Subject: [PATCH 01/23] Added javascript NPD support

---
 src/agent/dfbscan.py                          |   5 +
 src/llmtool/LLM_utils.py                      |   2 +-
 .../dfbscan/intra_dataflow_analyzer.json      | 127 +++++++
 .../Javascript/dfbscan/path_validator.json    |  93 +++++
 src/repoaudit.py                              |   8 +
 src/run_repoaudit.sh                          |   6 +-
 src/tstool/analyzer/Javascript_TS_analyzer.py | 336 ++++++++++++++++++
 src/tstool/analyzer/TS_analyzer.py            |  10 +-
 .../Javascript/Javascript_NPD_extractor.py    |  41 +++
 .../dfbscan_extractor/Javascript/__init__.py  |   0
 .../Python/Python_NPD_extractor.py            |   2 -
 11 files changed, 623 insertions(+), 7 deletions(-)
 create mode 100644 src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
 create mode 100644 src/prompt/Javascript/dfbscan/path_validator.json
 create mode 100644 src/tstool/analyzer/Javascript_TS_analyzer.py
 create mode 100644 src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
 create mode 100644 src/tstool/dfbscan_extractor/Javascript/__init__.py

diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index d49073a..027fe0a 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -19,6 +19,7 @@
 from tstool.dfbscan_extractor.Cpp.Cpp_UAF_extractor import *
 from tstool.dfbscan_extractor.Java.Java_NPD_extractor import *
 from tstool.dfbscan_extractor.Python.Python_NPD_extractor import *
+from tstool.dfbscan_extractor.Javascript.Javascript_NPD_extractor import *
 from tstool.dfbscan_extractor.Go.Go_NPD_extractor import *
 
 from llmtool.LLM_utils import *
@@ -109,9 +110,13 @@ def __obtain_extractor(self) -> DFBScanExtractor:
         elif self.language == "Python":
             if self.bug_type == "NPD":
                 return Python_NPD_Extractor(self.ts_analyzer)
+        elif self.language == "Javascript":
+            if self.bug_type == "NPD":
+                return Javascript_NPD_Extractor(self.ts_analyzer)
         elif self.language == "Go":
             if self.bug_type == "NPD":
                 return Go_NPD_Extractor(self.ts_analyzer)
+        
         raise NotImplementedError(
             f"Unsupported bug type: {self.bug_type} in {self.language}"
         )
diff --git a/src/llmtool/LLM_utils.py b/src/llmtool/LLM_utils.py
index 843c2db..4976710 100644
--- a/src/llmtool/LLM_utils.py
+++ b/src/llmtool/LLM_utils.py
@@ -92,7 +92,7 @@ def run_with_timeout(self, func, timeout):
 
     def infer_with_gemini(self, message: str) -> str:
         """Infer using the Gemini model from Google Generative AI"""
-        gemini_model = genai.GenerativeModel("gemini-pro")
+        gemini_model = genai.GenerativeModel(self.online_model_name)
 
         def call_api():
             message_with_role = self.systemRole + "\n" + message
diff --git a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
new file mode 100644
index 0000000..740a16c
--- /dev/null
+++ b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
@@ -0,0 +1,127 @@
+{
+  "model_role_name": "Intra-procedural Data Flow Analyzer",
+  "user_role_name": "Intra-procedural Data Flow Analyzer",
+  "system_role": "You are a Javascript programmer and very good at analyzing Javascript code. Particularly, you excel at understanding individual Javascript functions and their data flow relationships.",
+  "task": "Given a specific source variable/parameter/expression (denoted as SRC) at a specific line (denoted as L1), analyze the execution flows of the given function and determine the variables to which SRC can propagate.",
+  "analysis_rules": [
+    "The key principle for answering this question is to extract all execution paths related to SRC and simulate the function's execution along each path to determine where SRC propagates. In Javascript, SRC can propagate to four possible locations:",
+    "1. Function Calls: SRC propagates to a call site where it is passed as an argument to a callee function within the current function.",
+    "2. Return Statements: SRC propagates to a return statement, returning a value to the caller of the current function.",
+    "3. Function Parameters: SRC propagates to a parameter of the current function and can be referenced in the caller function, since objects are passed by reference.",
+    "4. Sink variables: SRC reaches one of the predefined sink variables provided in the input.",
+    "If SRC is referenced by function parameters, it can propagate beyond the function scope after the function exits, due to object references being shared between caller and callee. For example, if function goo passes an object base to its callee function foo, and foo(obj: Base) { obj = SRC; }, then the caller function goo can access the updated state of SRC through the object base.",
+    "To conduct the analysis, follow these three steps:",
+    "",
+    "- Step 1: Identify SRC and its aliases within the current function. Extract key points, including:",
+    "  1. Sink Statements: Where SRC is used or assigned to predefined sink variables.",
+    "  2. Function Invocations: Call sites where SRC is passed as an argument.",
+    "  3. Return Statements: Points where the function returns, possibly propagating SRC.",
+    "  4. Parameter Assignments: Assignments where SRC is assigned to a parameter or an object field that is accessible outside the function.",
+    "- Step 2: Identify all execution paths relevant to the key points found in Step 1. For each path:",
+    "  - Identify every potential execution path;",
+    "  - Verify whether the key points are executed along each path;",
+    "  - Expand execution paths affected by conditional branches (if-else, switch), loops, and exception-handling blocks.",
+    "- Step 3: For each execution path extracted in Step 2, simulate function execution line by line and determine where SRC propagates based on the four possible propagation locations.",
+    "",
+    "Additional guidelines:",
+    "- Different propagation paths can exist due to control flow constructs (if-else, loops); identify all execution paths before analysis;",
+    "- Expand the first iteration of loops to analyze nested execution paths;",
+    "- Treat each conditional branch (if, switch) as a separate execution path;",
+    "- Expand nested conditions and loops to ensure all paths are analyzed."
+  ],
+  "analysis_examples": [
+    "Example 1: Propagation via Sink, Function call, and Return",
+    "User:",
+    "Now I will give you a target function with the source point 'p' at line 1:",
+    "```javascript",
+    "function process(p) {",
+    "    let a = p;            // SRC: a",
+    "    if (a > 0) {",
+    "        let b = a + 1;",
+    "        logValue(a);      // Sink value: a",
+    "    } else {",
+    "        let c = a - 1;",
+    "        updateValue(a);   // Function call",
+    "    }",
+    "    return a;              // Return statement: 'a' is returned",
+    "}",
+    "```",
+    "Where does the source variable 'p' at line 1 propagate within this function?",
+    "System:",
+    "Explanation:",
+    "Step 1: Identify SRC and its alias;",
+    "SRC: The variable p at line 1 is assigned to variable a;",
+    "Step 2: Identify key points and execution paths:",
+    "Path 1 (if branch): When a > 0, variable a is used in the sink statement at line 5 (logValue(a)) and returned at line 10;",
+    "Path 2 (else branch): When a <= 0, variable a is passed as an argument to updateValue at line 8 and then returned at line 10;",
+    "Step 3: Simulate each execution path:",
+    "In Path 1, propagation is from p -> a -> sink at logValue(a) and p -> a -> return a;",
+    "In Path 2, propagation is from p -> a -> updateValue(a) and p -> a -> return a;",
+    "Answer:",
+    "Path 1: Lines 1 -> 2 -> 4 -> 5 -> 10;",
+    "- Type: Sink; Name: a; Function: None; Index: None; Line: 5; Dependency: SRC (p) is assigned to 'a', then used in logValue;",
+    "- Type: Return; Name: a; Function: None; Index: 0; Line: 10; Dependency: SRC (p) is assigned to 'a', then returned.",
+    "Path 2: Lines 1 -> 2 -> 6 -> 8 -> 10;",
+    "- Type: Argument; Name: a; Function: updateValue; Index: 0; Line: 8; Dependency: SRC (p) is assigned to 'a', then passed as an argument to updateValue;",
+    "- Type: Return; Name: a; Function: None; Index: 0; Line: 10; Dependency: SRC (p) is assigned to 'a', then returned.",
+    "",
+    "Example 2: Propagation via Parameter Passing and Return",
+    "User:",
+    "Now I will give you a target function with the source point 'src' at line 1:",
+    "```javascript",
+    "function modifyValue(src, flag) {",
+    "    if (flag > 0) {",
+    "        flag = src;   // source value: src",
+    "    } else if (flag == 0) {",
+    "        return src;         // return statement",
+    "    }",
+    "    return -1;              // Default return value ",
+    "}",
+    "```",
+    "Where does the source variable 'src' at line 1 propagate within this function?",
+    "System:",
+    "Explanation:",
+    "Step 1: Identify SRC and its alias;",
+    "SRC: The variable src is defined at line 1;",
+    "Step 2: Identify key points and execution paths:",
+    "Path 1 (flag > 0): src is assigned to flag at line 3, making it accessible outside the function if flag is referenced after the call;",
+    "Path 2 (flag == 0): src is returned at line 5, propagating to the caller;",
+    "Path 3 (flag < 0): Function returns -1, so SRC does not propagate in this path;",
+    "Step 3: Simulate the execution paths:",
+    "Path 1: When flag > 0, src is assigned to flag, allowing potential propagation outside the function through the parameter reference;",
+    "Path 2: When flag == 0, src is returned to the caller;",
+    "Path 3: When flag < 0, src does not propagate, as the function returns -1;",
+    "Answer:",
+    "Path 1: Lines 1 -> 3;",
+    "- Type: Parameter; Name: flag; Function: None; Index: 1; Line: 3; Dependency: SRC (src) is assigned to parameter 'flag', which may be referenced by the caller;",
+    "Path 2: Lines 1 -> 5;",
+    "- Type: Return; Name: src; Function: None; Index: 0; Line: 5; Dependency: SRC (src) is returned to the caller;",
+    "Path 3: Lines 1 -> 6;",
+    "- No propagation; Dependency: Default return value -1 is unrelated to SRC."
+  ],
+  "question_template": "- Where does the source variable <SRC_NAME> at line <SRC_LINE> in this function propagate?",
+  "answer_format_cot": [
+    "(1) First, provide a detailed step-by-step reasoning process, following the explanation format used in the examples;",
+    "(2) Once the reasoning is complete, begin the final answer section with 'Answer:';",
+    "(3) For each execution path, list the propagation details using the following format:",
+    "- Path <Path Number>: <Execution Path>;",
+    "    - For a function argument propagation: 'Type: Argument; Name: {argument name}; Function: {callee function name}; Index: {argument index}; Line: {call site line number}; Dependency: {summary of dependency from SRC to argument}';",
+    "    - For a return propagation: 'Type: Return; Name: {return name}; Function: None; Index: {return value index}; Line: {return statement line number}; Dependency: {summary of dependency from SRC to return value}';",
+    "    - For parameter propagation: 'Type: Parameter; Name: {parameter name}; Function: None; Index: {parameter index}; Line: {assignment line number}; Dependency: {summary of dependency from SRC to parameter}';",
+    "    - For sink propagation: 'Type: Sink; Name: {sink name}; Function: None; Index: None; Line: {sink statement line number}; Dependency: {summary of dependency from SRC to sink}';",
+    "(4) If there is no propagation along a path, provide a brief explanation of why SRC does not propagate in that path as follows:",
+    "- Path <Path Number>: <Execution Path>;",
+    "    - No propagation; Dependency: {reason for no propagation};",
+    "(5) Remember: All the indexes start from 0 instead of 1. If there is only one return value, the index is 0."
+  ],
+  "meta_prompts": [
+    "Now I will give you a target function with the source point `<SRC_NAME>` at line <SRC_LINE>: \n```\n<FUNCTION>\n``` \n\n",
+    "You may see the following statements as potential sink points. Identify which of these are related to SRC and its aliases;\n",
+    "<SINK_VALUES>\n",
+    "Here are the Function call sites and return statements within the function, which can be used in Step 1;\n",
+    "<CALL_STATEMENTS>\n",
+    "<RETURN_VALUES>\n",
+    "Now, please answer the following question:\n<QUESTION>\n",
+    "Your response should strictly follow the format:\n<ANSWER>\n"
+  ]
+}
diff --git a/src/prompt/Javascript/dfbscan/path_validator.json b/src/prompt/Javascript/dfbscan/path_validator.json
new file mode 100644
index 0000000..a46d22f
--- /dev/null
+++ b/src/prompt/Javascript/dfbscan/path_validator.json
@@ -0,0 +1,93 @@
+{
+  "model_role_name": "Path Validator",
+  "user_role_name": "Path Validator",
+  "system_role": "You are a Javascript programmer and very good at analyzing Javascript code. In particular, you are skilled at understanding how data flows across multiple functions.",
+  "task": "You will be provided with an interprocedural data-flow path along with a specified <BUG_TYPE>. Your task is to decide whether the given propagation path is reachable – that is, whether its path condition is satisfiable. For example, for NPD (null-pointer dereference) detection, if the dereferenced object is guarded by a branch condition such as 'p !== null', then the path should be deemed unreachable.",
+  "analysis_rules": [
+    "Keep the following guidelines in mind:",
+    "- If the source in the first function flows to the sink in the last function without any interference, then the path is reachable and your answer should be Yes.",
+    "- For NPD detection, if the source value is modified or its null/undefined state is verified (for example, via an explicit check like 'p !== null') before reaching the sink, then the path is unreachable and you should answer No.",
+    "- If a function exits or returns before the sink or other propagation sites (such as function calls) are reached, the path is unreachable; answer No in such cases.",
+    "- Analyze conditions within each function: infer the outcome of branch statements and then verify whether the conditions across different sub-paths conflict. If conflicts exist, the overall path is unreachable.",
+    "- Consider the values of relevant variables; if those values contradict the necessary branch conditions for triggering the bug, the path is unreachable and you should answer No.",
+    "In summary, assess the conditions in every sub-path, check for conflicts, and decide whether the entire propagation path is reachable."
+  ],
+  "question_template": [
+    "When these functions are executed, does the following data-flow propagation path cause the <BUG_TYPE> bug?",
+    "```",
+    "<PATH>",
+    "```",
+    "Provide your detailed explanation for this propagation path:",
+    "<EXPLANATION>",
+    ""
+  ],
+  "analysis_examples": [
+    "Example 1:",
+    "User:",
+    "Here is the Javascript program:",
+    "```javascript",
+    "function getArray(length) {",
+    "    let array = null;",
+    "    if (length > 0) {",
+    "        array = new Array(length);",
+    "    }",
+    "    return array;",
+    "}",
+    "",
+    "function getElement(array, index) {",
+    "    return array[index];",
+    "}",
+    "```",
+    "Does the following propagation path cause the NPD bug?",
+    "Propagation Path: 'array' at line 2 in getArray --> 'array' used at line 2 in getElement",
+    "Explanation: In getArray, if length <= 0, array remains null and is returned. In getElement, a null array would trigger a TypeError (null dereference) when accessed at line 10. However, when length > 0, the array is non-null. Since the conditions for array being null and non-null conflict, this propagation path is unreachable and does not cause the NPD bug.",
+    "Answer: No.",
+    "",
+    "Example 2:",
+    "User:",
+    "Here is the Javascript program:",
+    "```javascript",
+    "function foo(obj) {",
+    "    if (obj === null) {",
+    "        return null;",
+    "    }",
+    "    return obj;",
+    "}",
+    "",
+    "function bar() {",
+    "    const myObj = foo(null);",
+    "    myObj.toString();",
+    "}",
+    "```",
+    "Does the following propagation path cause the NPD bug?",
+    "Parameter 'obj' in foo --> foo returns null --> myObj assigned null in bar, which then gets dereferenced causing a method call on null",
+    "Explanation: The function foo returns null when passed a null input. In bar, this leads to myObj being null, which in turn causes a TypeError when calling toString(). As there is no conflicting branch condition preventing this case, the propagation path is reachable and causes the NPD bug.",
+    "Answer: Yes."
+  ],
+  "additional_fact": [
+    "Additional details may include whether specific lines fall within if-statements and the corresponding line numbers for those conditions.",
+    "For each line in the provided path, follow this reasoning:",
+    "- Indicate whether line {line_number} is inside the 'true' or 'else' branch of an if-statement.",
+    "- State whether, given the variable values, the branch condition will always be evaluated as true, always as false, or is indeterminate.",
+    "- Conclude whether line {line_number} is reachable.",
+    "After analyzing each line, decide if the overall path's condition is satisfiable (reachable) or not."
+  ],
+  "answer_format": [
+    "(1) In the first line, provide your detailed reasoning and explanation.",
+    "(2) In the second line, simply state Yes or No.",
+    "Example:",
+    "Explanation: {Your detailed explanation.}",
+    "Answer: Yes"
+  ],
+  "meta_prompts": [
+    "Now I will provide you with the program:",
+    "```",
+    "<PROGRAM>",
+    "```",
+    "Please answer the following question:",
+    "<QUESTION>",
+    "Your answer should follow this format:",
+    "<ANSWER>",
+    "Remember: Do not assume the behavior or return values of external methods not provided in the program. Only evaluate the conditions present in the given code."
+  ]
+}
diff --git a/src/repoaudit.py b/src/repoaudit.py
index 24d3639..042e6ff 100644
--- a/src/repoaudit.py
+++ b/src/repoaudit.py
@@ -10,6 +10,7 @@
 from tstool.analyzer.Go_TS_analyzer import *
 from tstool.analyzer.Java_TS_analyzer import *
 from tstool.analyzer.Python_TS_analyzer import *
+from tstool.analyzer.Javascript_TS_analyzer import *
 
 from typing import List
 
@@ -17,6 +18,7 @@
     "Cpp": ["MLK", "NPD", "UAF"],
     "Java": ["NPD"],
     "Python": ["NPD"],
+    "Javascript": ["NPD"],
     "Go": ["NPD"],
 }
 
@@ -59,6 +61,8 @@ def __init__(
             suffixs = ["java"]
         elif self.language == "Python":
             suffixs = ["py"]
+        elif self.language == "Javascript":
+            suffixs = ["js", "jsx"]
         else:
             raise ValueError("Invalid language setting")
 
@@ -82,6 +86,10 @@ def __init__(
             self.ts_analyzer = Python_TSAnalyzer(
                 self.code_in_files, self.language, self.max_symbolic_workers
             )
+        elif self.language == "Javascript":
+            self.ts_analyzer = Javascript_TSAnalyzer(
+                self.code_in_files, self.language, self.max_symbolic_workers
+            )
         return
 
     def start_repo_auditing(self) -> None:
diff --git a/src/run_repoaudit.sh b/src/run_repoaudit.sh
index fbbc8bf..f0c6e92 100755
--- a/src/run_repoaudit.sh
+++ b/src/run_repoaudit.sh
@@ -1,7 +1,9 @@
 #!/bin/bash
 SCAN_TYPE=$1
-LANGUAGE=Python
-MODEL=claude-3.7
+LANGUAGE=Javascript
+# MODEL=claude-3.7
+# MODEL=o3-mini
+MODEL=gemini-2.5-flash
 BUG_TYPE=NPD
 PROJECT=toy
 
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
new file mode 100644
index 0000000..510e6a2
--- /dev/null
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -0,0 +1,336 @@
+import sys
+from os import path
+from typing import List, Tuple, Dict, Set
+import tree_sitter
+
+sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
+
+from .TS_analyzer import *
+from memory.syntactic.function import *
+from memory.syntactic.value import *
+
+
+class Javascript_TSAnalyzer(TSAnalyzer):
+    """
+    TSAnalyzer for Javascript source files using tree-sitter.
+    Implements Javascript-specific parsing and analysis.
+    """
+
+    def extract_function_info(
+        self, file_path: str, source_code: str, tree: tree_sitter.Tree
+    ) -> None:
+        """
+        Parse the function information in a source file.
+        :param file_path: The path of the source file.
+        :param source_code: The content of the source file.
+        :param tree: The parse tree of the source file.
+        """
+        all_function_header_nodes = find_nodes_by_type(
+            tree.root_node, "function_declaration"
+        )
+        all_variable_declarator_nodes = find_nodes_by_type(
+            tree.root_node, "variable_declarator"
+        )
+        
+        for node in all_function_header_nodes:
+            function_name = ""
+            for sub_node in node.children:
+                if sub_node.type == "identifier":
+                    function_name = source_code[sub_node.start_byte : sub_node.end_byte]
+                    break
+
+            if function_name == "":
+                continue
+
+            start_line_number = source_code[: node.start_byte].count("\n") + 1
+            end_line_number = source_code[: node.end_byte].count("\n") + 1
+            function_id = len(self.functionRawDataDic) + 1
+
+            self.functionRawDataDic[function_id] = (
+                function_name,
+                start_line_number,
+                end_line_number,
+                node,
+            )
+            self.functionToFile[function_id] = file_path
+
+            if function_name not in self.functionNameToId:
+                self.functionNameToId[function_name] = set([])
+            self.functionNameToId[function_name].add(function_id)
+            
+        for node in all_variable_declarator_nodes:
+            name_node = node.child_by_field_name("name")
+            value_node = node.child_by_field_name("value")
+
+            if not name_node or not value_node:
+                continue
+
+            if value_node.type != "arrow_function":
+                continue
+            
+            function_name = source_code[name_node.start_byte : name_node.end_byte]
+            start_line = source_code[:node.start_byte].count("\n") + 1
+            end_line = source_code[:node.end_byte].count("\n") + 1
+            function_id = len(self.functionRawDataDic) + 1
+
+            self.functionRawDataDic[function_id] = (
+                function_name, start_line, end_line, node
+            )
+            self.functionToFile[function_id] = file_path
+            self.functionNameToId.setdefault(function_name, set()).add(function_id)
+        
+        return
+
+    def extract_global_info(
+        self, file_path: str, source_code: str, tree: tree_sitter.Tree
+    ) -> None:
+        """
+        Parse global variable information from a Javascript source file.
+        For Javascript, this may include module-level variables.
+        Currently not implemented.
+        """
+        # TODO: Add global variable analysis if needed.
+        return
+
+    def get_callee_name_at_call_site(
+        self, node: tree_sitter.Node, source_code: str
+    ) -> str:
+        """
+        Get the callee name at the call site.
+        :param node: the node of the call site
+        :param source_code: the content of the file
+        """
+        function_name = ""
+        for sub_node in node.children:
+            if sub_node.type == "identifier":
+                function_name = source_code[sub_node.start_byte : sub_node.end_byte]
+                break
+            if sub_node.type == "member_expression":
+                for sub_sub_node in sub_node.children:
+                    if sub_sub_node.type == "identifier":
+                        function_name = source_code[
+                            sub_sub_node.start_byte : sub_sub_node.end_byte
+                        ]
+                break
+        return function_name
+
+    def get_callsites_by_callee_name(
+        self, current_function: Function, callee_name: str
+    ) -> List[tree_sitter.Node]:
+        """
+        Find the call sites by the callee function name.
+        :param current_function: the function to be analyzed
+        :param callee_name: the callee function name
+        """
+        results = []
+        file_content = self.code_in_files[current_function.file_path]
+        call_site_nodes = find_nodes_by_type(
+            current_function.parse_tree_root_node, "call_expression"
+        )
+        for call_site in call_site_nodes:
+            if (
+                self.get_callee_name_at_call_site(call_site, file_content)
+                == callee_name
+            ):
+                results.append(call_site)
+        return results
+
+    def get_arguments_at_callsite(
+        self, current_function: Function, call_site_node: tree_sitter.Node
+    ) -> Set[Value]:
+        """
+        Get arguments from a call site in a function.
+        :param current_function: the function to be analyzed
+        :param call_site_node: the node of the call site
+        :return: the arguments
+        """
+        arguments: Set[Value] = set([])
+        file_name = current_function.file_path
+        source_code = self.code_in_files[file_name]
+        for sub_node in call_site_node.children:
+            if sub_node.type == "arguments":
+                arg_list = sub_node.children[1:-1]
+                for element in arg_list:
+                    if element.type != ",":
+                        line_number = source_code[: element.start_byte].count("\n") + 1
+                        arguments.add(
+                            Value(
+                                source_code[element.start_byte : element.end_byte],
+                                line_number,
+                                ValueLabel.ARG,
+                                file_name,
+                                len(arguments),
+                            )
+                        )
+        return arguments
+
+    def get_parameters_in_single_function(
+        self, current_function: Function
+    ) -> Set[Value]:
+        """
+        Find the parameters of a function.
+        :param current_function: The function to be analyzed.
+        :return: A set of parameters as values
+        """
+        if current_function.paras is not None:
+            return current_function.paras
+        current_function.paras = set([])
+        file_content = self.code_in_files[current_function.file_path]
+        parameters = find_nodes_by_type(
+            current_function.parse_tree_root_node, "formal_parameters"
+        )
+
+        index = 0
+        for parameter_node in parameters:
+            parameter_name = ""
+            for sub_node in parameter_node.children:
+                for sub_sub_node in find_nodes_by_type(sub_node, "identifier"):
+                    parameter_name = file_content[
+                        sub_sub_node.start_byte : sub_sub_node.end_byte
+                    ]
+                    if parameter_name != "" and parameter_name != "self":
+                        line_number = (
+                            file_content[: sub_node.start_byte].count("\n") + 1
+                        )
+                        current_function.paras.add(
+                            Value(
+                                parameter_name,
+                                line_number,
+                                ValueLabel.PARA,
+                                current_function.file_path,
+                                index,
+                            )
+                        )
+                        index += 1
+        return current_function.paras
+
+    def get_return_values_in_single_function(
+        self, current_function: Function
+    ) -> Set[Value]:
+        """
+        Find the return values of a Go function
+        :param current_function: The function to be analyzed.
+        :return: A set of return values
+        """
+        if current_function.retvals is not None:
+            return current_function.retvals
+
+        current_function.retvals = set([])
+        file_content = self.code_in_files[current_function.file_path]
+        retnodes = find_nodes_by_type(
+            current_function.parse_tree_root_node, "return_statement"
+        )
+        for retnode in retnodes:
+            line_number = file_content[: retnode.start_byte].count("\n") + 1
+            restmts_str = file_content[retnode.start_byte : retnode.end_byte]
+            returned_value = restmts_str.replace("return", "").strip()
+            current_function.retvals.add(
+                Value(
+                    returned_value,
+                    line_number,
+                    ValueLabel.RET,
+                    current_function.file_path,
+                    0,
+                )
+            )
+        return current_function.retvals
+
+    def get_if_statements(
+        self, function: Function, source_code: str
+    ) -> Dict[Tuple, Tuple]:
+        """
+        Identify if-statements in the Javascript function.
+        This is a simplified analysis for illustrative purposes.
+        """
+        if_statement_nodes = find_nodes_by_type(
+            function.parse_tree_root_node, "if_statement"
+        )
+        if_statements = {}
+        for if_node in if_statement_nodes:
+            condition_str = ""
+            condition_start_line = 0
+            condition_end_line = 0
+            true_branch_start_line = 0
+            true_branch_end_line = 0
+            else_branch_start_line = 0
+            else_branch_end_line = 0
+
+            block_num = 0
+            for sub_target in if_node.children:
+                if sub_target.type == "parenthesized_expression":
+                    condition_start_line = (
+                        source_code[: sub_target.start_byte].count("\n") + 1
+                    )
+                    condition_end_line = (
+                        source_code[: sub_target.end_byte].count("\n") + 1
+                    )
+                    condition_str = source_code[
+                        sub_target.start_byte : sub_target.end_byte
+                    ]
+                if sub_target.type == "statement_block":
+                    lower_lines = []
+                    upper_lines = []
+                    for sub_sub in sub_target.children:
+                        if sub_sub.type not in {"{", "}"}:
+                            lower_lines.append(
+                                source_code[: sub_sub.start_byte].count("\n") + 1
+                            )
+                            upper_lines.append(
+                                source_code[: sub_sub.end_byte].count("\n") + 1
+                            )
+                    if lower_lines and upper_lines:
+                        if block_num == 0:
+                            true_branch_start_line = min(lower_lines)
+                            true_branch_end_line = max(upper_lines)
+                            block_num += 1
+                        elif block_num == 1:
+                            else_branch_start_line = min(lower_lines)
+                            else_branch_end_line = max(upper_lines)
+                            block_num += 1
+                if sub_target.type == "expression_statement":
+                    true_branch_start_line = (
+                        source_code[: sub_target.start_byte].count("\n") + 1
+                    )
+                    true_branch_end_line = (
+                        source_code[: sub_target.end_byte].count("\n") + 1
+                    )
+
+            if_statement_start_line = source_code[: if_node.start_byte].count("\n") + 1
+            if_statement_end_line = source_code[: if_node.end_byte].count("\n") + 1
+            line_scope = (if_statement_start_line, if_statement_end_line)
+            info = (
+                condition_start_line,
+                condition_end_line,
+                condition_str,
+                (true_branch_start_line, true_branch_end_line),
+                (else_branch_start_line, else_branch_end_line),
+            )
+            if_statements[line_scope] = info
+        return if_statements
+
+    def get_loop_statements(
+        self, function: Function, source_code: str
+    ) -> Dict[Tuple, Tuple]:
+        """
+        Identify loop statements (for and while) in the Javascript function.
+        """
+        loops = {}
+        loop_nodes = find_nodes_by_type(function.parse_tree_root_node, "for_statement")
+        loop_nodes.extend(
+            find_nodes_by_type(function.parse_tree_root_node, "for_in_statement")
+        )
+        loop_nodes.extend(
+            find_nodes_by_type(function.parse_tree_root_node, "while_statement")
+        )
+        for node in loop_nodes:
+            start_line = source_code[: node.start_byte].count("\n") + 1
+            end_line = source_code[: node.end_byte].count("\n") + 1
+            # Simplified header and body analysis.
+            loops[(start_line, end_line)] = (
+                start_line,
+                start_line,
+                "",
+                start_line,
+                end_line,
+            )
+        return loops
diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py
index 31118ab..dcafd26 100644
--- a/src/tstool/analyzer/TS_analyzer.py
+++ b/src/tstool/analyzer/TS_analyzer.py
@@ -156,6 +156,8 @@ def __init__(
             self.language = Language(str(language_path), "java")
         elif language_name == "Python":
             self.language = Language(str(language_path), "python")
+        elif language_name == "Javascript":
+            self.language = Language(str(language_path), "javascript")
         elif language_name == "Go":
             self.language = Language(str(language_path), "go")
         else:
@@ -354,7 +356,11 @@ def extract_call_graph_edges(self, current_function: Function) -> None:
         file_content = self.fileContentDic[file_name]
 
         call_node_type = None
-        if self.language_name == "C" or self.language_name == "Cpp":
+        if (
+            self.language_name == "C"
+            or self.language_name == "Cpp"
+            or self.language_name == "Javascript"
+        ):
             call_node_type = "call_expression"
         elif self.language_name == "Java":
             call_node_type = "method_invocation"
@@ -367,7 +373,7 @@ def extract_call_graph_edges(self, current_function: Function) -> None:
 
         all_call_sites = find_nodes_by_type(
             current_function.parse_tree_root_node, call_node_type
-        )
+        )        
         function_call_sites = []
         api_call_sites = []
 
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
new file mode 100644
index 0000000..2ee656d
--- /dev/null
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -0,0 +1,41 @@
+from tstool.analyzer.TS_analyzer import *
+from tstool.analyzer.Javascript_TS_analyzer import *
+from ..dfbscan_extractor import *
+
+
+class Javascript_NPD_Extractor(DFBScanExtractor):
+    def extract_sources(self, function: Function) -> List[Value]:
+        root_node = function.parse_tree_root_node
+        source_code = self.ts_analyzer.code_in_files[function.file_path]
+        file_path = function.file_path
+        null_value_nodes = find_nodes_by_type(root_node, "null")
+        null_value_nodes.extend(find_nodes_by_type(root_node, "undefined"))
+
+        sources = []
+        for node in null_value_nodes:
+            line_number = source_code[: node.start_byte].count("\n") + 1
+            name = source_code[node.start_byte : node.end_byte]
+            sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
+            
+        return sources
+
+    def extract_sinks(self, function: Function) -> List[Value]:
+        """
+        Extract the sinks that can cause the null pointer dereferences from Javascript programs.
+        :param: function: Function object.
+        :return: List of sink values
+        """
+        root_node = function.parse_tree_root_node
+        source_code = self.ts_analyzer.code_in_files[function.file_path]
+        file_path = function.file_path
+
+        nodes = find_nodes_by_type(root_node, "member_expression")
+        nodes.extend(find_nodes_by_type(root_node, "subscript_expression"))
+        sinks = []
+
+        for node in nodes:
+            first_child = node.children[0]
+            line_number = source_code[: first_child.start_byte].count("\n") + 1
+            name = source_code[first_child.start_byte : first_child.end_byte]
+            sinks.append(Value(name, line_number, ValueLabel.SINK, file_path, -1))
+        return sinks
diff --git a/src/tstool/dfbscan_extractor/Javascript/__init__.py b/src/tstool/dfbscan_extractor/Javascript/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
index c59562d..caca262 100644
--- a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
@@ -1,8 +1,6 @@
 from tstool.analyzer.TS_analyzer import *
 from tstool.analyzer.Python_TS_analyzer import *
 from ..dfbscan_extractor import *
-import tree_sitter
-import argparse
 
 
 class Python_NPD_Extractor(DFBScanExtractor):

From 024a2a76f5dbb4ac483cddb873ac3fd7824ccf5b Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Thu, 28 Aug 2025 16:30:29 -0400
Subject: [PATCH 02/23] Modified build.py to build javascript tree sitter and
 added javascript test cases

---
 benchmark/Javascript/toy/NPD/case01.js | 17 +++++++++++++++++
 benchmark/Javascript/toy/NPD/case02.js |  7 +++++++
 benchmark/Javascript/toy/NPD/case03.js | 11 +++++++++++
 benchmark/Javascript/toy/NPD/case04.js | 17 +++++++++++++++++
 benchmark/Javascript/toy/NPD/case05.js | 20 ++++++++++++++++++++
 lib/build.py                           |  7 +++++++
 6 files changed, 79 insertions(+)
 create mode 100644 benchmark/Javascript/toy/NPD/case01.js
 create mode 100644 benchmark/Javascript/toy/NPD/case02.js
 create mode 100644 benchmark/Javascript/toy/NPD/case03.js
 create mode 100644 benchmark/Javascript/toy/NPD/case04.js
 create mode 100644 benchmark/Javascript/toy/NPD/case05.js

diff --git a/benchmark/Javascript/toy/NPD/case01.js b/benchmark/Javascript/toy/NPD/case01.js
new file mode 100644
index 0000000..e7251ef
--- /dev/null
+++ b/benchmark/Javascript/toy/NPD/case01.js
@@ -0,0 +1,17 @@
+function hello() {
+    let output = [];
+
+    for (let i = 0; i < 5; i++) {
+        output.push(null);
+    }
+    return output;
+}
+
+function hello2() {
+    let output = hello();
+    for (let i = 0; i < 4; i++) {
+        output[i] = i.toString();
+    }
+    return output[4].length;
+}
+
diff --git a/benchmark/Javascript/toy/NPD/case02.js b/benchmark/Javascript/toy/NPD/case02.js
new file mode 100644
index 0000000..393be53
--- /dev/null
+++ b/benchmark/Javascript/toy/NPD/case02.js
@@ -0,0 +1,7 @@
+function getLength(value) {
+    return value.length;
+}
+
+const print = () => {
+    console.log(getLength(null));
+}
diff --git a/benchmark/Javascript/toy/NPD/case03.js b/benchmark/Javascript/toy/NPD/case03.js
new file mode 100644
index 0000000..b620a91
--- /dev/null
+++ b/benchmark/Javascript/toy/NPD/case03.js
@@ -0,0 +1,11 @@
+function getLength2(value) {
+    if (!value) {
+        return 0;
+    }
+    return value.length;
+}
+
+const print2 = () => {
+    let a = getLength2(null);
+    console.log();
+}
\ No newline at end of file
diff --git a/benchmark/Javascript/toy/NPD/case04.js b/benchmark/Javascript/toy/NPD/case04.js
new file mode 100644
index 0000000..5af5d93
--- /dev/null
+++ b/benchmark/Javascript/toy/NPD/case04.js
@@ -0,0 +1,17 @@
+function hello3() {
+    let output = [];
+
+    for (let i = 0; i < 5; i++) {
+        output.push(null);
+    }
+    return output;
+}
+
+function hello4() {
+    let output = hello3();
+    for (let i = 0; i < 4; i++) {
+        output[i] = i.toString();
+    }
+    return output[4] ? output[4].length : 0;
+}
+
diff --git a/benchmark/Javascript/toy/NPD/case05.js b/benchmark/Javascript/toy/NPD/case05.js
new file mode 100644
index 0000000..0ef2d26
--- /dev/null
+++ b/benchmark/Javascript/toy/NPD/case05.js
@@ -0,0 +1,20 @@
+function hello5() {
+    let output = [];
+
+    for (let i = 0; i < 5; i++) {
+        output.push(null);
+    }
+    return output;
+}
+
+function hello6() {
+    let output = hello5();
+    for (let i = 0; i < 4; i++) {
+        output[i] = i.toString();
+    }
+    if (output[4] !== null && output[4] !== undefined) {
+        return output[4].length;
+    }
+    return 0;
+}
+
diff --git a/lib/build.py b/lib/build.py
index bf7940e..7f59bfd 100644
--- a/lib/build.py
+++ b/lib/build.py
@@ -25,6 +25,12 @@
     os.system(
         f'git clone https://github.com/tree-sitter/tree-sitter-python.git {cwd / "vendor/tree-sitter-python"}'
     )
+    
+if not (cwd / "vendor/tree-sitter-javascript/grammar.js").exists():
+    os.system(
+        f'git clone https://github.com/tree-sitter/tree-sitter-javascript.git {cwd / "vendor/tree-sitter-javascript"}'
+    )
+
 
 if not (cwd / "vendor/tree-sitter-go/grammar.js").exists():
     os.system(
@@ -41,6 +47,7 @@
         str(cwd / "vendor/tree-sitter-cpp"),
         str(cwd / "vendor/tree-sitter-java"), 
         str(cwd / "vendor/tree-sitter-python"), 
+        str(cwd / "vendor/tree-sitter-javascript"),
         str(cwd / "vendor/tree-sitter-go"), 
     ],
 )

From 4ff5057439a48b10a0ceb1059b9d5e825080a70d Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Fri, 29 Aug 2025 18:05:00 -0400
Subject: [PATCH 03/23] Added delete operator and call expression as source and
 sink in javascript NPD extraction

---
 benchmark/Javascript/toy/NPD/case02.js        | 12 ++++++--
 benchmark/Javascript/toy/NPD/case05.js        | 29 +++++++++----------
 src/run_repoaudit.sh                          |  2 +-
 .../Javascript/Javascript_NPD_extractor.py    | 15 ++++++++--
 .../dfbscan_extractor/dfbscan_extractor.py    |  1 +
 5 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/benchmark/Javascript/toy/NPD/case02.js b/benchmark/Javascript/toy/NPD/case02.js
index 393be53..9003b3d 100644
--- a/benchmark/Javascript/toy/NPD/case02.js
+++ b/benchmark/Javascript/toy/NPD/case02.js
@@ -1,7 +1,13 @@
-function getLength(value) {
-    return value.length;
+function func_generator(value) {
+    let fn = null;
+    if (value % 3 == 0) {
+        fn = console.log;
+    } else if (value % 3 == 1) {
+        fn = console.error;
+    }
+    return fn;
 }
 
 const print = () => {
-    console.log(getLength(null));
+    func_generator(8)("Hello world!");
 }
diff --git a/benchmark/Javascript/toy/NPD/case05.js b/benchmark/Javascript/toy/NPD/case05.js
index 0ef2d26..f97f1df 100644
--- a/benchmark/Javascript/toy/NPD/case05.js
+++ b/benchmark/Javascript/toy/NPD/case05.js
@@ -1,20 +1,17 @@
-function hello5() {
-    let output = [];
-
-    for (let i = 0; i < 5; i++) {
-        output.push(null);
-    }
-    return output;
+function func(value) {
+    return func2(value);
 }
 
-function hello6() {
-    let output = hello5();
-    for (let i = 0; i < 4; i++) {
-        output[i] = i.toString();
-    }
-    if (output[4] !== null && output[4] !== undefined) {
-        return output[4].length;
-    }
-    return 0;
+function func2(value) {
+    console.log(+value.prop);
+    delete value.prop;
+    return value;
 }
 
+const printprop = () => {
+	let d = {
+        prop: "1"
+    };
+    d = func(d);
+    console.log(d.prop.length);
+}
\ No newline at end of file
diff --git a/src/run_repoaudit.sh b/src/run_repoaudit.sh
index f0c6e92..5bab301 100755
--- a/src/run_repoaudit.sh
+++ b/src/run_repoaudit.sh
@@ -3,7 +3,7 @@ SCAN_TYPE=$1
 LANGUAGE=Javascript
 # MODEL=claude-3.7
 # MODEL=o3-mini
-MODEL=gemini-2.5-flash
+MODEL=gemini-2.0-flash
 BUG_TYPE=NPD
 PROJECT=toy
 
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 2ee656d..0dc2fd4 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -10,13 +10,23 @@ def extract_sources(self, function: Function) -> List[Value]:
         file_path = function.file_path
         null_value_nodes = find_nodes_by_type(root_node, "null")
         null_value_nodes.extend(find_nodes_by_type(root_node, "undefined"))
-
+        unary_expressions = find_nodes_by_type(root_node, "unary_expression")
+        
         sources = []
+        
+        for unary_expression in unary_expressions:
+            operator = unary_expression.child(0)
+            if operator is not None and operator.type == "delete":
+                line_number = source_code[: unary_expression.start_byte].count("\n") + 1
+                name = source_code[unary_expression.start_byte : unary_expression.end_byte]
+                sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
+
+        
         for node in null_value_nodes:
             line_number = source_code[: node.start_byte].count("\n") + 1
             name = source_code[node.start_byte : node.end_byte]
             sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
-            
+        
         return sources
 
     def extract_sinks(self, function: Function) -> List[Value]:
@@ -31,6 +41,7 @@ def extract_sinks(self, function: Function) -> List[Value]:
 
         nodes = find_nodes_by_type(root_node, "member_expression")
         nodes.extend(find_nodes_by_type(root_node, "subscript_expression"))
+        nodes.extend(find_nodes_by_type(root_node, "call_expression"))
         sinks = []
 
         for node in nodes:
diff --git a/src/tstool/dfbscan_extractor/dfbscan_extractor.py b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
index d300cdb..d225ecd 100644
--- a/src/tstool/dfbscan_extractor/dfbscan_extractor.py
+++ b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
@@ -34,6 +34,7 @@ def extract_all(self) -> Tuple[List[Value], List[Value]]:
             function_root_node = function.parse_tree_root_node
             self.sources.extend(self.extract_sources(function))
             self.sinks.extend(self.extract_sinks(function))
+        
         return self.sources, self.sinks
 
     @abstractmethod

From 062cd1e399d43b3b2606f33cfb8bc22e1934b1d1 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Wed, 3 Sep 2025 21:06:22 -0400
Subject: [PATCH 04/23] Update buggy path computation to exclude sinks of
 already defined function calls

---
 .gitignore                                    |  1 +
 benchmark/Javascript/toy/NPD/case01.js        | 21 +++++++------------
 benchmark/Javascript/toy/NPD/case02.js        |  1 +
 src/agent/dfbscan.py                          | 15 ++++++++++---
 .../Javascript/dfbscan/path_validator.json    |  1 +
 5 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/.gitignore b/.gitignore
index 1e8dd82..31fc4d2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -172,3 +172,4 @@ cython_debug/
 #.idea/
 
 testcases/**
+.vscode
\ No newline at end of file
diff --git a/benchmark/Javascript/toy/NPD/case01.js b/benchmark/Javascript/toy/NPD/case01.js
index e7251ef..eda676e 100644
--- a/benchmark/Javascript/toy/NPD/case01.js
+++ b/benchmark/Javascript/toy/NPD/case01.js
@@ -1,17 +1,10 @@
-function hello() {
-    let output = [];
-
-    for (let i = 0; i < 5; i++) {
-        output.push(null);
-    }
-    return output;
+function test2_process(data) {
+    let value = data[0];
+    return value;
 }
+    
 
-function hello2() {
-    let output = hello();
-    for (let i = 0; i < 4; i++) {
-        output[i] = i.toString();
-    }
-    return output[4].length;
+function test2_caller() {
+    let data = null;
+    return test2_process(data)
 }
-
diff --git a/benchmark/Javascript/toy/NPD/case02.js b/benchmark/Javascript/toy/NPD/case02.js
index 9003b3d..f4d20ba 100644
--- a/benchmark/Javascript/toy/NPD/case02.js
+++ b/benchmark/Javascript/toy/NPD/case02.js
@@ -10,4 +10,5 @@ function func_generator(value) {
 
 const print = () => {
     func_generator(8)("Hello world!");
+    console.log("Done");
 }
diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index 027fe0a..daac04e 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -362,9 +362,18 @@ def __collect_potential_buggy_paths(
                     if value.label == ValueLabel.SINK:
                         # For NPD-style bug types
                         if self.is_reachable:
-                            self.state.update_potential_buggy_paths(
-                                src_value, path_with_unknown_status + [value]
-                            )
+                            
+                            # Checks if the sink is a called to a predefined function
+                            is_defined_function = False
+                            for func in self.ts_analyzer.function_env.values():
+                                if value.name == func.function_name:
+                                    is_defined_function = True
+                                    break
+                            
+                            if not is_defined_function:
+                                self.state.update_potential_buggy_paths(
+                                    src_value, path_with_unknown_status + [value]
+                                )
                     elif value.label in {
                         ValueLabel.PARA,
                         ValueLabel.RET,
diff --git a/src/prompt/Javascript/dfbscan/path_validator.json b/src/prompt/Javascript/dfbscan/path_validator.json
index a46d22f..ae80658 100644
--- a/src/prompt/Javascript/dfbscan/path_validator.json
+++ b/src/prompt/Javascript/dfbscan/path_validator.json
@@ -8,6 +8,7 @@
     "- If the source in the first function flows to the sink in the last function without any interference, then the path is reachable and your answer should be Yes.",
     "- For NPD detection, if the source value is modified or its null/undefined state is verified (for example, via an explicit check like 'p !== null') before reaching the sink, then the path is unreachable and you should answer No.",
     "- If a function exits or returns before the sink or other propagation sites (such as function calls) are reached, the path is unreachable; answer No in such cases.",
+    "- If a sink is a call to an object or a function that is builtin in Javascript or defined in the scope, then the path is unreachable; answer No in such cases.",
     "- Analyze conditions within each function: infer the outcome of branch statements and then verify whether the conditions across different sub-paths conflict. If conflicts exist, the overall path is unreachable.",
     "- Consider the values of relevant variables; if those values contradict the necessary branch conditions for triggering the bug, the path is unreachable and you should answer No.",
     "In summary, assess the conditions in every sub-path, check for conflicts, and decide whether the entire propagation path is reachable."

From 6d6a83ea3c13b997f39f207666920930980096bf Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Fri, 5 Sep 2025 10:23:30 -0400
Subject: [PATCH 05/23] Improved prompt and removed code duplicates in path
 validator input

---
 benchmark/Javascript/toy/NPD/case06.js           | 16 ++++++++++++++++
 src/llmtool/dfbscan/path_validator.py            |  9 +++++++--
 .../dfbscan/intra_dataflow_analyzer.json         |  4 +++-
 3 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 benchmark/Javascript/toy/NPD/case06.js

diff --git a/benchmark/Javascript/toy/NPD/case06.js b/benchmark/Javascript/toy/NPD/case06.js
new file mode 100644
index 0000000..420a328
--- /dev/null
+++ b/benchmark/Javascript/toy/NPD/case06.js
@@ -0,0 +1,16 @@
+function process_data(myobj) {
+    const inner_processing = (myobj) => {
+        delete myobj.func;
+        return myobj;
+    }
+    myobj.func("Hello");
+    myobj = inner_processing(myobj);
+    myobj.func("Hello");
+}
+
+function main() {
+    let myobj = {
+        func: console.log
+    };
+    process_data(myobj)
+}
\ No newline at end of file
diff --git a/src/llmtool/dfbscan/path_validator.py b/src/llmtool/dfbscan/path_validator.py
index 069edfc..08a8694 100644
--- a/src/llmtool/dfbscan/path_validator.py
+++ b/src/llmtool/dfbscan/path_validator.py
@@ -86,11 +86,16 @@ def _get_prompt(self, input: LLMToolInput) -> str:
             value_lines.append(value_line)
         prompt = prompt.replace("<PATH>", "\n".join(value_lines))
         prompt = prompt.replace("<BUG_TYPE>", input.bug_type)
+        
+        functions: Set[Function] = set()
+        for func in input.values_to_functions.values():
+            if func is not None:
+                functions.add(func)
 
         program = "\n".join(
             [
-                "```\n" + func.lined_code + "\n```\n" if func is not None else "\n"
-                for func in input.values_to_functions.values()
+                "```\n" + func.lined_code + "\n```\n"
+                for func in functions
             ]
         )
         prompt = prompt.replace("<PROGRAM>", program)
diff --git a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
index 740a16c..074e484 100644
--- a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
+++ b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
@@ -24,7 +24,8 @@
     "- Step 3: For each execution path extracted in Step 2, simulate function execution line by line and determine where SRC propagates based on the four possible propagation locations.",
     "",
     "Additional guidelines:",
-    "- Different propagation paths can exist due to control flow constructs (if-else, loops); identify all execution paths before analysis;",
+    "- Different propagation paths can exist due to control flow constructs (if-else, loops, race conditions); identify all execution paths before analysis;",
+    "- If there are potential race conditions, for example, calling an async function without await, differentiate the control paths to consider the race condition;",
     "- Expand the first iteration of loops to analyze nested execution paths;",
     "- Treat each conditional branch (if, switch) as a separate execution path;",
     "- Expand nested conditions and loops to ensure all paths are analyzed."
@@ -112,6 +113,7 @@
     "(4) If there is no propagation along a path, provide a brief explanation of why SRC does not propagate in that path as follows:",
     "- Path <Path Number>: <Execution Path>;",
     "    - No propagation; Dependency: {reason for no propagation};",
+    "(5) Each Execution Path should start with the word \"Lines\", with each line number separated by \" -> \" and ended with a semicolon.",
     "(5) Remember: All the indexes start from 0 instead of 1. If there is only one return value, the index is 0."
   ],
   "meta_prompts": [

From 9b2d6fb139e2242d04297b5f25c8e8c556131e1c Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Fri, 5 Sep 2025 11:50:06 -0400
Subject: [PATCH 06/23] Added known javascript builtin that could return null
 for NPD source detection and tested auditing against microlight.js

---
 .gitmodules                                   |   3 +
 benchmark/Javascript/microlight/LICENSE       |  22 ++
 benchmark/Javascript/microlight/README.md     |  15 ++
 benchmark/Javascript/microlight/bower.json    |  30 +++
 benchmark/Javascript/microlight/microlight.js | 210 ++++++++++++++++++
 benchmark/Javascript/microlight/package.json  |  27 +++
 src/tstool/analyzer/Javascript_TS_analyzer.py |   2 +-
 .../Javascript/Javascript_NPD_extractor.py    |  35 ++-
 8 files changed, 342 insertions(+), 2 deletions(-)
 create mode 100644 benchmark/Javascript/microlight/LICENSE
 create mode 100644 benchmark/Javascript/microlight/README.md
 create mode 100644 benchmark/Javascript/microlight/bower.json
 create mode 100644 benchmark/Javascript/microlight/microlight.js
 create mode 100644 benchmark/Javascript/microlight/package.json

diff --git a/.gitmodules b/.gitmodules
index 43367c1..9b98d5f 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,6 @@
 [submodule "benchmark/Go/sally"]
 	path = benchmark/Go/sally
 	url = https://github.com/uber-go/sally.git
+[submodule "benchmark/Javascript/toy/NPD/microlight"]
+	path = benchmark/Javascript/toy/NPD/microlight
+	url = https://github.com/asvd/microlight.git
diff --git a/benchmark/Javascript/microlight/LICENSE b/benchmark/Javascript/microlight/LICENSE
new file mode 100644
index 0000000..b204ed1
--- /dev/null
+++ b/benchmark/Javascript/microlight/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 asvd
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/benchmark/Javascript/microlight/README.md b/benchmark/Javascript/microlight/README.md
new file mode 100644
index 0000000..6e815f5
--- /dev/null
+++ b/benchmark/Javascript/microlight/README.md
@@ -0,0 +1,15 @@
+microlight.js
+=============
+
+*microlight.js* is a tiny library (2.2k minified) which improves
+ readability of code snippets by highlighting, for any programming
+ language, without attaching additional language packages or styles:
+
+![preview](http://asvd.github.io/microlight/microlight-preview-big.png)
+
+For demos and usage guide, refer to https://asvd.github.io/microlight
+
+--
+
+Follow me on twitter: https://twitter.com/asvd0
+
diff --git a/benchmark/Javascript/microlight/bower.json b/benchmark/Javascript/microlight/bower.json
new file mode 100644
index 0000000..b89fc78
--- /dev/null
+++ b/benchmark/Javascript/microlight/bower.json
@@ -0,0 +1,30 @@
+{
+  "name": "microlight",
+  "version": "0.0.7",
+  "homepage": "https://github.com/asvd/microlight",
+  "authors": [
+    "Dmitry Prokashev <heliosframework@gmail.com>"
+  ],
+  "description": "highlights code in any language",
+  "main": "microlight.js",
+  "moduleType": [
+    "amd",
+    "globals"
+  ],
+  "keywords": [
+    "syntax",
+    "highlight",
+    "highlighting",
+    "source",
+    "code",
+    "source code",
+    "snippet",
+    "code snippet"
+  ],
+  "license": "MIT",
+  "ignore": [
+    "**/.*",
+    "node_modules",
+    "bower_components"
+  ]
+}
diff --git a/benchmark/Javascript/microlight/microlight.js b/benchmark/Javascript/microlight/microlight.js
new file mode 100644
index 0000000..5aa193a
--- /dev/null
+++ b/benchmark/Javascript/microlight/microlight.js
@@ -0,0 +1,210 @@
+/**
+ * @fileoverview microlight - syntax highlightning library
+ * @version 0.0.7
+ *
+ * @license MIT, see http://github.com/asvd/microlight
+ * @copyright 2016 asvd <heliosframework@gmail.com>
+ *
+ * Code structure aims at minimizing the compressed library size
+ */
+
+
+(function (root, factory) {
+    if (typeof define === 'function' && define.amd) {
+        define(['exports'], factory);
+    } else if (typeof exports !== 'undefined') {
+        factory(exports);
+    } else {
+        factory((root.microlight = {}));
+    }
+}(this, function (exports) {
+    // for better compression
+    var _window       = window,
+        _document     = document,
+        appendChild   = 'appendChild',
+        test          = 'test',
+        // style and color templates
+        textShadow    = ';text-shadow:',
+        opacity       = 'opacity:.',
+        _0px_0px      = ' 0px 0px ',
+        _3px_0px_5    = '3px 0px 5',
+        brace         = ')',
+
+        i,
+        microlighted,
+        el;  // current microlighted element to run through
+
+
+    
+    var reset = function(cls) {
+        // nodes to highlight
+        microlighted = _document.getElementsByClassName(cls||'microlight');
+
+        for (i = 0; el = microlighted[i++];) {
+            var text  = el.textContent,
+                pos   = 0,       // current position
+                next1 = text[0], // next character
+                chr   = 1,       // current character
+                prev1,           // previous character
+                prev2,           // the one before the previous
+                token =          // current token content
+                el.innerHTML = '',  // (and cleaning the node)
+                
+                // current token type:
+                //  0: anything else (whitespaces / newlines)
+                //  1: operator or brace
+                //  2: closing braces (after which '/' is division not regex)
+                //  3: (key)word
+                //  4: regex
+                //  5: string starting with "
+                //  6: string starting with '
+                //  7: xml comment  <!-- -->
+                //  8: multiline comment /* */
+                //  9: single-line comment starting with two slashes //
+                // 10: single-line comment starting with hash #
+                tokenType = 0,
+
+                // kept to determine between regex and division
+                lastTokenType,
+                // flag determining if token is multi-character
+                multichar,
+                node,
+
+                // calculating the colors for the style templates
+                colorArr = /(\d*\, \d*\, \d*)(, ([.\d]*))?/g.exec(
+                    _window.getComputedStyle(el).color
+                ),
+                pxColor = 'px rgba('+colorArr[1]+',',
+                alpha = colorArr[3]||1;
+
+            // running through characters and highlighting
+            while (prev2 = prev1,
+                   // escaping if needed (with except for comments)
+                   // pervious character will not be therefore
+                   // recognized as a token finalize condition
+                   prev1 = tokenType < 7 && prev1 == '\\' ? 1 : chr
+            ) {
+                chr = next1;
+                next1=text[++pos];
+                multichar = token.length > 1;
+
+                // checking if current token should be finalized
+                if (!chr  || // end of content
+                    // types 9-10 (single-line comments) end with a
+                    // newline
+                    (tokenType > 8 && chr == '\n') ||
+                    [ // finalize conditions for other token types
+                        // 0: whitespaces
+                        /\S/[test](chr),  // merged together
+                        // 1: operators
+                        1,                // consist of a single character
+                        // 2: braces
+                        1,                // consist of a single character
+                        // 3: (key)word
+                        !/[$\w]/[test](chr),
+                        // 4: regex
+                        (prev1 == '/' || prev1 == '\n') && multichar,
+                        // 5: string with "
+                        prev1 == '"' && multichar,
+                        // 6: string with '
+                        prev1 == "'" && multichar,
+                        // 7: xml comment
+                        text[pos-4]+prev2+prev1 == '-->',
+                        // 8: multiline comment
+                        prev2+prev1 == '*/'
+                    ][tokenType]
+                ) {
+                    // appending the token to the result
+                    if (token) {
+                        // remapping token type into style
+                        // (some types are highlighted similarly)
+                        el[appendChild](
+                            node = _document.createElement('span')
+                        ).setAttribute('style', [
+                            // 0: not formatted
+                            '',
+                            // 1: keywords
+                            textShadow + _0px_0px+9+pxColor + alpha * .7 + '),' +
+                                         _0px_0px+2+pxColor + alpha * .4 + brace,
+                            // 2: punctuation
+                            opacity + 6 +
+                            textShadow + _0px_0px+7+pxColor + alpha / 4 + '),' +
+                                         _0px_0px+3+pxColor + alpha / 4 + brace,
+                            // 3: strings and regexps
+                            opacity + 7 +
+                            textShadow + _3px_0px_5+pxColor + alpha / 5 + '),-' +
+                                         _3px_0px_5+pxColor + alpha / 5 + brace,
+                            // 4: comments
+                            'font-style:italic;'+
+                            opacity + 5 +
+                            textShadow + _3px_0px_5+pxColor + alpha / 4 + '),-' +
+                                         _3px_0px_5+pxColor + alpha / 4 + brace
+                        ][
+                            // not formatted
+                            !tokenType ? 0 :
+                            // punctuation
+                            tokenType < 3 ? 2 :
+                            // comments
+                            tokenType > 6 ? 4 :
+                            // regex and strings
+                            tokenType > 3 ? 3 :
+                            // otherwise tokenType == 3, (key)word
+                            // (1 if regexp matches, 0 otherwise)
+                            + /^(a(bstract|lias|nd|rguments|rray|s(m|sert)?|uto)|b(ase|egin|ool(ean)?|reak|yte)|c(ase|atch|har|hecked|lass|lone|ompl|onst|ontinue)|de(bugger|cimal|clare|f(ault|er)?|init|l(egate|ete)?)|do|double|e(cho|ls?if|lse(if)?|nd|nsure|num|vent|x(cept|ec|p(licit|ort)|te(nds|nsion|rn)))|f(allthrough|alse|inal(ly)?|ixed|loat|or(each)?|riend|rom|unc(tion)?)|global|goto|guard|i(f|mp(lements|licit|ort)|n(it|clude(_once)?|line|out|stanceof|t(erface|ernal)?)?|s)|l(ambda|et|ock|ong)|m(icrolight|odule|utable)|NaN|n(amespace|ative|ext|ew|il|ot|ull)|o(bject|perator|r|ut|verride)|p(ackage|arams|rivate|rotected|rotocol|ublic)|r(aise|e(adonly|do|f|gister|peat|quire(_once)?|scue|strict|try|turn))|s(byte|ealed|elf|hort|igned|izeof|tatic|tring|truct|ubscript|uper|ynchronized|witch)|t(emplate|hen|his|hrows?|ransient|rue|ry|ype(alias|def|id|name|of))|u(n(checked|def(ined)?|ion|less|signed|til)|se|sing)|v(ar|irtual|oid|olatile)|w(char_t|hen|here|hile|ith)|xor|yield)$/[test](token)
+                        ]);
+
+                        node[appendChild](_document.createTextNode(token));
+                    }
+
+                    // saving the previous token type
+                    // (skipping whitespaces and comments)
+                    lastTokenType =
+                        (tokenType && tokenType < 7) ?
+                        tokenType : lastTokenType;
+
+                    // initializing a new token
+                    token = '';
+
+                    // determining the new token type (going up the
+                    // list until matching a token type start
+                    // condition)
+                    tokenType = 11;
+                    while (![
+                        1,                   //  0: whitespace
+                                             //  1: operator or braces
+                        /[\/{}[(\-+*=<>:;|\\.,?!&@~]/[test](chr),
+                        /[\])]/[test](chr),  //  2: closing brace
+                        /[$\w]/[test](chr),  //  3: (key)word
+                        chr == '/' &&        //  4: regex
+                            // previous token was an
+                            // opening brace or an
+                            // operator (otherwise
+                            // division, not a regex)
+                            (lastTokenType < 2) &&
+                            // workaround for xml
+                            // closing tags
+                            prev1 != '<',
+                        chr == '"',          //  5: string with "
+                        chr == "'",          //  6: string with '
+                                             //  7: xml comment
+                        chr+next1+text[pos+1]+text[pos+2] == '<!--',
+                        chr+next1 == '/*',   //  8: multiline comment
+                        chr+next1 == '//',   //  9: single-line comment
+                        chr == '#'           // 10: hash-style comment
+                    ][--tokenType]);
+                }
+
+                token += chr;
+            }
+        }
+    }
+
+    exports.reset = reset;
+
+    if (_document.readyState == 'complete') {
+        reset();
+    } else {
+        _window.addEventListener('load', function(){reset()}, 0);
+    }
+}));
+
diff --git a/benchmark/Javascript/microlight/package.json b/benchmark/Javascript/microlight/package.json
new file mode 100644
index 0000000..9006e24
--- /dev/null
+++ b/benchmark/Javascript/microlight/package.json
@@ -0,0 +1,27 @@
+{
+  "author": "Dmitry Prokashev <heliosframework@gmail.com>",
+  "name": "microlight",
+  "description": "highlights code in any language",
+  "version": "0.0.7",
+  "keywords": [
+    "syntax",
+    "highlight",
+    "highlighting",
+    "source",
+    "code",
+    "source code",
+    "snippet",
+    "code snippet"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/asvd/microlight.git"
+  },
+  "main": "microlight.js",
+  "dependencies": {},
+  "devDependencies": {},
+  "optionalDependencies": {},
+  "engines": {
+    "node": "*"
+  }
+}
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index 510e6a2..089f699 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -65,7 +65,7 @@ def extract_function_info(
             if not name_node or not value_node:
                 continue
 
-            if value_node.type != "arrow_function":
+            if value_node.type != "arrow_function" and value_node.type != "function_expression":
                 continue
             
             function_name = source_code[name_node.start_byte : name_node.end_byte]
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 0dc2fd4..8a1cb93 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -4,6 +4,25 @@
 
 
 class Javascript_NPD_Extractor(DFBScanExtractor):
+    BUILTIN_NULLABLE_METHODS = {
+        b"exec",
+        b"match",
+        b"matchAll",
+        b"getElementById",
+        b"querySelector",
+        b"querySelectorAll",
+        b"getElementsByClassName",
+        b"getElementsByTagName",
+        b"getAttribute",
+        b"find",
+        b"findIndex",
+        b"pop",
+        b"shift",
+        b"get",
+        b"getOwnPropertyDescriptor",
+        b"stringify",
+    }
+
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
@@ -11,9 +30,24 @@ def extract_sources(self, function: Function) -> List[Value]:
         null_value_nodes = find_nodes_by_type(root_node, "null")
         null_value_nodes.extend(find_nodes_by_type(root_node, "undefined"))
         unary_expressions = find_nodes_by_type(root_node, "unary_expression")
+        call_expressions = find_nodes_by_type(root_node, "call_expression")
         
         sources = []
         
+        for call_expression in call_expressions:
+            member_expression = call_expression.child(0)
+            if member_expression is None or member_expression.type != "member_expression":
+                continue
+
+            property_identifier = member_expression.child(2)
+            if property_identifier is None or property_identifier.type != "property_identifier":
+                continue
+
+            if property_identifier.text in self.BUILTIN_NULLABLE_METHODS:
+                line_number = source_code[: property_identifier.start_byte].count("\n") + 1
+                name = source_code[property_identifier.start_byte : property_identifier.end_byte]
+                sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
+        
         for unary_expression in unary_expressions:
             operator = unary_expression.child(0)
             if operator is not None and operator.type == "delete":
@@ -21,7 +55,6 @@ def extract_sources(self, function: Function) -> List[Value]:
                 name = source_code[unary_expression.start_byte : unary_expression.end_byte]
                 sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
 
-        
         for node in null_value_nodes:
             line_number = source_code[: node.start_byte].count("\n") + 1
             name = source_code[node.start_byte : node.end_byte]

From a8a85d136a463826ab10c31dd0ba2e50afbc0a3a Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Fri, 5 Sep 2025 11:56:20 -0400
Subject: [PATCH 07/23] Updated microlight.js submodule

---
 .gitmodules                                   |   4 +-
 benchmark/Javascript/microlight               |   1 +
 benchmark/Javascript/microlight/LICENSE       |  22 --
 benchmark/Javascript/microlight/README.md     |  15 --
 benchmark/Javascript/microlight/bower.json    |  30 ---
 benchmark/Javascript/microlight/microlight.js | 210 ------------------
 benchmark/Javascript/microlight/package.json  |  27 ---
 7 files changed, 3 insertions(+), 306 deletions(-)
 create mode 160000 benchmark/Javascript/microlight
 delete mode 100644 benchmark/Javascript/microlight/LICENSE
 delete mode 100644 benchmark/Javascript/microlight/README.md
 delete mode 100644 benchmark/Javascript/microlight/bower.json
 delete mode 100644 benchmark/Javascript/microlight/microlight.js
 delete mode 100644 benchmark/Javascript/microlight/package.json

diff --git a/.gitmodules b/.gitmodules
index 9b98d5f..a8da027 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,6 +10,6 @@
 [submodule "benchmark/Go/sally"]
 	path = benchmark/Go/sally
 	url = https://github.com/uber-go/sally.git
-[submodule "benchmark/Javascript/toy/NPD/microlight"]
-	path = benchmark/Javascript/toy/NPD/microlight
+[submodule "benchmark/Javascript/microlight"]
+	path = benchmark/Javascript/microlight
 	url = https://github.com/asvd/microlight.git
diff --git a/benchmark/Javascript/microlight b/benchmark/Javascript/microlight
new file mode 160000
index 0000000..8a627ec
--- /dev/null
+++ b/benchmark/Javascript/microlight
@@ -0,0 +1 @@
+Subproject commit 8a627ecc1ed37e82d2f48c08204923d4734127b1
diff --git a/benchmark/Javascript/microlight/LICENSE b/benchmark/Javascript/microlight/LICENSE
deleted file mode 100644
index b204ed1..0000000
--- a/benchmark/Javascript/microlight/LICENSE
+++ /dev/null
@@ -1,22 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2016 asvd
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
diff --git a/benchmark/Javascript/microlight/README.md b/benchmark/Javascript/microlight/README.md
deleted file mode 100644
index 6e815f5..0000000
--- a/benchmark/Javascript/microlight/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-microlight.js
-=============
-
-*microlight.js* is a tiny library (2.2k minified) which improves
- readability of code snippets by highlighting, for any programming
- language, without attaching additional language packages or styles:
-
-![preview](http://asvd.github.io/microlight/microlight-preview-big.png)
-
-For demos and usage guide, refer to https://asvd.github.io/microlight
-
---
-
-Follow me on twitter: https://twitter.com/asvd0
-
diff --git a/benchmark/Javascript/microlight/bower.json b/benchmark/Javascript/microlight/bower.json
deleted file mode 100644
index b89fc78..0000000
--- a/benchmark/Javascript/microlight/bower.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
-  "name": "microlight",
-  "version": "0.0.7",
-  "homepage": "https://github.com/asvd/microlight",
-  "authors": [
-    "Dmitry Prokashev <heliosframework@gmail.com>"
-  ],
-  "description": "highlights code in any language",
-  "main": "microlight.js",
-  "moduleType": [
-    "amd",
-    "globals"
-  ],
-  "keywords": [
-    "syntax",
-    "highlight",
-    "highlighting",
-    "source",
-    "code",
-    "source code",
-    "snippet",
-    "code snippet"
-  ],
-  "license": "MIT",
-  "ignore": [
-    "**/.*",
-    "node_modules",
-    "bower_components"
-  ]
-}
diff --git a/benchmark/Javascript/microlight/microlight.js b/benchmark/Javascript/microlight/microlight.js
deleted file mode 100644
index 5aa193a..0000000
--- a/benchmark/Javascript/microlight/microlight.js
+++ /dev/null
@@ -1,210 +0,0 @@
-/**
- * @fileoverview microlight - syntax highlightning library
- * @version 0.0.7
- *
- * @license MIT, see http://github.com/asvd/microlight
- * @copyright 2016 asvd <heliosframework@gmail.com>
- *
- * Code structure aims at minimizing the compressed library size
- */
-
-
-(function (root, factory) {
-    if (typeof define === 'function' && define.amd) {
-        define(['exports'], factory);
-    } else if (typeof exports !== 'undefined') {
-        factory(exports);
-    } else {
-        factory((root.microlight = {}));
-    }
-}(this, function (exports) {
-    // for better compression
-    var _window       = window,
-        _document     = document,
-        appendChild   = 'appendChild',
-        test          = 'test',
-        // style and color templates
-        textShadow    = ';text-shadow:',
-        opacity       = 'opacity:.',
-        _0px_0px      = ' 0px 0px ',
-        _3px_0px_5    = '3px 0px 5',
-        brace         = ')',
-
-        i,
-        microlighted,
-        el;  // current microlighted element to run through
-
-
-    
-    var reset = function(cls) {
-        // nodes to highlight
-        microlighted = _document.getElementsByClassName(cls||'microlight');
-
-        for (i = 0; el = microlighted[i++];) {
-            var text  = el.textContent,
-                pos   = 0,       // current position
-                next1 = text[0], // next character
-                chr   = 1,       // current character
-                prev1,           // previous character
-                prev2,           // the one before the previous
-                token =          // current token content
-                el.innerHTML = '',  // (and cleaning the node)
-                
-                // current token type:
-                //  0: anything else (whitespaces / newlines)
-                //  1: operator or brace
-                //  2: closing braces (after which '/' is division not regex)
-                //  3: (key)word
-                //  4: regex
-                //  5: string starting with "
-                //  6: string starting with '
-                //  7: xml comment  <!-- -->
-                //  8: multiline comment /* */
-                //  9: single-line comment starting with two slashes //
-                // 10: single-line comment starting with hash #
-                tokenType = 0,
-
-                // kept to determine between regex and division
-                lastTokenType,
-                // flag determining if token is multi-character
-                multichar,
-                node,
-
-                // calculating the colors for the style templates
-                colorArr = /(\d*\, \d*\, \d*)(, ([.\d]*))?/g.exec(
-                    _window.getComputedStyle(el).color
-                ),
-                pxColor = 'px rgba('+colorArr[1]+',',
-                alpha = colorArr[3]||1;
-
-            // running through characters and highlighting
-            while (prev2 = prev1,
-                   // escaping if needed (with except for comments)
-                   // pervious character will not be therefore
-                   // recognized as a token finalize condition
-                   prev1 = tokenType < 7 && prev1 == '\\' ? 1 : chr
-            ) {
-                chr = next1;
-                next1=text[++pos];
-                multichar = token.length > 1;
-
-                // checking if current token should be finalized
-                if (!chr  || // end of content
-                    // types 9-10 (single-line comments) end with a
-                    // newline
-                    (tokenType > 8 && chr == '\n') ||
-                    [ // finalize conditions for other token types
-                        // 0: whitespaces
-                        /\S/[test](chr),  // merged together
-                        // 1: operators
-                        1,                // consist of a single character
-                        // 2: braces
-                        1,                // consist of a single character
-                        // 3: (key)word
-                        !/[$\w]/[test](chr),
-                        // 4: regex
-                        (prev1 == '/' || prev1 == '\n') && multichar,
-                        // 5: string with "
-                        prev1 == '"' && multichar,
-                        // 6: string with '
-                        prev1 == "'" && multichar,
-                        // 7: xml comment
-                        text[pos-4]+prev2+prev1 == '-->',
-                        // 8: multiline comment
-                        prev2+prev1 == '*/'
-                    ][tokenType]
-                ) {
-                    // appending the token to the result
-                    if (token) {
-                        // remapping token type into style
-                        // (some types are highlighted similarly)
-                        el[appendChild](
-                            node = _document.createElement('span')
-                        ).setAttribute('style', [
-                            // 0: not formatted
-                            '',
-                            // 1: keywords
-                            textShadow + _0px_0px+9+pxColor + alpha * .7 + '),' +
-                                         _0px_0px+2+pxColor + alpha * .4 + brace,
-                            // 2: punctuation
-                            opacity + 6 +
-                            textShadow + _0px_0px+7+pxColor + alpha / 4 + '),' +
-                                         _0px_0px+3+pxColor + alpha / 4 + brace,
-                            // 3: strings and regexps
-                            opacity + 7 +
-                            textShadow + _3px_0px_5+pxColor + alpha / 5 + '),-' +
-                                         _3px_0px_5+pxColor + alpha / 5 + brace,
-                            // 4: comments
-                            'font-style:italic;'+
-                            opacity + 5 +
-                            textShadow + _3px_0px_5+pxColor + alpha / 4 + '),-' +
-                                         _3px_0px_5+pxColor + alpha / 4 + brace
-                        ][
-                            // not formatted
-                            !tokenType ? 0 :
-                            // punctuation
-                            tokenType < 3 ? 2 :
-                            // comments
-                            tokenType > 6 ? 4 :
-                            // regex and strings
-                            tokenType > 3 ? 3 :
-                            // otherwise tokenType == 3, (key)word
-                            // (1 if regexp matches, 0 otherwise)
-                            + /^(a(bstract|lias|nd|rguments|rray|s(m|sert)?|uto)|b(ase|egin|ool(ean)?|reak|yte)|c(ase|atch|har|hecked|lass|lone|ompl|onst|ontinue)|de(bugger|cimal|clare|f(ault|er)?|init|l(egate|ete)?)|do|double|e(cho|ls?if|lse(if)?|nd|nsure|num|vent|x(cept|ec|p(licit|ort)|te(nds|nsion|rn)))|f(allthrough|alse|inal(ly)?|ixed|loat|or(each)?|riend|rom|unc(tion)?)|global|goto|guard|i(f|mp(lements|licit|ort)|n(it|clude(_once)?|line|out|stanceof|t(erface|ernal)?)?|s)|l(ambda|et|ock|ong)|m(icrolight|odule|utable)|NaN|n(amespace|ative|ext|ew|il|ot|ull)|o(bject|perator|r|ut|verride)|p(ackage|arams|rivate|rotected|rotocol|ublic)|r(aise|e(adonly|do|f|gister|peat|quire(_once)?|scue|strict|try|turn))|s(byte|ealed|elf|hort|igned|izeof|tatic|tring|truct|ubscript|uper|ynchronized|witch)|t(emplate|hen|his|hrows?|ransient|rue|ry|ype(alias|def|id|name|of))|u(n(checked|def(ined)?|ion|less|signed|til)|se|sing)|v(ar|irtual|oid|olatile)|w(char_t|hen|here|hile|ith)|xor|yield)$/[test](token)
-                        ]);
-
-                        node[appendChild](_document.createTextNode(token));
-                    }
-
-                    // saving the previous token type
-                    // (skipping whitespaces and comments)
-                    lastTokenType =
-                        (tokenType && tokenType < 7) ?
-                        tokenType : lastTokenType;
-
-                    // initializing a new token
-                    token = '';
-
-                    // determining the new token type (going up the
-                    // list until matching a token type start
-                    // condition)
-                    tokenType = 11;
-                    while (![
-                        1,                   //  0: whitespace
-                                             //  1: operator or braces
-                        /[\/{}[(\-+*=<>:;|\\.,?!&@~]/[test](chr),
-                        /[\])]/[test](chr),  //  2: closing brace
-                        /[$\w]/[test](chr),  //  3: (key)word
-                        chr == '/' &&        //  4: regex
-                            // previous token was an
-                            // opening brace or an
-                            // operator (otherwise
-                            // division, not a regex)
-                            (lastTokenType < 2) &&
-                            // workaround for xml
-                            // closing tags
-                            prev1 != '<',
-                        chr == '"',          //  5: string with "
-                        chr == "'",          //  6: string with '
-                                             //  7: xml comment
-                        chr+next1+text[pos+1]+text[pos+2] == '<!--',
-                        chr+next1 == '/*',   //  8: multiline comment
-                        chr+next1 == '//',   //  9: single-line comment
-                        chr == '#'           // 10: hash-style comment
-                    ][--tokenType]);
-                }
-
-                token += chr;
-            }
-        }
-    }
-
-    exports.reset = reset;
-
-    if (_document.readyState == 'complete') {
-        reset();
-    } else {
-        _window.addEventListener('load', function(){reset()}, 0);
-    }
-}));
-
diff --git a/benchmark/Javascript/microlight/package.json b/benchmark/Javascript/microlight/package.json
deleted file mode 100644
index 9006e24..0000000
--- a/benchmark/Javascript/microlight/package.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "author": "Dmitry Prokashev <heliosframework@gmail.com>",
-  "name": "microlight",
-  "description": "highlights code in any language",
-  "version": "0.0.7",
-  "keywords": [
-    "syntax",
-    "highlight",
-    "highlighting",
-    "source",
-    "code",
-    "source code",
-    "snippet",
-    "code snippet"
-  ],
-  "repository": {
-    "type": "git",
-    "url": "git://github.com/asvd/microlight.git"
-  },
-  "main": "microlight.js",
-  "dependencies": {},
-  "devDependencies": {},
-  "optionalDependencies": {},
-  "engines": {
-    "node": "*"
-  }
-}

From 3886973f5d157e28a153b83d35848d06a9128c43 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Sun, 7 Sep 2025 12:29:13 -0400
Subject: [PATCH 08/23] Added global variable extraction for javascript

---
 src/tstool/analyzer/Javascript_TS_analyzer.py | 22 +++++-
 src/tstool/analyzer/TS_analyzer.py            | 25 ++++++-
 .../Cpp/Cpp_NPD_extractor.py                  |  6 ++
 .../dfbscan_extractor/Go/Go_NPD_extractor.py  |  6 ++
 .../Javascript/Javascript_NPD_extractor.py    | 72 ++++++++++++++-----
 .../Python/Python_NPD_extractor.py            |  6 ++
 .../dfbscan_extractor/dfbscan_extractor.py    | 29 +++++++-
 7 files changed, 143 insertions(+), 23 deletions(-)

diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index 089f699..dcfde29 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -89,7 +89,27 @@ def extract_global_info(
         For Javascript, this may include module-level variables.
         Currently not implemented.
         """
-        # TODO: Add global variable analysis if needed.
+        for child in tree.root_node.children:
+            if child.type != "lexical_declaration":
+                continue
+            
+            declarator_node = child.child(1)
+            if declarator_node is not None and declarator_node.type == "variable_declarator":
+                name_node = declarator_node.child_by_field_name("name")
+                value_node = declarator_node.child_by_field_name("value")
+
+                if not name_node or not value_node:
+                    continue
+
+                if value_node.type == "arrow_function" or value_node.type == "function_expression":
+                    continue
+                
+                global_name = source_code[name_node.start_byte : name_node.end_byte]
+                line = source_code[:name_node.start_byte].count("\n") + 1
+                global_id = len(self.globalsRawDataDic) + 1
+                self.globalsRawDataDic[global_id] = (global_name, line, declarator_node)
+                self.globalsToFile[global_id] = file_path
+                
         return
 
     def get_callee_name_at_call_site(
diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py
index dcafd26..e0bc473 100644
--- a/src/tstool/analyzer/TS_analyzer.py
+++ b/src/tstool/analyzer/TS_analyzer.py
@@ -170,8 +170,11 @@ def __init__(
         self.functionToFile: Dict[int, str] = {}
         self.fileContentDic: Dict[str, str] = {}
         self.glb_var_map: Dict[str, str] = {}  # global var info
-
+        self.globalsRawDataDic: Dict[str, Tuple[str, int, Node]] = {}
+        self.globalsToFile: Dict[int, str] = {}
+        
         self.function_env: Dict[int, Function] = {}
+        self.globals_env = {}
         self.api_env: Dict[int, API] = {}
 
         # Results of call graph analysis
@@ -231,6 +234,7 @@ def parse_project(self) -> None:
         """
         Parse all project files using tree-sitter.
         """
+        # Parses files in the project
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=self.max_symbolic_workers_num
         ) as executor:
@@ -249,6 +253,7 @@ def parse_project(self) -> None:
                 pbar.update(1)
             pbar.close()
 
+        # Analyzes extracted functions
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=self.max_symbolic_workers_num
         ) as executor:
@@ -267,6 +272,24 @@ def parse_project(self) -> None:
                 self.function_env[func_id] = current_function
                 pbar.update(1)
             pbar.close()
+            
+            
+        # Analyzes extracted global variables
+        pbar = tqdm(total=len(self.globalsRawDataDic), desc="Analyzing Global Variables")
+        for global_id, global_var_tuple in self.globalsRawDataDic.items():
+            name = global_var_tuple[0]
+            line = global_var_tuple[1]
+            value = Value(
+                name=name,
+                line_number=line,
+                label=ValueLabel.GLOBAL,
+                file=self.globalsToFile[global_id]
+            )
+            
+            self.globals_env[global_id] = value
+            pbar.update(1)
+        pbar.close()
+            
         return
 
     def analyze_call_graph(self) -> None:
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
index b7fe94f..fbf5ec5 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
@@ -6,6 +6,12 @@
 
 
 class Cpp_NPD_Extractor(DFBScanExtractor):
+    def is_global_source(self, global_declarator_node: Tree) -> bool:
+        return False
+        
+    def is_global_sink(self, global_declarator_node: Tree) -> bool:
+        return False
+    
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
diff --git a/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py b/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
index 93a666b..4501f96 100644
--- a/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
@@ -6,6 +6,12 @@
 
 
 class Go_NPD_Extractor(DFBScanExtractor):
+    def is_global_source(self, global_declarator_node: Tree) -> bool:
+        return False
+        
+    def is_global_sink(self, global_declarator_node: Tree) -> bool:
+        return False
+    
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 8a1cb93..e5e1928 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -4,31 +4,65 @@
 
 
 class Javascript_NPD_Extractor(DFBScanExtractor):
+    NULLISH_VALUES = {
+        "null",
+        "undefined"
+    }
     BUILTIN_NULLABLE_METHODS = {
-        b"exec",
-        b"match",
-        b"matchAll",
-        b"getElementById",
-        b"querySelector",
-        b"querySelectorAll",
-        b"getElementsByClassName",
-        b"getElementsByTagName",
-        b"getAttribute",
-        b"find",
-        b"findIndex",
-        b"pop",
-        b"shift",
-        b"get",
-        b"getOwnPropertyDescriptor",
-        b"stringify",
+        "exec",
+        "match",
+        "matchAll",
+        "getElementById",
+        "querySelector",
+        "querySelectorAll",
+        "getElementsByClassName",
+        "getElementsByTagName",
+        "getAttribute",
+        "find",
+        "findIndex",
+        "pop",
+        "shift",
+        "get",
+        "getOwnPropertyDescriptor",
+        "stringify",
     }
+    
+    def is_global_source(self, global_declarator_node: Tree) -> bool:
+        target_node = global_declarator_node.child(2)
+        if target_node.type in self.NULLISH_VALUES:
+            return True
+        
+        if target_node.type == "call_expression":
+            member_expression = target_node.child(0)
+            if member_expression is None or member_expression.type != "member_expression":
+                return False
+
+            property_identifier = member_expression.child(2)
+            if property_identifier is None or property_identifier.type != "property_identifier":
+                return False
+
+            if property_identifier.text.decode() in self.BUILTIN_NULLABLE_METHODS:
+                return True
+            
+        return False
+        
+    def is_global_sink(self, global_declarator_node: Tree) -> bool:
+        target_node = global_declarator_node.child(2)
+
+        if target_node.type in {"member_expression", "subscript_expression", "call_expression"}:
+            return True
+
+        return False
 
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
         file_path = function.file_path
-        null_value_nodes = find_nodes_by_type(root_node, "null")
-        null_value_nodes.extend(find_nodes_by_type(root_node, "undefined"))
+        null_value_nodes = []
+        
+        for nullish_value in self.NULLISH_VALUES:
+            null_value_nodes.extend(find_nodes_by_type(root_node, nullish_value))
+        
         unary_expressions = find_nodes_by_type(root_node, "unary_expression")
         call_expressions = find_nodes_by_type(root_node, "call_expression")
         
@@ -43,7 +77,7 @@ def extract_sources(self, function: Function) -> List[Value]:
             if property_identifier is None or property_identifier.type != "property_identifier":
                 continue
 
-            if property_identifier.text in self.BUILTIN_NULLABLE_METHODS:
+            if property_identifier.text.decode() in self.BUILTIN_NULLABLE_METHODS:
                 line_number = source_code[: property_identifier.start_byte].count("\n") + 1
                 name = source_code[property_identifier.start_byte : property_identifier.end_byte]
                 sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
diff --git a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
index caca262..a4c5e0e 100644
--- a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
@@ -4,6 +4,12 @@
 
 
 class Python_NPD_Extractor(DFBScanExtractor):
+    def is_global_source(self, global_declarator_node: Tree) -> bool:
+        return False
+        
+    def is_global_sink(self, global_declarator_node: Tree) -> bool:
+        return False
+    
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
diff --git a/src/tstool/dfbscan_extractor/dfbscan_extractor.py b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
index d225ecd..2ca0c17 100644
--- a/src/tstool/dfbscan_extractor/dfbscan_extractor.py
+++ b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
@@ -24,7 +24,11 @@ def extract_all(self) -> Tuple[List[Value], List[Value]]:
         """
         Start the source/sink extraction process.
         """
-        pbar = tqdm(total=len(self.ts_analyzer.function_env), desc="Parsing files")
+        pbar = tqdm(
+            total=len(self.ts_analyzer.function_env)
+            + len(self.ts_analyzer.globals_env),
+            desc="Parsing files",
+        )
         for function_id in self.ts_analyzer.function_env:
             pbar.update(1)
             function: Function = self.ts_analyzer.function_env[function_id]
@@ -34,9 +38,30 @@ def extract_all(self) -> Tuple[List[Value], List[Value]]:
             function_root_node = function.parse_tree_root_node
             self.sources.extend(self.extract_sources(function))
             self.sinks.extend(self.extract_sinks(function))
-        
+
+        for global_id, global_var in self.ts_analyzer.globals_env.items():
+            pbar.update(1)
+            node = self.ts_analyzer.globalsRawDataDic[global_id][2]
+            if self.is_global_source(node):
+                global_var.label = ValueLabel.SRC
+                self.sources.append(global_var)
+
+            if self.is_global_sink(node):
+                global_var.label = ValueLabel.SINK
+                self.sinks.append(global_var)
+
+        pbar.close()
+
         return self.sources, self.sinks
 
+    @abstractmethod
+    def is_global_source(self, global_var: Tree) -> bool:
+        pass
+
+    @abstractmethod
+    def is_global_sink(self, global_var: Tree) -> bool:
+        pass
+
     @abstractmethod
     def extract_sources(self, function: Function) -> List[Value]:
         """

From b8f50451dc10513c7098f50cc804200f8bfe8e56 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Sat, 20 Sep 2025 20:28:36 -0400
Subject: [PATCH 09/23] Added javascript global variable NPD analysis

---
 benchmark/Javascript/toy/NPD/case01.js        |   5 +
 src/agent/dfbscan.py                          | 158 +++++++++++++++++-
 src/llmtool/dfbscan/path_validator.py         |  23 ++-
 src/tstool/analyzer/Javascript_TS_analyzer.py |  29 +++-
 src/tstool/analyzer/TS_analyzer.py            |  61 ++++++-
 .../Javascript/Javascript_NPD_extractor.py    |  97 +++++++----
 .../dfbscan_extractor/dfbscan_extractor.py    |   6 +-
 7 files changed, 315 insertions(+), 64 deletions(-)

diff --git a/benchmark/Javascript/toy/NPD/case01.js b/benchmark/Javascript/toy/NPD/case01.js
index eda676e..6fc2c93 100644
--- a/benchmark/Javascript/toy/NPD/case01.js
+++ b/benchmark/Javascript/toy/NPD/case01.js
@@ -1,5 +1,10 @@
+var myname = "daniel";
+myname = null;
+
 function test2_process(data) {
+    let current = myname;
     let value = data[0];
+    console.log(current.length)
     return value;
 }
     
diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index daac04e..52afb74 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -116,7 +116,7 @@ def __obtain_extractor(self) -> DFBScanExtractor:
         elif self.language == "Go":
             if self.bug_type == "NPD":
                 return Go_NPD_Extractor(self.ts_analyzer)
-        
+
         raise NotImplementedError(
             f"Unsupported bug type: {self.bug_type} in {self.language}"
         )
@@ -362,14 +362,14 @@ def __collect_potential_buggy_paths(
                     if value.label == ValueLabel.SINK:
                         # For NPD-style bug types
                         if self.is_reachable:
-                            
+
                             # Checks if the sink is a called to a predefined function
                             is_defined_function = False
                             for func in self.ts_analyzer.function_env.values():
                                 if value.name == func.function_name:
                                     is_defined_function = True
                                     break
-                            
+
                             if not is_defined_function:
                                 self.state.update_potential_buggy_paths(
                                     src_value, path_with_unknown_status + [value]
@@ -568,6 +568,9 @@ def start_scan(self) -> None:
         # Total number of source values
         total_src_values = len(self.src_values)
 
+        for global_value in self.ts_analyzer.globals_env.values():
+            self.__process_global_value(global_value)
+
         # Process each source value in parallel with a progress bar
         with tqdm(
             total=total_src_values, desc="Processing Source Values", unit="src"
@@ -599,6 +602,148 @@ def start_scan(self) -> None:
             self.logger.print_console(log_file)
         return
 
+    def __process_global_value(self, global_value):
+        worklist = []
+        reference_in_funcs = self.ts_analyzer.get_function_global_value_reference(
+            global_value
+        )
+        if len(reference_in_funcs) == 0:
+            return
+        initial_context = CallContext(False)
+
+        for func, global_references in reference_in_funcs.items():
+            for global_reference in global_references:
+                worklist.append((global_reference, func, initial_context))
+
+        # TODO: test intra dataflow analyzer for globals
+        while len(worklist) > 0:
+            (start_value, start_function, call_context) = worklist.pop(0)
+            if len(call_context.context) > self.call_depth:
+                continue
+
+            # Construct the input for intra-procedural data-flow analysis
+            sinks_in_function = self.__obtain_extractor().extract_sinks(start_function)
+            sink_values = [
+                (sink.name, sink.line_number - start_function.start_line_number + 1)
+                for sink in sinks_in_function
+            ]
+
+            call_statements = []
+            for call_site_node in start_function.function_call_site_nodes:
+                file_content = self.ts_analyzer.code_in_files[start_function.file_path]
+                call_site_line_number = (
+                    file_content[: call_site_node.start_byte].count("\n") + 1
+                )
+                call_site_name = file_content[
+                    call_site_node.start_byte : call_site_node.end_byte
+                ]
+                call_statements.append((call_site_name, call_site_line_number))
+
+            ret_values = [
+                (ret.name, ret.line_number - start_function.start_line_number + 1)
+                for ret in (
+                    start_function.retvals if start_function.retvals is not None else []
+                )
+            ]
+
+            df_input = IntraDataFlowAnalyzerInput(
+                start_function, start_value, sink_values, call_statements, ret_values
+            )
+
+            # Invoke the intra-procedural data-flow analysis
+            df_output = self.intra_dfa.invoke(df_input, IntraDataFlowAnalyzerOutput)
+
+            if df_output is None:
+                continue
+
+            for path_index in range(len(df_output.reachable_values)):
+                reachable_values_in_single_path = set([])
+                for value in df_output.reachable_values[path_index]:
+                    reachable_values_in_single_path.add((value, call_context))
+                self.state.update_reachable_values_per_path(
+                    (start_value, call_context), reachable_values_in_single_path
+                )
+
+                delta_worklist = self.__update_worklist(
+                    df_input, df_output, call_context, path_index
+                )
+                worklist.extend(delta_worklist)
+
+        if global_value.label != ValueLabel.SRC:
+            return
+
+        found_potential_buggy_paths = False
+        for func, global_references in reference_in_funcs.items():
+            for global_reference in global_references:
+                self.__collect_potential_buggy_paths(
+                    global_reference, (global_reference, CallContext(False))
+                )
+
+                if global_reference in self.state.potential_buggy_paths:
+                    found_potential_buggy_paths = True
+
+        # If no potential buggy paths are found, return early
+        if not found_potential_buggy_paths:
+            return
+
+        for start_value, buggy_paths in self.state.potential_buggy_paths.items():
+            for buggy_path in buggy_paths.values():
+                values_to_functions = {
+                    value: self.ts_analyzer.get_function_from_localvalue(value)
+                    for value in buggy_path
+                }
+
+                program_root = None
+                functions: Set[Function] = set()
+                for func in values_to_functions.values():
+                    if func is not None:
+                        functions.add(func)
+                    if program_root is None:
+                        program_root = func.parse_tree_root_node.parent
+
+                relevant_global_exprs = (
+                    self.ts_analyzer.get_global_expressions_by_identifier(
+                        global_value.name, program_root
+                    )
+                )
+
+                if self.state.check_existence(start_value, functions):
+                    continue
+
+                pv_input = PathValidatorInput(
+                    self.bug_type,
+                    buggy_path,
+                    values_to_functions,
+                    relevant_global_exprs,
+                )
+                pv_output = self.path_validator.invoke(pv_input, PathValidatorOutput)
+
+                if pv_output is None:
+                    continue
+
+                if pv_output.is_reachable:
+                    relevant_functions = {}
+                    for value in buggy_path:
+                        function = self.ts_analyzer.get_function_from_localvalue(value)
+                        if function is not None:
+                            relevant_functions[function.function_id] = function
+
+                    bug_report = BugReport(
+                        self.bug_type,
+                        start_value,
+                        relevant_functions,
+                        pv_output.explanation_str,
+                    )
+                    self.state.update_bug_report(bug_report)
+                    bug_report_dict = {
+                        bug_report_id: bug.to_dict()
+                        for bug_report_id, bug in self.state.bug_reports.items()
+                    }
+
+                    bug_info_file_path = self.res_dir_path + "/detect_info.json"
+                    with open(bug_info_file_path, "w") as f:
+                        json.dump(bug_report_dict, f, indent=4)
+
     def __process_src_value(self, src_value: Value) -> None:
         worklist = []
         src_function = self.ts_analyzer.get_function_from_localvalue(src_value)
@@ -710,10 +855,9 @@ def __process_src_value(self, src_value: Value) -> None:
                     for bug_report_id, bug in self.state.bug_reports.items()
                 }
 
-                with open(
-                    self.res_dir_path + "/detect_info.json", "w"
-                ) as bug_info_file:
-                    json.dump(bug_report_dict, bug_info_file, indent=4)
+                bug_info_file_path = self.res_dir_path + "/detect_info.json"
+                with open(bug_info_file_path, "w") as f:
+                    json.dump(bug_report_dict, f, indent=4)
         return
 
     def get_agent_state(self) -> DFBScanState:
diff --git a/src/llmtool/dfbscan/path_validator.py b/src/llmtool/dfbscan/path_validator.py
index 08a8694..e36e161 100644
--- a/src/llmtool/dfbscan/path_validator.py
+++ b/src/llmtool/dfbscan/path_validator.py
@@ -16,10 +16,12 @@ def __init__(
         bug_type: str,
         values: List[Value],
         values_to_functions: Dict[Value, Optional[Function]],
+        relevant_global_exprs: List[Node] = [],
     ) -> None:
         self.bug_type = bug_type
         self.values = values
         self.values_to_functions = values_to_functions
+        self.relevant_global_exprs = relevant_global_exprs
         return
 
     def __hash__(self) -> int:
@@ -86,17 +88,26 @@ def _get_prompt(self, input: LLMToolInput) -> str:
             value_lines.append(value_line)
         prompt = prompt.replace("<PATH>", "\n".join(value_lines))
         prompt = prompt.replace("<BUG_TYPE>", input.bug_type)
-        
+
         functions: Set[Function] = set()
         for func in input.values_to_functions.values():
             if func is not None:
                 functions.add(func)
 
-        program = "\n".join(
-            [
-                "```\n" + func.lined_code + "\n```\n"
-                for func in functions
-            ]
+        program = "\n"
+        if len(input.relevant_global_exprs) > 0:
+            program = (
+                "\n".join(
+                    [
+                        "```\n" + expr.text.decode() + "\n```\n"
+                        for expr in input.relevant_global_exprs
+                    ]
+                )
+                + "\n"
+            )
+
+        program += "\n".join(
+            ["```\n" + func.lined_code + "\n```\n" for func in functions]
         )
         prompt = prompt.replace("<PROGRAM>", program)
         return prompt
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index dcfde29..7bd826f 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -89,8 +89,12 @@ def extract_global_info(
         For Javascript, this may include module-level variables.
         Currently not implemented.
         """
+        declaration_types = [
+            "lexical_declaration",
+            "variable_declaration"
+        ]
         for child in tree.root_node.children:
-            if child.type != "lexical_declaration":
+            if child.type not in declaration_types:
                 continue
             
             declarator_node = child.child(1)
@@ -107,7 +111,7 @@ def extract_global_info(
                 global_name = source_code[name_node.start_byte : name_node.end_byte]
                 line = source_code[:name_node.start_byte].count("\n") + 1
                 global_id = len(self.globalsRawDataDic) + 1
-                self.globalsRawDataDic[global_id] = (global_name, line, declarator_node)
+                self.globalsRawDataDic[global_id] = (global_name, line, child)
                 self.globalsToFile[global_id] = file_path
                 
         return
@@ -354,3 +358,24 @@ def get_loop_statements(
                 end_line,
             )
         return loops
+    
+    def get_global_expressions_by_identifier(
+        self, identifier: str, program_root: Node
+    ) -> List[Node]:
+        output_nodes = []
+        children = program_root.children
+        global_expression_types = [
+            "variable_declaration",
+            "lexical_declaration",
+            "expression_statement"
+        ]
+        
+        for child in children:
+            if child.type not in global_expression_types:
+                continue
+            
+            if find_nodes_by_type(child, "identifier")[0].text.decode() == identifier:
+                output_nodes.append(child)
+        
+        return output_nodes
+
diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py
index e0bc473..babb448 100644
--- a/src/tstool/analyzer/TS_analyzer.py
+++ b/src/tstool/analyzer/TS_analyzer.py
@@ -172,7 +172,7 @@ def __init__(
         self.glb_var_map: Dict[str, str] = {}  # global var info
         self.globalsRawDataDic: Dict[str, Tuple[str, int, Node]] = {}
         self.globalsToFile: Dict[int, str] = {}
-        
+
         self.function_env: Dict[int, Function] = {}
         self.globals_env = {}
         self.api_env: Dict[int, API] = {}
@@ -272,10 +272,11 @@ def parse_project(self) -> None:
                 self.function_env[func_id] = current_function
                 pbar.update(1)
             pbar.close()
-            
-            
+
         # Analyzes extracted global variables
-        pbar = tqdm(total=len(self.globalsRawDataDic), desc="Analyzing Global Variables")
+        pbar = tqdm(
+            total=len(self.globalsRawDataDic), desc="Analyzing Global Variables"
+        )
         for global_id, global_var_tuple in self.globalsRawDataDic.items():
             name = global_var_tuple[0]
             line = global_var_tuple[1]
@@ -283,13 +284,13 @@ def parse_project(self) -> None:
                 name=name,
                 line_number=line,
                 label=ValueLabel.GLOBAL,
-                file=self.globalsToFile[global_id]
+                file=self.globalsToFile[global_id],
             )
-            
+
             self.globals_env[global_id] = value
             pbar.update(1)
         pbar.close()
-            
+
         return
 
     def analyze_call_graph(self) -> None:
@@ -396,7 +397,7 @@ def extract_call_graph_edges(self, current_function: Function) -> None:
 
         all_call_sites = find_nodes_by_type(
             current_function.parse_tree_root_node, call_node_type
-        )        
+        )
         function_call_sites = []
         api_call_sites = []
 
@@ -790,6 +791,50 @@ def get_node_by_line_number(self, line_number: int) -> List[Tuple[str, Node]]:
                     code_node_list.append((function.function_code, node))
         return code_node_list
 
+    def get_function_global_value_reference(
+        self, global_value: Value
+    ) -> Dict[Function, List[Value]]:
+        """
+        Find references to a given global value in all functions
+        belonging to the same source file.
+
+        Args:
+            global_value: The global Value to search for.
+
+        Returns:
+            A dictionary mapping each Function to a list of Value
+            references where the global is used.
+        """
+        file_name = global_value.file
+        references: Dict[Function, List[Value]] = {}
+
+        for _, function in self.function_env.items():
+            if function.file_path != file_name:
+                continue
+
+            identifiers = find_nodes_by_type(
+                function.parse_tree_root_node, "identifier"
+            )
+            for identifier in identifiers:
+                if global_value.name == identifier.text.decode():
+                    line_number = identifier.start_point[0] + 1
+                    ref_value = Value(
+                        global_value.name,
+                        line_number,
+                        ValueLabel.GLOBAL,
+                        function.file_path,
+                        -1,
+                    )
+                    references.setdefault(function, []).append(ref_value)
+
+        return references
+    
+    @abstractmethod
+    def get_global_expressions_by_identifier(
+        self, identifier: str, program_root: Node
+    ) -> List[Node]:
+        pass
+        
     def get_function_from_localvalue(self, value: Value) -> Optional[Function]:
         """
         Retrieve the function corresponding to a local value.
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index e5e1928..0753db9 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -4,10 +4,7 @@
 
 
 class Javascript_NPD_Extractor(DFBScanExtractor):
-    NULLISH_VALUES = {
-        "null",
-        "undefined"
-    }
+    NULLISH_VALUES = {"null", "undefined"}
     BUILTIN_NULLABLE_METHODS = {
         "exec",
         "match",
@@ -26,32 +23,48 @@ class Javascript_NPD_Extractor(DFBScanExtractor):
         "getOwnPropertyDescriptor",
         "stringify",
     }
-    
-    def is_global_source(self, global_declarator_node: Tree) -> bool:
-        target_node = global_declarator_node.child(2)
-        if target_node.type in self.NULLISH_VALUES:
-            return True
-        
-        if target_node.type == "call_expression":
-            member_expression = target_node.child(0)
-            if member_expression is None or member_expression.type != "member_expression":
-                return False
 
-            property_identifier = member_expression.child(2)
-            if property_identifier is None or property_identifier.type != "property_identifier":
-                return False
+    def is_global_source(self, global_declaration_node: Tree) -> bool:
+        global_name = global_declaration_node.child(1).child_by_field_name("name").text
+        sibling = global_declaration_node.next_sibling
 
-            if property_identifier.text.decode() in self.BUILTIN_NULLABLE_METHODS:
+        while sibling is not None:
+            if sibling.type != "expression_statement":
+                sibling = sibling.next_sibling
+                continue
+
+            expr = sibling.child(0)
+            if expr.type != "assignment_expression":
+                sibling = sibling.next_sibling
+                continue
+
+            if expr.child(0).text != global_name:
+                sibling = sibling.next_sibling
+                continue
+
+            value_node = expr.child(2)
+            value_type = value_node.type
+
+            # Nullish constant (e.g. null/undefined)
+            if value_type in self.NULLISH_VALUES:
                 return True
-            
-        return False
-        
-    def is_global_sink(self, global_declarator_node: Tree) -> bool:
-        target_node = global_declarator_node.child(2)
 
-        if target_node.type in {"member_expression", "subscript_expression", "call_expression"}:
-            return True
+            # Possible call expression check
+            if value_type == "call_expression":
+                member_expr = value_node.child(0)
+                if member_expr is not None and member_expr.type == "member_expression":
+                    prop_id = member_expr.child(2)
+                    if (
+                        prop_id is not None
+                        and prop_id.type == "property_identifier"
+                        and prop_id.text.decode() in self.BUILTIN_NULLABLE_METHODS
+                    ):
+                        return True
 
+            return False
+        return False
+
+    def is_global_sink(self, global_declarator_node: Tree) -> bool:
         return False
 
     def extract_sources(self, function: Function) -> List[Value]:
@@ -59,41 +72,53 @@ def extract_sources(self, function: Function) -> List[Value]:
         source_code = self.ts_analyzer.code_in_files[function.file_path]
         file_path = function.file_path
         null_value_nodes = []
-        
+
         for nullish_value in self.NULLISH_VALUES:
             null_value_nodes.extend(find_nodes_by_type(root_node, nullish_value))
-        
+
         unary_expressions = find_nodes_by_type(root_node, "unary_expression")
         call_expressions = find_nodes_by_type(root_node, "call_expression")
-        
+
         sources = []
-        
+
         for call_expression in call_expressions:
             member_expression = call_expression.child(0)
-            if member_expression is None or member_expression.type != "member_expression":
+            if (
+                member_expression is None
+                or member_expression.type != "member_expression"
+            ):
                 continue
 
             property_identifier = member_expression.child(2)
-            if property_identifier is None or property_identifier.type != "property_identifier":
+            if (
+                property_identifier is None
+                or property_identifier.type != "property_identifier"
+            ):
                 continue
 
             if property_identifier.text.decode() in self.BUILTIN_NULLABLE_METHODS:
-                line_number = source_code[: property_identifier.start_byte].count("\n") + 1
-                name = source_code[property_identifier.start_byte : property_identifier.end_byte]
+                line_number = (
+                    source_code[: property_identifier.start_byte].count("\n") + 1
+                )
+                name = source_code[
+                    property_identifier.start_byte : property_identifier.end_byte
+                ]
                 sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
-        
+
         for unary_expression in unary_expressions:
             operator = unary_expression.child(0)
             if operator is not None and operator.type == "delete":
                 line_number = source_code[: unary_expression.start_byte].count("\n") + 1
-                name = source_code[unary_expression.start_byte : unary_expression.end_byte]
+                name = source_code[
+                    unary_expression.start_byte : unary_expression.end_byte
+                ]
                 sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
 
         for node in null_value_nodes:
             line_number = source_code[: node.start_byte].count("\n") + 1
             name = source_code[node.start_byte : node.end_byte]
             sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
-        
+
         return sources
 
     def extract_sinks(self, function: Function) -> List[Value]:
diff --git a/src/tstool/dfbscan_extractor/dfbscan_extractor.py b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
index 2ca0c17..39519cc 100644
--- a/src/tstool/dfbscan_extractor/dfbscan_extractor.py
+++ b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
@@ -44,11 +44,7 @@ def extract_all(self) -> Tuple[List[Value], List[Value]]:
             node = self.ts_analyzer.globalsRawDataDic[global_id][2]
             if self.is_global_source(node):
                 global_var.label = ValueLabel.SRC
-                self.sources.append(global_var)
-
-            if self.is_global_sink(node):
-                global_var.label = ValueLabel.SINK
-                self.sinks.append(global_var)
+                self.ts_analyzer.globals_env[global_id] = global_var
 
         pbar.close()
 

From b00c51348c5717537fd4e9174e21728e01bc4211 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Tue, 23 Sep 2025 22:03:16 -0400
Subject: [PATCH 10/23] Briefly documented DFBScanAgent and slightly improved
 prompt for intra dataflow analysis for Javascript

---
 src/agent/dfbscan.py                          | 117 ++++++++----------
 src/llmtool/dfbscan/path_validator.py         |   2 +-
 .../dfbscan/intra_dataflow_analyzer.json      |   2 +-
 .../Javascript/dfbscan/path_validator.json    |   2 -
 4 files changed, 54 insertions(+), 69 deletions(-)

diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index 52afb74..43caf69 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -603,25 +603,34 @@ def start_scan(self) -> None:
         return
 
     def __process_global_value(self, global_value):
+        """
+        Perform data-flow analysis starting from a global variable.
+
+        1. Finds all functions referencing the global variable.
+        2. Runs intra-procedural data-flow analysis to discover reachable values.
+        3. If the global is marked as a source (SRC), collects potential buggy paths and
+        reports them if confirmed.
+        """
         worklist = []
         reference_in_funcs = self.ts_analyzer.get_function_global_value_reference(
             global_value
         )
         if len(reference_in_funcs) == 0:
             return
+
         initial_context = CallContext(False)
 
+        # Seed worklist with all function references to the global.
         for func, global_references in reference_in_funcs.items():
             for global_reference in global_references:
                 worklist.append((global_reference, func, initial_context))
 
-        # TODO: test intra dataflow analyzer for globals
-        while len(worklist) > 0:
-            (start_value, start_function, call_context) = worklist.pop(0)
+        # Worklist-driven intra-procedural analysis
+        while worklist:
+            start_value, start_function, call_context = worklist.pop(0)
             if len(call_context.context) > self.call_depth:
                 continue
 
-            # Construct the input for intra-procedural data-flow analysis
             sinks_in_function = self.__obtain_extractor().extract_sinks(start_function)
             sink_values = [
                 (sink.name, sink.line_number - start_function.start_line_number + 1)
@@ -641,34 +650,30 @@ def __process_global_value(self, global_value):
 
             ret_values = [
                 (ret.name, ret.line_number - start_function.start_line_number + 1)
-                for ret in (
-                    start_function.retvals if start_function.retvals is not None else []
-                )
+                for ret in (start_function.retvals if start_function.retvals else [])
             ]
 
             df_input = IntraDataFlowAnalyzerInput(
                 start_function, start_value, sink_values, call_statements, ret_values
             )
-
-            # Invoke the intra-procedural data-flow analysis
             df_output = self.intra_dfa.invoke(df_input, IntraDataFlowAnalyzerOutput)
-
             if df_output is None:
                 continue
 
             for path_index in range(len(df_output.reachable_values)):
-                reachable_values_in_single_path = set([])
-                for value in df_output.reachable_values[path_index]:
-                    reachable_values_in_single_path.add((value, call_context))
+                reachable_values_in_single_path = {
+                    (value, call_context)
+                    for value in df_output.reachable_values[path_index]
+                }
                 self.state.update_reachable_values_per_path(
                     (start_value, call_context), reachable_values_in_single_path
                 )
-
                 delta_worklist = self.__update_worklist(
                     df_input, df_output, call_context, path_index
                 )
                 worklist.extend(delta_worklist)
 
+        # Only proceed with bug-path checks if this global is a source
         if global_value.label != ValueLabel.SRC:
             return
 
@@ -678,14 +683,13 @@ def __process_global_value(self, global_value):
                 self.__collect_potential_buggy_paths(
                     global_reference, (global_reference, CallContext(False))
                 )
-
                 if global_reference in self.state.potential_buggy_paths:
                     found_potential_buggy_paths = True
 
-        # If no potential buggy paths are found, return early
         if not found_potential_buggy_paths:
             return
 
+        # Validate each potential buggy path
         for start_value, buggy_paths in self.state.potential_buggy_paths.items():
             for buggy_path in buggy_paths.values():
                 values_to_functions = {
@@ -694,11 +698,11 @@ def __process_global_value(self, global_value):
                 }
 
                 program_root = None
-                functions: Set[Function] = set()
+                functions = set()
                 for func in values_to_functions.values():
-                    if func is not None:
+                    if func:
                         functions.add(func)
-                    if program_root is None:
+                    if program_root is None and func:
                         program_root = func.parse_tree_root_node.parent
 
                 relevant_global_exprs = (
@@ -717,15 +721,11 @@ def __process_global_value(self, global_value):
                     relevant_global_exprs,
                 )
                 pv_output = self.path_validator.invoke(pv_input, PathValidatorOutput)
-
-                if pv_output is None:
-                    continue
-
-                if pv_output.is_reachable:
+                if pv_output and pv_output.is_reachable:
                     relevant_functions = {}
                     for value in buggy_path:
                         function = self.ts_analyzer.get_function_from_localvalue(value)
-                        if function is not None:
+                        if function:
                             relevant_functions[function.function_id] = function
 
                     bug_report = BugReport(
@@ -735,29 +735,36 @@ def __process_global_value(self, global_value):
                         pv_output.explanation_str,
                     )
                     self.state.update_bug_report(bug_report)
+
                     bug_report_dict = {
                         bug_report_id: bug.to_dict()
                         for bug_report_id, bug in self.state.bug_reports.items()
                     }
-
-                    bug_info_file_path = self.res_dir_path + "/detect_info.json"
-                    with open(bug_info_file_path, "w") as f:
+                    with open(self.res_dir_path + "/detect_info.json", "w") as f:
                         json.dump(bug_report_dict, f, indent=4)
 
     def __process_src_value(self, src_value: Value) -> None:
+        """
+        Perform data-flow analysis starting from a local source value.
+
+        1. Locates the function containing the source.
+        2. Performs intra-procedural data-flow analysis to find reachable values.
+        3. Collects and validates potential buggy paths, creating bug reports
+        when confirmed.
+        """
         worklist = []
         src_function = self.ts_analyzer.get_function_from_localvalue(src_value)
         if src_function is None:
             return
-        initial_context = CallContext(False)
 
+        initial_context = CallContext(False)
         worklist.append((src_value, src_function, initial_context))
-        while len(worklist) > 0:
-            (start_value, start_function, call_context) = worklist.pop(0)
+
+        while worklist:
+            start_value, start_function, call_context = worklist.pop(0)
             if len(call_context.context) > self.call_depth:
                 continue
 
-            # Construct the input for intra-procedural data-flow analysis
             sinks_in_function = self.__obtain_extractor().extract_sinks(start_function)
             sink_values = [
                 (sink.name, sink.line_number - start_function.start_line_number + 1)
@@ -777,70 +784,52 @@ def __process_src_value(self, src_value: Value) -> None:
 
             ret_values = [
                 (ret.name, ret.line_number - start_function.start_line_number + 1)
-                for ret in (
-                    start_function.retvals if start_function.retvals is not None else []
-                )
+                for ret in (start_function.retvals if start_function.retvals else [])
             ]
+
             df_input = IntraDataFlowAnalyzerInput(
                 start_function, start_value, sink_values, call_statements, ret_values
             )
-
-            # Invoke the intra-procedural data-flow analysis
             df_output = self.intra_dfa.invoke(df_input, IntraDataFlowAnalyzerOutput)
-
             if df_output is None:
                 continue
 
             for path_index in range(len(df_output.reachable_values)):
-                reachable_values_in_single_path = set([])
-                for value in df_output.reachable_values[path_index]:
-                    reachable_values_in_single_path.add((value, call_context))
+                reachable_values_in_single_path = {
+                    (value, call_context)
+                    for value in df_output.reachable_values[path_index]
+                }
                 self.state.update_reachable_values_per_path(
                     (start_value, call_context), reachable_values_in_single_path
                 )
-
                 delta_worklist = self.__update_worklist(
                     df_input, df_output, call_context, path_index
                 )
                 worklist.extend(delta_worklist)
 
-        # Collect potential buggy paths
+        # Collect and validate buggy paths
         self.__collect_potential_buggy_paths(src_value, (src_value, CallContext(False)))
-
-        # If no potential buggy paths are found, return early
         if src_value not in self.state.potential_buggy_paths:
             return
 
-        # Validate buggy paths and generate bug reports
         for buggy_path in self.state.potential_buggy_paths[src_value].values():
             values_to_functions = {
                 value: self.ts_analyzer.get_function_from_localvalue(value)
                 for value in buggy_path
             }
-
-            functions: Set[Function] = set()
-            for func in values_to_functions.values():
-                if func is not None:
-                    functions.add(func)
-
+            functions = {func for func in values_to_functions.values() if func}
             if self.state.check_existence(src_value, functions):
                 continue
 
             pv_input = PathValidatorInput(
-                self.bug_type,
-                buggy_path,
-                values_to_functions,
+                self.bug_type, buggy_path, values_to_functions
             )
             pv_output = self.path_validator.invoke(pv_input, PathValidatorOutput)
-
-            if pv_output is None:
-                continue
-
-            if pv_output.is_reachable:
+            if pv_output and pv_output.is_reachable:
                 relevant_functions = {}
                 for value in buggy_path:
                     function = self.ts_analyzer.get_function_from_localvalue(value)
-                    if function is not None:
+                    if function:
                         relevant_functions[function.function_id] = function
 
                 bug_report = BugReport(
@@ -850,15 +839,13 @@ def __process_src_value(self, src_value: Value) -> None:
                     pv_output.explanation_str,
                 )
                 self.state.update_bug_report(bug_report)
+
                 bug_report_dict = {
                     bug_report_id: bug.to_dict()
                     for bug_report_id, bug in self.state.bug_reports.items()
                 }
-
-                bug_info_file_path = self.res_dir_path + "/detect_info.json"
-                with open(bug_info_file_path, "w") as f:
+                with open(self.res_dir_path + "/detect_info.json", "w") as f:
                     json.dump(bug_report_dict, f, indent=4)
-        return
 
     def get_agent_state(self) -> DFBScanState:
         return self.state
diff --git a/src/llmtool/dfbscan/path_validator.py b/src/llmtool/dfbscan/path_validator.py
index e36e161..cee6e88 100644
--- a/src/llmtool/dfbscan/path_validator.py
+++ b/src/llmtool/dfbscan/path_validator.py
@@ -68,7 +68,7 @@ def _get_prompt(self, input: LLMToolInput) -> str:
         prompt = prompt_template_dict["task"]
         prompt += "\n" + "\n".join(prompt_template_dict["analysis_rules"])
         prompt += "\n" + "\n".join(prompt_template_dict["analysis_examples"])
-        prompt += "\n" + "".join(prompt_template_dict["meta_prompts"])
+        prompt += "\n" + "\n".join(prompt_template_dict["meta_prompts"])
         prompt = prompt.replace(
             "<ANSWER>", "\n".join(prompt_template_dict["answer_format"])
         ).replace("<QUESTION>", "\n".join(prompt_template_dict["question_template"]))
diff --git a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
index 074e484..2449419 100644
--- a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
+++ b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
@@ -118,7 +118,7 @@
   ],
   "meta_prompts": [
     "Now I will give you a target function with the source point `<SRC_NAME>` at line <SRC_LINE>: \n```\n<FUNCTION>\n``` \n\n",
-    "You may see the following statements as potential sink points. Identify which of these are related to SRC and its aliases;\n",
+    "You may see the following expressions at these line as sink points. Identify which of these are related to SRC and its aliases;\n",
     "<SINK_VALUES>\n",
     "Here are the Function call sites and return statements within the function, which can be used in Step 1;\n",
     "<CALL_STATEMENTS>\n",
diff --git a/src/prompt/Javascript/dfbscan/path_validator.json b/src/prompt/Javascript/dfbscan/path_validator.json
index ae80658..e3ce18b 100644
--- a/src/prompt/Javascript/dfbscan/path_validator.json
+++ b/src/prompt/Javascript/dfbscan/path_validator.json
@@ -82,9 +82,7 @@
   ],
   "meta_prompts": [
     "Now I will provide you with the program:",
-    "```",
     "<PROGRAM>",
-    "```",
     "Please answer the following question:",
     "<QUESTION>",
     "Your answer should follow this format:",

From 327a44f4b8c27f7688dde4f4ab42428c81201c45 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Mon, 29 Sep 2025 19:40:09 -0400
Subject: [PATCH 11/23] Added delete oparator based global variable NPD
 detection

---
 benchmark/Javascript/toy/NPD/case05.js        | 19 +---
 benchmark/Javascript/toy/NPD/case06.js        | 24 +++--
 .../Javascript/dfbscan/path_validator.json    |  2 +
 .../Javascript/Javascript_NPD_extractor.py    | 17 +++-
 .../Javascript/Javascript_UAF_extractor.py    | 89 +++++++++++++++++++
 5 files changed, 122 insertions(+), 29 deletions(-)
 create mode 100644 src/tstool/dfbscan_extractor/Javascript/Javascript_UAF_extractor.py

diff --git a/benchmark/Javascript/toy/NPD/case05.js b/benchmark/Javascript/toy/NPD/case05.js
index f97f1df..ed7739b 100644
--- a/benchmark/Javascript/toy/NPD/case05.js
+++ b/benchmark/Javascript/toy/NPD/case05.js
@@ -1,17 +1,6 @@
-function func(value) {
-    return func2(value);
-}
+var a = console.error;
+delete a.error;
 
-function func2(value) {
-    console.log(+value.prop);
-    delete value.prop;
-    return value;
-}
-
-const printprop = () => {
-	let d = {
-        prop: "1"
-    };
-    d = func(d);
-    console.log(d.prop.length);
+function exec() {
+    a.error();
 }
\ No newline at end of file
diff --git a/benchmark/Javascript/toy/NPD/case06.js b/benchmark/Javascript/toy/NPD/case06.js
index 420a328..9e6053a 100644
--- a/benchmark/Javascript/toy/NPD/case06.js
+++ b/benchmark/Javascript/toy/NPD/case06.js
@@ -1,16 +1,14 @@
-function process_data(myobj) {
-    const inner_processing = (myobj) => {
-        delete myobj.func;
-        return myobj;
+const obj = {
+    greet() {
+        console.log("hello");
     }
-    myobj.func("Hello");
-    myobj = inner_processing(myobj);
-    myobj.func("Hello");
+};
+
+const a = obj;
+
+function exec() {
+    delete a.greet;
+    a.greet();
 }
 
-function main() {
-    let myobj = {
-        func: console.log
-    };
-    process_data(myobj)
-}
\ No newline at end of file
+exec();
diff --git a/src/prompt/Javascript/dfbscan/path_validator.json b/src/prompt/Javascript/dfbscan/path_validator.json
index e3ce18b..44a6e67 100644
--- a/src/prompt/Javascript/dfbscan/path_validator.json
+++ b/src/prompt/Javascript/dfbscan/path_validator.json
@@ -7,9 +7,11 @@
     "Keep the following guidelines in mind:",
     "- If the source in the first function flows to the sink in the last function without any interference, then the path is reachable and your answer should be Yes.",
     "- For NPD detection, if the source value is modified or its null/undefined state is verified (for example, via an explicit check like 'p !== null') before reaching the sink, then the path is unreachable and you should answer No.",
+    "- For NPD detection, if a program is accessing a variable or property that may be null/undefined and the variable/property isn't redefined before it is accessed, then the path is reachable and you should answer Yes.",
     "- If a function exits or returns before the sink or other propagation sites (such as function calls) are reached, the path is unreachable; answer No in such cases.",
     "- If a sink is a call to an object or a function that is builtin in Javascript or defined in the scope, then the path is unreachable; answer No in such cases.",
     "- Analyze conditions within each function: infer the outcome of branch statements and then verify whether the conditions across different sub-paths conflict. If conflicts exist, the overall path is unreachable.",
+    "- If the data flow propagation path only one element, and the element is a sink, then the path is reachable and you should say Yes.",
     "- Consider the values of relevant variables; if those values contradict the necessary branch conditions for triggering the bug, the path is unreachable and you should answer No.",
     "In summary, assess the conditions in every sub-path, check for conflicts, and decide whether the entire propagation path is reachable."
   ],
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 0753db9..06ecfe5 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -29,6 +29,20 @@ def is_global_source(self, global_declaration_node: Tree) -> bool:
         sibling = global_declaration_node.next_sibling
 
         while sibling is not None:
+            # Look for unary_expression nodes whose operator is 'delete'
+            for descendant in sibling.children:
+                if descendant.type == "unary_expression":
+                    operator = descendant.child(0)
+                    if operator and operator.type == "delete":
+                        # The next child should be the expression being deleted
+                        target = descendant.child(1)
+                        if target and target.type == "member_expression":
+                            # Check that the object part of member_expression matches our global variable
+                            obj_node = target.child_by_field_name("object")
+                            if obj_node:
+                                if obj_node.text == global_name:
+                                    return True
+                                
             if sibling.type != "expression_statement":
                 sibling = sibling.next_sibling
                 continue
@@ -78,7 +92,7 @@ def extract_sources(self, function: Function) -> List[Value]:
 
         unary_expressions = find_nodes_by_type(root_node, "unary_expression")
         call_expressions = find_nodes_by_type(root_node, "call_expression")
-
+        
         sources = []
 
         for call_expression in call_expressions:
@@ -96,6 +110,7 @@ def extract_sources(self, function: Function) -> List[Value]:
             ):
                 continue
 
+            # If the call expression calls builtin nullable methods
             if property_identifier.text.decode() in self.BUILTIN_NULLABLE_METHODS:
                 line_number = (
                     source_code[: property_identifier.start_byte].count("\n") + 1
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_UAF_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_UAF_extractor.py
new file mode 100644
index 0000000..5edc293
--- /dev/null
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_UAF_extractor.py
@@ -0,0 +1,89 @@
+from tstool.analyzer.TS_analyzer import *
+from tstool.analyzer.Javascript_TS_analyzer import *
+from ..dfbscan_extractor import *
+
+
+class Javascript_UAF_Extractor(DFBScanExtractor):
+    def is_global_source(self, global_declaration_node: Node) -> bool:
+        """
+        Determine if this global variable is a UAF source.
+        A UAF source is a global variable that has *one of its attributes*
+        deleted somewhere later at the top-level scope.
+        """
+        # 1. Get the declared variable's name
+        name_node = global_declaration_node.child(1).child_by_field_name("name")
+        if name_node is None:
+            return False
+        global_name = (
+            name_node.text.decode("utf8")
+            if isinstance(name_node.text, bytes)
+            else name_node.text
+        )
+
+        sibling = global_declaration_node.next_sibling
+        while sibling is not None:
+            # Look for unary_expression nodes whose operator is 'delete'
+            for descendant in sibling.children:
+                if descendant.type == "unary_expression":
+                    operator = descendant.child(0)
+                    if operator and operator.type == "delete":
+                        # The next child should be the expression being deleted
+                        target = descendant.child(1)
+                        if target and target.type == "member_expression":
+                            # Check that the object part of member_expression matches our global variable
+                            obj_node = target.child_by_field_name("object")
+                            if obj_node:
+                                obj_name = (
+                                    obj_node.text.decode("utf8")
+                                    if isinstance(obj_node.text, bytes)
+                                    else obj_node.text
+                                )
+                                if obj_name == global_name:
+                                    return True
+            sibling = sibling.next_sibling
+
+        return False
+
+    def is_global_sink(self, global_declarator_node: Tree) -> bool:
+        return False
+
+    def extract_sources(self, function: Function) -> List[Value]:
+        root_node = function.parse_tree_root_node
+        source_code = self.ts_analyzer.code_in_files[function.file_path]
+        file_path = function.file_path
+
+        unary_expressions = find_nodes_by_type(root_node, "unary_expression")
+        sources = []
+
+        for unary_expression in unary_expressions:
+            operator = unary_expression.child(0)
+            if operator is not None and operator.type == "delete":
+                line_number = source_code[: unary_expression.start_byte].count("\n") + 1
+                name = source_code[
+                    unary_expression.start_byte : unary_expression.end_byte
+                ]
+                sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
+
+        return sources
+
+    def extract_sinks(self, function: Function) -> List[Value]:
+        """
+        Extract the sinks that can cause the null pointer dereferences from Javascript programs.
+        :param: function: Function object.
+        :return: List of sink values
+        """
+        root_node = function.parse_tree_root_node
+        source_code = self.ts_analyzer.code_in_files[function.file_path]
+        file_path = function.file_path
+
+        nodes = find_nodes_by_type(root_node, "member_expression")
+        nodes.extend(find_nodes_by_type(root_node, "subscript_expression"))
+        nodes.extend(find_nodes_by_type(root_node, "call_expression"))
+        sinks = []
+
+        for node in nodes:
+            first_child = node.children[0]
+            line_number = source_code[: first_child.start_byte].count("\n") + 1
+            name = source_code[first_child.start_byte : first_child.end_byte]
+            sinks.append(Value(name, line_number, ValueLabel.SINK, file_path, -1))
+        return sinks

From 4daa3ab1a1dc7101fdd228269e43efa15ad99457 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Mon, 29 Sep 2025 19:41:57 -0400
Subject: [PATCH 12/23] Deleted javascript UAF detector

---
 .../Javascript/Javascript_UAF_extractor.py    | 89 -------------------
 1 file changed, 89 deletions(-)
 delete mode 100644 src/tstool/dfbscan_extractor/Javascript/Javascript_UAF_extractor.py

diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_UAF_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_UAF_extractor.py
deleted file mode 100644
index 5edc293..0000000
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_UAF_extractor.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from tstool.analyzer.TS_analyzer import *
-from tstool.analyzer.Javascript_TS_analyzer import *
-from ..dfbscan_extractor import *
-
-
-class Javascript_UAF_Extractor(DFBScanExtractor):
-    def is_global_source(self, global_declaration_node: Node) -> bool:
-        """
-        Determine if this global variable is a UAF source.
-        A UAF source is a global variable that has *one of its attributes*
-        deleted somewhere later at the top-level scope.
-        """
-        # 1. Get the declared variable's name
-        name_node = global_declaration_node.child(1).child_by_field_name("name")
-        if name_node is None:
-            return False
-        global_name = (
-            name_node.text.decode("utf8")
-            if isinstance(name_node.text, bytes)
-            else name_node.text
-        )
-
-        sibling = global_declaration_node.next_sibling
-        while sibling is not None:
-            # Look for unary_expression nodes whose operator is 'delete'
-            for descendant in sibling.children:
-                if descendant.type == "unary_expression":
-                    operator = descendant.child(0)
-                    if operator and operator.type == "delete":
-                        # The next child should be the expression being deleted
-                        target = descendant.child(1)
-                        if target and target.type == "member_expression":
-                            # Check that the object part of member_expression matches our global variable
-                            obj_node = target.child_by_field_name("object")
-                            if obj_node:
-                                obj_name = (
-                                    obj_node.text.decode("utf8")
-                                    if isinstance(obj_node.text, bytes)
-                                    else obj_node.text
-                                )
-                                if obj_name == global_name:
-                                    return True
-            sibling = sibling.next_sibling
-
-        return False
-
-    def is_global_sink(self, global_declarator_node: Tree) -> bool:
-        return False
-
-    def extract_sources(self, function: Function) -> List[Value]:
-        root_node = function.parse_tree_root_node
-        source_code = self.ts_analyzer.code_in_files[function.file_path]
-        file_path = function.file_path
-
-        unary_expressions = find_nodes_by_type(root_node, "unary_expression")
-        sources = []
-
-        for unary_expression in unary_expressions:
-            operator = unary_expression.child(0)
-            if operator is not None and operator.type == "delete":
-                line_number = source_code[: unary_expression.start_byte].count("\n") + 1
-                name = source_code[
-                    unary_expression.start_byte : unary_expression.end_byte
-                ]
-                sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
-
-        return sources
-
-    def extract_sinks(self, function: Function) -> List[Value]:
-        """
-        Extract the sinks that can cause the null pointer dereferences from Javascript programs.
-        :param: function: Function object.
-        :return: List of sink values
-        """
-        root_node = function.parse_tree_root_node
-        source_code = self.ts_analyzer.code_in_files[function.file_path]
-        file_path = function.file_path
-
-        nodes = find_nodes_by_type(root_node, "member_expression")
-        nodes.extend(find_nodes_by_type(root_node, "subscript_expression"))
-        nodes.extend(find_nodes_by_type(root_node, "call_expression"))
-        sinks = []
-
-        for node in nodes:
-            first_child = node.children[0]
-            line_number = source_code[: first_child.start_byte].count("\n") + 1
-            name = source_code[first_child.start_byte : first_child.end_byte]
-            sinks.append(Value(name, line_number, ValueLabel.SINK, file_path, -1))
-        return sinks

From 4585345e3f6c5fe8a88f36be0a5187788570c5a9 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Mon, 29 Sep 2025 22:49:13 -0400
Subject: [PATCH 13/23] Briefly refactored Javascript NPD detector

---
 .../Javascript/Javascript_NPD_extractor.py    | 87 +++++++++----------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 06ecfe5..183764c 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -23,58 +23,55 @@ class Javascript_NPD_Extractor(DFBScanExtractor):
         "getOwnPropertyDescriptor",
         "stringify",
     }
+    
+    def is_expression_delete(self, expr: Tree) -> bool:
+        if expr.type == "unary_expression":
+            operator = expr.child(0)
+            if operator and operator.type == "delete":
+                return True
+                
+        return False
+    
+    def is_expression_nullable(self, expr: Tree) -> bool:
+        if expr.type != "assignment_expression":
+            return False
+
+        value_node = expr.child(2)
+        value_type = value_node.type
+
+        # Nullish constant (e.g. null/undefined)
+        if value_type in self.NULLISH_VALUES:
+            return True
+
+        # Possible call expression check
+        if value_type == "call_expression":
+            member_expr = value_node.child(0)
+            if member_expr is not None and member_expr.type == "member_expression":
+                prop_id = member_expr.child(2)
+                if (
+                    prop_id is not None
+                    and prop_id.type == "property_identifier"
+                    and prop_id.text.decode() in self.BUILTIN_NULLABLE_METHODS
+                ):
+                    return True
+                    
 
     def is_global_source(self, global_declaration_node: Tree) -> bool:
         global_name = global_declaration_node.child(1).child_by_field_name("name").text
         sibling = global_declaration_node.next_sibling
 
         while sibling is not None:
-            # Look for unary_expression nodes whose operator is 'delete'
-            for descendant in sibling.children:
-                if descendant.type == "unary_expression":
-                    operator = descendant.child(0)
-                    if operator and operator.type == "delete":
-                        # The next child should be the expression being deleted
-                        target = descendant.child(1)
-                        if target and target.type == "member_expression":
-                            # Check that the object part of member_expression matches our global variable
-                            obj_node = target.child_by_field_name("object")
-                            if obj_node:
-                                if obj_node.text == global_name:
-                                    return True
-                                
-            if sibling.type != "expression_statement":
-                sibling = sibling.next_sibling
-                continue
-
             expr = sibling.child(0)
-            if expr.type != "assignment_expression":
-                sibling = sibling.next_sibling
-                continue
-
-            if expr.child(0).text != global_name:
-                sibling = sibling.next_sibling
-                continue
-
-            value_node = expr.child(2)
-            value_type = value_node.type
-
-            # Nullish constant (e.g. null/undefined)
-            if value_type in self.NULLISH_VALUES:
+            
+            if self.is_expression_delete(expr):
+                # the target object with property being deleted
+                obj_node = expr.child(1).child_by_field_name("object")
+                if obj_node and obj_node.text == global_name:
+                    return True
+                
+            if self.is_expression_nullable(expr) and expr.child(0).text == global_name:
                 return True
 
-            # Possible call expression check
-            if value_type == "call_expression":
-                member_expr = value_node.child(0)
-                if member_expr is not None and member_expr.type == "member_expression":
-                    prop_id = member_expr.child(2)
-                    if (
-                        prop_id is not None
-                        and prop_id.type == "property_identifier"
-                        and prop_id.text.decode() in self.BUILTIN_NULLABLE_METHODS
-                    ):
-                        return True
-
             return False
         return False
 
@@ -95,6 +92,7 @@ def extract_sources(self, function: Function) -> List[Value]:
         
         sources = []
 
+        # Look for call expressions of builtin nullable methods
         for call_expression in call_expressions:
             member_expression = call_expression.child(0)
             if (
@@ -110,7 +108,6 @@ def extract_sources(self, function: Function) -> List[Value]:
             ):
                 continue
 
-            # If the call expression calls builtin nullable methods
             if property_identifier.text.decode() in self.BUILTIN_NULLABLE_METHODS:
                 line_number = (
                     source_code[: property_identifier.start_byte].count("\n") + 1
@@ -120,6 +117,7 @@ def extract_sources(self, function: Function) -> List[Value]:
                 ]
                 sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
 
+        # Look for delete expressions
         for unary_expression in unary_expressions:
             operator = unary_expression.child(0)
             if operator is not None and operator.type == "delete":
@@ -129,6 +127,7 @@ def extract_sources(self, function: Function) -> List[Value]:
                 ]
                 sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
 
+        # Look for nullish value nodes
         for node in null_value_nodes:
             line_number = source_code[: node.start_byte].count("\n") + 1
             name = source_code[node.start_byte : node.end_byte]

From 06e44ae2625a84ce9906eff97c7d1a5cc296d83f Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Sat, 4 Oct 2025 21:52:49 -0400
Subject: [PATCH 14/23] Fixed segmentation fault caused by accessing
 nonexistent child

---
 .../Javascript/Javascript_NPD_extractor.py                  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 183764c..2fe2109 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -61,6 +61,10 @@ def is_global_source(self, global_declaration_node: Tree) -> bool:
         sibling = global_declaration_node.next_sibling
 
         while sibling is not None:
+            if len(sibling.children) == 0:
+                sibling = sibling.next_sibling
+                continue
+
             expr = sibling.child(0)
             
             if self.is_expression_delete(expr):
@@ -72,7 +76,7 @@ def is_global_source(self, global_declaration_node: Tree) -> bool:
             if self.is_expression_nullable(expr) and expr.child(0).text == global_name:
                 return True
 
-            return False
+            sibling = sibling.next_sibling
         return False
 
     def is_global_sink(self, global_declarator_node: Tree) -> bool:

From 3c96b961536680843de150ab5f1ef7159468e046 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Mon, 20 Oct 2025 13:46:42 -0400
Subject: [PATCH 15/23] Implemented extraction of relevant non-local  variables
 and removed detection of potentially nullable builtin functions

---
 .gitmodules                                   |   9 +
 benchmark/Javascript/microlight               |   1 -
 benchmark/Javascript/toy/NPD/case06.js        |  34 ++-
 requirements.txt                              |  19 --
 .../Javascript/dfbscan/path_validator.json    |   1 -
 src/tstool/analyzer/Javascript_TS_analyzer.py | 206 ++++++++++++++++--
 src/tstool/analyzer/TS_analyzer.py            |  32 ++-
 .../Javascript/Javascript_NPD_extractor.py    |  61 +-----
 8 files changed, 243 insertions(+), 120 deletions(-)
 delete mode 160000 benchmark/Javascript/microlight
 delete mode 100644 requirements.txt

diff --git a/.gitmodules b/.gitmodules
index a8da027..f216884 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,3 +13,12 @@
 [submodule "benchmark/Javascript/microlight"]
 	path = benchmark/Javascript/microlight
 	url = https://github.com/asvd/microlight.git
+[submodule "benchmark/Javascript/mocha"]
+	path = benchmark/Javascript/mocha
+	url = https://github.com/mochajs/mocha.git
+[submodule "squish"]
+	path = squish
+	url = https://github.com/shgysk8zer0/squish.git
+[submodule "benchmark/Javascript/squish"]
+	path = benchmark/Javascript/squish
+	url = https://github.com/shgysk8zer0/squish.git
diff --git a/benchmark/Javascript/microlight b/benchmark/Javascript/microlight
deleted file mode 160000
index 8a627ec..0000000
--- a/benchmark/Javascript/microlight
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 8a627ecc1ed37e82d2f48c08204923d4734127b1
diff --git a/benchmark/Javascript/toy/NPD/case06.js b/benchmark/Javascript/toy/NPD/case06.js
index 9e6053a..51654d4 100644
--- a/benchmark/Javascript/toy/NPD/case06.js
+++ b/benchmark/Javascript/toy/NPD/case06.js
@@ -1,14 +1,24 @@
-const obj = {
-    greet() {
-        console.log("hello");
+function main() {
+    const obj = {
+        greet() {
+            let obj = 1;
+            console.log("hello");
+        }
+    };
+    
+    
+    const a = obj;
+    
+    function exec() {
+        var b = null;
+        let c = 1;
+        if (true) {
+            a = b;
+        }
+        for (let i = 0; i < 5; i++) {
+            a.greet();
+        }
     }
-};
-
-const a = obj;
-
-function exec() {
-    delete a.greet;
-    a.greet();
+    
+    exec();
 }
-
-exec();
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 78dfb30..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-black
-tree-sitter>=0.20.0,<0.22.0
-transformers
-torch
-tiktoken
-replicate
-openai
-google-generativeai
-tqdm
-networkx
-streamlit
-botocore
-boto3
-black
-anthropic
-mypy
-types-networkx
-types-tqdm
-boto3-stubs[essential]
diff --git a/src/prompt/Javascript/dfbscan/path_validator.json b/src/prompt/Javascript/dfbscan/path_validator.json
index 44a6e67..989ae21 100644
--- a/src/prompt/Javascript/dfbscan/path_validator.json
+++ b/src/prompt/Javascript/dfbscan/path_validator.json
@@ -7,7 +7,6 @@
     "Keep the following guidelines in mind:",
     "- If the source in the first function flows to the sink in the last function without any interference, then the path is reachable and your answer should be Yes.",
     "- For NPD detection, if the source value is modified or its null/undefined state is verified (for example, via an explicit check like 'p !== null') before reaching the sink, then the path is unreachable and you should answer No.",
-    "- For NPD detection, if a program is accessing a variable or property that may be null/undefined and the variable/property isn't redefined before it is accessed, then the path is reachable and you should answer Yes.",
     "- If a function exits or returns before the sink or other propagation sites (such as function calls) are reached, the path is unreachable; answer No in such cases.",
     "- If a sink is a call to an object or a function that is builtin in Javascript or defined in the scope, then the path is unreachable; answer No in such cases.",
     "- Analyze conditions within each function: infer the outcome of branch statements and then verify whether the conditions across different sub-paths conflict. If conflicts exist, the overall path is unreachable.",
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index 7bd826f..0cd7012 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -16,6 +16,156 @@ class Javascript_TSAnalyzer(TSAnalyzer):
     Implements Javascript-specific parsing and analysis.
     """
 
+    def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
+        """
+        Parse source code to extract scope topography
+        :param tree: Parsed syntax tree
+        """
+        scope_stack: List[int] = []
+        scope_id: int = 0
+
+        def search(root: Node) -> None:
+            nonlocal scope_id
+
+            for child in root.children:
+                if child.type == "statement_block":
+                    if len(scope_stack) > 0:
+                        self.scope_env[scope_stack[-1]][1].append(scope_id)
+
+                    self.scope_env[scope_id] = (child, [])
+                    self.scope_root_to_scope_id[child] = scope_id
+                    scope_stack.append(scope_id)
+                    scope_id += 1
+                    search(child)
+                    scope_stack.pop()
+                else:
+                    search(child)
+
+            return
+
+        self.scope_env[scope_id] = (tree.root_node, [])
+        self.scope_root_to_scope_id[tree.root_node] = scope_id
+        scope_stack.append(scope_id)
+        scope_id += 1
+        search(tree.root_node)
+        return
+
+    def extract_nonlocal_info(self) -> None:
+        identifiers_per_scope = dict()
+        for scope_id, scope_data in self.scope_env.items():
+            scope_root, child_scope_ids = scope_data
+            for scope_child in scope_root.children:
+                # Found variables declared with const or let
+                if scope_child.type == "lexical_declaration":
+                    variable_name = (
+                        scope_child.child(1).child_by_field_name("name").text.decode()
+                    )
+                    
+                    reference_found = False
+                    # Determines whether the variable is used in child functions and should be analyzed separately
+                    for child_scope_id in child_scope_ids:
+                        child_scope = self.scope_env[child_scope_id]
+                        child_scope_root, _ = child_scope
+                        
+                        # Skips if the nested scope does not resemble a nested function
+                        if not child_scope_root.parent or (child_scope_root.parent.type != "arrow_function" and child_scope_root.parent.type != "function_declaration"):
+                            continue
+
+                        # Finds all identifier nodes for each scope with memorization
+                        if child_scope_id not in identifiers_per_scope:
+                            identifiers_per_scope[child_scope_id] = find_nodes_by_type(
+                                child_scope_root, "identifier"
+                            )
+
+                        for candidate_node in identifiers_per_scope[child_scope_id]:
+                            # Skip identifiers with different names
+                            if candidate_node.text.decode() != variable_name:
+                                continue
+
+                            # Skip declarations of variables with the same name in the child scopes
+                            if (
+                                candidate_node.parent.type == "variable_declarator"
+                                and candidate_node.parent.child_by_field_name("name")
+                                == candidate_node
+                            ):
+                                continue
+
+                            reference_found = True
+                            break
+                        
+                        if reference_found: break
+
+                    if reference_found:
+                        label = ValueLabel.LOCAL
+                        if scope_root.type == "program":
+                            label = ValueLabel.GLOBAL
+
+                        non_local_value = Value(
+                            variable_name, scope_child.start_point[0] + 1, label, -1
+                        )
+                        self.non_local_to_scope_id[non_local_value] = scope_id
+
+                # Found variables declared with var
+                elif scope_child.type == "variable_declaration":
+                    variable_name = (
+                        scope_child.child(1).child_by_field_name("name").text.decode()
+                    )
+                    
+                    # Finds the enclosing function as variables declared with var are accessible in the entire function
+                    function_root = scope_root
+                    while function_root:
+                        parent = function_root.parent
+                        if parent and (parent.type == "arrow_function" or parent.type == "function_declaration"):
+                            break
+
+                        function_root = parent
+                    
+                    function_scope_id = self.scope_root_to_scope_id[function_root]
+                    reference_found = False
+                    
+                    # Determines whether the variable is used in child functions and should be analyzed separately
+                    for child_scope_id in self.scope_env[function_scope_id][1]:
+                        child_scope = self.scope_env[child_scope_id]
+                        child_scope_root, _ = child_scope
+                        
+                        # Skips if the nested scope does not resemble a nested function
+                        if not child_scope_root.parent or (child_scope_root.parent.type != "arrow_function" and child_scope_root.parent.type != "function_declaration"):
+                            continue
+
+                        # Finds all identifier nodes for each scope with memorization
+                        if child_scope_id not in identifiers_per_scope:
+                            identifiers_per_scope[child_scope_id] = find_nodes_by_type(
+                                child_scope_root, "identifier"
+                            )
+
+                        for candidate_node in identifiers_per_scope[child_scope_id]:
+                            # Skip identifiers with different names
+                            if candidate_node.text.decode() != variable_name:
+                                continue
+
+                            # Skip declarations of variables with the same name in the child scopes
+                            if (
+                                candidate_node.parent.type == "variable_declarator"
+                                and candidate_node.parent.child_by_field_name("name")
+                                == candidate_node
+                            ):
+                                continue
+
+                            reference_found = True
+                            break
+                        
+                        if reference_found: break
+                    
+                    if reference_found:
+                        label = ValueLabel.LOCAL
+                        if scope_root.type == "program":
+                            label = ValueLabel.GLOBAL
+
+                        non_local_value = Value(
+                            variable_name, scope_child.start_point[0] + 1, label, -1
+                        )
+                        self.non_local_to_scope_id[non_local_value] = function_scope_id
+
     def extract_function_info(
         self, file_path: str, source_code: str, tree: tree_sitter.Tree
     ) -> None:
@@ -31,7 +181,7 @@ def extract_function_info(
         all_variable_declarator_nodes = find_nodes_by_type(
             tree.root_node, "variable_declarator"
         )
-        
+
         for node in all_function_header_nodes:
             function_name = ""
             for sub_node in node.children:
@@ -57,7 +207,7 @@ def extract_function_info(
             if function_name not in self.functionNameToId:
                 self.functionNameToId[function_name] = set([])
             self.functionNameToId[function_name].add(function_id)
-            
+
         for node in all_variable_declarator_nodes:
             name_node = node.child_by_field_name("name")
             value_node = node.child_by_field_name("value")
@@ -65,20 +215,26 @@ def extract_function_info(
             if not name_node or not value_node:
                 continue
 
-            if value_node.type != "arrow_function" and value_node.type != "function_expression":
+            if (
+                value_node.type != "arrow_function"
+                and value_node.type != "function_expression"
+            ):
                 continue
-            
+
             function_name = source_code[name_node.start_byte : name_node.end_byte]
-            start_line = source_code[:node.start_byte].count("\n") + 1
-            end_line = source_code[:node.end_byte].count("\n") + 1
+            start_line = source_code[: node.start_byte].count("\n") + 1
+            end_line = source_code[: node.end_byte].count("\n") + 1
             function_id = len(self.functionRawDataDic) + 1
 
             self.functionRawDataDic[function_id] = (
-                function_name, start_line, end_line, node
+                function_name,
+                start_line,
+                end_line,
+                node,
             )
             self.functionToFile[function_id] = file_path
             self.functionNameToId.setdefault(function_name, set()).add(function_id)
-        
+
         return
 
     def extract_global_info(
@@ -89,31 +245,34 @@ def extract_global_info(
         For Javascript, this may include module-level variables.
         Currently not implemented.
         """
-        declaration_types = [
-            "lexical_declaration",
-            "variable_declaration"
-        ]
+        declaration_types = ["lexical_declaration", "variable_declaration"]
         for child in tree.root_node.children:
             if child.type not in declaration_types:
                 continue
-            
+
             declarator_node = child.child(1)
-            if declarator_node is not None and declarator_node.type == "variable_declarator":
+            if (
+                declarator_node is not None
+                and declarator_node.type == "variable_declarator"
+            ):
                 name_node = declarator_node.child_by_field_name("name")
                 value_node = declarator_node.child_by_field_name("value")
 
                 if not name_node or not value_node:
                     continue
 
-                if value_node.type == "arrow_function" or value_node.type == "function_expression":
+                if (
+                    value_node.type == "arrow_function"
+                    or value_node.type == "function_expression"
+                ):
                     continue
-                
+
                 global_name = source_code[name_node.start_byte : name_node.end_byte]
-                line = source_code[:name_node.start_byte].count("\n") + 1
+                line = source_code[: name_node.start_byte].count("\n") + 1
                 global_id = len(self.globalsRawDataDic) + 1
                 self.globalsRawDataDic[global_id] = (global_name, line, child)
                 self.globalsToFile[global_id] = file_path
-                
+
         return
 
     def get_callee_name_at_call_site(
@@ -358,7 +517,7 @@ def get_loop_statements(
                 end_line,
             )
         return loops
-    
+
     def get_global_expressions_by_identifier(
         self, identifier: str, program_root: Node
     ) -> List[Node]:
@@ -367,15 +526,14 @@ def get_global_expressions_by_identifier(
         global_expression_types = [
             "variable_declaration",
             "lexical_declaration",
-            "expression_statement"
+            "expression_statement",
         ]
-        
+
         for child in children:
             if child.type not in global_expression_types:
                 continue
-            
+
             if find_nodes_by_type(child, "identifier")[0].text.decode() == identifier:
                 output_nodes.append(child)
-        
-        return output_nodes
 
+        return output_nodes
diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py
index babb448..b8ae6c3 100644
--- a/src/tstool/analyzer/TS_analyzer.py
+++ b/src/tstool/analyzer/TS_analyzer.py
@@ -174,8 +174,15 @@ def __init__(
         self.globalsToFile: Dict[int, str] = {}
 
         self.function_env: Dict[int, Function] = {}
-        self.globals_env = {}
+        self.globals_env: Dict[int, Value] = {}
+        self.scope_env: Dict[int, Tuple[Node, Set[Dict]]] = {}
         self.api_env: Dict[int, API] = {}
+        
+        # Dictionary storing mapping from the root node of the scope to its scope id
+        self.scope_root_to_scope_id: Dict[Node, int] = {}
+        
+        # Dictionary storing mapping of a non local value to its declarator scope id
+        self.non_local_to_scope_id: Dict[Value, int] = {}
 
         # Results of call graph analysis
         ## Caller-callee relationship between user-defined functions
@@ -206,6 +213,7 @@ def _parse_single_file(self, file_path: str, source_code: str) -> Tuple[str, str
         # Call user-defined processing.
         self.extract_function_info(file_path, source_code, tree)
         self.extract_global_info(file_path, source_code, tree)
+        self.extract_scope_info(tree)
         return file_path, source_code
 
     def _analyze_single_function(
@@ -252,7 +260,9 @@ def parse_project(self) -> None:
                 self.fileContentDic[file_path] = source
                 pbar.update(1)
             pbar.close()
-
+            
+        self.extract_nonlocal_info()
+        
         # Analyzes extracted functions
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=self.max_symbolic_workers_num
@@ -272,7 +282,7 @@ def parse_project(self) -> None:
                 self.function_env[func_id] = current_function
                 pbar.update(1)
             pbar.close()
-
+            
         # Analyzes extracted global variables
         pbar = tqdm(
             total=len(self.globalsRawDataDic), desc="Analyzing Global Variables"
@@ -320,6 +330,20 @@ def analyze_call_graph(self) -> None:
     ###########################################
     # Helper function for project AST parsing #
     ###########################################
+    @abstractmethod
+    def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
+        """
+        Parse source code to extract scope topography
+        :param tree: Parsed syntax tree
+        """
+    
+    @abstractmethod
+    def extract_nonlocal_info(self) -> None:
+        """
+        Traverse the scopes to identify declarations of non locals
+        """
+        pass
+    
     @abstractmethod
     def extract_function_info(
         self, file_path: str, source_code: str, tree: Tree
@@ -427,7 +451,7 @@ def extract_call_graph_edges(self, current_function: Function) -> None:
                 tmp_api = API(-1, callee_name, len(arguments))
 
                 # Insert the API into the API environment if it does not exist previously
-                for single_api_id in self.api_env:
+                for single_api_id in list(self.api_env):
                     if self.api_env[single_api_id] == tmp_api:
                         api_id = single_api_id
                 if api_id == None:
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 2fe2109..8dd5d92 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -5,24 +5,6 @@
 
 class Javascript_NPD_Extractor(DFBScanExtractor):
     NULLISH_VALUES = {"null", "undefined"}
-    BUILTIN_NULLABLE_METHODS = {
-        "exec",
-        "match",
-        "matchAll",
-        "getElementById",
-        "querySelector",
-        "querySelectorAll",
-        "getElementsByClassName",
-        "getElementsByTagName",
-        "getAttribute",
-        "find",
-        "findIndex",
-        "pop",
-        "shift",
-        "get",
-        "getOwnPropertyDescriptor",
-        "stringify",
-    }
     
     def is_expression_delete(self, expr: Tree) -> bool:
         if expr.type == "unary_expression":
@@ -32,7 +14,7 @@ def is_expression_delete(self, expr: Tree) -> bool:
                 
         return False
     
-    def is_expression_nullable(self, expr: Tree) -> bool:
+    def is_expression_null(self, expr: Tree) -> bool:
         if expr.type != "assignment_expression":
             return False
 
@@ -43,19 +25,6 @@ def is_expression_nullable(self, expr: Tree) -> bool:
         if value_type in self.NULLISH_VALUES:
             return True
 
-        # Possible call expression check
-        if value_type == "call_expression":
-            member_expr = value_node.child(0)
-            if member_expr is not None and member_expr.type == "member_expression":
-                prop_id = member_expr.child(2)
-                if (
-                    prop_id is not None
-                    and prop_id.type == "property_identifier"
-                    and prop_id.text.decode() in self.BUILTIN_NULLABLE_METHODS
-                ):
-                    return True
-                    
-
     def is_global_source(self, global_declaration_node: Tree) -> bool:
         global_name = global_declaration_node.child(1).child_by_field_name("name").text
         sibling = global_declaration_node.next_sibling
@@ -73,7 +42,7 @@ def is_global_source(self, global_declaration_node: Tree) -> bool:
                 if obj_node and obj_node.text == global_name:
                     return True
                 
-            if self.is_expression_nullable(expr) and expr.child(0).text == global_name:
+            if self.is_expression_null(expr) and expr.child(0).text == global_name:
                 return True
 
             sibling = sibling.next_sibling
@@ -92,35 +61,9 @@ def extract_sources(self, function: Function) -> List[Value]:
             null_value_nodes.extend(find_nodes_by_type(root_node, nullish_value))
 
         unary_expressions = find_nodes_by_type(root_node, "unary_expression")
-        call_expressions = find_nodes_by_type(root_node, "call_expression")
         
         sources = []
 
-        # Look for call expressions of builtin nullable methods
-        for call_expression in call_expressions:
-            member_expression = call_expression.child(0)
-            if (
-                member_expression is None
-                or member_expression.type != "member_expression"
-            ):
-                continue
-
-            property_identifier = member_expression.child(2)
-            if (
-                property_identifier is None
-                or property_identifier.type != "property_identifier"
-            ):
-                continue
-
-            if property_identifier.text.decode() in self.BUILTIN_NULLABLE_METHODS:
-                line_number = (
-                    source_code[: property_identifier.start_byte].count("\n") + 1
-                )
-                name = source_code[
-                    property_identifier.start_byte : property_identifier.end_byte
-                ]
-                sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
-
         # Look for delete expressions
         for unary_expression in unary_expressions:
             operator = unary_expression.child(0)

From 029b254f38c60d63f5f5d84ca426955098560427 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Sat, 25 Oct 2025 17:24:48 -0400
Subject: [PATCH 16/23] Extended __process_src_value for non local variable
 analysis - Non local variable assignments are now considered as part of the
 propagation details. - Non local variable assignments are now considered as a
 side effect and all caller functions that invoked the non-local-changing
 function would be added to the worklist for dataflow anaysis. - Adjusted
 potential buggy path detection to include non local variable assignments.

These changes enables Repoaudit to detect NPD styled bugs in Javascript that involves non local variables (see benchmark/Javascript/toy/NPD/case06.js for example).
---
 benchmark/Javascript/toy/NPD/case06.js        |  45 ++--
 src/agent/dfbscan.py                          | 254 ++++++++----------
 .../dfbscan/intra_dataflow_analyzer.py        |  15 ++
 src/memory/syntactic/value.py                 |   5 +-
 .../dfbscan/intra_dataflow_analyzer.json      |  12 +-
 src/tstool/analyzer/Javascript_TS_analyzer.py |  81 ++++--
 src/tstool/analyzer/TS_analyzer.py            |   4 +-
 7 files changed, 217 insertions(+), 199 deletions(-)

diff --git a/benchmark/Javascript/toy/NPD/case06.js b/benchmark/Javascript/toy/NPD/case06.js
index 51654d4..1bb5f1a 100644
--- a/benchmark/Javascript/toy/NPD/case06.js
+++ b/benchmark/Javascript/toy/NPD/case06.js
@@ -1,24 +1,25 @@
-function main() {
-    const obj = {
-        greet() {
-            let obj = 1;
-            console.log("hello");
-        }
-    };
-    
-    
-    const a = obj;
-    
-    function exec() {
-        var b = null;
-        let c = 1;
-        if (true) {
-            a = b;
-        }
-        for (let i = 0; i < 5; i++) {
-            a.greet();
-        }
+const obj = {
+    greet() {
+        let obj = 1;
+        console.log("hello");
     }
-    
-    exec();
+};
+
+
+const a = obj;
+
+function call(items) {
+    a = items;
 }
+
+const exec = function () {
+    var b = null;
+    let c = 1;
+    call(b);
+
+    for (let i = 0; i < 5; i++) {
+        a.greet();
+    }
+}
+
+exec();
\ No newline at end of file
diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index 43caf69..2432897 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -192,6 +192,85 @@ def __update_worklist(
                                     set({(para, new_call_context)}),
                                 )
 
+            if value.label == ValueLabel.NONLOCAL:
+                # Consider side effect.
+                # Example: the non local variable g is used in the function g = null;
+                # We need to consider the side effect of g.
+                caller_functions = self.ts_analyzer.get_all_caller_functions(function)
+
+                if caller_functions:
+                    for caller_function in caller_functions:
+                        new_call_context = copy.deepcopy(call_context)
+
+                        top_unmatched_context_label = (
+                            new_call_context.get_top_unmatched_context_label()
+                        )
+
+                        call_site_nodes = self.ts_analyzer.get_callsites_by_callee_name(
+                            caller_function, function.function_name
+                        )
+                        for call_site_node in call_site_nodes:
+                            caller_function_file_name = self.ts_analyzer.functionToFile[
+                                caller_function.function_id
+                            ]
+                            file_content = self.ts_analyzer.code_in_files[
+                                caller_function_file_name
+                            ]
+                            call_site_lower_line_number = (
+                                file_content[: call_site_node.start_byte].count("\n")
+                                + 1
+                            )
+
+                            if top_unmatched_context_label is not None:
+                                if (
+                                    top_unmatched_context_label.parenthesis
+                                    == Parenthesis.LEFT_PAR
+                                ):
+                                    if (
+                                        call_site_lower_line_number
+                                        != top_unmatched_context_label.line_number
+                                        or caller_function_file_name
+                                        != top_unmatched_context_label.file_name
+                                        or top_unmatched_context_label.function_id
+                                        != function.function_id
+                                    ):
+                                        continue
+
+                            append_context_label = ContextLabel(
+                                caller_function_file_name,
+                                call_site_lower_line_number,
+                                function.function_id,
+                                Parenthesis.RIGHT_PAR,
+                            )
+                            new_value = Value(
+                                value.name,
+                                call_site_node.start_point[0] + 1,
+                                ValueLabel.NONLOCAL,
+                                value.file,
+                            )
+
+                            new_call_context.add_and_check_context(append_context_label)
+
+                            delta_worklist.append(
+                                (
+                                    new_value,
+                                    caller_function,
+                                    new_call_context,
+                                )
+                            )
+                            self.state.update_external_value_match(
+                                (value, call_context),
+                                set({(new_value, new_call_context)}),
+                            )
+                else:
+                    delta_worklist.append(
+                        (
+                            value,
+                            function,
+                            call_context,
+                        )
+                    )
+
             if value.label == ValueLabel.PARA:
                 # Consider side-effect.
                 # Example: the parameter *p is used in the function: p->f = null;
@@ -379,6 +458,7 @@ def __collect_potential_buggy_paths(
                         ValueLabel.RET,
                         ValueLabel.ARG,
                         ValueLabel.OUT,
+                        ValueLabel.NONLOCAL,
                     }:
                         # For other propagation types, check further external matches.
                         if (value, ctx) in external_match_snapshot:
@@ -568,9 +648,6 @@ def start_scan(self) -> None:
         # Total number of source values
         total_src_values = len(self.src_values)
 
-        for global_value in self.ts_analyzer.globals_env.values():
-            self.__process_global_value(global_value)
-
         # Process each source value in parallel with a progress bar
         with tqdm(
             total=total_src_values, desc="Processing Source Values", unit="src"
@@ -602,147 +679,6 @@ def start_scan(self) -> None:
             self.logger.print_console(log_file)
         return
 
-    def __process_global_value(self, global_value):
-        """
-        Perform data-flow analysis starting from a global variable.
-
-        1. Finds all functions referencing the global variable.
-        2. Runs intra-procedural data-flow analysis to discover reachable values.
-        3. If the global is marked as a source (SRC), collects potential buggy paths and
-        reports them if confirmed.
-        """
-        worklist = []
-        reference_in_funcs = self.ts_analyzer.get_function_global_value_reference(
-            global_value
-        )
-        if len(reference_in_funcs) == 0:
-            return
-
-        initial_context = CallContext(False)
-
-        # Seed worklist with all function references to the global.
-        for func, global_references in reference_in_funcs.items():
-            for global_reference in global_references:
-                worklist.append((global_reference, func, initial_context))
-
-        # Worklist-driven intra-procedural analysis
-        while worklist:
-            start_value, start_function, call_context = worklist.pop(0)
-            if len(call_context.context) > self.call_depth:
-                continue
-
-            sinks_in_function = self.__obtain_extractor().extract_sinks(start_function)
-            sink_values = [
-                (sink.name, sink.line_number - start_function.start_line_number + 1)
-                for sink in sinks_in_function
-            ]
-
-            call_statements = []
-            for call_site_node in start_function.function_call_site_nodes:
-                file_content = self.ts_analyzer.code_in_files[start_function.file_path]
-                call_site_line_number = (
-                    file_content[: call_site_node.start_byte].count("\n") + 1
-                )
-                call_site_name = file_content[
-                    call_site_node.start_byte : call_site_node.end_byte
-                ]
-                call_statements.append((call_site_name, call_site_line_number))
-
-            ret_values = [
-                (ret.name, ret.line_number - start_function.start_line_number + 1)
-                for ret in (start_function.retvals if start_function.retvals else [])
-            ]
-
-            df_input = IntraDataFlowAnalyzerInput(
-                start_function, start_value, sink_values, call_statements, ret_values
-            )
-            df_output = self.intra_dfa.invoke(df_input, IntraDataFlowAnalyzerOutput)
-            if df_output is None:
-                continue
-
-            for path_index in range(len(df_output.reachable_values)):
-                reachable_values_in_single_path = {
-                    (value, call_context)
-                    for value in df_output.reachable_values[path_index]
-                }
-                self.state.update_reachable_values_per_path(
-                    (start_value, call_context), reachable_values_in_single_path
-                )
-                delta_worklist = self.__update_worklist(
-                    df_input, df_output, call_context, path_index
-                )
-                worklist.extend(delta_worklist)
-
-        # Only proceed with bug-path checks if this global is a source
-        if global_value.label != ValueLabel.SRC:
-            return
-
-        found_potential_buggy_paths = False
-        for func, global_references in reference_in_funcs.items():
-            for global_reference in global_references:
-                self.__collect_potential_buggy_paths(
-                    global_reference, (global_reference, CallContext(False))
-                )
-                if global_reference in self.state.potential_buggy_paths:
-                    found_potential_buggy_paths = True
-
-        if not found_potential_buggy_paths:
-            return
-
-        # Validate each potential buggy path
-        for start_value, buggy_paths in self.state.potential_buggy_paths.items():
-            for buggy_path in buggy_paths.values():
-                values_to_functions = {
-                    value: self.ts_analyzer.get_function_from_localvalue(value)
-                    for value in buggy_path
-                }
-
-                program_root = None
-                functions = set()
-                for func in values_to_functions.values():
-                    if func:
-                        functions.add(func)
-                    if program_root is None and func:
-                        program_root = func.parse_tree_root_node.parent
-
-                relevant_global_exprs = (
-                    self.ts_analyzer.get_global_expressions_by_identifier(
-                        global_value.name, program_root
-                    )
-                )
-
-                if self.state.check_existence(start_value, functions):
-                    continue
-
-                pv_input = PathValidatorInput(
-                    self.bug_type,
-                    buggy_path,
-                    values_to_functions,
-                    relevant_global_exprs,
-                )
-                pv_output = self.path_validator.invoke(pv_input, PathValidatorOutput)
-                if pv_output and pv_output.is_reachable:
-                    relevant_functions = {}
-                    for value in buggy_path:
-                        function = self.ts_analyzer.get_function_from_localvalue(value)
-                        if function:
-                            relevant_functions[function.function_id] = function
-
-                    bug_report = BugReport(
-                        self.bug_type,
-                        start_value,
-                        relevant_functions,
-                        pv_output.explanation_str,
-                    )
-                    self.state.update_bug_report(bug_report)
-
-                    bug_report_dict = {
-                        bug_report_id: bug.to_dict()
-                        for bug_report_id, bug in self.state.bug_reports.items()
-                    }
-                    with open(self.res_dir_path + "/detect_info.json", "w") as f:
-                        json.dump(bug_report_dict, f, indent=4)
-
     def __process_src_value(self, src_value: Value) -> None:
         """
         Perform data-flow analysis starting from a local source value.
@@ -787,8 +723,36 @@ def __process_src_value(self, src_value: Value) -> None:
                 for ret in (start_function.retvals if start_function.retvals else [])
             ]
 
+            non_local_list = []
+            function_block_node = None
+            if start_function.parse_tree_root_node.type == "function_declaration":
+                function_block_node = start_function.parse_tree_root_node.child(3)
+            elif start_function.parse_tree_root_node.type == "variable_declarator":
+                function_block_node = start_function.parse_tree_root_node.child(
+                    2
+                ).child(2)
+
+            if (
+                function_block_node
+                and function_block_node in self.ts_analyzer.scope_root_to_scope_id
+            ):
+                function_scope_id = self.ts_analyzer.scope_root_to_scope_id[
+                    function_block_node
+                ]
+                non_local_list = [
+                    (value.name, value.line_number)
+                    for value in self.ts_analyzer.child_scope_id_to_non_locals[
+                        function_scope_id
+                    ]
+                ]
+
             df_input = IntraDataFlowAnalyzerInput(
-                start_function, start_value, sink_values, call_statements, ret_values
+                start_function,
+                start_value,
+                sink_values,
+                call_statements,
+                ret_values,
+                non_local_list,
             )
             df_output = self.intra_dfa.invoke(df_input, IntraDataFlowAnalyzerOutput)
             if df_output is None:
diff --git a/src/llmtool/dfbscan/intra_dataflow_analyzer.py b/src/llmtool/dfbscan/intra_dataflow_analyzer.py
index b26b54b..e820281 100644
--- a/src/llmtool/dfbscan/intra_dataflow_analyzer.py
+++ b/src/llmtool/dfbscan/intra_dataflow_analyzer.py
@@ -19,12 +19,14 @@ def __init__(
         sink_values: List[Tuple[str, int]],
         call_statements: List[Tuple[str, int]],
         ret_values: List[Tuple[str, int]],
+        non_locals: List[Value]
     ) -> None:
         self.function = function
         self.summary_start = summary_start
         self.sink_values = sink_values
         self.call_statements = call_statements
         self.ret_values = ret_values
+        self.non_locals = non_locals
         return
 
     def __hash__(self) -> int:
@@ -108,6 +110,15 @@ def _get_prompt(self, input: LLMToolInput) -> str:
         for ret_val in input.ret_values:
             rets_str += f"- {ret_val[0]} at line {ret_val[1]}\n"
         prompt = prompt.replace("<RETURN_VALUES>", rets_str)
+        
+        if input.non_locals:
+            non_local_str = "Non local variables relevant to this function:"
+            for non_local in input.non_locals:
+                non_local_str += f"- {non_local[0]} at line {non_local[1]}\n"
+            prompt = prompt.replace("<NONLOCAL_VALUES>", non_local_str)
+        else:
+            prompt = prompt.replace("<NONLOCAL_VALUES>", "")
+        
         return prompt
 
     def _parse_response(
@@ -223,6 +234,10 @@ def _parse_response(
                     reachable_values_per_path.add(
                         Value(detail["name"], line_number, ValueLabel.SINK, file_path)
                     )
+                elif detail["type"] == "Nonlocal":
+                    reachable_values_per_path.add(
+                        Value(detail["name"], line_number, ValueLabel.NONLOCAL, file_path)
+                    )
             reachable_values.append(reachable_values_per_path)
 
         output = IntraDataFlowAnalyzerOutput(reachable_values)
diff --git a/src/memory/syntactic/value.py b/src/memory/syntactic/value.py
index 61e439f..f46d816 100644
--- a/src/memory/syntactic/value.py
+++ b/src/memory/syntactic/value.py
@@ -15,7 +15,8 @@ class ValueLabel(Enum):
     NON_BUF_ACCESS_EXPR = 8  # non-buffer access
 
     LOCAL = 9
-    GLOBAL = 10
+    NONLOCAL = 10
+    GLOBAL = 11
 
     def __str__(self) -> str:
         mapping = {
@@ -28,6 +29,7 @@ def __str__(self) -> str:
             ValueLabel.BUF_ACCESS_EXPR: "ValueLabel.BUF_ACCESS_EXPR",
             ValueLabel.NON_BUF_ACCESS_EXPR: "ValueLabel.NON_BUF_ACCESS_EXPR",
             ValueLabel.LOCAL: "ValueLabel.LOCAL",
+            ValueLabel.NONLOCAL: "ValueLabel.NONLOCAL",
             ValueLabel.GLOBAL: "ValueLabel.GLOBAL",
         }
         return mapping[self]
@@ -44,6 +46,7 @@ def from_str(s: str):
             "ValueLabel.BUF_ACCESS_EXPR": ValueLabel.BUF_ACCESS_EXPR,
             "ValueLabel.NON_BUF_ACCESS_EXPR": ValueLabel.NON_BUF_ACCESS_EXPR,
             "ValueLabel.LOCAL": ValueLabel.LOCAL,
+            "ValueLabel.NONLOCAL": ValueLabel.NONLOCAL,
             "ValueLabel.GLOBAL": ValueLabel.GLOBAL,
         }
         try:
diff --git a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
index 2449419..ab183d6 100644
--- a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
+++ b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
@@ -4,11 +4,12 @@
   "system_role": "You are a Javascript programmer and very good at analyzing Javascript code. Particularly, you excel at understanding individual Javascript functions and their data flow relationships.",
   "task": "Given a specific source variable/parameter/expression (denoted as SRC) at a specific line (denoted as L1), analyze the execution flows of the given function and determine the variables to which SRC can propagate.",
   "analysis_rules": [
-    "The key principle for answering this question is to extract all execution paths related to SRC and simulate the function's execution along each path to determine where SRC propagates. In Javascript, SRC can propagate to four possible locations:",
+    "The key principle for answering this question is to extract all execution paths related to SRC and simulate the function's execution along each path to determine where SRC propagates. In Javascript, SRC can propagate to five possible locations:",
     "1. Function Calls: SRC propagates to a call site where it is passed as an argument to a callee function within the current function.",
     "2. Return Statements: SRC propagates to a return statement, returning a value to the caller of the current function.",
     "3. Function Parameters: SRC propagates to a parameter of the current function and can be referenced in the caller function, since objects are passed by reference.",
     "4. Sink variables: SRC reaches one of the predefined sink variables provided in the input.",
+    "5. Non local variable assignment: SRC propagates its value to a predefined non local variable.",
     "If SRC is referenced by function parameters, it can propagate beyond the function scope after the function exits, due to object references being shared between caller and callee. For example, if function goo passes an object base to its callee function foo, and foo(obj: Base) { obj = SRC; }, then the caller function goo can access the updated state of SRC through the object base.",
     "To conduct the analysis, follow these three steps:",
     "",
@@ -17,6 +18,7 @@
     "  2. Function Invocations: Call sites where SRC is passed as an argument.",
     "  3. Return Statements: Points where the function returns, possibly propagating SRC.",
     "  4. Parameter Assignments: Assignments where SRC is assigned to a parameter or an object field that is accessible outside the function.",
+    "  5. Non local variable assignment: Assignments where SRC is assigned to a predefined non local variable.",
     "- Step 2: Identify all execution paths relevant to the key points found in Step 1. For each path:",
     "  - Identify every potential execution path;",
     "  - Verify whether the key points are executed along each path;",
@@ -28,7 +30,8 @@
     "- If there are potential race conditions, for example, calling an async function without await, differentiate the control paths to consider the race condition;",
     "- Expand the first iteration of loops to analyze nested execution paths;",
     "- Treat each conditional branch (if, switch) as a separate execution path;",
-    "- Expand nested conditions and loops to ensure all paths are analyzed."
+    "- Expand nested conditions and loops to ensure all paths are analyzed.",
+    "- For non local variables, they can be defined anywhere outside the function and you should not assume any information about its declaration (including line numbers of their declaration or their initial values)."
   ],
   "analysis_examples": [
     "Example 1: Propagation via Sink, Function call, and Return",
@@ -110,11 +113,13 @@
     "    - For a return propagation: 'Type: Return; Name: {return name}; Function: None; Index: {return value index}; Line: {return statement line number}; Dependency: {summary of dependency from SRC to return value}';",
     "    - For parameter propagation: 'Type: Parameter; Name: {parameter name}; Function: None; Index: {parameter index}; Line: {assignment line number}; Dependency: {summary of dependency from SRC to parameter}';",
     "    - For sink propagation: 'Type: Sink; Name: {sink name}; Function: None; Index: None; Line: {sink statement line number}; Dependency: {summary of dependency from SRC to sink}';",
+    "    - For non local variable assignment: 'Type: Nonlocal; Name: {non local name}; Function: None; Index: None; Line: {assignment statement line number}; Dependency: {summary of dependency from SRC to assignment}';",
     "(4) If there is no propagation along a path, provide a brief explanation of why SRC does not propagate in that path as follows:",
     "- Path <Path Number>: <Execution Path>;",
     "    - No propagation; Dependency: {reason for no propagation};",
     "(5) Each Execution Path should start with the word \"Lines\", with each line number separated by \" -> \" and ended with a semicolon.",
-    "(5) Remember: All the indexes start from 0 instead of 1. If there is only one return value, the index is 0."
+    "(6) Remember: All the indexes start from 0 instead of 1. If there is only one return value, the index is 0.",
+    "(7) Remember: For non local variable assignment, only list 'Type: Nonlocal; Name: {non local name}; Function: None; Index: None; Line: {assignment statement line number}; Dependency: {summary of dependency from SRC to assignment}' if assignments are explicitly stated in the function code (Eg. non_local = something)."
   ],
   "meta_prompts": [
     "Now I will give you a target function with the source point `<SRC_NAME>` at line <SRC_LINE>: \n```\n<FUNCTION>\n``` \n\n",
@@ -123,6 +128,7 @@
     "Here are the Function call sites and return statements within the function, which can be used in Step 1;\n",
     "<CALL_STATEMENTS>\n",
     "<RETURN_VALUES>\n",
+    "<NONLOCAL_VALUES>",
     "Now, please answer the following question:\n<QUESTION>\n",
     "Your response should strictly follow the format:\n<ANSWER>\n"
   ]
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index 0cd7012..5e7c8fe 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -60,15 +60,27 @@ def extract_nonlocal_info(self) -> None:
                     variable_name = (
                         scope_child.child(1).child_by_field_name("name").text.decode()
                     )
-                    
+
+                    label = ValueLabel.LOCAL
+                    if scope_root.type == "program":
+                        label = ValueLabel.GLOBAL
+
+                    non_local_value = Value(
+                        variable_name, scope_child.start_point[0] + 1, label, -1
+                    )
+
                     reference_found = False
                     # Determines whether the variable is used in child functions and should be analyzed separately
                     for child_scope_id in child_scope_ids:
                         child_scope = self.scope_env[child_scope_id]
                         child_scope_root, _ = child_scope
-                        
+
                         # Skips if the nested scope does not resemble a nested function
-                        if not child_scope_root.parent or (child_scope_root.parent.type != "arrow_function" and child_scope_root.parent.type != "function_declaration"):
+                        if not child_scope_root.parent or (
+                            child_scope_root.parent.type != "arrow_function"
+                            and child_scope_root.parent.type != "function_declaration"
+                            and child_scope_root.parent.type != "function_expression"
+                        ):
                             continue
 
                         # Finds all identifier nodes for each scope with memorization
@@ -91,9 +103,15 @@ def extract_nonlocal_info(self) -> None:
                                 continue
 
                             reference_found = True
-                            break
-                        
-                        if reference_found: break
+
+                            if child_scope_id not in self.child_scope_id_to_non_locals:
+                                self.child_scope_id_to_non_locals[child_scope_id] = {
+                                    non_local_value
+                                }
+                            else:
+                                self.child_scope_id_to_non_locals[child_scope_id].add(
+                                    non_local_value
+                                )
 
                     if reference_found:
                         label = ValueLabel.LOCAL
@@ -103,33 +121,48 @@ def extract_nonlocal_info(self) -> None:
                         non_local_value = Value(
                             variable_name, scope_child.start_point[0] + 1, label, -1
                         )
-                        self.non_local_to_scope_id[non_local_value] = scope_id
-
+                        
                 # Found variables declared with var
                 elif scope_child.type == "variable_declaration":
                     variable_name = (
                         scope_child.child(1).child_by_field_name("name").text.decode()
                     )
-                    
+
+                    label = ValueLabel.LOCAL
+                    if scope_root.type == "program":
+                        label = ValueLabel.GLOBAL
+
+                    non_local_value = Value(
+                        variable_name, scope_child.start_point[0] + 1, label, -1
+                    )
+
                     # Finds the enclosing function as variables declared with var are accessible in the entire function
                     function_root = scope_root
                     while function_root:
                         parent = function_root.parent
-                        if parent and (parent.type == "arrow_function" or parent.type == "function_declaration"):
+                        if parent and (
+                            parent.type == "arrow_function"
+                            or parent.type == "function_declaration"
+                            or parent.type == "function_expression"
+                        ):
                             break
 
                         function_root = parent
-                    
+
                     function_scope_id = self.scope_root_to_scope_id[function_root]
                     reference_found = False
-                    
+
                     # Determines whether the variable is used in child functions and should be analyzed separately
                     for child_scope_id in self.scope_env[function_scope_id][1]:
                         child_scope = self.scope_env[child_scope_id]
                         child_scope_root, _ = child_scope
-                        
+
                         # Skips if the nested scope does not resemble a nested function
-                        if not child_scope_root.parent or (child_scope_root.parent.type != "arrow_function" and child_scope_root.parent.type != "function_declaration"):
+                        if not child_scope_root.parent or (
+                            child_scope_root.parent.type != "arrow_function"
+                            and child_scope_root.parent.type != "function_declaration"
+                            and child_scope_root.parent.type != "function_expression"
+                        ):
                             continue
 
                         # Finds all identifier nodes for each scope with memorization
@@ -152,19 +185,15 @@ def extract_nonlocal_info(self) -> None:
                                 continue
 
                             reference_found = True
-                            break
-                        
-                        if reference_found: break
-                    
-                    if reference_found:
-                        label = ValueLabel.LOCAL
-                        if scope_root.type == "program":
-                            label = ValueLabel.GLOBAL
 
-                        non_local_value = Value(
-                            variable_name, scope_child.start_point[0] + 1, label, -1
-                        )
-                        self.non_local_to_scope_id[non_local_value] = function_scope_id
+                            if child_scope_id not in self.child_scope_id_to_non_locals:
+                                self.child_scope_id_to_non_locals[child_scope_id] = {
+                                    non_local_value
+                                }
+                            else:
+                                self.child_scope_id_to_non_locals[child_scope_id].add(
+                                    non_local_value
+                                )
 
     def extract_function_info(
         self, file_path: str, source_code: str, tree: tree_sitter.Tree
diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py
index b8ae6c3..a6f27d1 100644
--- a/src/tstool/analyzer/TS_analyzer.py
+++ b/src/tstool/analyzer/TS_analyzer.py
@@ -181,8 +181,8 @@ def __init__(
         # Dictionary storing mapping from the root node of the scope to its scope id
         self.scope_root_to_scope_id: Dict[Node, int] = {}
         
-        # Dictionary storing mapping of a non local value to its declarator scope id
-        self.non_local_to_scope_id: Dict[Value, int] = {}
+        # Dictionary storing mapping from a scope id to all the non locals it is depended on
+        self.child_scope_id_to_non_locals: Dict[int, Set[Value]] = {}
 
         # Results of call graph analysis
         ## Caller-callee relationship between user-defined functions

From 1173bb0172a4c9c3864849c98e6425d7111b25c9 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Sat, 25 Oct 2025 18:02:15 -0400
Subject: [PATCH 17/23] Fixed bugs in non local extraction

---
 src/agent/dfbscan.py                          | 11 ++++++-----
 src/tstool/analyzer/Javascript_TS_analyzer.py |  8 +++++++-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index 2432897..122e72a 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -739,12 +739,13 @@ def __process_src_value(self, src_value: Value) -> None:
                 function_scope_id = self.ts_analyzer.scope_root_to_scope_id[
                     function_block_node
                 ]
-                non_local_list = [
-                    (value.name, value.line_number)
-                    for value in self.ts_analyzer.child_scope_id_to_non_locals[
-                        function_scope_id
+                if function_scope_id in self.ts_analyzer.child_scope_id_to_non_locals:
+                    non_local_list = [
+                        (value.name, value.line_number)
+                        for value in self.ts_analyzer.child_scope_id_to_non_locals[
+                            function_scope_id
+                        ]
                     ]
-                ]
 
             df_input = IntraDataFlowAnalyzerInput(
                 start_function,
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index 5e7c8fe..35cb58f 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -121,7 +121,7 @@ def extract_nonlocal_info(self) -> None:
                         non_local_value = Value(
                             variable_name, scope_child.start_point[0] + 1, label, -1
                         )
-                        
+
                 # Found variables declared with var
                 elif scope_child.type == "variable_declaration":
                     variable_name = (
@@ -149,6 +149,12 @@ def extract_nonlocal_info(self) -> None:
 
                         function_root = parent
 
+                    if (
+                        not function_root
+                        or function_root not in self.scope_root_to_scope_id
+                    ):
+                        continue
+
                     function_scope_id = self.scope_root_to_scope_id[function_root]
                     reference_found = False
 

From 660f320006226cc1e539d306100dcb1b0f220916 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Sun, 9 Nov 2025 13:35:06 -0500
Subject: [PATCH 18/23] Added back original global variable analysis and
 removed language dependent components in dfbscan agent

---
 benchmark/Javascript/toy/NPD/case05.js        |   6 +-
 src/agent/dfbscan.py                          | 199 +++++++++++++++++-
 src/tstool/analyzer/Javascript_TS_analyzer.py | 192 +++++++----------
 src/tstool/analyzer/TS_analyzer.py            |  24 ++-
 .../dfbscan_extractor/dfbscan_extractor.py    |  19 +-
 5 files changed, 292 insertions(+), 148 deletions(-)

diff --git a/benchmark/Javascript/toy/NPD/case05.js b/benchmark/Javascript/toy/NPD/case05.js
index ed7739b..c06a3ba 100644
--- a/benchmark/Javascript/toy/NPD/case05.js
+++ b/benchmark/Javascript/toy/NPD/case05.js
@@ -1,6 +1,6 @@
 var a = console.error;
 delete a.error;
-
-function exec() {
+const exec = function () {
     a.error();
-}
\ No newline at end of file
+}
+exec()
\ No newline at end of file
diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index 122e72a..9f990a0 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -648,6 +648,27 @@ def start_scan(self) -> None:
         # Total number of source values
         total_src_values = len(self.src_values)
 
+        total_global_src_values = len(self.ts_analyzer.globals_env)
+
+        with tqdm(
+            total=total_global_src_values,
+            desc="Processing Global Source Values",
+            unit="src",
+        ) as pbar:
+            with ThreadPoolExecutor(max_workers=self.max_neural_workers) as executor:
+                futures = [
+                    executor.submit(self.__process_global_value, global_value)
+                    for _, global_value in self.ts_analyzer.globals_env.items()
+                ]
+                for future in as_completed(futures):
+                    try:
+                        future.result()
+                    except Exception as e:
+                        self.logger.print_log("Error processing source value:", e)
+                    finally:
+                        # Update the progress bar after each source value is processed
+                        pbar.update(1)
+
         # Process each source value in parallel with a progress bar
         with tqdm(
             total=total_src_values, desc="Processing Source Values", unit="src"
@@ -724,20 +745,13 @@ def __process_src_value(self, src_value: Value) -> None:
             ]
 
             non_local_list = []
-            function_block_node = None
-            if start_function.parse_tree_root_node.type == "function_declaration":
-                function_block_node = start_function.parse_tree_root_node.child(3)
-            elif start_function.parse_tree_root_node.type == "variable_declarator":
-                function_block_node = start_function.parse_tree_root_node.child(
-                    2
-                ).child(2)
 
             if (
-                function_block_node
-                and function_block_node in self.ts_analyzer.scope_root_to_scope_id
+                start_function.parse_tree_root_node
+                in self.ts_analyzer.function_root_to_scope_id
             ):
-                function_scope_id = self.ts_analyzer.scope_root_to_scope_id[
-                    function_block_node
+                function_scope_id = self.ts_analyzer.function_root_to_scope_id[
+                    start_function.parse_tree_root_node
                 ]
                 if function_scope_id in self.ts_analyzer.child_scope_id_to_non_locals:
                     non_local_list = [
@@ -812,6 +826,169 @@ def __process_src_value(self, src_value: Value) -> None:
                 with open(self.res_dir_path + "/detect_info.json", "w") as f:
                     json.dump(bug_report_dict, f, indent=4)
 
+    def __process_global_value(self, global_value):
+        """
+        Perform data-flow analysis starting from a global variable.
+
+        1. Finds all functions referencing the global variable.
+        2. Runs intra-procedural data-flow analysis to discover reachable values.
+        3. If the global is marked as a source (SRC), collects potential buggy paths and
+        reports them if confirmed.
+        """
+        worklist = []
+
+        reference_in_funcs = self.ts_analyzer.get_function_global_value_reference(
+            global_value
+        )
+        if len(reference_in_funcs) == 0:
+            return
+
+        initial_context = CallContext(False)
+
+        # Seed worklist with all function references to the global.
+        for func, global_references in reference_in_funcs.items():
+            for global_reference in global_references:
+                worklist.append((global_reference, func, initial_context))
+
+        # Worklist-driven intra-procedural analysis
+        while worklist:
+            start_value, start_function, call_context = worklist.pop(0)
+            if len(call_context.context) > self.call_depth:
+                continue
+
+            sinks_in_function = self.__obtain_extractor().extract_sinks(start_function)
+            sink_values = [
+                (sink.name, sink.line_number - start_function.start_line_number + 1)
+                for sink in sinks_in_function
+            ]
+
+            call_statements = []
+            for call_site_node in start_function.function_call_site_nodes:
+                file_content = self.ts_analyzer.code_in_files[start_function.file_path]
+                call_site_line_number = (
+                    file_content[: call_site_node.start_byte].count("\n") + 1
+                )
+                call_site_name = file_content[
+                    call_site_node.start_byte : call_site_node.end_byte
+                ]
+                call_statements.append((call_site_name, call_site_line_number))
+
+            ret_values = [
+                (ret.name, ret.line_number - start_function.start_line_number + 1)
+                for ret in (start_function.retvals if start_function.retvals else [])
+            ]
+
+            non_local_list = []
+
+            if (
+                start_function.parse_tree_root_node
+                in self.ts_analyzer.function_root_to_scope_id
+            ):
+                function_scope_id = self.ts_analyzer.function_root_to_scope_id[
+                    start_function.parse_tree_root_node
+                ]
+                if function_scope_id in self.ts_analyzer.child_scope_id_to_non_locals:
+                    non_local_list = [
+                        (value.name, value.line_number)
+                        for value in self.ts_analyzer.child_scope_id_to_non_locals[
+                            function_scope_id
+                        ]
+                    ]
+
+            df_input = IntraDataFlowAnalyzerInput(
+                start_function,
+                start_value,
+                sink_values,
+                call_statements,
+                ret_values,
+                non_local_list,
+            )
+
+            df_output = self.intra_dfa.invoke(df_input, IntraDataFlowAnalyzerOutput)
+            if df_output is None:
+                continue
+
+            for path_index in range(len(df_output.reachable_values)):
+                reachable_values_in_single_path = {
+                    (value, call_context)
+                    for value in df_output.reachable_values[path_index]
+                }
+                self.state.update_reachable_values_per_path(
+                    (start_value, call_context), reachable_values_in_single_path
+                )
+                delta_worklist = self.__update_worklist(
+                    df_input, df_output, call_context, path_index
+                )
+                worklist.extend(delta_worklist)
+
+        found_potential_buggy_paths = False
+        for func, global_references in reference_in_funcs.items():
+            for global_reference in global_references:
+                self.__collect_potential_buggy_paths(
+                    global_reference, (global_reference, CallContext(False))
+                )
+                if global_reference in self.state.potential_buggy_paths:
+                    found_potential_buggy_paths = True
+
+        if not found_potential_buggy_paths:
+            return
+
+        # Validate each potential buggy path
+        for start_value, buggy_paths in self.state.potential_buggy_paths.items():
+            for buggy_path in buggy_paths.values():
+                values_to_functions = {
+                    value: self.ts_analyzer.get_function_from_localvalue(value)
+                    for value in buggy_path
+                }
+
+                functions = set()
+                relevant_global_exprs = []
+                for func in values_to_functions.values():
+                    if func:
+                        functions.add(func)
+
+                    current = func.parse_tree_root_node
+                    while current.parent:
+                        current = current.parent
+
+                    relevant_global_exprs.extend(
+                        self.ts_analyzer.get_global_expressions_by_identifier(
+                            global_value.name, current
+                        )
+                    )
+
+                if self.state.check_existence(start_value, functions):
+                    continue
+
+                pv_input = PathValidatorInput(
+                    self.bug_type,
+                    buggy_path,
+                    values_to_functions,
+                    relevant_global_exprs,
+                )
+                pv_output = self.path_validator.invoke(pv_input, PathValidatorOutput)
+                if pv_output and pv_output.is_reachable:
+                    relevant_functions = {}
+                    for value in buggy_path:
+                        function = self.ts_analyzer.get_function_from_localvalue(value)
+                        if function:
+                            relevant_functions[function.function_id] = function
+
+                    bug_report = BugReport(
+                        self.bug_type,
+                        start_value,
+                        relevant_functions,
+                        pv_output.explanation_str,
+                    )
+                    self.state.update_bug_report(bug_report)
+
+                    bug_report_dict = {
+                        bug_report_id: bug.to_dict()
+                        for bug_report_id, bug in self.state.bug_reports.items()
+                    }
+                    with open(self.res_dir_path + "/detect_info.json", "w") as f:
+                        json.dump(bug_report_dict, f, indent=4)
+
     def get_agent_state(self) -> DFBScanState:
         return self.state
 
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index 35cb58f..a2eed8a 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -35,6 +35,15 @@ def search(root: Node) -> None:
                     self.scope_env[scope_id] = (child, [])
                     self.scope_root_to_scope_id[child] = scope_id
                     scope_stack.append(scope_id)
+
+                    if child.parent.type == "function_declaration":
+                        self.function_root_to_scope_id[child.parent] = scope_id
+                    elif (
+                        child.parent.type == "arrow_function"
+                        or child.parent.type == "function_expression"
+                    ):
+                        self.function_root_to_scope_id[child.parent.parent] = scope_id
+
                     scope_id += 1
                     search(child)
                     scope_stack.pop()
@@ -52,93 +61,32 @@ def search(root: Node) -> None:
 
     def extract_nonlocal_info(self) -> None:
         identifiers_per_scope = dict()
-        for scope_id, scope_data in self.scope_env.items():
+        for _, scope_data in self.scope_env.items():
             scope_root, child_scope_ids = scope_data
             for scope_child in scope_root.children:
-                # Found variables declared with const or let
-                if scope_child.type == "lexical_declaration":
-                    variable_name = (
-                        scope_child.child(1).child_by_field_name("name").text.decode()
-                    )
-
-                    label = ValueLabel.LOCAL
-                    if scope_root.type == "program":
-                        label = ValueLabel.GLOBAL
-
-                    non_local_value = Value(
-                        variable_name, scope_child.start_point[0] + 1, label, -1
-                    )
-
-                    reference_found = False
-                    # Determines whether the variable is used in child functions and should be analyzed separately
-                    for child_scope_id in child_scope_ids:
-                        child_scope = self.scope_env[child_scope_id]
-                        child_scope_root, _ = child_scope
-
-                        # Skips if the nested scope does not resemble a nested function
-                        if not child_scope_root.parent or (
-                            child_scope_root.parent.type != "arrow_function"
-                            and child_scope_root.parent.type != "function_declaration"
-                            and child_scope_root.parent.type != "function_expression"
-                        ):
-                            continue
-
-                        # Finds all identifier nodes for each scope with memorization
-                        if child_scope_id not in identifiers_per_scope:
-                            identifiers_per_scope[child_scope_id] = find_nodes_by_type(
-                                child_scope_root, "identifier"
-                            )
-
-                        for candidate_node in identifiers_per_scope[child_scope_id]:
-                            # Skip identifiers with different names
-                            if candidate_node.text.decode() != variable_name:
-                                continue
-
-                            # Skip declarations of variables with the same name in the child scopes
-                            if (
-                                candidate_node.parent.type == "variable_declarator"
-                                and candidate_node.parent.child_by_field_name("name")
-                                == candidate_node
-                            ):
-                                continue
-
-                            reference_found = True
-
-                            if child_scope_id not in self.child_scope_id_to_non_locals:
-                                self.child_scope_id_to_non_locals[child_scope_id] = {
-                                    non_local_value
-                                }
-                            else:
-                                self.child_scope_id_to_non_locals[child_scope_id].add(
-                                    non_local_value
-                                )
-
-                    if reference_found:
-                        label = ValueLabel.LOCAL
-                        if scope_root.type == "program":
-                            label = ValueLabel.GLOBAL
-
-                        non_local_value = Value(
-                            variable_name, scope_child.start_point[0] + 1, label, -1
-                        )
+                # Skips expressions that does not resemble variable declarations
+                if (
+                    scope_child.type != "lexical_declaration"
+                    and scope_child.type != "variable_declaration"
+                ):
+                    continue
 
-                # Found variables declared with var
-                elif scope_child.type == "variable_declaration":
-                    variable_name = (
-                        scope_child.child(1).child_by_field_name("name").text.decode()
-                    )
+                variable_name = (
+                    scope_child.child(1).child_by_field_name("name").text.decode()
+                )
 
-                    label = ValueLabel.LOCAL
-                    if scope_root.type == "program":
-                        label = ValueLabel.GLOBAL
+                label = ValueLabel.LOCAL
+                if scope_root.type == "program":
+                    label = ValueLabel.GLOBAL
 
-                    non_local_value = Value(
-                        variable_name, scope_child.start_point[0] + 1, label, -1
-                    )
+                non_local_value = Value(
+                    variable_name, scope_child.start_point[0] + 1, label, -1
+                )
 
-                    # Finds the enclosing function as variables declared with var are accessible in the entire function
+                if scope_child.type == "variable_declaration":
+                    # In JavaScript, the variable declared in var propagates to the function's scope
                     function_root = scope_root
-                    while function_root:
+                    while function_root.parent:
                         parent = function_root.parent
                         if parent and (
                             parent.type == "arrow_function"
@@ -156,50 +104,47 @@ def extract_nonlocal_info(self) -> None:
                         continue
 
                     function_scope_id = self.scope_root_to_scope_id[function_root]
-                    reference_found = False
-
-                    # Determines whether the variable is used in child functions and should be analyzed separately
-                    for child_scope_id in self.scope_env[function_scope_id][1]:
-                        child_scope = self.scope_env[child_scope_id]
-                        child_scope_root, _ = child_scope
-
-                        # Skips if the nested scope does not resemble a nested function
-                        if not child_scope_root.parent or (
-                            child_scope_root.parent.type != "arrow_function"
-                            and child_scope_root.parent.type != "function_declaration"
-                            and child_scope_root.parent.type != "function_expression"
+                    child_scope_ids = self.scope_env[function_scope_id][1]
+
+                # Determines whether the variable is used in child functions and should be analyzed separately
+                for child_scope_id in child_scope_ids:
+                    child_scope = self.scope_env[child_scope_id]
+                    child_scope_root, _ = child_scope
+
+                    # Skips if the nested scope does not resemble a nested function
+                    if not child_scope_root.parent or (
+                        child_scope_root.parent.type != "arrow_function"
+                        and child_scope_root.parent.type != "function_declaration"
+                        and child_scope_root.parent.type != "function_expression"
+                    ):
+                        continue
+
+                    # Finds all identifier nodes for each scope with memorization
+                    if child_scope_id not in identifiers_per_scope:
+                        identifiers_per_scope[child_scope_id] = find_nodes_by_type(
+                            child_scope_root, "identifier"
+                        )
+
+                    for candidate_node in identifiers_per_scope[child_scope_id]:
+                        # Skip identifiers with different names
+                        if candidate_node.text.decode() != variable_name:
+                            continue
+
+                        # Skip declarations of variables with the same name in the child scopes
+                        if (
+                            candidate_node.parent.type == "variable_declarator"
+                            and candidate_node.parent.child_by_field_name("name")
+                            == candidate_node
                         ):
                             continue
 
-                        # Finds all identifier nodes for each scope with memorization
-                        if child_scope_id not in identifiers_per_scope:
-                            identifiers_per_scope[child_scope_id] = find_nodes_by_type(
-                                child_scope_root, "identifier"
-                            )
+                        self.child_scope_id_to_non_locals.setdefault(
+                            child_scope_id, set()
+                        ).add(non_local_value)
 
-                        for candidate_node in identifiers_per_scope[child_scope_id]:
-                            # Skip identifiers with different names
-                            if candidate_node.text.decode() != variable_name:
-                                continue
-
-                            # Skip declarations of variables with the same name in the child scopes
-                            if (
-                                candidate_node.parent.type == "variable_declarator"
-                                and candidate_node.parent.child_by_field_name("name")
-                                == candidate_node
-                            ):
-                                continue
-
-                            reference_found = True
-
-                            if child_scope_id not in self.child_scope_id_to_non_locals:
-                                self.child_scope_id_to_non_locals[child_scope_id] = {
-                                    non_local_value
-                                }
-                            else:
-                                self.child_scope_id_to_non_locals[child_scope_id].add(
-                                    non_local_value
-                                )
+                        self.non_local_to_child_scopes.setdefault(
+                            non_local_value, set()
+                        ).add(child_scope_id)
 
     def extract_function_info(
         self, file_path: str, source_code: str, tree: tree_sitter.Tree
@@ -556,6 +501,13 @@ def get_loop_statements(
     def get_global_expressions_by_identifier(
         self, identifier: str, program_root: Node
     ) -> List[Node]:
+        """
+        Extracts all expressions related to a specific identifier in the global scope
+        :param identifier: The identifier
+        :param program_root: Program root node
+        :return: A list of extracted nodes
+        """
+
         output_nodes = []
         children = program_root.children
         global_expression_types = [
diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py
index a6f27d1..5b14ece 100644
--- a/src/tstool/analyzer/TS_analyzer.py
+++ b/src/tstool/analyzer/TS_analyzer.py
@@ -181,8 +181,14 @@ def __init__(
         # Dictionary storing mapping from the root node of the scope to its scope id
         self.scope_root_to_scope_id: Dict[Node, int] = {}
         
+        # Dictionary storing mapping from function root node to its scope id
+        self.function_root_to_scope_id: Dict[Node, int] = {}
+                
         # Dictionary storing mapping from a scope id to all the non locals it is depended on
         self.child_scope_id_to_non_locals: Dict[int, Set[Value]] = {}
+        
+        # Dictionary storing mapping from a non local value to its child scopes
+        self.non_local_to_child_scopes: Dict[Value, Set[int]] = {}
 
         # Results of call graph analysis
         ## Caller-callee relationship between user-defined functions
@@ -723,6 +729,18 @@ def get_loop_statements(
         :return: A dictionary mapping (start_line, end_line) to loop statement info.
         """
         pass
+    
+    @abstractmethod
+    def get_global_expressions_by_identifier(
+        self, identifier: str, program_root: Node
+    ) -> List[Node]:
+        """
+        Extracts all expressions related to a specific identifier in the global scope
+        :param identifier: The identifier
+        :param program_root: Program root node
+        :return: A list of extracted nodes
+        """
+        pass
 
     def check_control_order(
         self, function: Function, src_line_number: int, sink_line_number: int
@@ -852,12 +870,6 @@ def get_function_global_value_reference(
                     references.setdefault(function, []).append(ref_value)
 
         return references
-    
-    @abstractmethod
-    def get_global_expressions_by_identifier(
-        self, identifier: str, program_root: Node
-    ) -> List[Node]:
-        pass
         
     def get_function_from_localvalue(self, value: Value) -> Optional[Function]:
         """
diff --git a/src/tstool/dfbscan_extractor/dfbscan_extractor.py b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
index 39519cc..4d9d635 100644
--- a/src/tstool/dfbscan_extractor/dfbscan_extractor.py
+++ b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
@@ -26,25 +26,28 @@ def extract_all(self) -> Tuple[List[Value], List[Value]]:
         """
         pbar = tqdm(
             total=len(self.ts_analyzer.function_env)
-            + len(self.ts_analyzer.globals_env),
+            + len(self.ts_analyzer.globalsRawDataDic),
             desc="Parsing files",
         )
+
+        # Extract src/sink values from functions
         for function_id in self.ts_analyzer.function_env:
             pbar.update(1)
             function: Function = self.ts_analyzer.function_env[function_id]
             if "test" in function.file_path or "example" in function.file_path:
                 continue
-            file_content = self.ts_analyzer.code_in_files[function.file_path]
-            function_root_node = function.parse_tree_root_node
+
             self.sources.extend(self.extract_sources(function))
             self.sinks.extend(self.extract_sinks(function))
 
-        for global_id, global_var in self.ts_analyzer.globals_env.items():
+        # Filter out non src global values in global_env
+        for global_id, global_data in self.ts_analyzer.globalsRawDataDic.items():
             pbar.update(1)
-            node = self.ts_analyzer.globalsRawDataDic[global_id][2]
-            if self.is_global_source(node):
-                global_var.label = ValueLabel.SRC
-                self.ts_analyzer.globals_env[global_id] = global_var
+            global_node = global_data[2]
+            if self.is_global_source(global_node):
+                self.ts_analyzer.globals_env[global_id].label = ValueLabel.SRC
+            else:
+                del self.ts_analyzer.globals_env[global_id]
 
         pbar.close()
 

From 52743e71fa64616cc23ac60bce210035517100d3 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Mon, 10 Nov 2025 22:20:14 -0500
Subject: [PATCH 19/23] Specified nullish value extraction rule for Javascript

---
 src/agent/dfbscan.py                          |  8 -----
 .../dfbscan/intra_dataflow_analyzer.py        |  2 +-
 .../dfbscan/intra_dataflow_analyzer.json      |  2 +-
 .../Javascript/Javascript_NPD_extractor.py    | 35 +++++++++++++------
 4 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index 9f990a0..42a7bd5 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -262,14 +262,6 @@ def __update_worklist(
                                 (value, call_context),
                                 set({(new_value, new_call_context)}),
                             )
-                else:
-                    delta_worklist.append(
-                        (
-                            value,
-                            function,
-                            call_context,
-                        )
-                    )
 
             if value.label == ValueLabel.PARA:
                 # Consider side-effect.
diff --git a/src/llmtool/dfbscan/intra_dataflow_analyzer.py b/src/llmtool/dfbscan/intra_dataflow_analyzer.py
index e820281..e9a7191 100644
--- a/src/llmtool/dfbscan/intra_dataflow_analyzer.py
+++ b/src/llmtool/dfbscan/intra_dataflow_analyzer.py
@@ -112,7 +112,7 @@ def _get_prompt(self, input: LLMToolInput) -> str:
         prompt = prompt.replace("<RETURN_VALUES>", rets_str)
         
         if input.non_locals:
-            non_local_str = "Non local variables relevant to this function:"
+            non_local_str = "Non local variables relevant to this function:\n"
             for non_local in input.non_locals:
                 non_local_str += f"- {non_local[0]} at line {non_local[1]}\n"
             prompt = prompt.replace("<NONLOCAL_VALUES>", non_local_str)
diff --git a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
index ab183d6..9b20e83 100644
--- a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
+++ b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json
@@ -103,7 +103,7 @@
     "Path 3: Lines 1 -> 6;",
     "- No propagation; Dependency: Default return value -1 is unrelated to SRC."
   ],
-  "question_template": "- Where does the source variable <SRC_NAME> at line <SRC_LINE> in this function propagate?",
+  "question_template": "- Where does the source <SRC_NAME> at line <SRC_LINE> in this function propagate?",
   "answer_format_cot": [
     "(1) First, provide a detailed step-by-step reasoning process, following the explanation format used in the examples;",
     "(2) Once the reasoning is complete, begin the final answer section with 'Answer:';",
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 8dd5d92..c7b5f2c 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -55,15 +55,34 @@ def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
         file_path = function.file_path
-        null_value_nodes = []
+        
+        """
+        Extract the potential null values as sources from the source code.
+        1. variable = null;
+        2. return null;
+        """
+        nodes = find_nodes_by_type(root_node, "variable_declarator")
+        nodes.extend(find_nodes_by_type(root_node, "assignment_expression"))
+        nodes.extend(find_nodes_by_type(root_node, "return_statement"))
+        
+        sources = []
+        
+        # Look for nullish value nodes
+        for node in nodes:
+            is_seed_node = False
+
+            for child in node.children:
+                if child.type in self.NULLISH_VALUES:
+                    is_seed_node = True
+
+            if is_seed_node:
+                line_number = source_code[: node.start_byte].count("\n") + 1
+                name = source_code[node.start_byte : node.end_byte]
+                sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
 
-        for nullish_value in self.NULLISH_VALUES:
-            null_value_nodes.extend(find_nodes_by_type(root_node, nullish_value))
 
         unary_expressions = find_nodes_by_type(root_node, "unary_expression")
         
-        sources = []
-
         # Look for delete expressions
         for unary_expression in unary_expressions:
             operator = unary_expression.child(0)
@@ -74,12 +93,6 @@ def extract_sources(self, function: Function) -> List[Value]:
                 ]
                 sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
 
-        # Look for nullish value nodes
-        for node in null_value_nodes:
-            line_number = source_code[: node.start_byte].count("\n") + 1
-            name = source_code[node.start_byte : node.end_byte]
-            sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
-
         return sources
 
     def extract_sinks(self, function: Function) -> List[Value]:

From 9ecf357ad24b86fc491cb04f140735e559e07fa8 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Fri, 14 Nov 2025 20:15:20 -0500
Subject: [PATCH 20/23] Added missing abstract function implementations to TS
 analyzers and NPD extractors

---
 src/tstool/analyzer/Cpp_TS_analyzer.py        | 27 +++++++++++++++++++
 src/tstool/analyzer/Go_TS_analyzer.py         | 27 +++++++++++++++++++
 src/tstool/analyzer/Java_TS_analyzer.py       | 27 +++++++++++++++++++
 src/tstool/analyzer/Python_TS_analyzer.py     | 27 +++++++++++++++++++
 .../Cpp/Cpp_MLK_extractor.py                  |  7 +++++
 .../Cpp/Cpp_NPD_extractor.py                  |  9 ++++---
 .../Cpp/Cpp_UAF_extractor.py                  |  7 +++++
 .../dfbscan_extractor/Go/Go_NPD_extractor.py  |  7 ++---
 .../Java/Java_NPD_extractor.py                |  7 +++++
 .../Javascript/Javascript_NPD_extractor.py    | 25 +++++++++++------
 .../Python/Python_NPD_extractor.py            |  7 ++---
 .../dfbscan_extractor/dfbscan_extractor.py    |  4 ---
 12 files changed, 159 insertions(+), 22 deletions(-)

diff --git a/src/tstool/analyzer/Cpp_TS_analyzer.py b/src/tstool/analyzer/Cpp_TS_analyzer.py
index 244e82f..58a18c4 100644
--- a/src/tstool/analyzer/Cpp_TS_analyzer.py
+++ b/src/tstool/analyzer/Cpp_TS_analyzer.py
@@ -15,6 +15,21 @@ class Cpp_TSAnalyzer(TSAnalyzer):
     TSAnalyzer for C/C++ source files using tree-sitter.
     Implements language-specific parsing and analysis.
     """
+    
+    def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
+        """
+        Parse source code to extract scope topography.
+        Currently Not implemented.
+        :param tree: Parsed syntax tree
+        """
+        pass
+    
+    def extract_nonlocal_info(self) -> None:
+        """
+        Traverse the scopes to identify declarations of non locals.
+        Currently Not implemented.
+        """
+        pass
 
     def extract_function_info(
         self, file_path: str, source_code: str, tree: tree_sitter.Tree
@@ -416,3 +431,15 @@ def get_loop_statements(
                 loop_body_end_line,
             )
         return loop_statements
+    
+    def get_global_expressions_by_identifier(
+        self, identifier: str, program_root: Node
+    ) -> List[Node]:
+        """
+        Extracts all expressions related to a specific identifier in the global scope.
+        Currently not implemented.
+        :param identifier: The identifier
+        :param program_root: Program root node
+        :return: A list of extracted nodes
+        """
+        pass
diff --git a/src/tstool/analyzer/Go_TS_analyzer.py b/src/tstool/analyzer/Go_TS_analyzer.py
index 13ceb2a..42958a1 100644
--- a/src/tstool/analyzer/Go_TS_analyzer.py
+++ b/src/tstool/analyzer/Go_TS_analyzer.py
@@ -15,6 +15,21 @@ class Go_TSAnalyzer(TSAnalyzer):
     TSAnalyzer for Go source files using tree-sitter.
     Implements Go-specific parsing and analysis.
     """
+    
+    def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
+        """
+        Parse source code to extract scope topography.
+        Currently Not implemented.
+        :param tree: Parsed syntax tree
+        """
+        pass
+    
+    def extract_nonlocal_info(self) -> None:
+        """
+        Traverse the scopes to identify declarations of non locals.
+        Currently Not implemented.
+        """
+        pass
 
     def extract_function_info(
         self, file_path: str, source_code: str, tree: tree_sitter.Tree
@@ -349,3 +364,15 @@ def get_loop_statements(
                 loop_body_end_line,
             )
         return loop_statements
+
+    def get_global_expressions_by_identifier(
+        self, identifier: str, program_root: Node
+    ) -> List[Node]:
+        """
+        Extracts all expressions related to a specific identifier in the global scope.
+        Currently not implemented.
+        :param identifier: The identifier
+        :param program_root: Program root node
+        :return: A list of extracted nodes
+        """
+        pass
\ No newline at end of file
diff --git a/src/tstool/analyzer/Java_TS_analyzer.py b/src/tstool/analyzer/Java_TS_analyzer.py
index 4464bea..ff9266a 100644
--- a/src/tstool/analyzer/Java_TS_analyzer.py
+++ b/src/tstool/analyzer/Java_TS_analyzer.py
@@ -15,6 +15,21 @@ class Java_TSAnalyzer(TSAnalyzer):
     TSAnalyzer for Java source files using tree-sitter.
     Implements Java-specific parsing and analysis.
     """
+    
+    def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
+        """
+        Parse source code to extract scope topography.
+        Currently Not implemented.
+        :param tree: Parsed syntax tree
+        """
+        pass
+    
+    def extract_nonlocal_info(self) -> None:
+        """
+        Traverse the scopes to identify declarations of non locals.
+        Currently Not implemented.
+        """
+        pass
 
     def extract_function_info(
         self, file_path: str, source_code: str, tree: tree_sitter.Tree
@@ -361,3 +376,15 @@ def get_loop_statements(
                 loop_body_end_line,
             )
         return loop_statements
+
+    def get_global_expressions_by_identifier(
+        self, identifier: str, program_root: Node
+    ) -> List[Node]:
+        """
+        Extracts all expressions related to a specific identifier in the global scope.
+        Currently not implemented.
+        :param identifier: The identifier
+        :param program_root: Program root node
+        :return: A list of extracted nodes
+        """
+        pass
\ No newline at end of file
diff --git a/src/tstool/analyzer/Python_TS_analyzer.py b/src/tstool/analyzer/Python_TS_analyzer.py
index 24e0f02..f4887d3 100644
--- a/src/tstool/analyzer/Python_TS_analyzer.py
+++ b/src/tstool/analyzer/Python_TS_analyzer.py
@@ -15,6 +15,21 @@ class Python_TSAnalyzer(TSAnalyzer):
     TSAnalyzer for Python source files using tree-sitter.
     Implements Python-specific parsing and analysis.
     """
+    
+    def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
+        """
+        Parse source code to extract scope topography
+        :param tree: Parsed syntax tree
+        """
+        # TODO: Add scope extraction if needed
+        pass
+    
+    def extract_nonlocal_info(self) -> None:
+        """
+        Traverse the scopes to identify declarations of non locals
+        """
+        # TODO: add non local variable extraction if needed
+        pass
 
     def extract_function_info(
         self, file_path: str, source_code: str, tree: tree_sitter.Tree
@@ -279,3 +294,15 @@ def get_loop_statements(
                 end_line,
             )
         return loops
+    
+    def get_global_expressions_by_identifier(
+        self, identifier: str, program_root: Node
+    ) -> List[Node]:
+        """
+        Extracts all expressions related to a specific identifier in the global scope
+        :param identifier: The identifier
+        :param program_root: Program root node
+        :return: A list of extracted nodes
+        """
+        # TODO: implement if needed
+        pass
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
index b3c8367..ed975ef 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
@@ -4,6 +4,13 @@
 
 
 class Cpp_MLK_Extractor(DFBScanExtractor):
+    def is_global_source(self, global_declarator_node: Tree) -> bool:
+        """
+        Determines whether the global variable is initially a source.
+        Currently not implemented.
+        """
+        return False
+    
     def extract_sources(self, function: Function) -> List[Value]:
         """
         Extract the sources that can cause the memory leak bugs from C/C++ programs.
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
index fbf5ec5..13ab499 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
@@ -7,11 +7,12 @@
 
 class Cpp_NPD_Extractor(DFBScanExtractor):
     def is_global_source(self, global_declarator_node: Tree) -> bool:
+        """
+        Determines whether the global variable is initially a source.
+        Currently not implemented.
+        """
         return False
-        
-    def is_global_sink(self, global_declarator_node: Tree) -> bool:
-        return False
-    
+
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
index a18bf18..b283924 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
@@ -6,6 +6,13 @@
 
 
 class Cpp_UAF_Extractor(DFBScanExtractor):
+    def is_global_source(self, global_declarator_node: Tree) -> bool:
+        """
+        Determines whether the global variable is initially a source.
+        Currently not implemented.
+        """
+        return False
+    
     def extract_sources(self, function: Function) -> List[Value]:
         """
         Extract the sources that can cause the use-after-free bugs from C/C++ programs.
diff --git a/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py b/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
index 4501f96..4f9c3af 100644
--- a/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
@@ -7,9 +7,10 @@
 
 class Go_NPD_Extractor(DFBScanExtractor):
     def is_global_source(self, global_declarator_node: Tree) -> bool:
-        return False
-        
-    def is_global_sink(self, global_declarator_node: Tree) -> bool:
+        """
+        Determines whether the global variable is initially a source.
+        Currently not implemented.
+        """
         return False
     
     def extract_sources(self, function: Function) -> List[Value]:
diff --git a/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py b/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
index 91a5201..b043e3b 100644
--- a/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
@@ -6,6 +6,13 @@
 
 
 class Java_NPD_Extractor(DFBScanExtractor):
+    def is_global_source(self, global_declarator_node: Tree) -> bool:
+        """
+        Determines whether the global variable is initially a source.
+        Currently not implemented.
+        """
+        return False
+    
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index c7b5f2c..3a0c63f 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -26,6 +26,11 @@ def is_expression_null(self, expr: Tree) -> bool:
             return True
 
     def is_global_source(self, global_declaration_node: Tree) -> bool:
+        """
+        Determines whether the global variable is initially a source.
+        1. globa_var = null;
+        3. delete globa_var.prop;
+        """
         global_name = global_declaration_node.child(1).child_by_field_name("name").text
         sibling = global_declaration_node.next_sibling
 
@@ -48,19 +53,18 @@ def is_global_source(self, global_declaration_node: Tree) -> bool:
             sibling = sibling.next_sibling
         return False
 
-    def is_global_sink(self, global_declarator_node: Tree) -> bool:
-        return False
-
     def extract_sources(self, function: Function) -> List[Value]:
-        root_node = function.parse_tree_root_node
-        source_code = self.ts_analyzer.code_in_files[function.file_path]
-        file_path = function.file_path
-        
         """
-        Extract the potential null values as sources from the source code.
+        Extract the potential null/undefined values as sources from the source code.
         1. variable = null;
         2. return null;
+        3. delete obj.prop;
         """
+        
+        root_node = function.parse_tree_root_node
+        source_code = self.ts_analyzer.code_in_files[function.file_path]
+        file_path = function.file_path
+        
         nodes = find_nodes_by_type(root_node, "variable_declarator")
         nodes.extend(find_nodes_by_type(root_node, "assignment_expression"))
         nodes.extend(find_nodes_by_type(root_node, "return_statement"))
@@ -98,9 +102,14 @@ def extract_sources(self, function: Function) -> List[Value]:
     def extract_sinks(self, function: Function) -> List[Value]:
         """
         Extract the sinks that can cause the null pointer dereferences from Javascript programs.
+        1. null_obj.prop;
+        2. null_obj[1];
+        3. null_obj();
+        
         :param: function: Function object.
         :return: List of sink values
         """
+
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
         file_path = function.file_path
diff --git a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
index a4c5e0e..eabeece 100644
--- a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
@@ -5,9 +5,10 @@
 
 class Python_NPD_Extractor(DFBScanExtractor):
     def is_global_source(self, global_declarator_node: Tree) -> bool:
-        return False
-        
-    def is_global_sink(self, global_declarator_node: Tree) -> bool:
+        """
+        Determines whether the global variable is initially a source.
+        """
+        # TODO: Implement source detection for global variables if needed
         return False
     
     def extract_sources(self, function: Function) -> List[Value]:
diff --git a/src/tstool/dfbscan_extractor/dfbscan_extractor.py b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
index 4d9d635..34e6e44 100644
--- a/src/tstool/dfbscan_extractor/dfbscan_extractor.py
+++ b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
@@ -57,10 +57,6 @@ def extract_all(self) -> Tuple[List[Value], List[Value]]:
     def is_global_source(self, global_var: Tree) -> bool:
         pass
 
-    @abstractmethod
-    def is_global_sink(self, global_var: Tree) -> bool:
-        pass
-
     @abstractmethod
     def extract_sources(self, function: Function) -> List[Value]:
         """

From aa9071637a9bea5bba1dfbca42f85d8d617844e3 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Mon, 17 Nov 2025 22:43:21 -0500
Subject: [PATCH 21/23] Added nullish arguments as source type

---
 .../dfbscan_extractor/Javascript/Javascript_NPD_extractor.py    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 3a0c63f..06e1233 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -59,6 +59,7 @@ def extract_sources(self, function: Function) -> List[Value]:
         1. variable = null;
         2. return null;
         3. delete obj.prop;
+        4. func(null);
         """
         
         root_node = function.parse_tree_root_node
@@ -68,6 +69,7 @@ def extract_sources(self, function: Function) -> List[Value]:
         nodes = find_nodes_by_type(root_node, "variable_declarator")
         nodes.extend(find_nodes_by_type(root_node, "assignment_expression"))
         nodes.extend(find_nodes_by_type(root_node, "return_statement"))
+        nodes.extend(find_nodes_by_type(root_node, "arguments"))
         
         sources = []
         

From 9ed352f8336727261a2e4300c9445c6052fd67b7 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Fri, 21 Nov 2025 15:12:25 -0500
Subject: [PATCH 22/23] Fixed mypy errors

---
 src/agent/dfbscan.py                          |   1 +
 .../dfbscan/intra_dataflow_analyzer.py        |   2 +-
 src/tstool/analyzer/Cpp_TS_analyzer.py        |   2 +-
 src/tstool/analyzer/Go_TS_analyzer.py         |   2 +-
 src/tstool/analyzer/Java_TS_analyzer.py       |   2 +-
 src/tstool/analyzer/Javascript_TS_analyzer.py | 135 +++++++++++-------
 src/tstool/analyzer/Python_TS_analyzer.py     |   2 +-
 src/tstool/analyzer/TS_analyzer.py            |   5 +-
 .../Cpp/Cpp_MLK_extractor.py                  |   2 +-
 .../Cpp/Cpp_NPD_extractor.py                  |   2 +-
 .../Cpp/Cpp_UAF_extractor.py                  |   2 +-
 .../dfbscan_extractor/Go/Go_NPD_extractor.py  |   2 +-
 .../Java/Java_NPD_extractor.py                |   2 +-
 .../Javascript/Javascript_NPD_extractor.py    |  60 +++++---
 .../Python/Python_NPD_extractor.py            |   2 +-
 .../dfbscan_extractor/dfbscan_extractor.py    |   2 +-
 16 files changed, 137 insertions(+), 88 deletions(-)

diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index 4bc439a..0bfc3a1 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -544,6 +544,7 @@ def start_scan_sequential(self) -> None:
                         sink_values,
                         call_statements,
                         ret_values,
+                        non_locals=[]
                     )
 
                     # Invoke the intra-procedural data-flow analysis
diff --git a/src/llmtool/dfbscan/intra_dataflow_analyzer.py b/src/llmtool/dfbscan/intra_dataflow_analyzer.py
index e9a7191..26d7f9d 100644
--- a/src/llmtool/dfbscan/intra_dataflow_analyzer.py
+++ b/src/llmtool/dfbscan/intra_dataflow_analyzer.py
@@ -19,7 +19,7 @@ def __init__(
         sink_values: List[Tuple[str, int]],
         call_statements: List[Tuple[str, int]],
         ret_values: List[Tuple[str, int]],
-        non_locals: List[Value]
+        non_locals: List[Tuple[str, int]]
     ) -> None:
         self.function = function
         self.summary_start = summary_start
diff --git a/src/tstool/analyzer/Cpp_TS_analyzer.py b/src/tstool/analyzer/Cpp_TS_analyzer.py
index 58a18c4..5eecdea 100644
--- a/src/tstool/analyzer/Cpp_TS_analyzer.py
+++ b/src/tstool/analyzer/Cpp_TS_analyzer.py
@@ -442,4 +442,4 @@ def get_global_expressions_by_identifier(
         :param program_root: Program root node
         :return: A list of extracted nodes
         """
-        pass
+        return []
diff --git a/src/tstool/analyzer/Go_TS_analyzer.py b/src/tstool/analyzer/Go_TS_analyzer.py
index 42958a1..669829a 100644
--- a/src/tstool/analyzer/Go_TS_analyzer.py
+++ b/src/tstool/analyzer/Go_TS_analyzer.py
@@ -375,4 +375,4 @@ def get_global_expressions_by_identifier(
         :param program_root: Program root node
         :return: A list of extracted nodes
         """
-        pass
\ No newline at end of file
+        return []
\ No newline at end of file
diff --git a/src/tstool/analyzer/Java_TS_analyzer.py b/src/tstool/analyzer/Java_TS_analyzer.py
index ff9266a..ade1795 100644
--- a/src/tstool/analyzer/Java_TS_analyzer.py
+++ b/src/tstool/analyzer/Java_TS_analyzer.py
@@ -387,4 +387,4 @@ def get_global_expressions_by_identifier(
         :param program_root: Program root node
         :return: A list of extracted nodes
         """
-        pass
\ No newline at end of file
+        return []
\ No newline at end of file
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index a2eed8a..3fb892f 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -30,29 +30,33 @@ def search(root: Node) -> None:
             for child in root.children:
                 if child.type == "statement_block":
                     if len(scope_stack) > 0:
-                        self.scope_env[scope_stack[-1]][1].append(scope_id)
+                        self.scope_env[scope_stack[-1]][1].add(scope_id)
 
-                    self.scope_env[scope_id] = (child, [])
+                    self.scope_env[scope_id] = (child, set())
                     self.scope_root_to_scope_id[child] = scope_id
                     scope_stack.append(scope_id)
 
-                    if child.parent.type == "function_declaration":
-                        self.function_root_to_scope_id[child.parent] = scope_id
-                    elif (
-                        child.parent.type == "arrow_function"
-                        or child.parent.type == "function_expression"
-                    ):
-                        self.function_root_to_scope_id[child.parent.parent] = scope_id
-
-                    scope_id += 1
-                    search(child)
-                    scope_stack.pop()
+                    if child.parent:
+                        if child.parent.type == "function_declaration":
+                            self.function_root_to_scope_id[child.parent] = scope_id
+                        elif (
+                            child.parent.type == "arrow_function"
+                            or child.parent.type == "function_expression"
+                        ):
+                            if child.parent.parent:
+                                self.function_root_to_scope_id[child.parent.parent] = (
+                                    scope_id
+                                )
+
+                        scope_id += 1
+                        search(child)
+                        scope_stack.pop()
                 else:
                     search(child)
 
             return
 
-        self.scope_env[scope_id] = (tree.root_node, [])
+        self.scope_env[scope_id] = (tree.root_node, set())
         self.scope_root_to_scope_id[tree.root_node] = scope_id
         scope_stack.append(scope_id)
         scope_id += 1
@@ -60,81 +64,104 @@ def search(root: Node) -> None:
         return
 
     def extract_nonlocal_info(self) -> None:
-        identifiers_per_scope = dict()
+        identifiers_per_scope: Dict[int, List[Node]] = {}
+
         for _, scope_data in self.scope_env.items():
             scope_root, child_scope_ids = scope_data
+
             for scope_child in scope_root.children:
-                # Skips expressions that does not resemble variable declarations
-                if (
-                    scope_child.type != "lexical_declaration"
-                    and scope_child.type != "variable_declaration"
+                # Only process lexical/variable declarations
+                if scope_child.type not in (
+                    "lexical_declaration",
+                    "variable_declaration",
                 ):
                     continue
 
-                variable_name = (
-                    scope_child.child(1).child_by_field_name("name").text.decode()
-                )
+                decl_child = scope_child.child(1)
+                if decl_child is None:
+                    continue
 
-                label = ValueLabel.LOCAL
-                if scope_root.type == "program":
-                    label = ValueLabel.GLOBAL
+                name_node = decl_child.child_by_field_name("name")
+                if name_node is None or name_node.text is None:
+                    continue
+
+                variable_name: str = name_node.text.decode("utf-8")
+
+                label = (
+                    ValueLabel.GLOBAL
+                    if scope_root.type == "program"
+                    else ValueLabel.LOCAL
+                )
 
                 non_local_value = Value(
-                    variable_name, scope_child.start_point[0] + 1, label, -1
+                    variable_name,
+                    scope_child.start_point[0] + 1,
+                    label,
+                    file="",
+                    index=-1,
                 )
 
+                effective_child_scope_ids = child_scope_ids
                 if scope_child.type == "variable_declaration":
-                    # In JavaScript, the variable declared in var propagates to the function's scope
-                    function_root = scope_root
-                    while function_root.parent:
+                    function_root: Optional[Node] = scope_root
+
+                    # Find closest parent function
+                    while (
+                        function_root is not None and function_root.parent is not None
+                    ):
                         parent = function_root.parent
-                        if parent and (
-                            parent.type == "arrow_function"
-                            or parent.type == "function_declaration"
-                            or parent.type == "function_expression"
+                        if parent.type in (
+                            "arrow_function",
+                            "function_declaration",
+                            "function_expression",
                         ):
                             break
-
                         function_root = parent
 
                     if (
-                        not function_root
+                        function_root is None
                         or function_root not in self.scope_root_to_scope_id
                     ):
                         continue
 
                     function_scope_id = self.scope_root_to_scope_id[function_root]
-                    child_scope_ids = self.scope_env[function_scope_id][1]
-
-                # Determines whether the variable is used in child functions and should be analyzed separately
-                for child_scope_id in child_scope_ids:
-                    child_scope = self.scope_env[child_scope_id]
-                    child_scope_root, _ = child_scope
-
-                    # Skips if the nested scope does not resemble a nested function
-                    if not child_scope_root.parent or (
-                        child_scope_root.parent.type != "arrow_function"
-                        and child_scope_root.parent.type != "function_declaration"
-                        and child_scope_root.parent.type != "function_expression"
+                    effective_child_scope_ids = self.scope_env[function_scope_id][1]
+
+                # Process child scopes
+                for child_scope_id in effective_child_scope_ids:
+                    child_scope_root, _ = self.scope_env[child_scope_id]
+
+                    # Must be inside a function-like construct
+                    parent_node: Optional[Node] = child_scope_root.parent
+                    if parent_node is None or parent_node.type not in (
+                        "arrow_function",
+                        "function_declaration",
+                        "function_expression",
                     ):
                         continue
 
-                    # Finds all identifier nodes for each scope with memorization
+                    # Cache identifiers per scope
                     if child_scope_id not in identifiers_per_scope:
                         identifiers_per_scope[child_scope_id] = find_nodes_by_type(
                             child_scope_root, "identifier"
                         )
 
                     for candidate_node in identifiers_per_scope[child_scope_id]:
-                        # Skip identifiers with different names
-                        if candidate_node.text.decode() != variable_name:
+                        if candidate_node:
+                            continue
+
+                        # Name mismatch
+                        if candidate_node.text is None:
+                            continue
+                        if candidate_node.text.decode("utf-8") != variable_name:
                             continue
 
-                        # Skip declarations of variables with the same name in the child scopes
+                        # Skip if this identifier declares a new variable in this scope with the same name
+                        candidate_parent = candidate_node.parent
                         if (
-                            candidate_node.parent.type == "variable_declarator"
-                            and candidate_node.parent.child_by_field_name("name")
-                            == candidate_node
+                            candidate_parent is not None
+                            and candidate_parent.type == "variable_declarator"
+                            and candidate_parent.child_by_field_name("name") is candidate_node
                         ):
                             continue
 
diff --git a/src/tstool/analyzer/Python_TS_analyzer.py b/src/tstool/analyzer/Python_TS_analyzer.py
index f4887d3..8768a99 100644
--- a/src/tstool/analyzer/Python_TS_analyzer.py
+++ b/src/tstool/analyzer/Python_TS_analyzer.py
@@ -305,4 +305,4 @@ def get_global_expressions_by_identifier(
         :return: A list of extracted nodes
         """
         # TODO: implement if needed
-        pass
+        return []
\ No newline at end of file
diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py
index 5b14ece..475a28b 100644
--- a/src/tstool/analyzer/TS_analyzer.py
+++ b/src/tstool/analyzer/TS_analyzer.py
@@ -170,12 +170,12 @@ def __init__(
         self.functionToFile: Dict[int, str] = {}
         self.fileContentDic: Dict[str, str] = {}
         self.glb_var_map: Dict[str, str] = {}  # global var info
-        self.globalsRawDataDic: Dict[str, Tuple[str, int, Node]] = {}
+        self.globalsRawDataDic: Dict[int, Tuple[str, int, Node]] = {}
         self.globalsToFile: Dict[int, str] = {}
 
         self.function_env: Dict[int, Function] = {}
         self.globals_env: Dict[int, Value] = {}
-        self.scope_env: Dict[int, Tuple[Node, Set[Dict]]] = {}
+        self.scope_env: Dict[int, Tuple[Node, Set[int]]] = {}
         self.api_env: Dict[int, API] = {}
         
         # Dictionary storing mapping from the root node of the scope to its scope id
@@ -342,6 +342,7 @@ def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         Parse source code to extract scope topography
         :param tree: Parsed syntax tree
         """
+        pass
     
     @abstractmethod
     def extract_nonlocal_info(self) -> None:
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
index ed975ef..96b3d63 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
@@ -4,7 +4,7 @@
 
 
 class Cpp_MLK_Extractor(DFBScanExtractor):
-    def is_global_source(self, global_declarator_node: Tree) -> bool:
+    def is_global_source(self, global_declarator_node: Node) -> bool:
         """
         Determines whether the global variable is initially a source.
         Currently not implemented.
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
index 13ab499..38f4528 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_NPD_extractor.py
@@ -6,7 +6,7 @@
 
 
 class Cpp_NPD_Extractor(DFBScanExtractor):
-    def is_global_source(self, global_declarator_node: Tree) -> bool:
+    def is_global_source(self, global_declarator_node: Node) -> bool:
         """
         Determines whether the global variable is initially a source.
         Currently not implemented.
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
index b283924..f67bf3c 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
@@ -6,7 +6,7 @@
 
 
 class Cpp_UAF_Extractor(DFBScanExtractor):
-    def is_global_source(self, global_declarator_node: Tree) -> bool:
+    def is_global_source(self, global_declarator_node: Node) -> bool:
         """
         Determines whether the global variable is initially a source.
         Currently not implemented.
diff --git a/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py b/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
index 4f9c3af..bcbf15b 100644
--- a/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
@@ -6,7 +6,7 @@
 
 
 class Go_NPD_Extractor(DFBScanExtractor):
-    def is_global_source(self, global_declarator_node: Tree) -> bool:
+    def is_global_source(self, global_declarator_node: Node) -> bool:
         """
         Determines whether the global variable is initially a source.
         Currently not implemented.
diff --git a/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py b/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
index b043e3b..65698b9 100644
--- a/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
@@ -6,7 +6,7 @@
 
 
 class Java_NPD_Extractor(DFBScanExtractor):
-    def is_global_source(self, global_declarator_node: Tree) -> bool:
+    def is_global_source(self, global_declarator_node: Node) -> bool:
         """
         Determines whether the global variable is initially a source.
         Currently not implemented.
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 06e1233..2c4e274 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -6,7 +6,7 @@
 class Javascript_NPD_Extractor(DFBScanExtractor):
     NULLISH_VALUES = {"null", "undefined"}
     
-    def is_expression_delete(self, expr: Tree) -> bool:
+    def is_expression_delete(self, expr: Node) -> bool:
         if expr.type == "unary_expression":
             operator = expr.child(0)
             if operator and operator.type == "delete":
@@ -14,45 +14,65 @@ def is_expression_delete(self, expr: Tree) -> bool:
                 
         return False
     
-    def is_expression_null(self, expr: Tree) -> bool:
+    def is_expression_null(self, expr: Node) -> bool:
         if expr.type != "assignment_expression":
             return False
 
         value_node = expr.child(2)
-        value_type = value_node.type
+        value_type = value_node.type if value_node else ""
 
         # Nullish constant (e.g. null/undefined)
         if value_type in self.NULLISH_VALUES:
             return True
+        
+        return False
 
-    def is_global_source(self, global_declaration_node: Tree) -> bool:
-        """
-        Determines whether the global variable is initially a source.
-        1. globa_var = null;
-        3. delete globa_var.prop;
-        """
-        global_name = global_declaration_node.child(1).child_by_field_name("name").text
-        sibling = global_declaration_node.next_sibling
+    def is_global_source(self, global_declaration_node: Node) -> bool:
+        # global_name is usually bytes, decode for safe string comparison
+        name_node = global_declaration_node.child(1)
+        if name_node is None:
+            return False
+
+        name_field = name_node.child_by_field_name("name")
+        if name_field is None or name_field.text is None:
+            return False
+
+        global_name = name_field.text.decode("utf-8")
+
+        sibling: Optional[Node] = global_declaration_node.next_sibling
 
         while sibling is not None:
-            if len(sibling.children) == 0:
+            # Skip empty siblings
+            if not sibling.children:
                 sibling = sibling.next_sibling
                 continue
 
             expr = sibling.child(0)
-            
+            if expr is None:
+                sibling = sibling.next_sibling
+                continue
+
+            # Handle deletion of property
             if self.is_expression_delete(expr):
-                # the target object with property being deleted
-                obj_node = expr.child(1).child_by_field_name("object")
-                if obj_node and obj_node.text == global_name:
-                    return True
-                
-            if self.is_expression_null(expr) and expr.child(0).text == global_name:
-                return True
+                second_child = expr.child(1)
+                if second_child is not None:
+                    obj_node = second_child.child_by_field_name("object")
+                    if obj_node is not None and obj_node.text is not None:
+                        if obj_node.text.decode("utf-8") == global_name:
+                            return True
+
+            # Handle nullish assignment
+            if self.is_expression_null(expr):
+                lhs = expr.child(0)
+                if lhs is not None and lhs.text is not None:
+                    if lhs.text.decode("utf-8") == global_name:
+                        return True
 
             sibling = sibling.next_sibling
+
         return False
 
+
     def extract_sources(self, function: Function) -> List[Value]:
         """
         Extract the potential null/undefined values as sources from the source code.
diff --git a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
index eabeece..7d5e6a9 100644
--- a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
@@ -4,7 +4,7 @@
 
 
 class Python_NPD_Extractor(DFBScanExtractor):
-    def is_global_source(self, global_declarator_node: Tree) -> bool:
+    def is_global_source(self, global_declarator_node: Node) -> bool:
         """
         Determines whether the global variable is initially a source.
         """
diff --git a/src/tstool/dfbscan_extractor/dfbscan_extractor.py b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
index 34e6e44..9ce30be 100644
--- a/src/tstool/dfbscan_extractor/dfbscan_extractor.py
+++ b/src/tstool/dfbscan_extractor/dfbscan_extractor.py
@@ -54,7 +54,7 @@ def extract_all(self) -> Tuple[List[Value], List[Value]]:
         return self.sources, self.sinks
 
     @abstractmethod
-    def is_global_source(self, global_var: Tree) -> bool:
+    def is_global_source(self, global_var: Node) -> bool:
         pass
 
     @abstractmethod

From 06384d4431bc9a8ff7f5c2669e019b22735603d4 Mon Sep 17 00:00:00 2001
From: acezxn <acezxn@gmail.com>
Date: Fri, 21 Nov 2025 16:40:25 -0500
Subject: [PATCH 23/23] Ran black formatter

---
 src/agent/dfbscan.py                          |  2 +-
 .../dfbscan/intra_dataflow_analyzer.py        | 10 +++++----
 src/tstool/analyzer/Cpp_TS_analyzer.py        |  6 ++---
 src/tstool/analyzer/Go_TS_analyzer.py         |  6 ++---
 src/tstool/analyzer/Java_TS_analyzer.py       |  6 ++---
 src/tstool/analyzer/Javascript_TS_analyzer.py |  3 ++-
 src/tstool/analyzer/Python_TS_analyzer.py     |  8 +++----
 src/tstool/analyzer/TS_analyzer.py            | 22 +++++++++----------
 .../Cpp/Cpp_MLK_extractor.py                  |  2 +-
 .../Cpp/Cpp_UAF_extractor.py                  |  2 +-
 .../dfbscan_extractor/Go/Go_NPD_extractor.py  |  2 +-
 .../Java/Java_NPD_extractor.py                |  2 +-
 .../Javascript/Javascript_NPD_extractor.py    | 22 +++++++++----------
 .../Python/Python_NPD_extractor.py            |  2 +-
 14 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/src/agent/dfbscan.py b/src/agent/dfbscan.py
index 0bfc3a1..22757bd 100644
--- a/src/agent/dfbscan.py
+++ b/src/agent/dfbscan.py
@@ -544,7 +544,7 @@ def start_scan_sequential(self) -> None:
                         sink_values,
                         call_statements,
                         ret_values,
-                        non_locals=[]
+                        non_locals=[],
                     )
 
                     # Invoke the intra-procedural data-flow analysis
diff --git a/src/llmtool/dfbscan/intra_dataflow_analyzer.py b/src/llmtool/dfbscan/intra_dataflow_analyzer.py
index 26d7f9d..ee263ad 100644
--- a/src/llmtool/dfbscan/intra_dataflow_analyzer.py
+++ b/src/llmtool/dfbscan/intra_dataflow_analyzer.py
@@ -19,7 +19,7 @@ def __init__(
         sink_values: List[Tuple[str, int]],
         call_statements: List[Tuple[str, int]],
         ret_values: List[Tuple[str, int]],
-        non_locals: List[Tuple[str, int]]
+        non_locals: List[Tuple[str, int]],
     ) -> None:
         self.function = function
         self.summary_start = summary_start
@@ -110,7 +110,7 @@ def _get_prompt(self, input: LLMToolInput) -> str:
         for ret_val in input.ret_values:
             rets_str += f"- {ret_val[0]} at line {ret_val[1]}\n"
         prompt = prompt.replace("<RETURN_VALUES>", rets_str)
-        
+
         if input.non_locals:
             non_local_str = "Non local variables relevant to this function:\n"
             for non_local in input.non_locals:
@@ -118,7 +118,7 @@ def _get_prompt(self, input: LLMToolInput) -> str:
             prompt = prompt.replace("<NONLOCAL_VALUES>", non_local_str)
         else:
             prompt = prompt.replace("<NONLOCAL_VALUES>", "")
-        
+
         return prompt
 
     def _parse_response(
@@ -236,7 +236,9 @@ def _parse_response(
                     )
                 elif detail["type"] == "Nonlocal":
                     reachable_values_per_path.add(
-                        Value(detail["name"], line_number, ValueLabel.NONLOCAL, file_path)
+                        Value(
+                            detail["name"], line_number, ValueLabel.NONLOCAL, file_path
+                        )
                     )
             reachable_values.append(reachable_values_per_path)
 
diff --git a/src/tstool/analyzer/Cpp_TS_analyzer.py b/src/tstool/analyzer/Cpp_TS_analyzer.py
index 5eecdea..796d87a 100644
--- a/src/tstool/analyzer/Cpp_TS_analyzer.py
+++ b/src/tstool/analyzer/Cpp_TS_analyzer.py
@@ -15,7 +15,7 @@ class Cpp_TSAnalyzer(TSAnalyzer):
     TSAnalyzer for C/C++ source files using tree-sitter.
     Implements language-specific parsing and analysis.
     """
-    
+
     def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         """
         Parse source code to extract scope topography.
@@ -23,7 +23,7 @@ def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         :param tree: Parsed syntax tree
         """
         pass
-    
+
     def extract_nonlocal_info(self) -> None:
         """
         Traverse the scopes to identify declarations of non locals.
@@ -431,7 +431,7 @@ def get_loop_statements(
                 loop_body_end_line,
             )
         return loop_statements
-    
+
     def get_global_expressions_by_identifier(
         self, identifier: str, program_root: Node
     ) -> List[Node]:
diff --git a/src/tstool/analyzer/Go_TS_analyzer.py b/src/tstool/analyzer/Go_TS_analyzer.py
index 669829a..05b248c 100644
--- a/src/tstool/analyzer/Go_TS_analyzer.py
+++ b/src/tstool/analyzer/Go_TS_analyzer.py
@@ -15,7 +15,7 @@ class Go_TSAnalyzer(TSAnalyzer):
     TSAnalyzer for Go source files using tree-sitter.
     Implements Go-specific parsing and analysis.
     """
-    
+
     def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         """
         Parse source code to extract scope topography.
@@ -23,7 +23,7 @@ def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         :param tree: Parsed syntax tree
         """
         pass
-    
+
     def extract_nonlocal_info(self) -> None:
         """
         Traverse the scopes to identify declarations of non locals.
@@ -375,4 +375,4 @@ def get_global_expressions_by_identifier(
         :param program_root: Program root node
         :return: A list of extracted nodes
         """
-        return []
\ No newline at end of file
+        return []
diff --git a/src/tstool/analyzer/Java_TS_analyzer.py b/src/tstool/analyzer/Java_TS_analyzer.py
index ade1795..3a25281 100644
--- a/src/tstool/analyzer/Java_TS_analyzer.py
+++ b/src/tstool/analyzer/Java_TS_analyzer.py
@@ -15,7 +15,7 @@ class Java_TSAnalyzer(TSAnalyzer):
     TSAnalyzer for Java source files using tree-sitter.
     Implements Java-specific parsing and analysis.
     """
-    
+
     def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         """
         Parse source code to extract scope topography.
@@ -23,7 +23,7 @@ def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         :param tree: Parsed syntax tree
         """
         pass
-    
+
     def extract_nonlocal_info(self) -> None:
         """
         Traverse the scopes to identify declarations of non locals.
@@ -387,4 +387,4 @@ def get_global_expressions_by_identifier(
         :param program_root: Program root node
         :return: A list of extracted nodes
         """
-        return []
\ No newline at end of file
+        return []
diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py
index 3fb892f..d27c5f9 100644
--- a/src/tstool/analyzer/Javascript_TS_analyzer.py
+++ b/src/tstool/analyzer/Javascript_TS_analyzer.py
@@ -161,7 +161,8 @@ def extract_nonlocal_info(self) -> None:
                         if (
                             candidate_parent is not None
                             and candidate_parent.type == "variable_declarator"
-                            and candidate_parent.child_by_field_name("name") is candidate_node
+                            and candidate_parent.child_by_field_name("name")
+                            is candidate_node
                         ):
                             continue
 
diff --git a/src/tstool/analyzer/Python_TS_analyzer.py b/src/tstool/analyzer/Python_TS_analyzer.py
index 8768a99..0954385 100644
--- a/src/tstool/analyzer/Python_TS_analyzer.py
+++ b/src/tstool/analyzer/Python_TS_analyzer.py
@@ -15,7 +15,7 @@ class Python_TSAnalyzer(TSAnalyzer):
     TSAnalyzer for Python source files using tree-sitter.
     Implements Python-specific parsing and analysis.
     """
-    
+
     def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         """
         Parse source code to extract scope topography
@@ -23,7 +23,7 @@ def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         """
         # TODO: Add scope extraction if needed
         pass
-    
+
     def extract_nonlocal_info(self) -> None:
         """
         Traverse the scopes to identify declarations of non locals
@@ -294,7 +294,7 @@ def get_loop_statements(
                 end_line,
             )
         return loops
-    
+
     def get_global_expressions_by_identifier(
         self, identifier: str, program_root: Node
     ) -> List[Node]:
@@ -305,4 +305,4 @@ def get_global_expressions_by_identifier(
         :return: A list of extracted nodes
         """
         # TODO: implement if needed
-        return []
\ No newline at end of file
+        return []
diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py
index 475a28b..42e4f94 100644
--- a/src/tstool/analyzer/TS_analyzer.py
+++ b/src/tstool/analyzer/TS_analyzer.py
@@ -177,16 +177,16 @@ def __init__(
         self.globals_env: Dict[int, Value] = {}
         self.scope_env: Dict[int, Tuple[Node, Set[int]]] = {}
         self.api_env: Dict[int, API] = {}
-        
+
         # Dictionary storing mapping from the root node of the scope to its scope id
         self.scope_root_to_scope_id: Dict[Node, int] = {}
-        
+
         # Dictionary storing mapping from function root node to its scope id
         self.function_root_to_scope_id: Dict[Node, int] = {}
-                
+
         # Dictionary storing mapping from a scope id to all the non locals it is depended on
         self.child_scope_id_to_non_locals: Dict[int, Set[Value]] = {}
-        
+
         # Dictionary storing mapping from a non local value to its child scopes
         self.non_local_to_child_scopes: Dict[Value, Set[int]] = {}
 
@@ -266,9 +266,9 @@ def parse_project(self) -> None:
                 self.fileContentDic[file_path] = source
                 pbar.update(1)
             pbar.close()
-            
+
         self.extract_nonlocal_info()
-        
+
         # Analyzes extracted functions
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=self.max_symbolic_workers_num
@@ -288,7 +288,7 @@ def parse_project(self) -> None:
                 self.function_env[func_id] = current_function
                 pbar.update(1)
             pbar.close()
-            
+
         # Analyzes extracted global variables
         pbar = tqdm(
             total=len(self.globalsRawDataDic), desc="Analyzing Global Variables"
@@ -343,14 +343,14 @@ def extract_scope_info(self, tree: tree_sitter.Tree) -> None:
         :param tree: Parsed syntax tree
         """
         pass
-    
+
     @abstractmethod
     def extract_nonlocal_info(self) -> None:
         """
         Traverse the scopes to identify declarations of non locals
         """
         pass
-    
+
     @abstractmethod
     def extract_function_info(
         self, file_path: str, source_code: str, tree: Tree
@@ -730,7 +730,7 @@ def get_loop_statements(
         :return: A dictionary mapping (start_line, end_line) to loop statement info.
         """
         pass
-    
+
     @abstractmethod
     def get_global_expressions_by_identifier(
         self, identifier: str, program_root: Node
@@ -871,7 +871,7 @@ def get_function_global_value_reference(
                     references.setdefault(function, []).append(ref_value)
 
         return references
-        
+
     def get_function_from_localvalue(self, value: Value) -> Optional[Function]:
         """
         Retrieve the function corresponding to a local value.
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
index 96b3d63..f9294c8 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_MLK_extractor.py
@@ -10,7 +10,7 @@ def is_global_source(self, global_declarator_node: Node) -> bool:
         Currently not implemented.
         """
         return False
-    
+
     def extract_sources(self, function: Function) -> List[Value]:
         """
         Extract the sources that can cause the memory leak bugs from C/C++ programs.
diff --git a/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py b/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
index f67bf3c..7ad8ac5 100644
--- a/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
+++ b/src/tstool/dfbscan_extractor/Cpp/Cpp_UAF_extractor.py
@@ -12,7 +12,7 @@ def is_global_source(self, global_declarator_node: Node) -> bool:
         Currently not implemented.
         """
         return False
-    
+
     def extract_sources(self, function: Function) -> List[Value]:
         """
         Extract the sources that can cause the use-after-free bugs from C/C++ programs.
diff --git a/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py b/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
index bcbf15b..747dcc0 100644
--- a/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Go/Go_NPD_extractor.py
@@ -12,7 +12,7 @@ def is_global_source(self, global_declarator_node: Node) -> bool:
         Currently not implemented.
         """
         return False
-    
+
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
diff --git a/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py b/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
index 65698b9..e5fe58f 100644
--- a/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Java/Java_NPD_extractor.py
@@ -12,7 +12,7 @@ def is_global_source(self, global_declarator_node: Node) -> bool:
         Currently not implemented.
         """
         return False
-    
+
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
diff --git a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
index 2c4e274..25340a9 100644
--- a/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Javascript/Javascript_NPD_extractor.py
@@ -5,15 +5,15 @@
 
 class Javascript_NPD_Extractor(DFBScanExtractor):
     NULLISH_VALUES = {"null", "undefined"}
-    
+
     def is_expression_delete(self, expr: Node) -> bool:
         if expr.type == "unary_expression":
             operator = expr.child(0)
             if operator and operator.type == "delete":
                 return True
-                
+
         return False
-    
+
     def is_expression_null(self, expr: Node) -> bool:
         if expr.type != "assignment_expression":
             return False
@@ -24,7 +24,7 @@ def is_expression_null(self, expr: Node) -> bool:
         # Nullish constant (e.g. null/undefined)
         if value_type in self.NULLISH_VALUES:
             return True
-        
+
         return False
 
     def is_global_source(self, global_declaration_node: Node) -> bool:
@@ -72,7 +72,6 @@ def is_global_source(self, global_declaration_node: Node) -> bool:
 
         return False
 
-
     def extract_sources(self, function: Function) -> List[Value]:
         """
         Extract the potential null/undefined values as sources from the source code.
@@ -81,18 +80,18 @@ def extract_sources(self, function: Function) -> List[Value]:
         3. delete obj.prop;
         4. func(null);
         """
-        
+
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]
         file_path = function.file_path
-        
+
         nodes = find_nodes_by_type(root_node, "variable_declarator")
         nodes.extend(find_nodes_by_type(root_node, "assignment_expression"))
         nodes.extend(find_nodes_by_type(root_node, "return_statement"))
         nodes.extend(find_nodes_by_type(root_node, "arguments"))
-        
+
         sources = []
-        
+
         # Look for nullish value nodes
         for node in nodes:
             is_seed_node = False
@@ -106,9 +105,8 @@ def extract_sources(self, function: Function) -> List[Value]:
                 name = source_code[node.start_byte : node.end_byte]
                 sources.append(Value(name, line_number, ValueLabel.SRC, file_path))
 
-
         unary_expressions = find_nodes_by_type(root_node, "unary_expression")
-        
+
         # Look for delete expressions
         for unary_expression in unary_expressions:
             operator = unary_expression.child(0)
@@ -127,7 +125,7 @@ def extract_sinks(self, function: Function) -> List[Value]:
         1. null_obj.prop;
         2. null_obj[1];
         3. null_obj();
-        
+
         :param: function: Function object.
         :return: List of sink values
         """
diff --git a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
index 7d5e6a9..1fe805f 100644
--- a/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
+++ b/src/tstool/dfbscan_extractor/Python/Python_NPD_extractor.py
@@ -10,7 +10,7 @@ def is_global_source(self, global_declarator_node: Node) -> bool:
         """
         # TODO: Implement source detection for global variables if needed
         return False
-    
+
     def extract_sources(self, function: Function) -> List[Value]:
         root_node = function.parse_tree_root_node
         source_code = self.ts_analyzer.code_in_files[function.file_path]