diff --git a/lldb/include/lldb/API/SBInstruction.h b/lldb/include/lldb/API/SBInstruction.h index 755e3b4a47c9b..c2111cd3d46af 100644 --- a/lldb/include/lldb/API/SBInstruction.h +++ b/lldb/include/lldb/API/SBInstruction.h @@ -11,6 +11,7 @@ #include "lldb/API/SBData.h" #include "lldb/API/SBDefines.h" +#include "lldb/API/SBStructuredData.h" #include @@ -73,6 +74,23 @@ class LLDB_API SBInstruction { bool TestEmulation(lldb::SBStream &output_stream, const char *test_file); + /// Get variable annotations for this instruction as structured data. + /// Returns an array of dictionaries, each containing: + /// - "variable_name": string name of the variable + /// - "location_description": string description of where variable is stored + /// ("RDI", "R15", "undef", etc.) + /// - "is_live": boolean indicates if variable is live at this instruction + /// - "start_address": unsigned integer address where this annotation becomes + /// valid + /// - "end_address": unsigned integer address where this annotation becomes + /// invalid + /// - "register_kind": unsigned integer indicating the register numbering + /// scheme + /// - "decl_file": string path to the file where variable is declared + /// - "decl_line": unsigned integer line number where variable is declared + /// - "type_name": string type name of the variable + lldb::SBStructuredData GetVariableAnnotations(); + protected: friend class SBInstructionList; diff --git a/lldb/include/lldb/API/SBStructuredData.h b/lldb/include/lldb/API/SBStructuredData.h index dfd8ec0e180ce..75fb16b795a5a 100644 --- a/lldb/include/lldb/API/SBStructuredData.h +++ b/lldb/include/lldb/API/SBStructuredData.h @@ -153,6 +153,7 @@ class SBStructuredData { friend class SBBreakpointLocation; friend class SBBreakpointName; friend class SBTrace; + friend class SBInstruction; friend class lldb_private::python::SWIGBridge; friend class lldb_private::lua::SWIGBridge; friend class SBCommandInterpreter; diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h index db186dd33d774..a37c5edd4e356 100644 --- a/lldb/include/lldb/Core/Disassembler.h +++ b/lldb/include/lldb/Core/Disassembler.h @@ -566,24 +566,41 @@ class Disassembler : public std::enable_shared_from_this, const Disassembler &operator=(const Disassembler &) = delete; }; +/// Structured data for a single variable annotation. +struct VariableAnnotation { + std::string variable_name; + /// Location description (e.g., "r15", "undef", "const_0"). + std::string location_description; + /// Whether variable is live at this instruction. + bool is_live; + /// Register numbering scheme for location interpretation. + lldb::RegisterKind register_kind; + /// Where this annotation is valid. + std::optional address_range; + /// Source file where variable was declared. + std::optional decl_file; + /// Line number where variable was declared. + std::optional decl_line; + /// Variable's type name. + std::optional type_name; +}; + /// Tracks live variable annotations across instructions and produces /// per-instruction "events" like `name = RDI` or `name = `. class VariableAnnotator { - struct VarState { - /// Display name. - std::string name; - /// Last printed location (empty means ). - std::string last_loc; - }; - // Live state from the previous instruction, keyed by Variable::GetID(). - llvm::DenseMap Live_; + llvm::DenseMap m_live_vars; + + static constexpr const char *kUndefLocation = "undef"; + static const std::string kUndefLocationFormatted; public: /// Compute annotation strings for a single instruction and update `Live_`. /// Returns only the events that should be printed *at this instruction*. - std::vector annotate(Instruction &inst, Target &target, - const lldb::ModuleSP &module_sp); + std::vector Annotate(Instruction &inst); + + /// Returns structured data for all variables relevant at this instruction. + std::vector AnnotateStructured(Instruction &inst); }; } // namespace lldb_private diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp index 6755089af39a4..ef0db7a37abb8 100644 --- a/lldb/source/API/SBInstruction.cpp +++ b/lldb/source/API/SBInstruction.cpp @@ -10,10 +10,11 @@ #include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBAddress.h" -#include "lldb/API/SBFrame.h" #include "lldb/API/SBFile.h" +#include "lldb/API/SBFrame.h" #include "lldb/API/SBStream.h" +#include "lldb/API/SBStructuredData.h" #include "lldb/API/SBTarget.h" #include "lldb/Core/Disassembler.h" #include "lldb/Core/EmulateInstruction.h" @@ -26,6 +27,7 @@ #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/StructuredData.h" #include @@ -163,7 +165,8 @@ const char *SBInstruction::GetComment(SBTarget target) { return ConstString(inst_sp->GetComment(&exe_ctx)).GetCString(); } -lldb::InstructionControlFlowKind SBInstruction::GetControlFlowKind(lldb::SBTarget target) { +lldb::InstructionControlFlowKind +SBInstruction::GetControlFlowKind(lldb::SBTarget target) { LLDB_INSTRUMENT_VA(this, target); lldb::InstructionSP inst_sp(GetOpaque()); @@ -347,3 +350,62 @@ bool SBInstruction::TestEmulation(lldb::SBStream &output_stream, return inst_sp->TestEmulation(output_stream.ref(), test_file); return false; } + +lldb::SBStructuredData SBInstruction::GetVariableAnnotations() { + LLDB_INSTRUMENT_VA(this); + + SBStructuredData result; + + if (!m_opaque_sp || !m_opaque_sp->IsValid()) + return result; + + lldb::InstructionSP inst_sp = m_opaque_sp->GetSP(); + if (!inst_sp) + return result; + + const Address &addr = inst_sp->GetAddress(); + ModuleSP module_sp = addr.GetModule(); + + if (!module_sp) + return result; + + VariableAnnotator annotator; + std::vector annotations = + annotator.AnnotateStructured(*inst_sp); + + auto array_sp = std::make_shared(); + + for (const auto &ann : annotations) { + auto dict_sp = std::make_shared(); + + dict_sp->AddStringItem("variable_name", ann.variable_name); + dict_sp->AddStringItem("location_description", ann.location_description); + dict_sp->AddBooleanItem("is_live", ann.is_live); + if (ann.address_range.has_value()) { + const auto &range = *ann.address_range; + dict_sp->AddItem("start_address", + std::make_shared( + range.GetBaseAddress().GetFileAddress())); + dict_sp->AddItem( + "end_address", + std::make_shared( + range.GetBaseAddress().GetFileAddress() + range.GetByteSize())); + } + dict_sp->AddItem( + "register_kind", + std::make_shared(ann.register_kind)); + if (ann.decl_file.has_value()) + dict_sp->AddStringItem("decl_file", *ann.decl_file); + if (ann.decl_line.has_value()) + dict_sp->AddItem( + "decl_line", + std::make_shared(*ann.decl_line)); + if (ann.type_name.has_value()) + dict_sp->AddStringItem("type_name", *ann.type_name); + + array_sp->AddItem(dict_sp); + } + + result.m_impl_up->SetObjectSP(array_sp); + return result; +} diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index f2ed1f7395346..9b3d94dfa0982 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -286,6 +286,9 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine( return false; } +const std::string VariableAnnotator::kUndefLocationFormatted = + llvm::formatv("<{0}>", kUndefLocation).str(); + // For each instruction, this block attempts to resolve in-scope variables // and determine if the current PC falls within their // DWARF location entry. If so, it prints a simplified annotation using the @@ -299,17 +302,45 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine( // The goal is to give users helpful live variable hints alongside the // disassembled instruction stream, similar to how debug information // enhances source-level debugging. -std::vector -VariableAnnotator::annotate(Instruction &inst, Target &target, - const lldb::ModuleSP &module_sp) { +std::vector VariableAnnotator::Annotate(Instruction &inst) { + auto structured_annotations = AnnotateStructured(inst); + std::vector events; + events.reserve(structured_annotations.size()); + + for (const auto &annotation : structured_annotations) { + std::string display_string; + llvm::raw_string_ostream os(display_string); + + os << annotation.variable_name; + os << " = "; + os << (annotation.location_description == VariableAnnotator::kUndefLocation + ? VariableAnnotator::kUndefLocationFormatted + : annotation.location_description); + os.flush(); + + events.push_back(std::move(display_string)); + } + + return events; +} + +std::vector +VariableAnnotator::AnnotateStructured(Instruction &inst) { + std::vector annotations; - // If we lost module context, everything becomes . + ModuleSP module_sp = inst.GetAddress().GetModule(); + + // If we lost module context, mark all live variables as undefined. if (!module_sp) { - for (const auto &KV : Live_) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); - Live_.clear(); - return events; + for (const auto &KV : m_live_vars) { + auto annotation_entity = KV.second; + annotation_entity.is_live = false; + annotation_entity.location_description = kUndefLocation; + annotations.push_back(annotation_entity); + } + m_live_vars.clear(); + return annotations; } // Resolve function/block at this *file* address. @@ -319,10 +350,14 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, if (!module_sp->ResolveSymbolContextForAddress(iaddr, mask, sc) || !sc.function) { // No function context: everything dies here. - for (const auto &KV : Live_) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); - Live_.clear(); - return events; + for (const auto &KV : m_live_vars) { + auto annotation_entity = KV.second; + annotation_entity.is_live = false; + annotation_entity.location_description = kUndefLocation; + annotations.push_back(annotation_entity); + } + m_live_vars.clear(); + return annotations; } // Collect in-scope variables for this instruction into Current. @@ -341,7 +376,7 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, const lldb::addr_t func_file = sc.function->GetAddress().GetFileAddress(); // ABI from Target (pretty reg names if plugin exists). Safe to be null. - lldb::ABISP abi_sp = ABI::FindPlugin(nullptr, target.GetArchitecture()); + lldb::ABISP abi_sp = ABI::FindPlugin(nullptr, module_sp->GetArchitecture()); ABI *abi = abi_sp.get(); llvm::DIDumpOptions opts; @@ -349,7 +384,7 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, // Prefer "register-only" output when we have an ABI. opts.PrintRegisterOnly = static_cast(abi_sp); - llvm::DenseMap Current; + llvm::DenseMap current_vars; for (size_t i = 0, e = var_list.GetSize(); i != e; ++i) { lldb::VariableSP v = var_list.GetVariableAtIndex(i); @@ -376,35 +411,60 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, if (loc.empty()) continue; - Current.try_emplace(v->GetID(), - VarState{std::string(name), std::string(loc)}); + std::optional decl_file; + std::optional decl_line; + std::optional type_name; + + const Declaration &decl = v->GetDeclaration(); + if (decl.GetFile()) { + decl_file = decl.GetFile().GetFilename().AsCString(); + if (decl.GetLine() > 0) + decl_line = decl.GetLine(); + } + + if (Type *type = v->GetType()) + if (const char *type_str = type->GetName().AsCString()) + type_name = type_str; + + current_vars.try_emplace( + v->GetID(), + VariableAnnotation{std::string(name), std::string(loc), true, + entry.expr->GetRegisterKind(), entry.file_range, + decl_file, decl_line, type_name}); } - // Diff Live_ → Current. + // Diff m_live_vars → current_vars. - // 1) Starts/changes: iterate Current and compare with Live_. - for (const auto &KV : Current) { - auto it = Live_.find(KV.first); - if (it == Live_.end()) { + // 1) Starts/changes: iterate current_vars and compare with m_live_vars. + for (const auto &KV : current_vars) { + auto it = m_live_vars.find(KV.first); + if (it == m_live_vars.end()) { // Newly live. - events.emplace_back( - llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str()); - } else if (it->second.last_loc != KV.second.last_loc) { + auto annotation_entity = KV.second; + annotation_entity.is_live = true; + annotations.push_back(annotation_entity); + } else if (it->second.location_description != + KV.second.location_description) { // Location changed. - events.emplace_back( - llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str()); + auto annotation_entity = KV.second; + annotation_entity.is_live = true; + annotations.push_back(annotation_entity); } } - // 2) Ends: anything that was live but is not in Current becomes . - for (const auto &KV : Live_) { - if (!Current.count(KV.first)) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); - } + // 2) Ends: anything that was live but is not in current_vars becomes + // . + for (const auto &KV : m_live_vars) + if (!current_vars.count(KV.first)) { + auto annotation_entity = KV.second; + annotation_entity.is_live = false; + annotation_entity.location_description = kUndefLocation; + annotations.push_back(annotation_entity); + } // Commit new state. - Live_ = std::move(Current); - return events; + m_live_vars = std::move(current_vars); + return annotations; } void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, @@ -676,7 +736,7 @@ void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, address_text_size); if ((options & eOptionVariableAnnotations) && target_sp) { - auto annotations = annot.annotate(*inst, *target_sp, module_sp); + auto annotations = annot.Annotate(*inst); if (!annotations.empty()) { const size_t annotation_column = 100; inst_line.FillLastLineToColumn(annotation_column, ' '); diff --git a/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py b/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py index f107efbddddeb..ea2f099b0d057 100644 --- a/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py +++ b/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py @@ -116,3 +116,84 @@ def test_seed_reg_const_undef(self): print(out) self.assertRegex(out, r"\b(i|argc)\s*=\s*(DW_OP_reg\d+\b|R[A-Z0-9]+)") self.assertNotIn("", out) + + @no_debug_info_test + @skipIf(archs=no_match(["x86_64"])) + def test_structured_annotations_api(self): + """Test GetVariableAnnotations() API returns structured data""" + obj = self._build_obj("d_original_example.o") + target = self._create_target(obj) + + main_symbols = target.FindSymbols("main") + self.assertTrue(main_symbols.IsValid() and main_symbols.GetSize() > 0, + "Could not find 'main' symbol") + + main_symbol = main_symbols.GetContextAtIndex(0).GetSymbol() + start_addr = main_symbol.GetStartAddress() + self.assertTrue(start_addr.IsValid(), "Invalid start address for main") + + instructions = target.ReadInstructions(start_addr, 16) + self.assertGreater(instructions.GetSize(), 0, "No instructions read") + + if self.TraceOn(): + print(f"\nTesting GetVariableAnnotations() API on {instructions.GetSize()} instructions") + + expected_vars = ["argc", "argv", "i"] + found_variables = set() + + # Test each instruction + for i in range(instructions.GetSize()): + inst = instructions.GetInstructionAtIndex(i) + self.assertTrue(inst.IsValid(), f"Invalid instruction at index {i}") + + annotations = inst.GetVariableAnnotations() + + self.assertIsInstance(annotations, lldb.SBStructuredData, + "GetVariableAnnotations should return SBStructuredData") + + self.assertTrue(annotations.GetSize() > 0, + "GetVariableAnnotations should return non empty array") + + if annotations.GetSize() > 0: + # Validate each annotation. + for j in range(annotations.GetSize()): + ann = annotations.GetItemAtIndex(j) + self.assertTrue(ann.IsValid(), f"Invalid annotation at index {j}") + + self.assertEqual(ann.GetType(), lldb.eStructuredDataTypeDictionary, + "Each annotation should be a dictionary") + + var_name_obj = ann.GetValueForKey("variable_name") + self.assertTrue(var_name_obj.IsValid(), + "Missing 'variable_name' field") + + location_obj = ann.GetValueForKey("location_description") + self.assertTrue(location_obj.IsValid(), + "Missing 'location_description' field") + + is_live_obj = ann.GetValueForKey("is_live") + self.assertTrue(is_live_obj.IsValid(), + "Missing 'is_live' field") + + start_addr_obj = ann.GetValueForKey("start_address") + self.assertTrue(start_addr_obj.IsValid(), + "Missing 'start_address' field") + + end_addr_obj = ann.GetValueForKey("end_address") + self.assertTrue(end_addr_obj.IsValid(), + "Missing 'end_address' field") + + register_kind_obj = ann.GetValueForKey("register_kind") + self.assertTrue(register_kind_obj.IsValid(), + "Missing 'register_kind' field") + + var_name = var_name_obj.GetStringValue(1024) + + # Check for expected variables in this function. + self.assertIn(var_name, expected_vars, + f"Unexpected variable name: {var_name}") + + found_variables.add(var_name) + + if self.TraceOn(): + print(f"\nTest complete. Found variables: {found_variables}")