29.4 代码理解模块
约 1179 字大约 4 分钟
29.4.1 代码理解概述
代码理解模块是编程 Agent 的另一个核心能力,它能够分析、解释和理解现有代码的功能、结构和设计。代码理解涉及代码解析、语义分析、依赖分析等多个环节。
代码理解流程
输入代码 ↓ 代码解析 ↓ 结构分析 ↓ 语义分析 ↓ 依赖分析 ↓ 功能推断 ↓ 生成解释
29.4.2 代码解析
代码解析器
python
class CodeParser:
"""代码解析器"""
def __init__(self):
self.parsers = {
'python': PythonParser(),
'javascript': JavaScriptParser(),
'java': JavaParser(),
'cpp': CppParser()
}
def parse(self, code: str, language: str) -> ParsedCode:
"""解析代码"""
parser = self.parsers.get(language.lower())
if not parser:
raise ValueError(f"Unsupported language: {language}")
return parser.parse(code)
class PythonParser:
"""Python 解析器"""
def parse(self, code: str) -> ParsedCode:
"""解析 Python 代码"""
try:
tree = ast.parse(code)
parsed_code = ParsedCode(
language='python',
original_code=code,
ast=tree
)
# 提取类
parsed_code.classes = self._extract_classes(tree)
# 提取函数
parsed_code.functions = self._extract_functions(tree)
# 提取导入
parsed_code.imports = self._extract_imports(tree)
# 提取全局变量
parsed_code.global_variables = self._extract_global_variables(tree)
return parsed_code
except SyntaxError as e:
raise ValueError(f"Invalid Python code: {e}")
def _extract_classes(self, tree: ast.AST) -> List[ClassInfo]:
"""提取类信息"""
classes = []
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
class_info = ClassInfo(
name=node.name,
bases=[self._get_name(base) for base in node.bases],
methods=[self._extract_method(m) for m in node.body
if isinstance(m, ast.FunctionDef)],
attributes=[self._extract_attribute(a) for a in node.body
if isinstance(a, ast.Assign)],
docstring=ast.get_docstring(node)
)
classes.append(class_info)
return classes
def _extract_functions(self, tree: ast.AST) -> List[FunctionInfo]:
"""提取函数信息"""
functions = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
# 跳过类中的方法
if not any(isinstance(parent, ast.ClassDef)
for parent in ast.walk(tree)
if node in ast.iter_child_nodes(parent)):
function_info = FunctionInfo(
name=node.name,
arguments=[arg.arg for arg in node.args.args],
return_type=self._get_return_type(node),
docstring=ast.get_docstring(node),
decorators=[self._get_name(d) for d in node.decorator_list]
)
functions.append(function_info)
return functions
def _extract_imports(self, tree: ast.AST) -> List[ImportInfo]:
"""提取导入信息"""
imports = []
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
import_info = ImportInfo(
module=alias.name,
alias=alias.asname,
type='import'
)
imports.append(import_info)
elif isinstance(node, ast.ImportFrom):
for alias in node.names:
import_info = ImportInfo(
module=node.module,
name=alias.name,
alias=alias.asname,
type='from'
)
imports.append(import_info)
return imports
def _extract_global_variables(self, tree: ast.AST) -> List[VariableInfo]:
"""提取全局变量"""
variables = []
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
# 只提取模块级别的变量
if isinstance(node, ast.Module):
for target in node.targets:
if isinstance(target, ast.Name):
var_info = VariableInfo(
name=target.id,
type=self._infer_type(node.value),
value=self._get_value(node.value)
)
variables.append(var_info)
return variables
def _extract_method(self, node: ast.FunctionDef) -> MethodInfo:
"""提取方法信息"""
return MethodInfo(
name=node.name,
arguments=[arg.arg for arg in node.args.args],
return_type=self._get_return_type(node),
docstring=ast.get_docstring(node),
is_static=any(isinstance(d, ast.Name) and d.id == 'staticmethod'
for d in node.decorator_list),
is_classmethod=any(isinstance(d, ast.Name) and d.id == 'classmethod'
for d in node.decorator_list)
)
def _extract_attribute(self, node: ast.Assign) -> AttributeInfo:
"""提取属性信息"""
target = node.targets[0]
if isinstance(target, ast.Name):
return AttributeInfo(
name=target.id,
type=self._infer_type(node.value),
value=self._get_value(node.value)
)
return None
def _get_name(self, node: ast.AST) -> str:
"""获取节点名称"""
if isinstance(node, ast.Name):
return node.id
elif isinstance(node, ast.Attribute):
return f"{self._get_name(node.value)}.{node.attr}"
return str(node)
def _get_return_type(self, node: ast.FunctionDef) -> str:
"""获取返回类型"""
if node.returns:
return self._get_name(node.returns)
return "None"
def _infer_type(self, node: ast.AST) -> str:
"""推断类型"""
if isinstance(node, ast.Constant):
return type(node.value).__name__
elif isinstance(node, ast.List):
return "list"
elif isinstance(node, ast.Dict):
return "dict"
elif isinstance(node, ast.Call):
return self._get_name(node.func)
return "Any"
def _get_value(self, node: ast.AST) -> Any:
"""获取值"""
if isinstance(node, ast.Constant):
return node.value
return None
```## 29.4.3 结构分析
### 结构分析器
class StructureAnalyzer:
"""结构分析器"""
def analyze(self, parsed_code: ParsedCode) -> StructureAnalysis:
"""分析代码结构"""
analysis = StructureAnalysis()
# 分析类的层次结构
analysis.class_hierarchy = self._analyze_class_hierarchy(
parsed_code.classes
)
# 分析函数调用关系
analysis.call_graph = self._analyze_call_graph(parsed_code)
# 分析模块依赖
analysis.dependencies = self._analyze_dependencies(parsed_code)
# 分析代码复杂度
analysis.complexity = self._analyze_complexity(parsed_code)
return analysis
def _analyze_class_hierarchy(self,
classes: List[ClassInfo]) -> Dict[str, List[str]]:
"""分析类层次结构"""
hierarchy = {}
for cls in classes:
hierarchy[cls.name] = cls.bases
return hierarchy
def _analyze_call_graph(self,
parsed_code: ParsedCode) -> Dict[str, List[str]]:
"""分析函数调用关系"""
call_graph = {}
# 分析函数调用
for func in parsed_code.functions:
calls = self._extract_function_calls(func, parsed_code)
call_graph[func.name] = calls
# 分析方法调用
for cls in parsed_code.classes:
for method in cls.methods:
calls = self._extract_method_calls(method, cls, parsed_code)
call_graph[f"{cls.name}.{method.name}"] = calls
return call_graph
def _extract_function_calls(self, func: FunctionInfo,
parsed_code: ParsedCode) -> List[str]:
"""提取函数调用"""
calls = []
# 这里需要更复杂的 AST 分析
# 简化实现:从函数体中提取调用
return calls
def _extract_method_calls(self, method: MethodInfo,
cls: ClassInfo,
parsed_code: ParsedCode) -> List[str]:
"""提取方法调用"""
calls = []
# 这里需要更复杂的 AST 分析
# 简化实现:从方法体中提取调用
return calls
def _analyze_dependencies(self,
parsed_code: ParsedCode) -> List[Dependency]:
"""分析依赖关系"""
dependencies = []
# 分析导入依赖
for imp in parsed_code.imports:
dependency = Dependency(
type='import',
source=parsed_code.language,
target=imp.module,
strength='external'
)
dependencies.append(dependency)
# 分析类继承依赖
for cls in parsed_code.classes:
for base in cls.bases:
dependency = Dependency(
type='inheritance',
source=cls.name,
target=base,
strength='strong'
)
dependencies.append(dependency)
return dependencies
def _analyze_complexity(self,
parsed_code: ParsedCode) -> ComplexityMetrics:
"""分析代码复杂度"""metrics = ComplexityMetrics()
计算圈复杂度
metrics.cyclomatic_complexity = self._calculate_cyclomatic_complexity( parsed_code )
计算认知复杂度
metrics.cognitive_complexity = self._calculate_cognitive_complexity( parsed_code )
计算维护性指数
metrics.maintainability_index = self._calculate_maintainability_index( parsed_code ) return metrics def _calculate_cyclomatic_complexity(self, parsed_code: ParsedCode) -> float: """计算圈复杂度""" complexity = 1 # 基础复杂度
遍历 AST,计算决策点
for node in ast.walk(parsed_code.ast): if isinstance(node, (ast.If, ast.While, ast.For, ast.ExceptHandler)): complexity += 1 elif isinstance(node, ast.BoolOp): complexity += len(node.values) - 1 return complexity def _calculate_cognitive_complexity(self, parsed_code: ParsedCode) -> float: """计算认知复杂度"""
简化实现
return self._calculate_cyclomatic_complexity(parsed_code) * 1.5 def _calculate_maintainability_index(self, parsed_code: ParsedCode) -> float: """计算维护性指数"""
简化实现
loc = len(parsed_code.original_code.split('\n')) complexity = self._calculate_cyclomatic_complexity(parsed_code)
MI = 171 - 5.2 * ln(V) - 0.23 * G - 16.2 * ln(L)
V = 圈复杂度, G = 代码行数, L = 代码行数
mi = 171 - 5.2 * math.log(complexity) - 0.23 * loc - 16.2 * math.log(loc) return max(0, min(100, mi))