diff --git a/ReportEngine/agent.py b/ReportEngine/agent.py index 56b2d8c..6a0ece7 100644 --- a/ReportEngine/agent.py +++ b/ReportEngine/agent.py @@ -35,6 +35,7 @@ class FileCountBaseline: """文件数量基准管理器""" def __init__(self): + """在初始化阶段加载或创建文件数量基准快照""" self.baseline_file = 'logs/report_baseline.json' self.baseline_data = self._load_baseline() diff --git a/ReportEngine/core/chapter_storage.py b/ReportEngine/core/chapter_storage.py index ef6f838..99f6e1f 100644 --- a/ReportEngine/core/chapter_storage.py +++ b/ReportEngine/core/chapter_storage.py @@ -29,6 +29,7 @@ class ChapterRecord: updated_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z") def to_dict(self) -> Dict[str, object]: + """将记录转换为便于写入manifest.json的序列化字典""" return { "chapterId": self.chapter_id, "slug": self.slug, @@ -54,6 +55,12 @@ class ChapterStorage: """ def __init__(self, base_dir: str): + """ + 创建章节存储器。 + + Args: + base_dir: 所有输出run目录的根路径 + """ self.base_dir = Path(base_dir) self.base_dir.mkdir(parents=True, exist_ok=True) self._manifests: Dict[str, Dict[str, object]] = {} @@ -133,6 +140,7 @@ class ChapterStorage: return final_path def load_chapters(self, run_dir: Path) -> List[Dict[str, object]]: + """从指定run目录读取全部chapter.json并按order排序返回""" payloads: List[Dict[str, object]] = [] for child in sorted(run_dir.iterdir()): if not child.is_dir(): @@ -161,6 +169,7 @@ class ChapterStorage: # ======== 内部工具 ======== def _chapter_dir(self, run_dir: Path, slug: str, order: int) -> Path: + """根据slug/order生成稳定的章节目录,确保各章分隔存盘""" safe_slug = self._safe_slug(slug) folder = f"{order:03d}-{safe_slug}" path = run_dir / folder @@ -168,25 +177,31 @@ class ChapterStorage: return path def _safe_slug(self, slug: str) -> str: + """移除危险字符,避免生成非法文件夹名""" slug = slug.replace(" ", "-").replace("/", "-") return slug or "section" def _raw_stream_path(self, chapter_dir: Path) -> Path: + """返回某章节流式输出对应的raw文件路径""" return chapter_dir / "stream.raw" def _key(self, run_dir: Path) -> str: + """将run目录解析为字典缓存的键,避免重复读取磁盘""" return str(run_dir.resolve()) def _manifest_path(self, run_dir: Path) -> Path: + """获取manifest.json的实际文件路径""" return run_dir / "manifest.json" def _write_manifest(self, run_dir: Path, manifest: Dict[str, object]): + """将内存中的manifest快照全量写回磁盘""" self._manifest_path(run_dir).write_text( json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8", ) def _read_manifest(self, run_dir: Path) -> Dict[str, object]: + """从磁盘读取已有manifest,用于进程重启或多实例协作""" manifest_path = self._manifest_path(run_dir) if manifest_path.exists(): return json.loads(manifest_path.read_text(encoding="utf-8")) diff --git a/ReportEngine/core/stitcher.py b/ReportEngine/core/stitcher.py index 8da1cca..ffa3b6b 100644 --- a/ReportEngine/core/stitcher.py +++ b/ReportEngine/core/stitcher.py @@ -16,6 +16,7 @@ class DocumentComposer: """ def __init__(self): + """初始化装订器并记录已使用的锚点,避免重复""" self._seen_anchors: Set[str] = set() def build_document( diff --git a/ReportEngine/core/template_parser.py b/ReportEngine/core/template_parser.py index 18b6a9c..9525f00 100644 --- a/ReportEngine/core/template_parser.py +++ b/ReportEngine/core/template_parser.py @@ -30,6 +30,7 @@ class TemplateSection: outline: List[str] = field(default_factory=list) def to_dict(self) -> dict: + """将章节实体序列化为字典,方便传给LLM或落盘""" return { "title": self.title, "slug": self.slug, @@ -185,6 +186,7 @@ def _build_slug(number: str, title: str) -> str: def _slugify_text(text: str) -> str: + """对任意文本做降噪与转写,得到URL友好的slug片段""" text = unicodedata.normalize("NFKD", text) text = text.replace("·", "-").replace(" ", "-") text = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff-]+", "-", text) @@ -193,6 +195,7 @@ def _slugify_text(text: str) -> str: def _ensure_unique_slug(slug: str, used: set) -> str: + """若slug重复则自动追加序号,直到在used集合中唯一""" if slug not in used: used.add(slug) return slug diff --git a/ReportEngine/flask_interface.py b/ReportEngine/flask_interface.py index e47b937..a95816f 100644 --- a/ReportEngine/flask_interface.py +++ b/ReportEngine/flask_interface.py @@ -40,6 +40,14 @@ class ReportTask: """报告生成任务""" def __init__(self, query: str, task_id: str, custom_template: str = ""): + """ + 初始化任务对象,记录查询词、自定义模板与运行期元数据。 + + Args: + query: 最终需要生成的报告主题 + task_id: 任务唯一ID,通常由时间戳构造 + custom_template: 可选的自定义Markdown模板 + """ self.task_id = task_id self.query = query self.custom_template = custom_template @@ -470,6 +478,7 @@ def get_templates(): # 错误处理 @report_bp.errorhandler(404) def not_found(error): + """404兜底处理:保证接口统一返回JSON结构""" logger.exception(f"API端点不存在: {str(error)}") return jsonify({ 'success': False, @@ -479,6 +488,7 @@ def not_found(error): @report_bp.errorhandler(500) def internal_error(error): + """500兜底处理:捕获未被主动捕获的异常""" logger.exception(f"服务器内部错误: {str(error)}") return jsonify({ 'success': False, diff --git a/ReportEngine/ir/validator.py b/ReportEngine/ir/validator.py index 60f3d15..4db7bde 100644 --- a/ReportEngine/ir/validator.py +++ b/ReportEngine/ir/validator.py @@ -23,6 +23,7 @@ class IRValidator: """ def __init__(self, schema_version: str = IR_VERSION): + """记录当前Schema版本,便于未来多版本并存""" self.schema_version = schema_version # ======== 对外接口 ======== diff --git a/ReportEngine/llms/base.py b/ReportEngine/llms/base.py index 29723c5..bfabc2e 100644 --- a/ReportEngine/llms/base.py +++ b/ReportEngine/llms/base.py @@ -1,5 +1,5 @@ """ -Unified OpenAI-compatible LLM client for the Report Engine, with retry support. +Report Engine 默认的OpenAI兼容LLM客户端封装,内置重试/流式能力。 """ import os @@ -19,7 +19,9 @@ try: from retry_helper import with_retry, LLM_RETRY_CONFIG except ImportError: def with_retry(config=None): + """简化版with_retry占位,实现与真实装饰器一致的调用签名""" def decorator(func): + """直接返回原函数,确保无retry依赖时代码仍可运行""" return func return decorator @@ -27,9 +29,17 @@ except ImportError: class LLMClient: - """Minimal wrapper around the OpenAI-compatible chat completion API.""" + """针对OpenAI Chat Completion API的轻量封装,统一Report Engine调用入口。""" def __init__(self, api_key: str, model_name: str, base_url: Optional[str] = None): + """ + 初始化LLM客户端并保存基础连接信息。 + + Args: + api_key: 用于鉴权的API Token + model_name: 具体模型ID,用于定位供应商能力 + base_url: 自定义兼容接口地址,默认为OpenAI官方 + """ if not api_key: raise ValueError("Report Engine LLM API key is required.") if not model_name: @@ -55,6 +65,17 @@ class LLMClient: @with_retry(LLM_RETRY_CONFIG) def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str: + """ + 以非流式方式调用LLM,并返回一次性完成的完整响应。 + + Args: + system_prompt: 系统角色提示 + user_prompt: 用户高优先级指令 + **kwargs: 允许透传temperature/top_p等采样参数 + + Returns: + 去除首尾空白后的LLM响应文本 + """ messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, @@ -142,11 +163,13 @@ class LLMClient: @staticmethod def validate_response(response: Optional[str]) -> str: + """兜底处理None/空白字符串,防止上层逻辑崩溃""" if response is None: return "" return response.strip() def get_model_info(self) -> Dict[str, Any]: + """以字典形式返回当前客户端的模型/提供方/基础URL信息""" return { "provider": self.provider, "model": self.model_name, diff --git a/ReportEngine/nodes/chapter_generation_node.py b/ReportEngine/nodes/chapter_generation_node.py index 0150d4f..0e12ee7 100644 --- a/ReportEngine/nodes/chapter_generation_node.py +++ b/ReportEngine/nodes/chapter_generation_node.py @@ -34,6 +34,14 @@ class ChapterGenerationNode(BaseNode): _COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=') def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage): + """ + 记录LLM客户端/校验器/章节存储器,便于run方法调度。 + + Args: + llm_client: 实际调用大模型的客户端 + validator: IR结构校验器 + storage: 负责章节流式落盘的存储器 + """ super().__init__(llm_client, "ChapterGenerationNode") self.validator = validator self.storage = storage @@ -385,6 +393,7 @@ class ChapterGenerationNode(BaseNode): """修正常见的结构性错误(例如list.items嵌套过深)""" def walk(blocks: List[Dict[str, Any]] | None): + """递归检查并修复嵌套结构,保证每个block合法""" if not isinstance(blocks, list): return for block in blocks: @@ -485,6 +494,7 @@ class ChapterGenerationNode(BaseNode): @staticmethod def _as_paragraph_block(text: str) -> Dict[str, Any]: + """将字符串快速包装成paragraph block,方便统一处理""" return { "type": "paragraph", "inlines": [{"text": text or ""}], diff --git a/ReportEngine/nodes/document_layout_node.py b/ReportEngine/nodes/document_layout_node.py index 370d459..a23409a 100644 --- a/ReportEngine/nodes/document_layout_node.py +++ b/ReportEngine/nodes/document_layout_node.py @@ -21,6 +21,7 @@ class DocumentLayoutNode(BaseNode): """负责生成全局标题、目录与Hero设计""" def __init__(self, llm_client): + """记录LLM客户端并设置节点名字,供BaseNode日志使用""" super().__init__(llm_client, "DocumentLayoutNode") def run( diff --git a/ReportEngine/nodes/word_budget_node.py b/ReportEngine/nodes/word_budget_node.py index ea75677..7876160 100644 --- a/ReportEngine/nodes/word_budget_node.py +++ b/ReportEngine/nodes/word_budget_node.py @@ -21,6 +21,7 @@ class WordBudgetNode(BaseNode): """规划各章节字数与重点""" def __init__(self, llm_client): + """仅记录LLM客户端引用,方便run阶段发起请求""" super().__init__(llm_client, "WordBudgetNode") def run( diff --git a/ReportEngine/renderers/html_renderer.py b/ReportEngine/renderers/html_renderer.py index 0c07690..b7a73ae 100644 --- a/ReportEngine/renderers/html_renderer.py +++ b/ReportEngine/renderers/html_renderer.py @@ -13,6 +13,7 @@ class HTMLRenderer: """Document IR → HTML 渲染器""" def __init__(self, config: Dict[str, Any] | None = None): + """初始化渲染器缓存并允许注入额外配置(如主题覆盖)""" self.config = config or {} self.document: Dict[str, Any] = {} self.widget_scripts: List[str] = [] diff --git a/ReportEngine/utils/config.py b/ReportEngine/utils/config.py index 0c277b6..06e38b2 100644 --- a/ReportEngine/utils/config.py +++ b/ReportEngine/utils/config.py @@ -1,5 +1,5 @@ """ -Configuration management module for the Report Engine. +Report Engine 配置模块,统一读取环境变量并提供类型安全的访问方式。 """ import os @@ -34,6 +34,7 @@ class Settings(BaseSettings): CHART_STYLE: str = Field("modern", description="图表样式:modern/classic/") class Config: + """Pydantic配置:允许从.env读取并兼容大小写""" env_file = ".env" env_prefix = "" case_sensitive = False @@ -43,6 +44,7 @@ settings = Settings() def print_config(config: Settings): + """将当前配置项按人类可读格式输出到日志,方便排障""" message = "" message += "\n=== Report Engine 配置 ===\n" message += f"LLM 模型: {config.REPORT_ENGINE_MODEL_NAME}\n"