"""初始化默认用户和预置知识库。 解析 rag/jrxml_source/ 下的全部 JRXML 模板 + 接口文档, 创建默认用户 "默认用户" 和知识库 "F6-汽车维修打印默认知识库", 执行完整的 parse -> chunk -> embed 管线。 用法: python scripts/init_default_kb.py python scripts/init_default_kb.py --force # 强制重建 """ import os import sys import argparse from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(PROJECT_ROOT)) from dotenv import load_dotenv load_dotenv() from backend.logger import get_logger _log = get_logger("init_kb") DEFAULT_USER_NAME = "默认用户" DEFAULT_KB_NAME = "F6-汽车维修打印默认知识库" DEFAULT_KB_DESC = "预置的汽车维修打印单模板与接口文档知识库" def find_source_files(source_dir: Path) -> list[str]: """收集源目录下的所有 .jrxml 和 .md 文件。""" files = [] if not source_dir.exists(): _log.warning("源目录不存在: %s", source_dir) return files for pattern in ("*.jrxml", "*.md", "**/*.jrxml", "**/*.md"): for fp in source_dir.glob(pattern): if fp.is_file(): files.append(str(fp)) return list(dict.fromkeys(files)) def init_default_kb(force: bool = False) -> dict: """初始化默认 KB,返回结果摘要。""" from backend.kb_manager import create_user, list_users, create_kb, list_kbs from backend.kb_parser import build_kb_from_files # 1. 查找或创建默认用户 users = list_users() default_user = None for u in users: if u.get("name") == DEFAULT_USER_NAME: default_user = u break if default_user is None: default_user = create_user(DEFAULT_USER_NAME) _log.info("创建默认用户: %s", default_user["user_id"]) else: _log.info("默认用户已存在: %s", default_user["user_id"]) user_id = default_user["user_id"] # 2. 查找或创建默认 KB user_kbs = list_kbs(user_id) default_kb = None for kb in user_kbs: if kb.get("name") == DEFAULT_KB_NAME: default_kb = kb break if default_kb is not None and force: from backend.kb_manager import delete_kb delete_kb(default_kb["kb_id"]) default_kb = None _log.info("强制重建: 已删除旧 KB") if default_kb is None: default_kb = create_kb(user_id, DEFAULT_KB_NAME, DEFAULT_KB_DESC) _log.info("创建默认知识库: %s", default_kb["kb_id"]) elif default_kb.get("parse_status") == "ready": _log.info("默认知识库已就绪: chunks=%s", default_kb.get("chunk_count", 0)) return {"status": "already_ready", "kb_id": default_kb["kb_id"], "user_id": user_id, "chunk_count": default_kb.get("chunk_count", 0)} kb_id = default_kb["kb_id"] # 3. 收集源文件 source_dir = PROJECT_ROOT / "rag" / "jrxml_source" files = find_source_files(source_dir) if not files: _log.warning("未找到源文件,跳过构建") return {"status": "no_files", "kb_id": kb_id, "user_id": user_id} _log.info("找到 %d 个源文件", len(files)) # 4. 构建 KB result = build_kb_from_files(kb_id, files) _log.info("KB 构建完成: %s", result) return {**result, "kb_id": kb_id, "user_id": user_id, "file_count": len(files)} def main(): parser = argparse.ArgumentParser(description="初始化默认知识库") parser.add_argument("--force", action="store_true", help="强制重建(删除已有 KB)") args = parser.parse_args() print("=" * 60) print("JRXML Agent - 默认知识库初始化") print("=" * 60) result = init_default_kb(force=args.force) print(f"\n用户: {DEFAULT_USER_NAME}") print(f"知识库: {DEFAULT_KB_NAME}") print(f"状态: {result.get('status', 'unknown')}") print(f"字段数: {result.get('field_count', 0)}") print(f"模板数: {result.get('template_count', 0)}") print(f"Chunk数: {result.get('chunk_count', 0)}") if result.get("errors"): print(f"错误: {len(result['errors'])} 条") for e in result["errors"]: print(f" - {e.get('file', '')}: {e.get('error', '')}") print("\n初始化完成!") if __name__ == "__main__": main()