b280c2b453
Add rag submodule for semantic JRXML chunk retrieval, refactor retrieve node to use RAGSearcher, and fix missing api_key in Anthropic SDK client initialization. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
56 lines
1.5 KiB
Python
56 lines
1.5 KiB
Python
"""初始化 JRXML 向量知识库。
|
|
|
|
rag_jrxml 子项目独立运行管线(分块→向量化→导入),本脚本仅用于预下载嵌入模型。
|
|
|
|
用法:
|
|
python scripts/init_kb.py --download-model # 预下载嵌入模型
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
load_dotenv()
|
|
|
|
|
|
def download_model():
|
|
"""预下载嵌入模型到本地。"""
|
|
model_name = os.getenv("RAG_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B")
|
|
print(f"正在下载嵌入模型: {model_name}")
|
|
print("如遇网络超时,可设置环境变量 HF_ENDPOINT=https://hf-mirror.com 使用镜像")
|
|
print()
|
|
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
model = SentenceTransformer(model_name)
|
|
model.encode("测试下载")
|
|
print(f"嵌入模型下载完成: {model_name}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="JRXML 向量知识库工具")
|
|
parser.add_argument(
|
|
"--download-model", action="store_true",
|
|
help="预下载嵌入模型到本地"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.download_model:
|
|
download_model()
|
|
else:
|
|
print("用法: python scripts/init_kb.py --download-model")
|
|
print()
|
|
print("知识库构建请在 rag/ 子项目中独立运行:")
|
|
print(" cd rag")
|
|
print(" python batch_chunker.py jrxml_source")
|
|
print(" python embed_chunks.py")
|
|
print(" python import_to_chroma.py")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|