chore: remove 13 stale files and clean up project structure
Removed: - app.py (deprecated Streamlit UI, replaced by api_server.py + frontend/) - start_agent_jrxml.py (old launcher, replaced by start.py) - test_reorder.py, e2e_test.py (ad-hoc/outdated test scripts) - ocr_raw_positions.json (debug output) - ARCHITECTURE.md, CODE_GUIDE.md, RAG_INTEGRATION.md, ROADMAP.md (superseded by CLAUDE.md) - EVALUATION_REPORT.md (auto-generated) - scripts/init_kb.py (replaced by init_default_kb.py) - validation_service/validate.bat (redundant, start.py covers it) - sessions/*.json (34 test session files, already gitignored) Updated: - CLAUDE.md: removed stale file entries from key mapping table - README.md: updated init script reference and removed validate.bat - .gitignore: removed EVALUATION_REPORT.md entry
This commit is contained in:
@@ -1,55 +0,0 @@
|
||||
"""初始化 JRXML 向量知识库。
|
||||
|
||||
rag_jrxml 子项目独立运行管线(分块→向量化→导入),本脚本仅用于预下载嵌入模型。
|
||||
|
||||
用法:
|
||||
python scripts/init_kb.py --download-model # 预下载嵌入模型
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def download_model():
|
||||
"""预下载嵌入模型到本地。"""
|
||||
model_name = os.getenv("RAG_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B")
|
||||
print(f"正在下载嵌入模型: {model_name}")
|
||||
print("如遇网络超时,可设置环境变量 HF_ENDPOINT=https://hf-mirror.com 使用镜像")
|
||||
print()
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
model = SentenceTransformer(model_name)
|
||||
model.encode("测试下载")
|
||||
print(f"嵌入模型下载完成: {model_name}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="JRXML 向量知识库工具")
|
||||
parser.add_argument(
|
||||
"--download-model", action="store_true",
|
||||
help="预下载嵌入模型到本地"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.download_model:
|
||||
download_model()
|
||||
else:
|
||||
print("用法: python scripts/init_kb.py --download-model")
|
||||
print()
|
||||
print("知识库构建请在 rag/ 子项目中独立运行:")
|
||||
print(" cd rag")
|
||||
print(" python batch_chunker.py jrxml_source")
|
||||
print(" python embed_chunks.py")
|
||||
print(" python import_to_chroma.py")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user