refactor: 重构项目配置管理,统一使用.env配置
- 新增config.py统一读取.env配置,移除硬编码路径和参数 - 重构collect_jrxml.py支持命令行参数和环境变量配置源目录 - 新增.env.example示例配置文件,整理所有可配置项 - 重构down_embedding_model.py、import_to_chroma.py等所有脚本使用统一配置 - 新增Windows一键部署脚本setup.bat - 修正jrxml_banch_chunker.py的文件名拼写错误
This commit is contained in:
+19
-14
@@ -2,6 +2,7 @@
|
||||
query_chroma.py
|
||||
查询 Chroma 数据库,从自然语言查找相关 JRXML chunk
|
||||
支持命令行单次查询和交互式连续查询
|
||||
模型通过 .env / config.py 配置
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -12,19 +13,27 @@ import numpy as np
|
||||
import torch
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import chromadb
|
||||
from config import (
|
||||
CHROMA_DB_PATH, CHROMA_COLLECTION_NAME, USE_FP16,
|
||||
DEFAULT_N_RESULTS, SIMILARITY_THRESHOLD, resolve_model_path
|
||||
)
|
||||
|
||||
|
||||
class JRXMLSearcher:
|
||||
def __init__(self, chroma_path: str = None,
|
||||
collection_name: str = "jrxml_chunks",
|
||||
collection_name: str = None,
|
||||
model_path: str = None,
|
||||
use_fp16: bool = True):
|
||||
use_fp16: bool = None):
|
||||
project_root = Path(__file__).resolve().parent
|
||||
|
||||
if chroma_path is None:
|
||||
chroma_path = str(project_root / "chroma_db")
|
||||
chroma_path = str(CHROMA_DB_PATH)
|
||||
if collection_name is None:
|
||||
collection_name = CHROMA_COLLECTION_NAME
|
||||
if model_path is None:
|
||||
model_path = str(project_root / "models" / "Qwen3-Embedding-4B")
|
||||
model_path = resolve_model_path()
|
||||
if use_fp16 is None:
|
||||
use_fp16 = USE_FP16
|
||||
|
||||
# 处理 Hub 模型名称
|
||||
model_path_str = str(model_path)
|
||||
@@ -110,13 +119,13 @@ def main():
|
||||
parser.add_argument("query", nargs="?", default="",
|
||||
help="搜索关键词(不提供则进入交互模式)")
|
||||
parser.add_argument("--chroma_path", "-c", default=None,
|
||||
help=f"Chroma 数据库路径 (默认: chroma_db)")
|
||||
parser.add_argument("--collection", "-n", default="jrxml_chunks",
|
||||
help=f"Chroma 数据库路径 (默认: {CHROMA_DB_PATH})")
|
||||
parser.add_argument("--collection", "-n", default=CHROMA_COLLECTION_NAME,
|
||||
help="集合名称")
|
||||
parser.add_argument("--model_path", "-m", default=None,
|
||||
help="嵌入模型路径")
|
||||
parser.add_argument("--n_results", "-k", type=int, default=5,
|
||||
help="返回结果数 (默认: 5)")
|
||||
parser.add_argument("--n_results", "-k", type=int, default=DEFAULT_N_RESULTS,
|
||||
help=f"返回结果数 (默认: {DEFAULT_N_RESULTS})")
|
||||
parser.add_argument("--filter_field", "-f",
|
||||
help="按 chunk_type 过滤,例如: field, query, chart")
|
||||
parser.add_argument("--threshold", "-t", type=float,
|
||||
@@ -127,14 +136,10 @@ def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.chroma_path is None:
|
||||
args.chroma_path = str(project_root / "chroma_db")
|
||||
args.chroma_path = str(CHROMA_DB_PATH)
|
||||
|
||||
if args.model_path is None:
|
||||
default_model = project_root / "models" / "Qwen3-Embedding-4B"
|
||||
if not default_model.exists():
|
||||
args.model_path = "sentence-transformers/all-MiniLM-L6-v2"
|
||||
else:
|
||||
args.model_path = str(default_model)
|
||||
args.model_path = resolve_model_path()
|
||||
|
||||
# 检查数据库
|
||||
if not os.path.exists(args.chroma_path):
|
||||
|
||||
Reference in New Issue
Block a user