refactor: 重构项目配置管理,统一使用.env配置
- 新增config.py统一读取.env配置,移除硬编码路径和参数 - 重构collect_jrxml.py支持命令行参数和环境变量配置源目录 - 新增.env.example示例配置文件,整理所有可配置项 - 重构down_embedding_model.py、import_to_chroma.py等所有脚本使用统一配置 - 新增Windows一键部署脚本setup.bat - 修正jrxml_banch_chunker.py的文件名拼写错误
This commit is contained in:
+11
-11
@@ -1,26 +1,26 @@
|
||||
"""
|
||||
down_embedding_model.py
|
||||
下载 Qwen3-Embedding-4B 嵌入模型
|
||||
下载嵌入模型(模型名称通过 .env / config.py 配置)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from config import EMBEDDING_MODEL_NAME, EMBEDDING_MODEL_PATH, HF_ENDPOINT
|
||||
|
||||
def download_model():
|
||||
"""下载 Qwen3-Embedding-4B 模型"""
|
||||
project_root = Path(__file__).resolve().parent
|
||||
model_dir = project_root / "models" / "Qwen3-Embedding-4B"
|
||||
"""下载嵌入模型"""
|
||||
model_dir = EMBEDDING_MODEL_PATH
|
||||
|
||||
print("=" * 60)
|
||||
print("Qwen3-Embedding-4B 模型下载")
|
||||
print(f"{EMBEDDING_MODEL_NAME} 模型下载")
|
||||
print("=" * 60)
|
||||
print(f"模型名称: {EMBEDDING_MODEL_NAME}")
|
||||
print(f"模型目录: {model_dir}")
|
||||
print()
|
||||
|
||||
# 使用国内镜像加速
|
||||
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
||||
print("使用 HuggingFace 镜像: https://hf-mirror.com")
|
||||
os.environ['HF_ENDPOINT'] = HF_ENDPOINT
|
||||
print(f"使用 HuggingFace 镜像: {HF_ENDPOINT}")
|
||||
print()
|
||||
|
||||
try:
|
||||
@@ -34,13 +34,13 @@ def download_model():
|
||||
# 创建模型目录
|
||||
os.makedirs(model_dir, exist_ok=True)
|
||||
|
||||
print(f"开始下载 Qwen3-Embedding-4B 模型...")
|
||||
print(f"模型大小约 4GB,请耐心等待...")
|
||||
print(f"开始下载 {EMBEDDING_MODEL_NAME} 模型...")
|
||||
print(f"请耐心等待...")
|
||||
print()
|
||||
|
||||
try:
|
||||
snapshot_download(
|
||||
repo_id="Qwen/Qwen3-Embedding-4B",
|
||||
repo_id=EMBEDDING_MODEL_NAME,
|
||||
local_dir=str(model_dir),
|
||||
local_dir_use_symlinks=False,
|
||||
resume_download=True
|
||||
|
||||
Reference in New Issue
Block a user