9d78a49625
- 新增config.py统一读取.env配置,移除硬编码路径和参数 - 重构collect_jrxml.py支持命令行参数和环境变量配置源目录 - 新增.env.example示例配置文件,整理所有可配置项 - 重构down_embedding_model.py、import_to_chroma.py等所有脚本使用统一配置 - 新增Windows一键部署脚本setup.bat - 修正jrxml_banch_chunker.py的文件名拼写错误
62 lines
1.6 KiB
Python
62 lines
1.6 KiB
Python
"""
|
|
down_embedding_model.py
|
|
下载嵌入模型(模型名称通过 .env / config.py 配置)
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from config import EMBEDDING_MODEL_NAME, EMBEDDING_MODEL_PATH, HF_ENDPOINT
|
|
|
|
def download_model():
|
|
"""下载嵌入模型"""
|
|
model_dir = EMBEDDING_MODEL_PATH
|
|
|
|
print("=" * 60)
|
|
print(f"{EMBEDDING_MODEL_NAME} 模型下载")
|
|
print("=" * 60)
|
|
print(f"模型名称: {EMBEDDING_MODEL_NAME}")
|
|
print(f"模型目录: {model_dir}")
|
|
print()
|
|
|
|
os.environ['HF_ENDPOINT'] = HF_ENDPOINT
|
|
print(f"使用 HuggingFace 镜像: {HF_ENDPOINT}")
|
|
print()
|
|
|
|
try:
|
|
from huggingface_hub import snapshot_download
|
|
except ImportError:
|
|
print("❌ 未安装 huggingface_hub,正在安装...")
|
|
import subprocess
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"])
|
|
from huggingface_hub import snapshot_download
|
|
|
|
# 创建模型目录
|
|
os.makedirs(model_dir, exist_ok=True)
|
|
|
|
print(f"开始下载 {EMBEDDING_MODEL_NAME} 模型...")
|
|
print(f"请耐心等待...")
|
|
print()
|
|
|
|
try:
|
|
snapshot_download(
|
|
repo_id=EMBEDDING_MODEL_NAME,
|
|
local_dir=str(model_dir),
|
|
local_dir_use_symlinks=False,
|
|
resume_download=True
|
|
)
|
|
print()
|
|
print("=" * 60)
|
|
print("✅ 模型下载完成!")
|
|
print("=" * 60)
|
|
print(f"模型路径: {model_dir}")
|
|
return True
|
|
except Exception as e:
|
|
print()
|
|
print("=" * 60)
|
|
print(f"❌ 下载失败: {e}")
|
|
print("=" * 60)
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
download_model() |