优化任务调度说明
This commit is contained in:
@@ -0,0 +1,383 @@
|
||||
import os
|
||||
import sys
|
||||
import platform
|
||||
import threading
|
||||
from typing import List, Dict, Optional, BinaryIO, Tuple, Any
|
||||
from datetime import datetime, timedelta
|
||||
import hashlib
|
||||
from io import BytesIO
|
||||
from minio import Minio
|
||||
from minio.error import S3Error, MinioException
|
||||
from utils.logger import log
|
||||
|
||||
|
||||
class MinIOAgent:
|
||||
"""
|
||||
全平台兼容的MinIO对象存储操作类
|
||||
支持Windows/macOS/Linux系统,提供对象存储的上传、下载、查询等功能
|
||||
专注于二进制数据处理,返回元数据用于与MySQL关联
|
||||
"""
|
||||
_instance = None # 单例模式实例
|
||||
_lock = threading.Lock() # 线程锁,保证单例线程安全
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
"""单例模式实现,确保全局只有一个实例"""
|
||||
if not cls._instance:
|
||||
with cls._lock:
|
||||
if not cls._instance:
|
||||
cls._instance = super().__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, config: dict):
|
||||
"""
|
||||
初始化MinIO连接
|
||||
|
||||
参数:
|
||||
config (dict): MinIO配置字典,包含以下键:
|
||||
- endpoint: 服务端点(例:'localhost:9000')
|
||||
- access_key: 访问密钥
|
||||
- secret_key: 密钥
|
||||
- [可选] secure: 是否使用SSL(默认False)
|
||||
- [可选] region: 区域
|
||||
- [可选] timeout: 超时时间(秒,默认30)
|
||||
"""
|
||||
# 避免重复初始化
|
||||
if hasattr(self, '_client') and self._client:
|
||||
return
|
||||
|
||||
# 验证必要配置参数
|
||||
required_keys = ['endpoint', 'access_key', 'secret_key']
|
||||
if not all(key in config for key in required_keys):
|
||||
raise ValueError(f"MinIO配置缺少必要参数,需要: {required_keys}")
|
||||
|
||||
# 整合配置,设置默认值
|
||||
self.config = {
|
||||
'endpoint': config['endpoint'],
|
||||
'access_key': config['access_key'],
|
||||
'secret_key': config['secret_key'],
|
||||
'secure': config.get('secure', False),
|
||||
'region': config.get('region'),
|
||||
'timeout': config.get('timeout', 30)
|
||||
}
|
||||
|
||||
# 初始化日志,绑定当前平台信息
|
||||
current_platform = platform.system()
|
||||
self.log = log.bind(module=f"MinIOAgent({current_platform})")
|
||||
|
||||
# 创建客户端实例
|
||||
self._client = self._create_client()
|
||||
|
||||
# 验证连接是否有效
|
||||
self._verify_connection()
|
||||
|
||||
def _create_client(self) -> Minio:
|
||||
"""创建MinIO客户端实例"""
|
||||
try:
|
||||
client = Minio(
|
||||
endpoint=self.config['endpoint'],
|
||||
access_key=self.config['access_key'],
|
||||
secret_key=self.config['secret_key'],
|
||||
secure=self.config['secure'],
|
||||
region=self.config['region']
|
||||
)
|
||||
self.log.info("MinIO客户端创建成功")
|
||||
return client
|
||||
except Exception as e:
|
||||
self.log.critical("创建MinIO客户端失败", 错误=str(e), exc_info=True)
|
||||
raise
|
||||
|
||||
def _verify_connection(self) -> None:
|
||||
"""验证与MinIO服务的连接是否正常"""
|
||||
try:
|
||||
# 通过列出存储桶来验证连接
|
||||
self._client.list_buckets()
|
||||
self.log.info(f"成功连接到MinIO服务:{self.config['endpoint']}")
|
||||
except Exception as e:
|
||||
self.log.critical("连接验证失败", 错误=str(e), exc_info=True)
|
||||
raise
|
||||
|
||||
def create_bucket(self, bucket_name: str) -> bool:
|
||||
"""
|
||||
创建存储桶(如不存在)
|
||||
|
||||
参数:
|
||||
bucket_name: 存储桶名称
|
||||
|
||||
返回:
|
||||
是否成功创建(或已存在)
|
||||
"""
|
||||
try:
|
||||
if not self._client.bucket_exists(bucket_name):
|
||||
self._client.make_bucket(bucket_name)
|
||||
self.log.info(f"存储桶创建成功:{bucket_name}")
|
||||
return True
|
||||
self.log.debug(f"存储桶已存在:{bucket_name}")
|
||||
return True
|
||||
except MinioException as e:
|
||||
self.log.error(f"创建存储桶 {bucket_name} 失败", 错误=str(e), exc_info=True)
|
||||
return False
|
||||
|
||||
def upload_bytes(self, bucket: str, object_name: str, data: bytes) -> Dict[str, Any]:
|
||||
"""
|
||||
上传二进制数据至MinIO
|
||||
|
||||
参数:
|
||||
bucket: 存储桶名称
|
||||
object_name: 对象名称(路径)
|
||||
data: 二进制数据
|
||||
|
||||
返回:
|
||||
包含元数据的字典:
|
||||
- bucket: 存储桶名称
|
||||
- object_name: 对象路径
|
||||
- size: 数据大小(字节)
|
||||
- etag: 服务器生成的哈希值
|
||||
- content_type: 内容类型
|
||||
- upload_time: 上传时间(UTC)
|
||||
- local_hash: 本地计算的MD5哈希
|
||||
"""
|
||||
if not data:
|
||||
raise ValueError("上传数据不能为空")
|
||||
|
||||
# 确保存储桶存在
|
||||
self.create_bucket(bucket)
|
||||
|
||||
try:
|
||||
# 计算本地哈希(用于数据完整性校验)
|
||||
local_hash = hashlib.md5(data).hexdigest()
|
||||
|
||||
# 上传数据
|
||||
result = self._client.put_object(
|
||||
bucket_name=bucket,
|
||||
object_name=object_name,
|
||||
data=BytesIO(data),
|
||||
length=len(data),
|
||||
content_type=self._guess_content_type(object_name)
|
||||
)
|
||||
|
||||
# 构建元数据
|
||||
metadata = {
|
||||
'bucket': bucket,
|
||||
'object_name': object_name,
|
||||
'size': len(data),
|
||||
'etag': result.etag,
|
||||
'content_type': result.content_type,
|
||||
'upload_time': datetime.utcfromtimestamp(result.last_modified.timestamp()),
|
||||
'local_hash': local_hash
|
||||
}
|
||||
|
||||
self.log.info(
|
||||
"文件上传成功",
|
||||
存储桶=bucket,
|
||||
对象名称=object_name,
|
||||
大小=len(data)
|
||||
)
|
||||
return metadata
|
||||
|
||||
except MinioException as e:
|
||||
self.log.error(
|
||||
"文件上传失败",
|
||||
存储桶=bucket,
|
||||
对象名称=object_name,
|
||||
错误=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def download_file(self, bucket: str, object_name: str, local_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
从MinIO下载文件至本地
|
||||
|
||||
参数:
|
||||
bucket: 存储桶名称
|
||||
object_name: 对象名称(路径)
|
||||
local_path: 本地保存路径
|
||||
|
||||
返回:
|
||||
包含下载信息的字典:
|
||||
- local_path: 本地路径
|
||||
- size: 文件大小
|
||||
- download_time: 下载时间
|
||||
"""
|
||||
try:
|
||||
# 创建父目录(如果不存在)
|
||||
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
||||
|
||||
# 下载文件
|
||||
start_time = datetime.now()
|
||||
self._client.fget_object(bucket, object_name, local_path)
|
||||
download_time = datetime.now() - start_time
|
||||
|
||||
# 获取文件信息
|
||||
stat = os.stat(local_path)
|
||||
|
||||
result = {
|
||||
'local_path': local_path,
|
||||
'size': stat.st_size,
|
||||
'download_time': download_time.total_seconds(),
|
||||
'downloaded_at': datetime.now()
|
||||
}
|
||||
|
||||
self.log.info(
|
||||
"文件下载成功",
|
||||
存储桶=bucket,
|
||||
对象名称=object_name,
|
||||
本地路径=local_path,
|
||||
大小=stat.st_size
|
||||
)
|
||||
return result
|
||||
|
||||
except MinioException as e:
|
||||
self.log.error(
|
||||
"文件下载失败",
|
||||
存储桶=bucket,
|
||||
对象名称=object_name,
|
||||
错误=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
except IOError as e:
|
||||
self.log.error(
|
||||
"本地文件操作失败",
|
||||
本地路径=local_path,
|
||||
错误=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def get_presigned_url(self, bucket: str, object_name: str, expires: int = 3600) -> Dict[str, str]:
|
||||
"""
|
||||
生成临时访问URL
|
||||
|
||||
参数:
|
||||
bucket: 存储桶名称
|
||||
object_name: 对象名称(路径)
|
||||
expires: 过期时间(秒),默认3600秒
|
||||
|
||||
返回:
|
||||
包含URL和过期信息的字典
|
||||
"""
|
||||
try:
|
||||
url = self._client.presigned_get_object(
|
||||
bucket_name=bucket,
|
||||
object_name=object_name,
|
||||
expires=expires
|
||||
)
|
||||
|
||||
result = {
|
||||
'presigned_url': url,
|
||||
'expires_in': expires,
|
||||
'expires_at': datetime.now() + timedelta(seconds=expires),
|
||||
'bucket': bucket,
|
||||
'object_name': object_name
|
||||
}
|
||||
|
||||
self.log.debug(
|
||||
"预签名URL生成成功",
|
||||
存储桶=bucket,
|
||||
对象名称=object_name,
|
||||
过期时间=expires
|
||||
)
|
||||
return result
|
||||
|
||||
except MinioException as e:
|
||||
self.log.error(
|
||||
"生成预签名URL失败",
|
||||
存储桶=bucket,
|
||||
对象名称=object_name,
|
||||
错误=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def list_objects(self, bucket: str, prefix: str = "") -> List[Dict[str, Any]]:
|
||||
"""
|
||||
查询指定前缀的对象列表及元数据
|
||||
|
||||
参数:
|
||||
bucket: 存储桶名称
|
||||
prefix: 对象路径前缀
|
||||
|
||||
返回:
|
||||
对象信息列表,每个对象包含:
|
||||
- bucket: 存储桶
|
||||
- object_name: 对象名称
|
||||
- size: 大小
|
||||
- last_modified: 最后修改时间
|
||||
- etag: 哈希值
|
||||
- content_type: 内容类型
|
||||
"""
|
||||
try:
|
||||
objects = self._client.list_objects(
|
||||
bucket_name=bucket,
|
||||
prefix=prefix,
|
||||
recursive=True
|
||||
)
|
||||
|
||||
result = []
|
||||
for obj in objects:
|
||||
# 获取详细元数据
|
||||
stat = self._client.stat_object(bucket, obj.object_name)
|
||||
|
||||
result.append({
|
||||
'bucket': bucket,
|
||||
'object_name': obj.object_name,
|
||||
'size': obj.size,
|
||||
'last_modified': obj.last_modified,
|
||||
'etag': stat.etag,
|
||||
'content_type': stat.content_type
|
||||
})
|
||||
|
||||
self.log.info(
|
||||
"对象列表查询成功",
|
||||
存储桶=bucket,
|
||||
前缀=prefix,
|
||||
数量=len(result)
|
||||
)
|
||||
return result
|
||||
|
||||
except MinioException as e:
|
||||
self.log.error(
|
||||
"查询对象列表失败",
|
||||
存储桶=bucket,
|
||||
前缀=prefix,
|
||||
错误=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def delete_object(self, bucket: str, object_name: str) -> bool:
|
||||
"""
|
||||
删除指定对象
|
||||
|
||||
参数:
|
||||
bucket: 存储桶名称
|
||||
object_name: 对象名称(路径)
|
||||
|
||||
返回:
|
||||
是否删除成功
|
||||
"""
|
||||
try:
|
||||
self._client.remove_object(bucket, object_name)
|
||||
self.log.info(
|
||||
"对象删除成功",
|
||||
存储桶=bucket,
|
||||
对象名称=object_name
|
||||
)
|
||||
return True
|
||||
except MinioException as e:
|
||||
self.log.error(
|
||||
"删除对象失败",
|
||||
存储桶=bucket,
|
||||
对象名称=object_name,
|
||||
错误=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _guess_content_type(object_name: str) -> str:
|
||||
"""根据文件名猜测内容类型"""
|
||||
from mimetypes import guess_type
|
||||
mime_type, _ = guess_type(object_name)
|
||||
return mime_type or 'application/octet-stream' # 默认二进制流类型
|
||||
Reference in New Issue
Block a user