import os import sys import platform import threading from typing import List, Dict, Optional, BinaryIO, Tuple, Any from datetime import datetime, timedelta import hashlib from io import BytesIO from minio import Minio from minio.error import S3Error, MinioException from utils.logger import log class MinIOAgent: """ 全平台兼容的MinIO对象存储操作类 支持Windows/macOS/Linux系统,提供对象存储的上传、下载、查询等功能 专注于二进制数据处理,返回元数据用于与MySQL关联 """ _instance = None # 单例模式实例 _lock = threading.Lock() # 线程锁,保证单例线程安全 def __new__(cls, *args, **kwargs): """单例模式实现,确保全局只有一个实例""" if not cls._instance: with cls._lock: if not cls._instance: cls._instance = super().__new__(cls) return cls._instance def __init__(self, config: dict): """ 初始化MinIO连接 参数: config (dict): MinIO配置字典,包含以下键: - endpoint: 服务端点(例:'localhost:9000') - access_key: 访问密钥 - secret_key: 密钥 - [可选] secure: 是否使用SSL(默认False) - [可选] region: 区域 - [可选] timeout: 超时时间(秒,默认30) """ # 避免重复初始化 if hasattr(self, '_client') and self._client: return # 验证必要配置参数 required_keys = ['endpoint', 'access_key', 'secret_key'] if not all(key in config for key in required_keys): raise ValueError(f"MinIO配置缺少必要参数,需要: {required_keys}") # 整合配置,设置默认值 self.config = { 'endpoint': config['endpoint'], 'access_key': config['access_key'], 'secret_key': config['secret_key'], 'secure': config.get('secure', False), 'region': config.get('region'), 'timeout': config.get('timeout', 30) } # 初始化日志,绑定当前平台信息 current_platform = platform.system() self.log = log.bind(module=f"MinIOAgent({current_platform})") # 创建客户端实例 self._client = self._create_client() # 验证连接是否有效 self._verify_connection() def _create_client(self) -> Minio: """创建MinIO客户端实例""" try: client = Minio( endpoint=self.config['endpoint'], access_key=self.config['access_key'], secret_key=self.config['secret_key'], secure=self.config['secure'], region=self.config['region'] ) self.log.info("MinIO客户端创建成功") return client except Exception as e: self.log.critical("创建MinIO客户端失败", 错误=str(e), exc_info=True) raise def _verify_connection(self) -> None: """验证与MinIO服务的连接是否正常""" try: # 通过列出存储桶来验证连接 self._client.list_buckets() self.log.info(f"成功连接到MinIO服务:{self.config['endpoint']}") except Exception as e: self.log.critical("连接验证失败", 错误=str(e), exc_info=True) raise def create_bucket(self, bucket_name: str) -> bool: """ 创建存储桶(如不存在) 参数: bucket_name: 存储桶名称 返回: 是否成功创建(或已存在) """ try: if not self._client.bucket_exists(bucket_name): self._client.make_bucket(bucket_name) self.log.info(f"存储桶创建成功:{bucket_name}") return True self.log.debug(f"存储桶已存在:{bucket_name}") return True except MinioException as e: self.log.error(f"创建存储桶 {bucket_name} 失败", 错误=str(e), exc_info=True) return False def upload_bytes(self, bucket: str, object_name: str, data: bytes) -> Dict[str, Any]: """ 上传二进制数据至MinIO 参数: bucket: 存储桶名称 object_name: 对象名称(路径) data: 二进制数据 返回: 包含元数据的字典: - bucket: 存储桶名称 - object_name: 对象路径 - size: 数据大小(字节) - etag: 服务器生成的哈希值 - content_type: 内容类型 - upload_time: 上传时间(UTC) - local_hash: 本地计算的MD5哈希 """ if not data: raise ValueError("上传数据不能为空") # 确保存储桶存在 self.create_bucket(bucket) try: # 计算本地哈希(用于数据完整性校验) local_hash = hashlib.md5(data).hexdigest() # 上传数据 result = self._client.put_object( bucket_name=bucket, object_name=object_name, data=BytesIO(data), length=len(data), content_type=self._guess_content_type(object_name) ) # 构建元数据 metadata = { 'bucket': bucket, 'object_name': object_name, 'size': len(data), 'etag': result.etag, 'content_type': result.content_type, 'upload_time': datetime.utcfromtimestamp(result.last_modified.timestamp()), 'local_hash': local_hash } self.log.info( "文件上传成功", 存储桶=bucket, 对象名称=object_name, 大小=len(data) ) return metadata except MinioException as e: self.log.error( "文件上传失败", 存储桶=bucket, 对象名称=object_name, 错误=str(e), exc_info=True ) raise def download_file(self, bucket: str, object_name: str, local_path: str) -> Dict[str, Any]: """ 从MinIO下载文件至本地 参数: bucket: 存储桶名称 object_name: 对象名称(路径) local_path: 本地保存路径 返回: 包含下载信息的字典: - local_path: 本地路径 - size: 文件大小 - download_time: 下载时间 """ try: # 创建父目录(如果不存在) os.makedirs(os.path.dirname(local_path), exist_ok=True) # 下载文件 start_time = datetime.now() self._client.fget_object(bucket, object_name, local_path) download_time = datetime.now() - start_time # 获取文件信息 stat = os.stat(local_path) result = { 'local_path': local_path, 'size': stat.st_size, 'download_time': download_time.total_seconds(), 'downloaded_at': datetime.now() } self.log.info( "文件下载成功", 存储桶=bucket, 对象名称=object_name, 本地路径=local_path, 大小=stat.st_size ) return result except MinioException as e: self.log.error( "文件下载失败", 存储桶=bucket, 对象名称=object_name, 错误=str(e), exc_info=True ) raise except IOError as e: self.log.error( "本地文件操作失败", 本地路径=local_path, 错误=str(e), exc_info=True ) raise def get_presigned_url(self, bucket: str, object_name: str, expires: int = 3600) -> Dict[str, str]: """ 生成临时访问URL 参数: bucket: 存储桶名称 object_name: 对象名称(路径) expires: 过期时间(秒),默认3600秒 返回: 包含URL和过期信息的字典 """ try: url = self._client.presigned_get_object( bucket_name=bucket, object_name=object_name, expires=expires ) result = { 'presigned_url': url, 'expires_in': expires, 'expires_at': datetime.now() + timedelta(seconds=expires), 'bucket': bucket, 'object_name': object_name } self.log.debug( "预签名URL生成成功", 存储桶=bucket, 对象名称=object_name, 过期时间=expires ) return result except MinioException as e: self.log.error( "生成预签名URL失败", 存储桶=bucket, 对象名称=object_name, 错误=str(e), exc_info=True ) raise def list_objects(self, bucket: str, prefix: str = "") -> List[Dict[str, Any]]: """ 查询指定前缀的对象列表及元数据 参数: bucket: 存储桶名称 prefix: 对象路径前缀 返回: 对象信息列表,每个对象包含: - bucket: 存储桶 - object_name: 对象名称 - size: 大小 - last_modified: 最后修改时间 - etag: 哈希值 - content_type: 内容类型 """ try: objects = self._client.list_objects( bucket_name=bucket, prefix=prefix, recursive=True ) result = [] for obj in objects: # 获取详细元数据 stat = self._client.stat_object(bucket, obj.object_name) result.append({ 'bucket': bucket, 'object_name': obj.object_name, 'size': obj.size, 'last_modified': obj.last_modified, 'etag': stat.etag, 'content_type': stat.content_type }) self.log.info( "对象列表查询成功", 存储桶=bucket, 前缀=prefix, 数量=len(result) ) return result except MinioException as e: self.log.error( "查询对象列表失败", 存储桶=bucket, 前缀=prefix, 错误=str(e), exc_info=True ) raise def delete_object(self, bucket: str, object_name: str) -> bool: """ 删除指定对象 参数: bucket: 存储桶名称 object_name: 对象名称(路径) 返回: 是否删除成功 """ try: self._client.remove_object(bucket, object_name) self.log.info( "对象删除成功", 存储桶=bucket, 对象名称=object_name ) return True except MinioException as e: self.log.error( "删除对象失败", 存储桶=bucket, 对象名称=object_name, 错误=str(e), exc_info=True ) return False @staticmethod def _guess_content_type(object_name: str) -> str: """根据文件名猜测内容类型""" from mimetypes import guess_type mime_type, _ = guess_type(object_name) return mime_type or 'application/octet-stream' # 默认二进制流类型