优化任务调度说明
This commit is contained in:
@@ -0,0 +1,292 @@
|
||||
# 对象存储数据库操作.md
|
||||
|
||||
## 1. 类概述
|
||||
|
||||
`MinIOAgent` 是一个全平台兼容的对象存储操作类,支持 Windows/macOS/Linux 系统,提供对象存储的桶管理、对象操作、权限控制等功能。
|
||||
|
||||
### 核心特性:
|
||||
|
||||
- ✅ 连接池管理与自动重连
|
||||
- ✅ 全平台兼容的对象操作接口
|
||||
- ✅ 支持大文件分块上传/下载
|
||||
- ✅ 预签名 URL 生成(临时访问)
|
||||
- ✅ 完善的日志记录与错误处理
|
||||
- ✅ 批量操作与前缀筛选
|
||||
|
||||
---
|
||||
|
||||
## 2. 初始化配置
|
||||
|
||||
### 基本配置参数
|
||||
```python
|
||||
Config = {
|
||||
'endpoint': '127.0.0.1:9005', # 对象存储服务地址
|
||||
'access_key': 'minioadmin', # 访问密钥
|
||||
'secret_key': 'minioadmin', # 密钥
|
||||
'secure': False, # 是否启用SSL(社区版默认False)
|
||||
'region': 'us-east-1', # 区域(默认值)
|
||||
'timeout': 300, # 超时时间(秒)
|
||||
'max_pool_connections': 10 # 连接池最大连接数
|
||||
}
|
||||
```
|
||||
|
||||
### 各平台特殊配置
|
||||
| 平台 | 超时设置(秒) | 分块大小建议 | 并发数建议 |
|
||||
|---------|----------------|--------------|------------|
|
||||
| Windows | 300 | 5MB-10MB | 2-4 |
|
||||
| macOS | 300 | 10MB-20MB | 4-8 |
|
||||
| Linux | 300 | 20MB-50MB | 8-16 |
|
||||
|
||||
### 初始化示例
|
||||
```python
|
||||
from utils.minio_agent import MinIOAgent
|
||||
|
||||
# 基础初始化
|
||||
config = {
|
||||
'endpoint': '127.0.0.1:9005',
|
||||
'access_key': 'minioadmin',
|
||||
'secret_key': 'minioadmin',
|
||||
'secure': False
|
||||
}
|
||||
|
||||
# 创建客户端实例
|
||||
minio_client = MinIOAgent(config)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. 桶(Bucket)管理
|
||||
|
||||
### 桶操作
|
||||
```python
|
||||
# 创建桶
|
||||
if minio_client.create_bucket('my-bucket'):
|
||||
print("桶创建成功")
|
||||
|
||||
# 检查桶是否存在
|
||||
if minio_client.bucket_exists('my-bucket'):
|
||||
print("桶已存在")
|
||||
|
||||
# 列出所有桶
|
||||
buckets = minio_client.list_buckets()
|
||||
for bucket in buckets:
|
||||
print(f"桶名称: {bucket['name']}, 创建时间: {bucket['creation_date']}")
|
||||
|
||||
# 删除桶(需先清空桶内对象)
|
||||
if minio_client.delete_bucket('my-bucket'):
|
||||
print("桶删除成功")
|
||||
```
|
||||
|
||||
### 桶策略管理
|
||||
```python
|
||||
# 获取桶策略
|
||||
policy = minio_client.get_bucket_policy('my-bucket')
|
||||
print(policy)
|
||||
|
||||
# 设置公共读策略
|
||||
public_read_policy = {
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [{
|
||||
"Effect": "Allow",
|
||||
"Principal": "*",
|
||||
"Action": ["s3:GetObject"],
|
||||
"Resource": ["arn:aws:s3:::my-bucket/*"]
|
||||
}]
|
||||
}
|
||||
minio_client.set_bucket_policy('my-bucket', public_read_policy)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. 对象(Object)操作
|
||||
|
||||
### 上传对象
|
||||
```python
|
||||
# 从文件上传
|
||||
upload_meta = minio_client.upload_file(
|
||||
bucket_name='my-bucket',
|
||||
object_name='documents/report.pdf',
|
||||
file_path='/local/path/to/report.pdf',
|
||||
content_type='application/pdf' # MIME类型
|
||||
)
|
||||
print(f"上传成功,大小: {upload_meta['size']} bytes")
|
||||
|
||||
# 从字节流上传
|
||||
data = b"test data"
|
||||
upload_meta = minio_client.upload_bytes(
|
||||
bucket_name='my-bucket',
|
||||
object_name='test/data.bin',
|
||||
data=data
|
||||
)
|
||||
|
||||
# 大文件分块上传
|
||||
upload_meta = minio_client.upload_large_file(
|
||||
bucket_name='my-bucket',
|
||||
object_name='videos/large_file.mp4',
|
||||
file_path='/local/path/to/large.mp4',
|
||||
part_size=5*1024*1024 # 5MB分块
|
||||
)
|
||||
```
|
||||
|
||||
### 下载对象
|
||||
```python
|
||||
# 下载到文件
|
||||
download_meta = minio_client.download_file(
|
||||
bucket_name='my-bucket',
|
||||
object_name='documents/report.pdf',
|
||||
file_path='/local/save/path/report.pdf'
|
||||
)
|
||||
|
||||
# 下载为字节流
|
||||
data = minio_client.download_bytes(
|
||||
bucket_name='my-bucket',
|
||||
object_name='test/data.bin'
|
||||
)
|
||||
print(f"下载数据: {data}")
|
||||
```
|
||||
|
||||
### 查询与列举对象
|
||||
```python
|
||||
# 列举桶内所有对象
|
||||
objects = minio_client.list_objects('my-bucket')
|
||||
for obj in objects:
|
||||
print(f"对象: {obj['object_name']}, 大小: {obj['size']}")
|
||||
|
||||
# 按前缀筛选(类似文件夹)
|
||||
pdf_files = minio_client.list_objects(
|
||||
bucket_name='my-bucket',
|
||||
prefix='documents/', # 前缀(类似文件夹路径)
|
||||
recursive=False # 是否递归查询子目录
|
||||
)
|
||||
|
||||
# 获取对象元信息
|
||||
meta = minio_client.get_object_metadata(
|
||||
bucket_name='my-bucket',
|
||||
object_name='documents/report.pdf'
|
||||
)
|
||||
print(f"内容类型: {meta['content_type']}, 最后修改: {meta['last_modified']}")
|
||||
```
|
||||
|
||||
### 删除对象
|
||||
```python
|
||||
# 删除单个对象
|
||||
if minio_client.delete_object('my-bucket', 'test/data.bin'):
|
||||
print("对象删除成功")
|
||||
|
||||
# 批量删除对象
|
||||
delete_count = minio_client.delete_objects(
|
||||
bucket_name='my-bucket',
|
||||
object_names=['file1.txt', 'file2.txt', 'docs/report.pdf']
|
||||
)
|
||||
print(f"成功删除 {delete_count} 个对象")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. 高级功能
|
||||
|
||||
### 预签名 URL(临时访问)
|
||||
```python
|
||||
# 生成下载预签名URL(有效期30分钟)
|
||||
download_url = minio_client.get_presigned_url(
|
||||
bucket_name='my-bucket',
|
||||
object_name='documents/report.pdf',
|
||||
expires=1800, # 有效期(秒)
|
||||
method='GET' # 访问方法(GET下载,PUT上传)
|
||||
)
|
||||
print(f"临时下载链接: {download_url}")
|
||||
|
||||
# 生成上传预签名URL(允许客户端直接上传)
|
||||
upload_url = minio_client.get_presigned_url(
|
||||
bucket_name='my-bucket',
|
||||
object_name='user_uploads/image.jpg',
|
||||
expires=3600,
|
||||
method='PUT'
|
||||
)
|
||||
```
|
||||
|
||||
### 批量操作
|
||||
```python
|
||||
# 批量复制对象(同桶内)
|
||||
copy_results = minio_client.copy_objects(
|
||||
source_bucket='my-bucket',
|
||||
dest_bucket='my-bucket',
|
||||
object_mapping={
|
||||
'documents/report.pdf': 'archive/report_2024.pdf',
|
||||
'data/raw.csv': 'data/backup/raw_2024.csv'
|
||||
}
|
||||
)
|
||||
|
||||
# 批量移动对象(跨桶)
|
||||
move_results = minio_client.move_objects(
|
||||
source_bucket='my-bucket',
|
||||
dest_bucket='archive-bucket',
|
||||
object_prefix='2023/' # 移动所有以2023/为前缀的对象
|
||||
)
|
||||
```
|
||||
|
||||
### 生命周期管理
|
||||
```python
|
||||
# 设置对象生命周期规则(自动迁移/删除)
|
||||
rule = {
|
||||
"Rules": [{
|
||||
"ID": "archive-old-files",
|
||||
"Status": "Enabled",
|
||||
"Prefix": "logs/",
|
||||
"Expiration": {
|
||||
"Days": 90 # 90天后自动删除
|
||||
},
|
||||
"Transition": {
|
||||
"Days": 30, # 30天后迁移到低频存储
|
||||
"StorageClass": "STANDARD_IA"
|
||||
}
|
||||
}]
|
||||
}
|
||||
minio_client.set_bucket_lifecycle('my-bucket', rule)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. 异常处理
|
||||
|
||||
```python
|
||||
from minio.error import S3Error
|
||||
|
||||
try:
|
||||
# 尝试上传对象
|
||||
minio_client.upload_file(
|
||||
bucket_name='my-bucket',
|
||||
object_name='critical/data.csv',
|
||||
file_path='/local/data.csv'
|
||||
)
|
||||
except S3Error as e:
|
||||
if e.code == 'NoSuchBucket':
|
||||
print("桶不存在,创建后重试")
|
||||
minio_client.create_bucket('my-bucket')
|
||||
elif e.code == 'AccessDenied':
|
||||
print("权限不足,请检查密钥")
|
||||
else:
|
||||
print(f"上传失败: {e}")
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. 性能优化建议
|
||||
|
||||
1. **大文件处理**:
|
||||
- 超过100MB的文件建议使用分块上传(`upload_large_file`)
|
||||
- 根据网络状况调整分块大小(5-50MB)
|
||||
|
||||
2. **批量操作**:
|
||||
- 列举对象时使用前缀筛选减少返回数据量
|
||||
- 批量删除/复制时单次操作不超过1000个对象
|
||||
|
||||
3. **缓存策略**:
|
||||
- 对频繁访问的对象使用预签名URL并设置合理过期时间
|
||||
- 客户端缓存对象元数据减少请求次数
|
||||
|
||||
4. **并发控制**:
|
||||
- 多线程操作时控制并发数(参考平台建议值)
|
||||
- 避免同时对同一对象进行写操作
|
||||
Reference in New Issue
Block a user