优化任务调度说明

This commit is contained in:
z66
2025-10-17 17:59:28 +08:00
commit fd67231866
49 changed files with 300973 additions and 0 deletions
+171
View File
@@ -0,0 +1,171 @@
import unittest
import os
import tempfile
import hashlib
from datetime import datetime
from utils.minio_agent import MinIOAgent # 导入之前的MinIO操作类
class TestMinIOAgent(unittest.TestCase):
# 测试配置 - 本地MinIO社区版
MINIO_CONFIG = {
'endpoint': '127.0.0.1:9005',
'access_key': 'admin', # 默认账号
'secret_key': 'abc88888888', # 默认密码
'secure': False # 社区版默认不启用SSL
}
@classmethod
def setUpClass(cls):
"""初始化测试环境"""
# 创建唯一测试桶(避免冲突)
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
cls.test_bucket = f"test-bucket-{timestamp}"
cls.test_object = "test-data/sample.txt"
cls.test_content = b"this is MinIO test data: 1234567890"
# 初始化客户端
cls.minio_agent = MinIOAgent(cls.MINIO_CONFIG)
# 确保测试桶存在
cls.minio_agent.create_bucket(cls.test_bucket)
@classmethod
def tearDownClass(cls):
"""清理测试环境"""
try:
# 列出并删除桶内所有对象
objects = cls.minio_agent.list_objects(cls.test_bucket)
for obj in objects:
cls.minio_agent.delete_object(cls.test_bucket, obj['object_name'])
# 删除测试桶(MinIO要求桶为空才能删除)
cls.minio_agent._client.remove_bucket(cls.test_bucket)
print(f"\n测试清理完成,已删除桶: {cls.test_bucket}")
except Exception as e:
print(f"清理测试环境失败: {str(e)}")
def test_01_create_bucket(self):
"""测试创建存储桶"""
new_bucket = f"temp-bucket-{datetime.now().microsecond}"
result = self.minio_agent.create_bucket(new_bucket)
self.assertTrue(result, "存储桶创建失败")
# 验证桶是否存在
exists = self.minio_agent._client.bucket_exists(new_bucket)
self.assertTrue(exists, "存储桶创建后未检测到存在")
# 清理临时桶
self.minio_agent._client.remove_bucket(new_bucket)
def test_02_upload_download(self):
"""测试上传与下载功能"""
# 上传数据
upload_meta = self.minio_agent.upload_bytes(
bucket=self.test_bucket,
object_name=self.test_object,
data=self.test_content
)
# 验证上传结果
self.assertEqual(upload_meta['size'], len(self.test_content), "上传数据大小不匹配")
self.assertEqual(upload_meta['local_hash'], hashlib.md5(self.test_content).hexdigest(), "本地哈希校验失败")
# 下载数据到临时文件
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_path = temp_file.name
download_meta = self.minio_agent.download_file(
bucket=self.test_bucket,
object_name=self.test_object,
local_path=temp_path
)
# 验证下载内容
with open(temp_path, 'rb') as f:
downloaded_content = f.read()
self.assertEqual(downloaded_content, self.test_content, "下载数据与原始数据不匹配")
self.assertEqual(download_meta['size'], len(self.test_content), "下载文件大小不匹配")
# 清理临时文件
os.unlink(temp_path)
def test_03_presigned_url(self):
"""测试生成预签名URL"""
# 先上传测试文件
self.minio_agent.upload_bytes(
self.test_bucket,
self.test_object,
self.test_content
)
# 生成URL(有效期30秒)
url_info = self.minio_agent.get_presigned_url(
bucket=self.test_bucket,
object_name=self.test_object,
expires=30
)
# 验证URL格式
self.assertIn("http://127.0.0.1:9005", url_info['presigned_url'], "预签名URL格式不正确")
self.assertEqual(url_info['expires_in'], 30, "过期时间设置不正确")
def test_04_list_objects(self):
"""测试列出对象功能"""
# 上传多个测试对象
test_objects = [
"test-folder/file1.txt",
"test-folder/file2.csv",
"another-folder/image.jpg"
]
for obj in test_objects:
self.minio_agent.upload_bytes(
self.test_bucket,
obj,
b"tese_list_obj"
)
# 列出所有对象
all_objects = self.minio_agent.list_objects(self.test_bucket)
self.assertEqual(len(all_objects), len(test_objects) + 1, "列出对象数量不匹配") # +1是之前的test_object
# 按前缀筛选
filtered_objects = self.minio_agent.list_objects(
self.test_bucket,
prefix="test-folder/"
)
self.assertEqual(len(filtered_objects), 2, "按前缀筛选结果不正确")
def test_05_delete_object(self):
"""测试删除对象功能"""
# 创建测试对象
delete_obj = "to-delete/temp.txt"
self.minio_agent.upload_bytes(
self.test_bucket,
delete_obj,
b"will be delete"
)
# 执行删除
result = self.minio_agent.delete_object(self.test_bucket, delete_obj)
self.assertTrue(result, "删除对象失败")
# 验证删除
objects = self.minio_agent.list_objects(self.test_bucket, prefix="to-delete/")
self.assertEqual(len(objects), 0, "对象删除后仍存在")
def test_06_upload_empty_data(self):
"""测试上传空数据的异常处理"""
with self.assertRaises(ValueError, msg="未捕获空数据上传异常"):
self.minio_agent.upload_bytes(
self.test_bucket,
"empty.txt",
b""
)
if __name__ == "__main__":
# 执行测试并显示详细结果
unittest.main(verbosity=2)
+280
View File
@@ -0,0 +1,280 @@
import unittest
import pandas as pd
from datetime import datetime
import time
import pymysql
import platform
from concurrent.futures import ThreadPoolExecutor
from utils.mysql_agent import MySQLAgent
class TestMySQLAgent(unittest.TestCase):
@classmethod
def setUpClass(cls):
"""初始化测试环境和测试表"""
# 创建唯一的测试数据库和表名(避免冲突)
cls.test_db_name = f"test_db_{datetime.now().strftime('%Y%m%d%H%M%S')}"
cls.test_table = f"test_table_{datetime.now().strftime('%Y%m%d%H%M%S')}"
# 基础配置(根据实际环境修改)
cls.base_config = {
'host': 'localhost',
'port': 3306,
'user': 'root',
'password': '123123',
'max_connections': 10
}
# 创建测试数据库
cls._create_test_database()
# 初始化数据库连接
cls.db = MySQLAgent({
**cls.base_config,
'database': cls.test_db_name
})
# 创建测试表并插入初始数据
test_data = pd.DataFrame({
'id': [1, 2, 3],
'name': ['Test1', 'Test2', 'Test3'],
'value': [10.5, 20.3, 30.8],
'created_at': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03'])
})
cls.db.create_table_from_df(cls.test_table, test_data, primary_key='id')
cls.db.insert_from_df(cls.test_table, test_data)
@classmethod
def _create_test_database(cls):
"""创建测试数据库"""
temp_conn = pymysql.connect(
host=cls.base_config['host'],
port=cls.base_config['port'],
user=cls.base_config['user'],
password=cls.base_config['password'],
charset='utf8mb4'
)
try:
with temp_conn.cursor() as cursor:
cursor.execute(f"CREATE DATABASE IF NOT EXISTS {cls.test_db_name}")
cursor.execute(f"USE {cls.test_db_name}")
cursor.execute("SET GLOBAL max_connections = 100")
temp_conn.commit()
finally:
temp_conn.close()
@classmethod
def tearDownClass(cls):
"""清理测试环境"""
if hasattr(cls, 'db') and cls.db:
# 删除测试表
if cls.db.table_exists(cls.test_table):
cls.db.drop_table(cls.test_table)
# 删除测试数据库
temp_conn = pymysql.connect(**cls.base_config, charset='utf8mb4')
try:
with temp_conn.cursor() as cursor:
cursor.execute(f"DROP DATABASE IF EXISTS {cls.test_db_name}")
temp_conn.commit()
finally:
temp_conn.close()
def test_connection(self):
"""测试数据库连接"""
version_df = self.db.query_to_df("SELECT VERSION() as version")
self.assertIsNotNone(version_df)
self.assertEqual(len(version_df), 1)
print(f"数据库版本: {version_df['version'].iloc[0]}")
def test_query_to_df(self):
"""测试查询返回DataFrame"""
df = self.db.query_to_df(
f"SELECT * FROM {self.test_table} WHERE id > %s",
params=(1,)
)
self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(len(df), 2) # id>1 的数据有2条
self.assertIn('name', df.columns)
def test_insert_from_df(self):
"""测试DataFrame插入"""
new_data = pd.DataFrame({
'id': [4, 5],
'name': ['Test4', 'Test5'],
'value': [40.1, 50.2],
'created_at': pd.to_datetime(['2023-01-04', '2023-01-05'])
})
inserted_rows = self.db.insert_from_df(self.test_table, new_data)
self.assertEqual(inserted_rows, 2)
# 验证插入结果
result_df = self.db.query_to_df(
f"SELECT name FROM {self.test_table} WHERE id IN (4,5)"
)
self.assertEqual(result_df['name'].tolist(), ['Test4', 'Test5'])
def test_update_from_df(self):
"""测试DataFrame更新"""
update_data = pd.DataFrame({
'id': [1, 2],
'name': ['Updated1', 'Updated2']
})
updated_rows = self.db.update_from_df(self.test_table, update_data, 'id')
self.assertGreaterEqual(updated_rows, 2)
# 验证更新结果
result_df = self.db.query_to_df(
f"SELECT name FROM {self.test_table} WHERE id IN (1,2)"
)
self.assertIn('Updated1', result_df['name'].values)
self.assertIn('Updated2', result_df['name'].values)
def test_transaction(self):
"""测试事务处理"""
conn = self.db.begin_transaction()
try:
# 执行事务内操作
cursor = conn.cursor()
cursor.execute(f"UPDATE {self.test_table} SET value = 99.9 WHERE id = 1")
cursor.execute(f"UPDATE {self.test_table} SET value = 88.8 WHERE id = 2")
self.db.commit_transaction(conn)
except Exception:
self.db.rollback_transaction(conn)
raise
# 验证事务提交结果
result_df = self.db.query_to_df(
f"SELECT value FROM {self.test_table} WHERE id IN (1,2)"
)
self.assertIn(99.9, result_df['value'].values)
self.assertIn(88.8, result_df['value'].values)
def test_large_data_insert(self):
"""测试大数据量插入"""
# 生成1000行测试数据
large_data = pd.DataFrame({
'id': range(1000, 2000),
'name': [f"Item_{i}" for i in range(1000, 2000)],
'value': [i * 0.1 for i in range(1000, 2000)],
'created_at': pd.date_range('2023-01-01', periods=1000)
})
# 根据平台自动调整批次大小
chunk_size = 100 if platform.system() == 'Windows' else 500
start_time = time.time()
inserted_rows = self.db.insert_from_df(
self.test_table,
large_data,
chunk_size=chunk_size
)
elapsed = time.time() - start_time
self.assertEqual(inserted_rows, 1000)
print(f"插入1000行数据耗时: {elapsed:.2f}秒 (批次大小: {chunk_size})")
def test_concurrent_access(self):
"""测试并发访问"""
def query_worker(i):
"""并发查询工作函数"""
df = self.db.query_to_df(
f"SELECT * FROM {self.test_table} WHERE id = %s",
params=(i % 3 + 1,) # 查询id=1,2,3循环
)
return len(df)
# 20个线程执行100次查询
start_time = time.time()
with ThreadPoolExecutor(max_workers=20) as executor:
results = list(executor.map(query_worker, range(100)))
elapsed = time.time() - start_time
self.assertEqual(sum(results), 100) # 每次查询应返回1行
print(f"100次并发查询耗时: {elapsed:.2f}")
class TestPlatformSpecific(unittest.TestCase):
"""平台特定功能测试"""
@classmethod
def setUpClass(cls):
cls.test_db_name = f"test_platform_db_{datetime.now().strftime('%Y%m%d%H%M%S')}"
cls.base_config = {
'host': 'localhost',
'port': 3306,
'user': 'root',
'password': '123123'
}
# 创建测试数据库
temp_conn = pymysql.connect(**cls.base_config, charset='utf8mb4')
try:
with temp_conn.cursor() as cursor:
cursor.execute(f"CREATE DATABASE IF NOT EXISTS {cls.test_db_name}")
temp_conn.commit()
finally:
temp_conn.close()
@classmethod
def tearDownClass(cls):
"""清理测试数据库"""
temp_conn = pymysql.connect(**cls.base_config, charset='utf8mb4')
try:
with temp_conn.cursor() as cursor:
cursor.execute(f"DROP DATABASE IF EXISTS {cls.test_db_name}")
temp_conn.commit()
finally:
temp_conn.close()
def test_windows_timeout(self):
"""测试Windows平台超时处理"""
if platform.system() != 'Windows':
self.skipTest("仅在Windows平台运行")
config = {
**self.base_config,
'database': self.test_db_name,
'connect_timeout': 1,
'read_timeout': 1,
'write_timeout': 1
}
db = MySQLAgent(config)
# 执行会超时的查询(SLEEP(2)超过1秒超时设置)
with self.assertRaises((pymysql.OperationalError, TimeoutError)) as ctx:
try:
db.query_to_df("SELECT SLEEP(2)")
except Exception as e:
# 提取底层异常信息(可能被包装)
while hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], Exception):
e = e.args[0]
raise e
error_msg = str(ctx.exception)
self.assertTrue(
"timed out" in error_msg or
"timeout" in error_msg or
"HY000" in error_msg, # MySQL超时错误码
f"未检测到超时异常,实际异常: {error_msg}"
)
def test_macos_ssl_connection(self):
"""测试macOS平台SSL连接"""
if platform.system() != 'Darwin':
self.skipTest("仅在macOS平台运行")
config = {
**self.base_config,
'database': self.test_db_name,
'ssl': {'ca': '/usr/local/etc/openssl/cert.pem'}
}
db = MySQLAgent(config)
version_df = db.query_to_df("SELECT VERSION() as version")
self.assertIsNotNone(version_df)
if __name__ == '__main__':
unittest.main(verbosity=2)
+56
View File
@@ -0,0 +1,56 @@
# test_logger.py
# from utils.logger import log
# import platform
#
# def test_logging():
# log.info(f"当前系统: {platform.system()}")
# try:
# 1/0
# except:
# log.error("除零错误", exc_info=True)
#
# if __name__ == "__main__":
# test_logging()
# test_log_rotation.py
# from utils.logger import log
# import time
#
# def generate_large_log():
# """快速生成超过20MB的测试日志"""
# for i in range(10000):
# log.info(f"测试日志填充数据... {i}" * 10)
# time.sleep(0.001) # 避免内存暴涨
#
# if __name__ == "__main__":
# generate_large_log()
# 使用方法
# my_module/main_class.py
from utils.logger import log
class MainProcessor:
def __init__(self):
self.log = log.bind(module=self.__class__.__name__) # 动态绑定类名
def main(self):
"""主执行方法"""
self.log.info("开始执行主流程")
try:
self._step1()
# self._step2()
except Exception as e:
self.log.error("主流程执行失败", exc_info=e)
raise
def _step1(self):
"""子方法示例"""
self.log.debug("执行步骤1: 初始化资源")
# ...业务逻辑...
resource_count = 10
self.log.info("步骤1完成 | created={}", resource_count)
if __name__ == "__main__":
processor = MainProcessor()
processor.main()
+187
View File
@@ -0,0 +1,187 @@
import pytest
import pandas as pd
import os
from pathlib import Path
from utils.file_handler import FileHandler
from datetime import datetime
@pytest.fixture
def temp_dir(tmp_path):
"""创建临时测试目录"""
test_dir = tmp_path / "test_files"
test_dir.mkdir()
return test_dir
@pytest.fixture
def file_handler(temp_dir):
"""创建FileHandler实例"""
return FileHandler(temp_dir)
@pytest.fixture
def sample_dataframe():
"""创建测试用DataFrame"""
return pd.DataFrame({
'id': [1, 2, 3],
'name': ['Alice', 'Bob', 'Charlie'],
'value': [10.5, 20.3, 30.1]
})
@pytest.fixture
def sample_text_file(temp_dir):
"""创建测试文本文件"""
file_path = temp_dir / "test.txt"
with open(file_path, 'w') as f:
f.write("line1\nline2\nline3")
return file_path
# 开始测试
def test_read_write_csv(file_handler, temp_dir, sample_dataframe):
"""测试CSV文件读写"""
test_file = temp_dir / "test.csv"
# 测试写入
write_result = file_handler.write_file(test_file, sample_dataframe)
# 修改断言方式
assert bool(write_result.iloc[0]['success']) == True # 使用bool()转换
# 或者
assert write_result.iloc[0]['success'] == True # 使用值比较
assert os.path.exists(test_file)
# 测试读取
df = file_handler.read_file(test_file)
assert df.shape == (3, 3)
assert list(df.columns) == ['id', 'name', 'value']
def test_read_write_json(file_handler, temp_dir, sample_dataframe):
"""测试JSON文件读写"""
test_file = temp_dir / "test.json"
# 测试写入
write_result = file_handler.write_file(test_file, sample_dataframe)
assert write_result.iloc[0]['success'] == True
# 测试读取
df = file_handler.read_file(test_file)
assert df.shape == (3, 3)
def test_read_write_excel(file_handler, temp_dir, sample_dataframe):
"""测试Excel文件读写"""
test_file = temp_dir / "test.xlsx"
# 测试写入
write_result = file_handler.write_file(test_file, sample_dataframe)
assert write_result.iloc[0]['success'] == True
# 测试读取
df = file_handler.read_file(test_file)
assert df.shape == (3, 3)
def test_read_write_csv(file_handler, temp_dir, sample_dataframe):
"""测试CSV文件读写"""
test_file = temp_dir / "test.csv"
# 测试写入
write_result = file_handler.write_file(test_file, sample_dataframe)
# 修改断言方式
assert bool(write_result.iloc[0]['success']) == True # 使用bool()转换
# 或者
assert write_result.iloc[0]['success'] == True # 使用值比较
assert os.path.exists(test_file)
# 测试读取
df = file_handler.read_file(test_file)
assert df.shape == (3, 3)
assert list(df.columns) == ['id', 'name', 'value']
# 文件操作测试
def test_file_operations(file_handler, sample_text_file):
"""测试文件存在检查、删除等操作"""
# 测试文件存在检查
exists_df = file_handler.file_exists(sample_text_file)
assert exists_df.iloc[0]['exists'] == True
# 测试获取文件大小
size_df = file_handler.get_file_size(sample_text_file)
assert size_df.iloc[0]['size_bytes'] > 0
# 测试获取修改时间
mtime_df = file_handler.get_file_modified_time(sample_text_file)
assert isinstance(mtime_df.iloc[0]['modified_time'], datetime)
# 测试删除文件
delete_df = file_handler.delete_file(sample_text_file)
assert delete_df.iloc[0]['deleted'] == True
assert not os.path.exists(sample_text_file)
def test_directory_operations(file_handler, temp_dir):
"""测试目录操作"""
test_dir = temp_dir / "subdir"
# 测试创建目录
create_df = file_handler.create_dir(test_dir)
assert create_df.iloc[0]['created'] == True
assert os.path.isdir(test_dir)
# 测试列出目录
list_df = file_handler.list_dirs(temp_dir)
assert any("subdir" in d for d in list_df['dir_name'].values)
# 测试删除目录
delete_df = file_handler.delete_dir(test_dir)
assert delete_df.iloc[0]['deleted'] == True
assert not os.path.exists(test_dir)
# 文件压缩
def test_zip_operations(file_handler, temp_dir, sample_dataframe):
"""测试文件压缩解压"""
# 创建测试文件
test_file1 = temp_dir / "file1.txt"
test_file2 = temp_dir / "file2.csv"
file_handler.write_file(test_file1, "test content")
file_handler.write_file(test_file2, sample_dataframe)
# 测试压缩文件
zip_path = temp_dir / "test.zip"
zip_result = file_handler.zip_files([test_file1, test_file2], zip_path)
assert zip_result.iloc[0]['zipped'] == True
assert zip_result.iloc[0]['file_count'] == 2
# 测试解压
extract_dir = temp_dir / "extracted"
unzip_result = file_handler.unzip(zip_path, extract_dir)
assert unzip_result.iloc[0]['unzipped'] is True
assert os.path.exists(extract_dir / "file1.txt")
assert os.path.exists(extract_dir / "file2.csv")
def test_zip_directory(file_handler, temp_dir):
"""测试目录压缩"""
# 创建测试目录结构
test_dir = temp_dir / "test_dir"
sub_dir = test_dir / "sub"
sub_dir.mkdir(parents=True)
(test_dir / "file1.txt").write_text("content1")
(sub_dir / "file2.txt").write_text("content2")
# 测试压缩目录
zip_path = temp_dir / "dir.zip"
zip_result = file_handler.zip_dir(test_dir, zip_path)
assert zip_result.iloc[0]['zipped'] == True
assert zip_result.iloc[0]['file_count'] == 2