Files
saas/back_ground_module/data_monitor.py
T
2025-08-14 11:55:03 +08:00

387 lines
15 KiB
Python

"""字段监控"""
import sys
from datetime import datetime, timedelta, timezone
import pandas as pd
import zipfile
import logging
from pathlib import Path
import json
import requests
from api import API
import time
import os
from log_config import configure_task_logger, configure_error_task_logger
from back_ground_module import CommonModule
# 获取已经配置好的常规日志记录器
logger = configure_task_logger()
# 获取已经配置好的错误任务日志记录器
error_task_logger = configure_error_task_logger()
common_tools = CommonModule()
# ---------------------------- 配置项 ----------------------------
class Config:
OUTPUT_DIR = "output"
DATA_DIR = "数据快照存储"
ARCHIVE_DIR = "压缩包存储"
RETAIN_DAYS = 7
COMPRESS_FORMAT = "zip"
LOG_FILE = os.path.join(OUTPUT_DIR+"data_monitor.log")
CHANGES_FILE = os.path.join(OUTPUT_DIR+"changes_summary.csv")
MAX_RETRIES = 3
RETRY_DELAY = 0.5
# ---------------------- 工具函数 -----------------------
class Utils:
@staticmethod
def get_path(*path_parts):
return str(Path(*path_parts))
@staticmethod
def ensure_dir(path):
Path(path).mkdir(parents=True, exist_ok=True)
logger.debug(f"确保目录存在: {path}")
@staticmethod
def get_iso_time():
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
@staticmethod
def is_first_run_today():
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
snapshot_file = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR, f"snapshot_{today}.csv")
widget_file = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR, f"all_widgets_{today}.csv")
return not (os.path.exists(snapshot_file) and os.path.exists(widget_file))
# ---------------------- API 客户端 -----------------------
class APIClient:
def __init__(self):
self.headers = {
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN',
'Content-Type': 'application/json'
}
self.api = API()
def request(self, url, payload, method='POST'):
for retry in range(Config.MAX_RETRIES + 1):
try:
response = requests.request(
method, url, headers=self.headers,
data=payload, timeout=30
)
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
if retry == Config.MAX_RETRIES:
raise
time.sleep(Config.RETRY_DELAY)
logger.warning(f"请求失败 (尝试 {retry + 1}/{Config.MAX_RETRIES}): {str(e)}")
# ---------------------- 数据处理类 -----------------------
class DataHandler:
def __init__(self):
self.execution_time = Utils.get_iso_time()
self.today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
self.setup_dirs()
self.api = APIClient()
def setup_dirs(self):
self.data_dir = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR)
self.archive_dir = Utils.get_path(Config.OUTPUT_DIR, Config.ARCHIVE_DIR)
Utils.ensure_dir(self.data_dir)
Utils.ensure_dir(self.archive_dir)
self.last_data_file = Utils.get_path(self.data_dir, "last_data.csv")
self.last_widget_file = Utils.get_path(self.data_dir, "last_widget_data.csv")
def load_last_data(self):
try:
last_data = pd.read_csv(self.last_data_file) if os.path.exists(self.last_data_file) else None
last_widget = pd.read_csv(self.last_widget_file) if os.path.exists(self.last_widget_file) else None
return last_data, last_widget
except Exception as e:
error_task_logger.error(f"加载上次数据失败: {str(e)}")
return None, None
def save_last_data(self, data, widget_data):
try:
data.to_csv(self.last_data_file, index=False)
widget_data.to_csv(self.last_widget_file, index=False)
return True
except Exception as e:
error_task_logger.error(f"保存当前数据失败: {str(e)}")
return False
def save_to_csv(self, data, filename):
try:
temp_file = filename + '.tmp'
data.to_csv(temp_file, index=False)
if os.path.exists(filename):
os.remove(filename)
os.rename(temp_file, filename)
return True
except Exception as e:
error_task_logger.error(f"保存文件失败: {filename}, 错误: {str(e)}")
return False
# ---------------------- 数据监控主类 -----------------------
class DataMonitor(DataHandler):
def __init__(self):
super().__init__()
self.last_data, self.last_widget = self.load_last_data()
def fetch_apps(self):
url = "https://api.jiandaoyun.com/api/v5/app/list"
payload = json.dumps({"skip": 0, "limit": 100})
response = self.api.request(url, payload)
return pd.DataFrame(response.json().get("apps", []))
def fetch_entries(self, app_df):
url = "https://api.jiandaoyun.com/api/v5/app/entry/list"
all_entries = []
for _, app in app_df.iterrows():
payload = json.dumps({"app_id": app['app_id']})
response = self.api.request(url, payload)
entries = response.json().get("forms", [])
if entries:
entry_df = pd.DataFrame(entries)
entry_df['app_id'] = app['app_id']
all_entries.append(entry_df)
return pd.concat(all_entries, ignore_index=True) if all_entries else None
def fetch_widgets(self, entry_df):
url = "https://api.jiandaoyun.com/api/v5/app/entry/widget/list"
all_widgets = []
for _, entry in entry_df.iterrows():
payload = json.dumps({
"app_id": entry['app_id'],
"entry_id": entry['entry_id']
})
response = self.api.request(url, payload)
widgets = response.json().get('widgets', [])
if widgets:
widget_df = pd.DataFrame(widgets)
widget_df['app_id'] = entry['app_id']
widget_df['entry_id'] = entry['entry_id']
all_widgets.append(widget_df)
return pd.concat(all_widgets, ignore_index=True) if all_widgets else None
def fetch_monitor_data(self):
payload = {"api_key": "6694d3c4fcb69ca9a111a6c4", "entry_id": "6850c044f17c934b3ec01fea"}
data = self.api.api.entry_data_list(payload).get("data")
data_list = pd.DataFrame(data)
for col in data_list.columns:
if data_list[col].apply(lambda x: isinstance(x, (dict, list))).any():
data_list[col] = data_list[col].astype(str)
return data_list.drop_duplicates()
def match_widgets(self, data_list, widget_list):
if '_widget_1750122565203' not in data_list.columns:
raise ValueError("数据列表中缺少 '_widget_1750122565203'")
return widget_list[widget_list['entry_id'].isin(data_list['_widget_1750122565203'])]
def archive_old_data(self):
keep_dates = [
(datetime.now(timezone.utc) - timedelta(days=i)).strftime("%Y-%m-%d")
for i in range(Config.RETAIN_DAYS)
]
files_to_archive = [
f for f in os.listdir(self.data_dir)
if (f.startswith("snapshot_") or f.startswith("all_widgets_")) and f.endswith(".csv")
]
for filename in files_to_archive:
date_str = filename[9:-4] if filename.startswith("snapshot_") else filename[12:-4]
if date_str not in keep_dates:
year_month = date_str[:7]
archive_name = Utils.get_path(self.archive_dir, f"snapshots_{year_month}.{Config.COMPRESS_FORMAT}")
file_path = Utils.get_path(self.data_dir, filename)
with zipfile.ZipFile(archive_name, 'a', zipfile.ZIP_DEFLATED) as zipf:
zipf.write(file_path, arcname=filename)
os.remove(file_path)
logger.debug(f"已归档 {filename}{archive_name}")
def compare_data(self, current_data):
if not os.path.exists(self.last_data_file):
return None
last_data = pd.read_csv(self.last_data_file)
last_data['unique_id'] = last_data['name'].astype(str) + last_data['app_id'].astype(str)
current_data['unique_id'] = current_data['name'].astype(str) + current_data['app_id'].astype(str)
merged = pd.merge(
last_data, current_data,
on=['unique_id'],
how='outer',
suffixes=('_last', '_current'),
indicator=True
)
changes = {
'added': merged[merged['_merge'] == 'right_only'],
'deleted': merged[merged['_merge'] == 'left_only'],
'modified': pd.DataFrame()
}
for col in ['label', 'type']:
last_col = f"{col}_last"
current_col = f"{col}_current"
if last_col in merged.columns and current_col in merged.columns:
mask = (merged['_merge'] == 'both') & (merged[last_col] != merged[current_col])
mask = mask & ~merged[last_col].isna() & ~merged[current_col].isna()
if mask.any():
modified = merged.loc[mask].copy()
modified['changed_field'] = col
modified['old_value'] = modified[last_col]
modified['new_value'] = modified[current_col]
modified['change_status'] = 'update'
changes['modified'] = pd.concat([changes['modified'], modified])
return changes
def save_changes(self, changes, apps, entries):
result_rows = []
for change_type in ['added', 'deleted', 'modified']:
suffix = 'current' if change_type in ['added', 'modified'] else 'last'
for _, row in changes[change_type].iterrows():
app_id = row[f'app_id_{suffix}']
entry_id = row[f'entry_id_{suffix}']
app_name = apps.loc[apps['app_id'] == app_id, 'name'].values[0] if not apps[
apps['app_id'] == app_id].empty else '未知应用'
entry_name = \
entries.loc[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id), 'name'].values[0] if not \
entries[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id)].empty else '未知表单'
if change_type == 'added':
content = f"新增字段: {row['label_current']}"
elif change_type == 'deleted':
content = f"删除字段: {row['label_last']}"
else:
content = f"\"{row['old_value']}\"修改为\"{row['new_value']}\""
result_rows.append({
'程序执行时间': self.execution_time,
'unique_id': row['unique_id'],
'app_id': app_id,
'app_name': app_name,
'entry_id': entry_id,
'entry_name': entry_name,
'change_type': {'added': '新增', 'deleted': '删除', 'modified': '修改'}[change_type],
'具体内容': content
})
if result_rows:
result_df = pd.DataFrame(result_rows)
changes_file = Utils.get_path(self.data_dir, Config.CHANGES_FILE)
result_df.to_csv(changes_file, mode='a', header=not os.path.exists(changes_file), index=False)
self.add_to_jiandaoyun(result_df)
return True
return False
def add_to_jiandaoyun(self, result_df):
all_data = [{
"_widget_1751446961315": {"value": row["app_name"]},
"_widget_1751446961316": {"value": row["entry_name"]},
"_widget_1751446961317": {"value": row["change_type"]},
"_widget_1751446961318": {"value": row["具体内容"]},
"_widget_1751446961319": {"value": row["程序执行时间"]},
} for _, row in result_df.iterrows()]
payload = {
"api_key": "6694d3c4fcb69ca9a111a6c4",
"entry_id": "6863a402a77925690a470cc5",
"data_list": all_data
}
response = self.api.api.entry_data_batch_create(payload)
if isinstance(response, list):
logger.info(f"成功写入 {len(response)} 条变更数据到简道云")
return True
else:
error_task_logger.error(f"写入简道云失败: {response.get('message', '未知错误')}")
return False
def run_daily_snapshot(self):
logger.info("=== 开始每日数据快照任务 ===")
apps = self.fetch_apps()
entries = self.fetch_entries(apps)
widgets = self.fetch_widgets(entries)
monitor_data = self.fetch_monitor_data()
matched_data = self.match_widgets(monitor_data, widgets)
self.save_to_csv(widgets, Utils.get_path(self.data_dir, f"all_widgets_{self.today}.csv"))
self.save_to_csv(matched_data, Utils.get_path(self.data_dir, f"snapshot_{self.today}.csv"))
self.archive_old_data()
self.save_last_data(matched_data, widgets)
logger.info("=== 每日数据快照任务成功完成 ===")
return True
def run_hourly_check(self):
logger.info("=== 开始每小时数据检查任务 ===")
apps = self.fetch_apps()
entries = self.fetch_entries(apps)
widgets = self.fetch_widgets(entries)
monitor_data = self.fetch_monitor_data()
current_data = self.match_widgets(monitor_data, widgets)
changes = self.compare_data(current_data)
if changes and any(len(v) > 0 for v in changes.values()):
self.save_changes(changes, apps, entries)
self.save_last_data(current_data, widgets)
logger.info("=== 每小时数据检查任务成功完成 ===")
return True
def main(self):
import datetime
task_start_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
logger.info(f"=== 开始数据监控任务 ({self.execution_time}) ===")
if Utils.is_first_run_today():
success = self.run_daily_snapshot()
else:
success = self.run_hourly_check()
common_tools.send_task_status(task_start_time, "字段监控")
logger.info("=== 数据监控任务完成 ===")
return success
except Exception as e:
error_task_logger.error(f"数据监控任务发生异常: {e}")
common_tools.send_task_error(task_start_time, "字段监控", str(e))
return False
if __name__ == "__main__":
Utils.ensure_dir(Config.OUTPUT_DIR)
monitor = DataMonitor()
if not monitor.main():
sys.exit(1)