386 lines
14 KiB
Python
386 lines
14 KiB
Python
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
import pandas as pd
|
|
import zipfile
|
|
import logging
|
|
from pathlib import Path
|
|
import json
|
|
import requests
|
|
from api import API
|
|
import time
|
|
import os
|
|
|
|
|
|
# ---------------------------- 配置项 ----------------------------
|
|
class Config:
|
|
OUTPUT_DIR = "output"
|
|
DATA_DIR = "数据快照存储"
|
|
ARCHIVE_DIR = "压缩包存储"
|
|
RETAIN_DAYS = 7
|
|
COMPRESS_FORMAT = "zip"
|
|
LOG_FILE = "data_monitor.log"
|
|
CHANGES_FILE = "changes_summary.csv"
|
|
MAX_RETRIES = 3
|
|
RETRY_DELAY = 0.5
|
|
|
|
|
|
# ---------------------- 日志配置 -----------------------
|
|
class Logger:
|
|
@staticmethod
|
|
def setup():
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.StreamHandler(),
|
|
logging.FileHandler(Config.LOG_FILE)
|
|
]
|
|
)
|
|
return logging.getLogger(__name__)
|
|
|
|
|
|
logger = Logger.setup()
|
|
|
|
|
|
# ---------------------- 工具函数 -----------------------
|
|
class Utils:
|
|
@staticmethod
|
|
def get_path(*path_parts):
|
|
return str(Path(*path_parts))
|
|
|
|
@staticmethod
|
|
def ensure_dir(path):
|
|
Path(path).mkdir(parents=True, exist_ok=True)
|
|
logger.debug(f"确保目录存在: {path}")
|
|
|
|
@staticmethod
|
|
def get_iso_time():
|
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
|
|
|
@staticmethod
|
|
def is_first_run_today():
|
|
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
snapshot_file = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR, f"snapshot_{today}.csv")
|
|
widget_file = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR, f"all_widgets_{today}.csv")
|
|
return not (os.path.exists(snapshot_file) and os.path.exists(widget_file))
|
|
|
|
|
|
# ---------------------- API 客户端 -----------------------
|
|
class APIClient:
|
|
def __init__(self):
|
|
self.headers = {
|
|
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN',
|
|
'Content-Type': 'application/json'
|
|
}
|
|
self.api = API()
|
|
|
|
def request(self, url, payload, method='POST'):
|
|
for retry in range(Config.MAX_RETRIES + 1):
|
|
try:
|
|
response = requests.request(
|
|
method, url, headers=self.headers,
|
|
data=payload, timeout=30
|
|
)
|
|
response.raise_for_status()
|
|
return response
|
|
except requests.exceptions.RequestException as e:
|
|
if retry == Config.MAX_RETRIES:
|
|
raise
|
|
time.sleep(Config.RETRY_DELAY)
|
|
logger.warning(f"请求失败 (尝试 {retry + 1}/{Config.MAX_RETRIES}): {str(e)}")
|
|
|
|
|
|
# ---------------------- 数据处理类 -----------------------
|
|
class DataHandler:
|
|
def __init__(self):
|
|
self.execution_time = Utils.get_iso_time()
|
|
self.today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
self.setup_dirs()
|
|
self.api = APIClient()
|
|
|
|
def setup_dirs(self):
|
|
self.data_dir = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR)
|
|
self.archive_dir = Utils.get_path(Config.OUTPUT_DIR, Config.ARCHIVE_DIR)
|
|
Utils.ensure_dir(self.data_dir)
|
|
Utils.ensure_dir(self.archive_dir)
|
|
self.last_data_file = Utils.get_path(self.data_dir, "last_data.csv")
|
|
self.last_widget_file = Utils.get_path(self.data_dir, "last_widget_data.csv")
|
|
|
|
def load_last_data(self):
|
|
try:
|
|
last_data = pd.read_csv(self.last_data_file) if os.path.exists(self.last_data_file) else None
|
|
last_widget = pd.read_csv(self.last_widget_file) if os.path.exists(self.last_widget_file) else None
|
|
return last_data, last_widget
|
|
except Exception as e:
|
|
logger.error(f"加载上次数据失败: {str(e)}")
|
|
return None, None
|
|
|
|
def save_last_data(self, data, widget_data):
|
|
try:
|
|
data.to_csv(self.last_data_file, index=False)
|
|
widget_data.to_csv(self.last_widget_file, index=False)
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"保存当前数据失败: {str(e)}")
|
|
return False
|
|
|
|
def save_to_csv(self, data, filename):
|
|
try:
|
|
temp_file = filename + '.tmp'
|
|
data.to_csv(temp_file, index=False)
|
|
if os.path.exists(filename):
|
|
os.remove(filename)
|
|
os.rename(temp_file, filename)
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"保存文件失败: {filename}, 错误: {str(e)}")
|
|
return False
|
|
|
|
|
|
# ---------------------- 数据监控主类 -----------------------
|
|
class DataMonitor(DataHandler):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.last_data, self.last_widget = self.load_last_data()
|
|
|
|
def fetch_apps(self):
|
|
url = "https://api.jiandaoyun.com/api/v5/app/list"
|
|
payload = json.dumps({"skip": 0, "limit": 100})
|
|
response = self.api.request(url, payload)
|
|
return pd.DataFrame(response.json().get("apps", []))
|
|
|
|
def fetch_entries(self, app_df):
|
|
url = "https://api.jiandaoyun.com/api/v5/app/entry/list"
|
|
all_entries = []
|
|
|
|
for _, app in app_df.iterrows():
|
|
payload = json.dumps({"app_id": app['app_id']})
|
|
response = self.api.request(url, payload)
|
|
entries = response.json().get("forms", [])
|
|
|
|
if entries:
|
|
entry_df = pd.DataFrame(entries)
|
|
entry_df['app_id'] = app['app_id']
|
|
all_entries.append(entry_df)
|
|
|
|
return pd.concat(all_entries, ignore_index=True) if all_entries else None
|
|
|
|
def fetch_widgets(self, entry_df):
|
|
url = "https://api.jiandaoyun.com/api/v5/app/entry/widget/list"
|
|
all_widgets = []
|
|
|
|
for _, entry in entry_df.iterrows():
|
|
payload = json.dumps({
|
|
"app_id": entry['app_id'],
|
|
"entry_id": entry['entry_id']
|
|
})
|
|
response = self.api.request(url, payload)
|
|
widgets = response.json().get('widgets', [])
|
|
|
|
if widgets:
|
|
widget_df = pd.DataFrame(widgets)
|
|
widget_df['app_id'] = entry['app_id']
|
|
widget_df['entry_id'] = entry['entry_id']
|
|
all_widgets.append(widget_df)
|
|
|
|
return pd.concat(all_widgets, ignore_index=True) if all_widgets else None
|
|
|
|
def fetch_monitor_data(self):
|
|
payload = {"api_key": "6694d3c4fcb69ca9a111a6c4", "entry_id": "6850c044f17c934b3ec01fea"}
|
|
data = self.api.api.entry_data_list(payload).get("data")
|
|
data_list = pd.DataFrame(data)
|
|
|
|
for col in data_list.columns:
|
|
if data_list[col].apply(lambda x: isinstance(x, (dict, list))).any():
|
|
data_list[col] = data_list[col].astype(str)
|
|
|
|
return data_list.drop_duplicates()
|
|
|
|
def match_widgets(self, data_list, widget_list):
|
|
if '_widget_1750122565203' not in data_list.columns:
|
|
raise ValueError("数据列表中缺少 '_widget_1750122565203' 列")
|
|
return widget_list[widget_list['entry_id'].isin(data_list['_widget_1750122565203'])]
|
|
|
|
def archive_old_data(self):
|
|
keep_dates = [
|
|
(datetime.now(timezone.utc) - timedelta(days=i)).strftime("%Y-%m-%d")
|
|
for i in range(Config.RETAIN_DAYS)
|
|
]
|
|
|
|
files_to_archive = [
|
|
f for f in os.listdir(self.data_dir)
|
|
if (f.startswith("snapshot_") or f.startswith("all_widgets_")) and f.endswith(".csv")
|
|
]
|
|
|
|
for filename in files_to_archive:
|
|
date_str = filename[9:-4] if filename.startswith("snapshot_") else filename[12:-4]
|
|
|
|
if date_str not in keep_dates:
|
|
year_month = date_str[:7]
|
|
archive_name = Utils.get_path(self.archive_dir, f"snapshots_{year_month}.{Config.COMPRESS_FORMAT}")
|
|
file_path = Utils.get_path(self.data_dir, filename)
|
|
|
|
with zipfile.ZipFile(archive_name, 'a', zipfile.ZIP_DEFLATED) as zipf:
|
|
zipf.write(file_path, arcname=filename)
|
|
|
|
os.remove(file_path)
|
|
logger.debug(f"已归档 {filename} 到 {archive_name}")
|
|
|
|
def compare_data(self, current_data):
|
|
if not os.path.exists(self.last_data_file):
|
|
return None
|
|
|
|
last_data = pd.read_csv(self.last_data_file)
|
|
last_data['unique_id'] = last_data['name'].astype(str) + last_data['app_id'].astype(str)
|
|
current_data['unique_id'] = current_data['name'].astype(str) + current_data['app_id'].astype(str)
|
|
|
|
merged = pd.merge(
|
|
last_data, current_data,
|
|
on=['unique_id'],
|
|
how='outer',
|
|
suffixes=('_last', '_current'),
|
|
indicator=True
|
|
)
|
|
|
|
changes = {
|
|
'added': merged[merged['_merge'] == 'right_only'],
|
|
'deleted': merged[merged['_merge'] == 'left_only'],
|
|
'modified': pd.DataFrame()
|
|
}
|
|
|
|
for col in ['label', 'type']:
|
|
last_col = f"{col}_last"
|
|
current_col = f"{col}_current"
|
|
|
|
if last_col in merged.columns and current_col in merged.columns:
|
|
mask = (merged['_merge'] == 'both') & (merged[last_col] != merged[current_col])
|
|
mask = mask & ~merged[last_col].isna() & ~merged[current_col].isna()
|
|
|
|
if mask.any():
|
|
modified = merged.loc[mask].copy()
|
|
modified['changed_field'] = col
|
|
modified['old_value'] = modified[last_col]
|
|
modified['new_value'] = modified[current_col]
|
|
modified['change_status'] = 'update'
|
|
changes['modified'] = pd.concat([changes['modified'], modified])
|
|
|
|
return changes
|
|
|
|
def save_changes(self, changes, apps, entries):
|
|
result_rows = []
|
|
|
|
for change_type in ['added', 'deleted', 'modified']:
|
|
suffix = 'current' if change_type in ['added', 'modified'] else 'last'
|
|
|
|
for _, row in changes[change_type].iterrows():
|
|
app_id = row[f'app_id_{suffix}']
|
|
entry_id = row[f'entry_id_{suffix}']
|
|
|
|
app_name = apps.loc[apps['app_id'] == app_id, 'name'].values[0] if not apps[
|
|
apps['app_id'] == app_id].empty else '未知应用'
|
|
entry_name = \
|
|
entries.loc[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id), 'name'].values[0] if not \
|
|
entries[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id)].empty else '未知表单'
|
|
|
|
if change_type == 'added':
|
|
content = f"新增字段: {row['label_current']}"
|
|
elif change_type == 'deleted':
|
|
content = f"删除字段: {row['label_last']}"
|
|
else:
|
|
content = f"由\"{row['old_value']}\"修改为\"{row['new_value']}\""
|
|
|
|
result_rows.append({
|
|
'程序执行时间': self.execution_time,
|
|
'unique_id': row['unique_id'],
|
|
'app_id': app_id,
|
|
'app_name': app_name,
|
|
'entry_id': entry_id,
|
|
'entry_name': entry_name,
|
|
'change_type': {'added': '新增', 'deleted': '删除', 'modified': '修改'}[change_type],
|
|
'具体内容': content
|
|
})
|
|
|
|
if result_rows:
|
|
result_df = pd.DataFrame(result_rows)
|
|
changes_file = Utils.get_path(self.data_dir, Config.CHANGES_FILE)
|
|
result_df.to_csv(changes_file, mode='a', header=not os.path.exists(changes_file), index=False)
|
|
self.add_to_jiandaoyun(result_df)
|
|
return True
|
|
return False
|
|
|
|
def add_to_jiandaoyun(self, result_df):
|
|
all_data = [{
|
|
"_widget_1751446961315": {"value": row["app_name"]},
|
|
"_widget_1751446961316": {"value": row["entry_name"]},
|
|
"_widget_1751446961317": {"value": row["change_type"]},
|
|
"_widget_1751446961318": {"value": row["具体内容"]},
|
|
"_widget_1751446961319": {"value": row["程序执行时间"]},
|
|
} for _, row in result_df.iterrows()]
|
|
|
|
payload = {
|
|
"api_key": "6694d3c4fcb69ca9a111a6c4",
|
|
"entry_id": "6863a402a77925690a470cc5",
|
|
"data_list": all_data
|
|
}
|
|
|
|
response = self.api.api.entry_data_batch_create(payload)
|
|
|
|
if isinstance(response, list):
|
|
logger.info(f"成功写入 {len(response)} 条变更数据到简道云")
|
|
return True
|
|
else:
|
|
logger.error(f"写入简道云失败: {response.get('message', '未知错误')}")
|
|
return False
|
|
|
|
def run_daily_snapshot(self):
|
|
logger.info("=== 开始每日数据快照任务 ===")
|
|
|
|
apps = self.fetch_apps()
|
|
entries = self.fetch_entries(apps)
|
|
widgets = self.fetch_widgets(entries)
|
|
monitor_data = self.fetch_monitor_data()
|
|
matched_data = self.match_widgets(monitor_data, widgets)
|
|
|
|
self.save_to_csv(widgets, Utils.get_path(self.data_dir, f"all_widgets_{self.today}.csv"))
|
|
self.save_to_csv(matched_data, Utils.get_path(self.data_dir, f"snapshot_{self.today}.csv"))
|
|
self.archive_old_data()
|
|
self.save_last_data(matched_data, widgets)
|
|
|
|
logger.info("=== 每日数据快照任务成功完成 ===")
|
|
return True
|
|
|
|
def run_hourly_check(self):
|
|
logger.info("=== 开始每小时数据检查任务 ===")
|
|
|
|
apps = self.fetch_apps()
|
|
entries = self.fetch_entries(apps)
|
|
widgets = self.fetch_widgets(entries)
|
|
monitor_data = self.fetch_monitor_data()
|
|
current_data = self.match_widgets(monitor_data, widgets)
|
|
|
|
changes = self.compare_data(current_data)
|
|
if changes and any(len(v) > 0 for v in changes.values()):
|
|
self.save_changes(changes, apps, entries)
|
|
|
|
self.save_last_data(current_data, widgets)
|
|
|
|
logger.info("=== 每小时数据检查任务成功完成 ===")
|
|
return True
|
|
|
|
def run(self):
|
|
logger.info(f"=== 开始数据监控任务 ({self.execution_time}) ===")
|
|
|
|
if Utils.is_first_run_today():
|
|
success = self.run_daily_snapshot()
|
|
else:
|
|
success = self.run_hourly_check()
|
|
|
|
logger.info("=== 数据监控任务完成 ===")
|
|
return success
|
|
|
|
|
|
if __name__ == "__main__":
|
|
Utils.ensure_dir(Config.OUTPUT_DIR)
|
|
monitor = DataMonitor()
|
|
if not monitor.run():
|
|
sys.exit(1) |