"""字段监控""" import sys from datetime import datetime, timedelta, timezone import pandas as pd import zipfile import logging from pathlib import Path import json import requests from api import API import time import os from log_config import configure_task_logger, configure_error_task_logger from back_ground_module import CommonModule # 获取已经配置好的常规日志记录器 logger = configure_task_logger() # 获取已经配置好的错误任务日志记录器 error_task_logger = configure_error_task_logger() common_tools = CommonModule() # ---------------------------- 配置项 ---------------------------- class Config: OUTPUT_DIR = "output" DATA_DIR = "数据快照存储" ARCHIVE_DIR = "压缩包存储" RETAIN_DAYS = 7 COMPRESS_FORMAT = "zip" LOG_FILE = os.path.join(OUTPUT_DIR+"data_monitor.log") CHANGES_FILE = os.path.join(OUTPUT_DIR+"changes_summary.csv") MAX_RETRIES = 3 RETRY_DELAY = 0.5 # ---------------------- 日志配置 ----------------------- # class Logger: # @staticmethod # def setup(): # logging.basicConfig( # level=logging.INFO, # format='%(asctime)s - %(levelname)s - %(message)s', # handlers=[ # logging.StreamHandler(), # logging.FileHandler(Config.LOG_FILE) # ] # ) # return logging.getLogger(__name__) # # # logger = Logger.setup() # ---------------------- 工具函数 ----------------------- class Utils: @staticmethod def get_path(*path_parts): return str(Path(*path_parts)) @staticmethod def ensure_dir(path): Path(path).mkdir(parents=True, exist_ok=True) logger.debug(f"确保目录存在: {path}") @staticmethod def get_iso_time(): return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" @staticmethod def is_first_run_today(): today = datetime.now(timezone.utc).strftime("%Y-%m-%d") snapshot_file = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR, f"snapshot_{today}.csv") widget_file = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR, f"all_widgets_{today}.csv") return not (os.path.exists(snapshot_file) and os.path.exists(widget_file)) # ---------------------- API 客户端 ----------------------- class APIClient: def __init__(self): self.headers = { 'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN', 'Content-Type': 'application/json' } self.api = API() def request(self, url, payload, method='POST'): for retry in range(Config.MAX_RETRIES + 1): try: response = requests.request( method, url, headers=self.headers, data=payload, timeout=30 ) response.raise_for_status() return response except requests.exceptions.RequestException as e: if retry == Config.MAX_RETRIES: raise time.sleep(Config.RETRY_DELAY) logger.warning(f"请求失败 (尝试 {retry + 1}/{Config.MAX_RETRIES}): {str(e)}") # ---------------------- 数据处理类 ----------------------- class DataHandler: def __init__(self): self.execution_time = Utils.get_iso_time() self.today = datetime.now(timezone.utc).strftime("%Y-%m-%d") self.setup_dirs() self.api = APIClient() def setup_dirs(self): self.data_dir = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR) self.archive_dir = Utils.get_path(Config.OUTPUT_DIR, Config.ARCHIVE_DIR) Utils.ensure_dir(self.data_dir) Utils.ensure_dir(self.archive_dir) self.last_data_file = Utils.get_path(self.data_dir, "last_data.csv") self.last_widget_file = Utils.get_path(self.data_dir, "last_widget_data.csv") def load_last_data(self): try: last_data = pd.read_csv(self.last_data_file) if os.path.exists(self.last_data_file) else None last_widget = pd.read_csv(self.last_widget_file) if os.path.exists(self.last_widget_file) else None return last_data, last_widget except Exception as e: error_task_logger.error(f"加载上次数据失败: {str(e)}") return None, None def save_last_data(self, data, widget_data): try: data.to_csv(self.last_data_file, index=False) widget_data.to_csv(self.last_widget_file, index=False) return True except Exception as e: error_task_logger.error(f"保存当前数据失败: {str(e)}") return False def save_to_csv(self, data, filename): try: temp_file = filename + '.tmp' data.to_csv(temp_file, index=False) if os.path.exists(filename): os.remove(filename) os.rename(temp_file, filename) return True except Exception as e: error_task_logger.error(f"保存文件失败: {filename}, 错误: {str(e)}") return False # ---------------------- 数据监控主类 ----------------------- class DataMonitor(DataHandler): def __init__(self): super().__init__() self.last_data, self.last_widget = self.load_last_data() def fetch_apps(self): url = "https://api.jiandaoyun.com/api/v5/app/list" payload = json.dumps({"skip": 0, "limit": 100}) response = self.api.request(url, payload) return pd.DataFrame(response.json().get("apps", [])) def fetch_entries(self, app_df): url = "https://api.jiandaoyun.com/api/v5/app/entry/list" all_entries = [] for _, app in app_df.iterrows(): payload = json.dumps({"app_id": app['app_id']}) response = self.api.request(url, payload) entries = response.json().get("forms", []) if entries: entry_df = pd.DataFrame(entries) entry_df['app_id'] = app['app_id'] all_entries.append(entry_df) return pd.concat(all_entries, ignore_index=True) if all_entries else None def fetch_widgets(self, entry_df): url = "https://api.jiandaoyun.com/api/v5/app/entry/widget/list" all_widgets = [] for _, entry in entry_df.iterrows(): payload = json.dumps({ "app_id": entry['app_id'], "entry_id": entry['entry_id'] }) response = self.api.request(url, payload) widgets = response.json().get('widgets', []) if widgets: widget_df = pd.DataFrame(widgets) widget_df['app_id'] = entry['app_id'] widget_df['entry_id'] = entry['entry_id'] all_widgets.append(widget_df) return pd.concat(all_widgets, ignore_index=True) if all_widgets else None def fetch_monitor_data(self): payload = {"api_key": "6694d3c4fcb69ca9a111a6c4", "entry_id": "6850c044f17c934b3ec01fea"} data = self.api.api.entry_data_list(payload).get("data") data_list = pd.DataFrame(data) for col in data_list.columns: if data_list[col].apply(lambda x: isinstance(x, (dict, list))).any(): data_list[col] = data_list[col].astype(str) return data_list.drop_duplicates() def match_widgets(self, data_list, widget_list): if '_widget_1750122565203' not in data_list.columns: raise ValueError("数据列表中缺少 '_widget_1750122565203' 列") return widget_list[widget_list['entry_id'].isin(data_list['_widget_1750122565203'])] def archive_old_data(self): keep_dates = [ (datetime.now(timezone.utc) - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(Config.RETAIN_DAYS) ] files_to_archive = [ f for f in os.listdir(self.data_dir) if (f.startswith("snapshot_") or f.startswith("all_widgets_")) and f.endswith(".csv") ] for filename in files_to_archive: date_str = filename[9:-4] if filename.startswith("snapshot_") else filename[12:-4] if date_str not in keep_dates: year_month = date_str[:7] archive_name = Utils.get_path(self.archive_dir, f"snapshots_{year_month}.{Config.COMPRESS_FORMAT}") file_path = Utils.get_path(self.data_dir, filename) with zipfile.ZipFile(archive_name, 'a', zipfile.ZIP_DEFLATED) as zipf: zipf.write(file_path, arcname=filename) os.remove(file_path) logger.debug(f"已归档 {filename} 到 {archive_name}") def compare_data(self, current_data): if not os.path.exists(self.last_data_file): return None last_data = pd.read_csv(self.last_data_file) last_data['unique_id'] = last_data['name'].astype(str) + last_data['app_id'].astype(str) current_data['unique_id'] = current_data['name'].astype(str) + current_data['app_id'].astype(str) merged = pd.merge( last_data, current_data, on=['unique_id'], how='outer', suffixes=('_last', '_current'), indicator=True ) changes = { 'added': merged[merged['_merge'] == 'right_only'], 'deleted': merged[merged['_merge'] == 'left_only'], 'modified': pd.DataFrame() } for col in ['label', 'type']: last_col = f"{col}_last" current_col = f"{col}_current" if last_col in merged.columns and current_col in merged.columns: mask = (merged['_merge'] == 'both') & (merged[last_col] != merged[current_col]) mask = mask & ~merged[last_col].isna() & ~merged[current_col].isna() if mask.any(): modified = merged.loc[mask].copy() modified['changed_field'] = col modified['old_value'] = modified[last_col] modified['new_value'] = modified[current_col] modified['change_status'] = 'update' changes['modified'] = pd.concat([changes['modified'], modified]) return changes def save_changes(self, changes, apps, entries): result_rows = [] for change_type in ['added', 'deleted', 'modified']: suffix = 'current' if change_type in ['added', 'modified'] else 'last' for _, row in changes[change_type].iterrows(): app_id = row[f'app_id_{suffix}'] entry_id = row[f'entry_id_{suffix}'] app_name = apps.loc[apps['app_id'] == app_id, 'name'].values[0] if not apps[ apps['app_id'] == app_id].empty else '未知应用' entry_name = \ entries.loc[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id), 'name'].values[0] if not \ entries[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id)].empty else '未知表单' if change_type == 'added': content = f"新增字段: {row['label_current']}" elif change_type == 'deleted': content = f"删除字段: {row['label_last']}" else: content = f"由\"{row['old_value']}\"修改为\"{row['new_value']}\"" result_rows.append({ '程序执行时间': self.execution_time, 'unique_id': row['unique_id'], 'app_id': app_id, 'app_name': app_name, 'entry_id': entry_id, 'entry_name': entry_name, 'change_type': {'added': '新增', 'deleted': '删除', 'modified': '修改'}[change_type], '具体内容': content }) if result_rows: result_df = pd.DataFrame(result_rows) changes_file = Utils.get_path(self.data_dir, Config.CHANGES_FILE) result_df.to_csv(changes_file, mode='a', header=not os.path.exists(changes_file), index=False) self.add_to_jiandaoyun(result_df) return True return False def add_to_jiandaoyun(self, result_df): all_data = [{ "_widget_1751446961315": {"value": row["app_name"]}, "_widget_1751446961316": {"value": row["entry_name"]}, "_widget_1751446961317": {"value": row["change_type"]}, "_widget_1751446961318": {"value": row["具体内容"]}, "_widget_1751446961319": {"value": row["程序执行时间"]}, } for _, row in result_df.iterrows()] payload = { "api_key": "6694d3c4fcb69ca9a111a6c4", "entry_id": "6863a402a77925690a470cc5", "data_list": all_data } response = self.api.api.entry_data_batch_create(payload) if isinstance(response, list): logger.info(f"成功写入 {len(response)} 条变更数据到简道云") return True else: error_task_logger.error(f"写入简道云失败: {response.get('message', '未知错误')}") return False def run_daily_snapshot(self): logger.info("=== 开始每日数据快照任务 ===") apps = self.fetch_apps() entries = self.fetch_entries(apps) widgets = self.fetch_widgets(entries) monitor_data = self.fetch_monitor_data() matched_data = self.match_widgets(monitor_data, widgets) self.save_to_csv(widgets, Utils.get_path(self.data_dir, f"all_widgets_{self.today}.csv")) self.save_to_csv(matched_data, Utils.get_path(self.data_dir, f"snapshot_{self.today}.csv")) self.archive_old_data() self.save_last_data(matched_data, widgets) logger.info("=== 每日数据快照任务成功完成 ===") return True def run_hourly_check(self): logger.info("=== 开始每小时数据检查任务 ===") apps = self.fetch_apps() entries = self.fetch_entries(apps) widgets = self.fetch_widgets(entries) monitor_data = self.fetch_monitor_data() current_data = self.match_widgets(monitor_data, widgets) changes = self.compare_data(current_data) if changes and any(len(v) > 0 for v in changes.values()): self.save_changes(changes, apps, entries) self.save_last_data(current_data, widgets) logger.info("=== 每小时数据检查任务成功完成 ===") return True def main(self): import datetime task_start_time =datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") logger.info(f"=== 开始数据监控任务 ({self.execution_time}) ===") if Utils.is_first_run_today(): success = self.run_daily_snapshot() else: success = self.run_hourly_check() common_tools.send_task_status(task_start_time, "字段监控") logger.info("=== 数据监控任务完成 ===") return success if __name__ == "__main__": Utils.ensure_dir(Config.OUTPUT_DIR) monitor = DataMonitor() if not monitor.main(): sys.exit(1)