import os from datetime import datetime, timezone, timedelta import pandas as pd from holidays.countries import saint_martin as record from tqdm import tqdm import json from yd_api import YDAPI from api import API import time output_dir = "output" os.makedirs(output_dir, exist_ok=True) api_instance = API() yd_api_instance = YDAPI() def generate_monthly_ranges(start: str, end: str): """ 生成按自然月划分的时间段列表(左闭右开) 例如: [('2025-11-01T00:00:00Z', '2025-12-01T00:00:00Z'), ...] """ start_dt = datetime.fromisoformat(start.replace("Z", "+00:00")) end_dt = datetime.fromisoformat(end.replace("Z", "+00:00")) ranges = [] current = start_dt while current < end_dt: # 下一个月的第一天 if current.month == 12: next_month = current.replace(year=current.year + 1, month=1, day=1) else: next_month = current.replace(month=current.month + 1, day=1) # 不超过 end_dt segment_end = min(next_month, end_dt) ranges.append(( current.strftime("%Y-%m-%dT00:00:00Z"), segment_end.strftime("%Y-%m-%dT00:00:00Z") )) current = next_month return ranges class GetYDData: def __init__(self): self.FORMID = "FORM-PE866MD1MJMU0WGLYRFLYEN5YN9L1I55Z7ZUK22" self.appType = "APP_UYZ0KG6L0CCNV80GZ66O" self.systemToken = "XA966F81JAJOFCVVVKO64E9MIIZV1EWE5SFMKJ2" # 第一段:2025-01-01 到 2025-11-01 first_segment = ("2025-01-01T00:00:00Z", "2025-02-01T00:00:00Z") # 第二段:2025-11-01 到当前时间(按月拆分) now_utc_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") monthly_segments = generate_monthly_ranges("2025-02-01T00:00:00Z", now_utc_str) # 合并所有时间段 self.time_ranges = [first_segment] + monthly_segments print("📅 计划拉取以下时间段:") for i, (s, e) in enumerate(self.time_ranges, 1): print(f" {i}. {s} → {e}") def build_value_to_label_map(self, form_structure): value_to_label_map = {} fields = form_structure.get("result", []) for field in fields: field_id = field.get("fieldId") component = field.get("componentName") props = field.get("props", {}) data_source = props.get("dataSource", []) if component in ["SelectField", "RadioField"] and data_source: option_map = {} for opt in data_source: val = opt.get("value") if val is None: continue text_obj = opt.get("text", {}) if isinstance(text_obj, dict): zh_text = text_obj.get("zh_CN") if zh_text is None and "value" in text_obj: raw = text_obj["value"] if isinstance(raw, str) and raw.startswith('"') and raw.endswith('"'): zh_text = raw[1:-1] else: zh_text = str(val) elif zh_text is None: zh_text = str(val) else: zh_text = str(text_obj) option_map[str(val)] = zh_text if option_map: value_to_label_map[field_id] = option_map return value_to_label_map def convert_record_values(self, record, value_map): converted = {} for key, val in record.items(): if key in value_map and val is not None: str_val = str(val) converted[key] = value_map[key].get(str_val, val) else: converted[key] = val return converted def fetch_records_in_range(self, token, start_time, end_time): """拉取指定时间范围内的所有记录""" try: first_page = yd_api_instance.read_processes_instances( token=token, formUuid=self.FORMID, page=1, n=100, appType=self.appType, systemToken=self.systemToken, instanceStatus="", modifiedFromTimeGMT=start_time, modifiedToTimeGMT=end_time, ) except Exception as e: print(f"❌ 首页请求失败 ({start_time} ~ {end_time}): {e}") return [] total_count = first_page.get("totalCount", 0) total_pages = (total_count // 100) + (1 if total_count % 100 else 0) print(f"📊 [{start_time[:10]} ~ {end_time[:10]}] 总记录数: {total_count}, 共 {total_pages} 页") all_records = [] if total_count > 0: all_records.extend(first_page.get("data", [])) for page in tqdm(range(2, total_pages + 1), desc=f"{start_time[:7]}"): try: resp = yd_api_instance.read_processes_instances( token=token, formUuid=self.FORMID, page=page, n=100, appType=self.appType, systemToken=self.systemToken, instanceStatus="", modifiedFromTimeGMT=start_time, modifiedToTimeGMT=end_time, ) page_data = resp.get("data", []) all_records.extend(page_data) time.sleep(0.15) # 稍微增加间隔,更安全 except Exception as e: print(f"⚠️ 第 {page} 页失败 ({start_time[:10]}): {e}") continue return all_records def main(self): # Step 1: 获取表单结构 token = yd_api_instance.generateToken() form_struct = yd_api_instance.get_form_structures( token=token, formUuid=self.FORMID ) value_map = self.build_value_to_label_map(form_struct) print("\n✅ 表单选项映射构建完成") # Step 2: 按时间段拉取 all_records = [] all_records_detils = [] for start_time, end_time in self.time_ranges: print(f"\n⏳ 拉取: {start_time} → {end_time}") records = self.fetch_records_in_range(token, start_time, end_time) all_records.extend(records) try: record_data = record.get("data", []) all_records_detils.extend(record_data) except Exception as e: continue print(f"\n📥 总共获取 {len(all_records)} 条流程实例") # # Step 3: 转换 formData converted_records = [] for inst in all_records: form_data = inst.get("formData", {}) converted = self.convert_record_values(form_data, value_map) converted_records.append(converted) # Step 4: 保存 if all_records: df = pd.DataFrame(all_records) output_path = os.path.join(output_dir, "converted_yd_data.csv") df.to_csv(output_path, index=False) df1 = pd.DataFrame(all_records_detils) output_path1 = os.path.join(output_dir, "converted_yd_data_detail.csv") df1.to_csv(output_path1, index=False) print(f"\n✅ 成功保存 {len(all_records)} 条记录至: {output_path}") else: print("\n❌ 无有效数据") if __name__ == "__main__": GetYDData().main()