优化连接不上时创建表

2025-11-05 09:50:55 +08:00
parent 4154eb452f
commit b0bf0fa9bc
4 changed files with 191 additions and 23 deletions
@@ -153,6 +153,12 @@ class MySQLAgent:
        """
        兼容旧接口的通用插入方法：保留replace参数，同时支持新的ignore_duplicates
        自动处理重复数据，对所有数据源通用，插入失败的数据会通过日志记录
+        
+        安全性说明:
+            - 使用 INSERT INTO（不是 REPLACE INTO 或 INSERT ... ON DUPLICATE KEY UPDATE）
+            - 当 ignore_duplicates=True 时，重复记录会被跳过，不会覆盖或删除现有数据
+            - 如果数据库连接失败，操作会抛出异常，不会部分成功
+            - 所有操作都是安全的，不会导致数据丢失或覆盖
        """
        # 【兼容性处理】如果未指定ignore_duplicates，用replace参数推导
        if ignore_duplicates is None:
@@ -592,6 +598,114 @@ class MySQLAgent:
                           exc_info=True)
            return False

+    def create_table_if_not_exists(self, table_name: str, create_sql: str) -> bool:
+        """
+        创建表（如果不存在）
+        使用 CREATE TABLE IF NOT EXISTS，不会删除已存在的表和数据
+        
+        参数:
+            table_name: 表名
+            create_sql: 完整的 CREATE TABLE SQL 语句（必须包含 IF NOT EXISTS）
+        
+        返回:
+            bool: 是否成功（表已存在也会返回True）
+        
+        注意:
+            - 此方法使用 CREATE TABLE IF NOT EXISTS，是安全的，不会删除现有数据
+            - 如果连接失败，会抛出异常
+        """
+        if "IF NOT EXISTS" not in create_sql.upper():
+            self.log.warning(f"CREATE TABLE 语句建议使用 IF NOT EXISTS 以保证安全性")
+        
+        try:
+            self.execute_sql(create_sql)
+            self.log.info(f"成功创建/检查表（表已存在时不会删除数据）: {table_name}")
+            return True
+        except Exception as e:
+            self.log.error(f"创建/检查表失败（可能是数据库连接问题）: {str(e)}", 
+                         table=table_name, exc_info=True)
+            raise
+
+    def add_unique_index_if_not_exists(self, table_name: str, index_name: str, 
+                                      column_name: str, column_length: int = 500,
+                                      check_duplicates: bool = True) -> bool:
+        """
+        添加唯一索引（如果不存在）
+        不会删除数据，只添加索引
+        
+        参数:
+            table_name: 表名
+            index_name: 索引名称
+            column_name: 要添加索引的列名
+            column_length: 索引长度（对于VARCHAR/TEXT类型）
+            check_duplicates: 是否在添加索引前检查重复数据
+        
+        返回:
+            bool: 是否成功添加索引（索引已存在也会返回True）
+        
+        注意:
+            - 此方法是安全的，不会删除数据
+            - 如果表中存在重复数据，会跳过添加索引（不会删除数据）
+            - 如果连接失败，会抛出异常
+        """
+        try:
+            # 1. 检查索引是否已存在
+            check_index_sql = f"""
+            SELECT COUNT(*) as cnt
+            FROM INFORMATION_SCHEMA.STATISTICS
+            WHERE TABLE_SCHEMA = %s
+              AND TABLE_NAME = %s
+              AND INDEX_NAME = %s
+            """
+            result = self.query_to_df(
+                check_index_sql, 
+                params=(self.config['database'], table_name, index_name),
+                is_print=False
+            )
+            
+            if not result.empty and result['cnt'].iloc[0] > 0:
+                self.log.debug(f"唯一索引 {index_name} 已存在，跳过添加")
+                return True
+            
+            # 2. 如果启用重复检查，先检查是否有重复数据
+            if check_duplicates:
+                check_duplicates_sql = f"""
+                SELECT {column_name}, COUNT(*) as cnt
+                FROM `{table_name}`
+                WHERE {column_name} IS NOT NULL AND {column_name} != ''
+                GROUP BY {column_name}
+                HAVING cnt > 1
+                LIMIT 1
+                """
+                duplicates = self.query_to_df(check_duplicates_sql, is_print=False)
+                
+                if not duplicates.empty:
+                    self.log.warning(
+                        f"表 {table_name} 中存在重复的 {column_name} 数据，无法添加唯一索引。"
+                        "现有数据不会被删除。",
+                        duplicate_count=len(duplicates)
+                    )
+                    return False
+            
+            # 3. 添加唯一索引
+            add_index_sql = f"""
+            ALTER TABLE `{table_name}`
+            ADD UNIQUE KEY `{index_name}` ({column_name}({column_length}))
+            """
+            self.execute_sql(add_index_sql)
+            self.log.info(f"成功添加唯一索引 {index_name}（现有数据不受影响）")
+            return True
+            
+        except Exception as e:
+            error_msg = str(e)
+            # 如果索引已存在，不报错
+            if "Duplicate key name" in error_msg or "already exists" in error_msg.lower():
+                self.log.debug(f"唯一索引 {index_name} 已存在，跳过添加")
+                return True
+            else:
+                self.log.warning(f"添加唯一索引时出现问题（不影响现有数据）: {error_msg}")
+                raise
+
    def execute_sql(self, sql: str, params: Union[tuple, dict, None] = None,
                    fetch: bool = False) -> Union[int, List[Dict[str, Any]]]:
        """执行SQL语句（原有逻辑完全保留）"""