优化连接不上时创建表

2025-11-05 09:50:55 +08:00
parent 4154eb452f
commit b0bf0fa9bc
4 changed files with 191 additions and 23 deletions
@@ -195,7 +195,7 @@ class RSSDataAIProcessor:
            raise
    
    def create_ai_result_table(self):
-        """创建AI处理结果表"""
+        """创建AI处理结果表（使用安全方法，确保不会删除现有数据）"""
        create_sql = f"""
        CREATE TABLE IF NOT EXISTS {self.ai_table} (
            id INT AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID',
@@ -221,10 +221,13 @@ class RSSDataAIProcessor:
        """
        
        try:
-            self.db_agent.execute_sql(create_sql)
-            self.log.info(f"成功创建AI结果表: {self.ai_table}")
+            # 使用安全方法创建表（如果不存在），确保不会删除现有数据
+            self.db_agent.create_table_if_not_exists(
+                table_name=self.ai_table,
+                create_sql=create_sql
+            )
        except Exception as e:
-            self.log.error(f"创建AI结果表失败: {str(e)}", exc_info=True)
+            self.log.error(f"创建AI结果表失败（可能是数据库连接问题）: {str(e)}", exc_info=True)
            raise
    
    def load_unprocessed_data(self, limit: int = 100) -> pd.DataFrame:
@@ -270,14 +270,34 @@ class RSSDataProcessor:
            save_df = save_df.drop('segmented_words', axis=1)
            
            # 检查目标表是否存在，不存在则创建
-            if not self.db_agent.table_exists(self.processed_table_name):
-                self.create_processed_table()
+            # 注意：如果连接失败，table_exists可能返回False，需要捕获异常
+            try:
+                table_exists = self.db_agent.table_exists(self.processed_table_name)
+                if not table_exists:
+                    self.log.warning(f"表 {self.processed_table_name} 不存在，正在创建...")
+                    self.create_processed_table()
+                else:
+                    # 表存在时，也确保有唯一索引（安全操作，不会删除数据）
+                    self.create_processed_table()  # 这个方法会检查并添加索引，不会删除数据
+            except Exception as table_check_error:
+                # 如果检查表存在性时连接失败，记录错误但不中断
+                # 因为后续的插入操作会再次尝试连接
+                self.log.warning(f"检查表存在性时出错（可能是连接问题）: {str(table_check_error)}")
+                # 尝试创建表（如果表已存在，CREATE TABLE IF NOT EXISTS不会报错）
+                try:
+                    self.create_processed_table()
+                except Exception as create_error:
+                    # 如果创建表也失败（可能是连接问题），记录错误
+                    self.log.error(f"创建表时出错（可能是连接问题）: {str(create_error)}")
+                    # 继续尝试插入，如果表存在，插入会成功；如果表不存在，插入会失败并抛出异常
            
-            # 插入数据
+            # 插入数据（ignore_duplicates=True 会跳过重复的文章链接）
+            # 注意：INSERT INTO + ignore_duplicates 只会跳过重复记录，不会覆盖或删除现有数据
+            # 如果数据库连接失败，此操作会抛出异常，不会部分成功
            inserted_rows = self.db_agent.insert_from_df(
                table_name=self.processed_table_name,
                df=save_df,
-                ignore_duplicates=True
+                ignore_duplicates=True  # 跳过重复的文章链接，不会删除或覆盖现有数据
            )
            
            self.log.info(f"成功保存 {inserted_rows} 条处理结果到数据库")
@@ -288,7 +308,10 @@ class RSSDataProcessor:
            return False
    
    def create_processed_table(self):
-        """创建处理结果表"""
+        """
+        创建处理结果表（带唯一索引保护，防止重复插入）
+        使用 MySQLAgent 的安全方法，确保不会删除现有数据
+        """
        create_sql = f"""
        CREATE TABLE IF NOT EXISTS {self.processed_table_name} (
            id INT AUTO_INCREMENT PRIMARY KEY,
@@ -306,10 +329,27 @@ class RSSDataProcessor:
        """
        
        try:
-            self.db_agent.execute_sql(create_sql)
-            self.log.info(f"成功创建处理结果表: {self.processed_table_name}")
+            # 使用安全方法创建表（如果不存在）
+            self.db_agent.create_table_if_not_exists(
+                table_name=self.processed_table_name,
+                create_sql=create_sql
+            )
+            
+            # 使用安全方法添加唯一索引（如果不存在）
+            # 注意：唯一索引在创建表时不能直接包含，因为如果表已存在会报错
+            # 所以先创建表，再单独添加索引
+            self.db_agent.add_unique_index_if_not_exists(
+                table_name=self.processed_table_name,
+                index_name='uk_article_link',
+                column_name='文章链接',
+                column_length=500,
+                check_duplicates=True
+            )
+            
        except Exception as e:
-            self.log.error(f"创建表失败: {str(e)}", exc_info=True)
+            # 如果创建表或添加索引失败（可能是连接问题），抛出异常
+            # 这样上层调用可以知道操作失败，不会误以为成功
+            self.log.error(f"创建/检查表失败（可能是数据库连接问题）: {str(e)}", exc_info=True)
            raise
    
    def get_processing_statistics(self, df: pd.DataFrame) -> Dict[str, Any]: