From 3d253b11fb314149f06f71ad47a9c6ee3b4f1a0a Mon Sep 17 00:00:00 2001 From: Asy0y0 <127722736+tianjy12@users.noreply.github.com> Date: Sun, 12 Jan 2025 04:06:18 -0600 Subject: [PATCH] Optimize word cloud generation script by adding logging and exception handling. --- app.log | 2 - database_operations.log | 0 wordCloudPicture.py | 233 ++++++++++++++++++++++++++++------------ 3 files changed, 162 insertions(+), 73 deletions(-) delete mode 100644 app.log delete mode 100644 database_operations.log diff --git a/app.log b/app.log deleted file mode 100644 index c7f343d..0000000 --- a/app.log +++ /dev/null @@ -1,2 +0,0 @@ -2025-01-09 23:29:06,246 [INFO] 尝试连接到数据库: root@localhost:3306/Weibo_PublicOpinion_AnalysisSystem -2025-01-09 23:29:06,346 [ERROR] 数据库连接失败: (1045, "Access denied for user 'root'@'localhost' (using password: YES)") diff --git a/database_operations.log b/database_operations.log deleted file mode 100644 index e69de29..0000000 diff --git a/wordCloudPicture.py b/wordCloudPicture.py index 40b6b73..9645323 100644 --- a/wordCloudPicture.py +++ b/wordCloudPicture.py @@ -5,95 +5,180 @@ import matplotlib.pyplot as plt from PIL import Image import numpy as np import pymysql +import logging -def stopWordList(): +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s [%(levelname)s] %(message)s', + handlers=[ + logging.FileHandler("wordcloud_generator.log"), + logging.StreamHandler() + ] +) + +# Global cache for stop words +STOP_WORDS = set() + +def load_stop_words(): """ - 濡傛灉 stopWords.txt 鏂囦欢鍐呭杈冨ぇ锛屾垨琚绻佽鍙栵紝 - 鍙互鑰冭檻灏嗗叾缂撳瓨璧锋潵锛岄伩鍏嶉噸澶嶈鏂囦欢銆 + Load and cache stop words. + If the stop words file does not exist or fails to read, log an error and return an empty set. """ - with open('./model/stopWords.txt', encoding='utf8') as f: - return [line.strip() for line in f.readlines()] + global STOP_WORDS + if STOP_WORDS: + return STOP_WORDS + stop_words_path = './model/stopWords.txt' + if not os.path.exists(stop_words_path): + logging.error(f"Stop words file does not exist: {stop_words_path}") + return set() + try: + with open(stop_words_path, encoding='utf8') as f: + STOP_WORDS = set(line.strip() for line in f if line.strip()) + logging.info(f"Loaded {len(STOP_WORDS)} stop words") + except Exception as e: + logging.error(f"Failed to load stop words file: {e}") + return STOP_WORDS def generate_word_cloud(text, mask_path, font_path, output_path): - """鐢熸垚璇嶄簯骞朵繚瀛樺埌 output_path""" - img = Image.open(mask_path) - img_arr = np.array(img) + """ + Generate a word cloud and save it to output_path. + + :param text: Processed text + :param mask_path: Path to the mask image + :param font_path: Path to the font file + :param output_path: Path to save the generated word cloud image + """ + if not os.path.exists(mask_path): + logging.error(f"Mask image file does not exist: {mask_path}") + return + try: + img = Image.open(mask_path) + img_arr = np.array(img) + logging.info(f"Successfully loaded mask image: {mask_path}") + except Exception as e: + logging.error(f"Failed to load mask image: {e}") + return - wc = WordCloud( - background_color="#fff", - mask=img_arr, - font_path=font_path - ) - wc.generate_from_text(text) + try: + wc = WordCloud( + background_color="#fff", + mask=img_arr, + font_path=font_path, + max_words=2000, + max_font_size=100, + random_state=42, + width=800, + height=600 + ) + wc.generate_from_text(text) + logging.info("Word cloud generated successfully") + except Exception as e: + logging.error(f"Failed to generate word cloud: {e}") + return - plt.figure(figsize=(8, 6)) - plt.imshow(wc, interpolation='bilinear') - plt.axis('off') - plt.savefig(output_path, dpi=300, bbox_inches='tight') - plt.close() # 淇濆瓨鍚庡叧闂 + try: + plt.figure(figsize=(8, 6)) + plt.imshow(wc, interpolation='bilinear') + plt.axis('off') + plt.savefig(output_path, dpi=300, bbox_inches='tight') + plt.close() + logging.info(f"Word cloud saved to: {output_path}") + except Exception as e: + logging.error(f"Failed to save word cloud image: {e}") def get_db_connection_interactive(): """ - 閫氳繃缁堢浜や簰鑾峰彇鏁版嵁搴撹繛鎺ュ弬鏁帮紝鑻ユ寜鍥炶溅鍒欎娇鐢ㄩ粯璁ゅ笺 - """ - print("璇蜂緷娆¤緭鍏ユ暟鎹簱杩炴帴淇℃伅锛堢洿鎺ユ寜鍥炶溅浣跨敤榛樿鍊硷級锛") - - host = input(" 1. 涓绘満 (榛樿: localhost): ") or "localhost" - port_str = input(" 2. 绔彛 (榛樿: 3306): ") or "3306" - port = int(port_str) - - user = input(" 3. 鐢ㄦ埛鍚 (榛樿: root): ") or "root" - password = input(" 4. 瀵嗙爜 (榛樿: 312517): ") or "12345678" - db_name = input(" 5. 鏁版嵁搴撳悕 (榛樿: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem" - - print(f"\n鍗冲皢杩炴帴鍒版暟鎹簱: {user}@{host}:{port}/{db_name}\n") + Interactively obtain database connection parameters from the terminal. + Press Enter to use default values. - return pymysql.connect( - host=host, - user=user, - password=password, - database=db_name, - port=port, - charset='utf8mb4' - ) + :return: pymysql.connections.Connection object + """ + print("Please enter database connection information (press Enter to use default values):") + + host = input(" 1. Host (default: localhost): ") or "localhost" + port_str = input(" 2. Port (default: 3306): ") or "3306" + try: + port = int(port_str) + except ValueError: + logging.error(f"Invalid port number: {port_str}") + port = 3306 + + user = input(" 3. Username (default: root): ") or "root" + password = input(" 4. Password (default: 12345678): ") or "12345678" + db_name = input(" 5. Database name (default: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem" + + logging.info(f"Attempting to connect to database: {user}@{host}:{port}/{db_name}") + + try: + connection = pymysql.connect( + host=host, + user=user, + password=password, + database=db_name, + port=port, + charset='utf8mb4' + ) + logging.info("Database connection successful") + return connection + except pymysql.MySQLError as e: + logging.error(f"Database connection failed: {e}") + raise def get_img(field, table_name, target_img_src, res_img_src, connection, font_path='STHUPO.TTF'): - """ - 浠庢暟鎹簱鎷夊彇鎸囧畾瀛楁鐨勬枃鏈暟鎹紝鍒嗚瘝澶勭悊鍚庣敓鎴愯瘝浜戙 - :param field: 鏁版嵁搴撳瓧娈靛悕 - :param table_name: 鏁版嵁琛ㄥ悕 - :param target_img_src: 璇嶄簯褰㈢姸鍥 - :param res_img_src: 杈撳嚭璇嶄簯鏂囦欢璺緞 - :param connection: 宸插缓绔嬬殑鏁版嵁搴撹繛鎺 - :param font_path: 瀛椾綋鏂囦欢璺緞 """ - cursor = connection.cursor() - sql = f'SELECT {field} FROM {table_name}' - cursor.execute(sql) - data = cursor.fetchall() + Retrieve text data from a specified field and table in the database, + perform word segmentation and stop word removal, then generate a word cloud. + + :param field: Database field name + :param table_name: Database table name + :param target_img_src: Path to the mask image + :param res_img_src: Path to save the generated word cloud image + :param connection: Established database connection + :param font_path: Path to the font file + """ + try: + with connection.cursor() as cursor: + sql = f'SELECT {field} FROM {table_name}' + cursor.execute(sql) + data = cursor.fetchall() + logging.info(f"Fetched {len(data)} records from '{table_name}' table, field '{field}'") + except pymysql.MySQLError as e: + logging.error(f"Database query failed: {e}") + return - text = '' - for item in data: - text += item[0] # item 鏄厓缁 (鍐呭,)锛屽彇绗竴涓厓绱犲嵆鍙 + text = ''.join(item[0] for item in data if item[0]) - cursor.close() + # Tokenization & Stop word removal + try: + stop_words = load_stop_words() + if not stop_words: + logging.warning("Stop words set is empty, proceeding without stop word removal") + cut_words = jieba.cut(text) + filtered_words = [word for word in cut_words if word not in stop_words] + final_text = ' '.join(filtered_words) + logging.info(f"Completed tokenization and stop word removal, generated {len(filtered_words)} words") + except Exception as e: + logging.error(f"Text processing failed: {e}") + return - # 鍒嗚瘝 & 鍘诲仠鐢ㄨ瘝 - cut_words = jieba.cut(text) - stop_words = set(stopWordList()) - filtered_words = [word for word in cut_words if word not in stop_words] - final_text = ' '.join(filtered_words) - - # 鐢熸垚璇嶄簯 + # Generate word cloud generate_word_cloud(final_text, target_img_src, font_path, res_img_src) def main(): - # 1. 鑾峰彇鏁版嵁搴撹繛鎺ワ紙浜や簰寮忚緭鍏ワ級 - connection = get_db_connection_interactive() - - # 2. 鏍规嵁闇姹傜敓鎴愯瘝浜 - # 渚嬪锛氫粠 article 琛ㄧ殑 content 瀛楁鐢熸垚璇嶄簯 + """ + Main function to execute the word cloud generation process. + """ try: + # Obtain database connection interactively + connection = get_db_connection_interactive() + except Exception: + logging.error("Failed to establish database connection, terminating program") + return + + try: + # Generate word cloud as per requirements + # Example: Generate word cloud from 'content' field in 'article' table get_img( field='content', table_name='article', @@ -101,10 +186,16 @@ def main(): res_img_src='./static/contentCloud.jpg', connection=connection ) - print("璇嶄簯鐢熸垚瀹屾瘯锛") + print("Word cloud generation completed!") + except Exception as e: + logging.error(f"An error occurred during word cloud generation: {e}") finally: - # 鍏抽棴鏁版嵁搴撹繛鎺 - connection.close() + # Close the database connection + try: + connection.close() + logging.info("Database connection closed") + except Exception as e: + logging.error(f"Error closing database connection: {e}") if __name__ == '__main__': main()