import os import jieba from wordcloud import WordCloud import matplotlib.pyplot as plt from PIL import Image import numpy as np import pymysql def stopWordList(): """ 如果 stopWords.txt 文件内容较大,或被频繁读取, 可以考虑将其缓存起来,避免重复读文件。 """ with open('./model/stopWords.txt', encoding='utf8') as f: return [line.strip() for line in f.readlines()] def generate_word_cloud(text, mask_path, font_path, output_path): """生成词云并保存到 output_path""" img = Image.open(mask_path) img_arr = np.array(img) wc = WordCloud( background_color="#fff", mask=img_arr, font_path=font_path ) wc.generate_from_text(text) plt.figure(figsize=(8, 6)) plt.imshow(wc, interpolation='bilinear') plt.axis('off') plt.savefig(output_path, dpi=300, bbox_inches='tight') plt.close() # 保存后关闭 def get_db_connection_interactive(): """ 通过终端交互获取数据库连接参数,若按回车则使用默认值。 """ print("请依次输入数据库连接信息(直接按回车使用默认值):") host = input(" 1. 主机 (默认: localhost): ") or "localhost" port_str = input(" 2. 端口 (默认: 3306): ") or "3306" port = int(port_str) user = input(" 3. 用户名 (默认: root): ") or "root" password = input(" 4. 密码 (默认: 312517): ") or "12345678" db_name = input(" 5. 数据库名 (默认: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem" print(f"\n即将连接到数据库: {user}@{host}:{port}/{db_name}\n") return pymysql.connect( host=host, user=user, password=password, database=db_name, port=port, charset='utf8mb4' ) def get_img(field, table_name, target_img_src, res_img_src, connection, font_path='STHUPO.TTF'): """ 从数据库拉取指定字段的文本数据,分词处理后生成词云。 :param field: 数据库字段名 :param table_name: 数据表名 :param target_img_src: 词云形状图 :param res_img_src: 输出词云文件路径 :param connection: 已建立的数据库连接 :param font_path: 字体文件路径 """ cursor = connection.cursor() sql = f'SELECT {field} FROM {table_name}' cursor.execute(sql) data = cursor.fetchall() text = '' for item in data: text += item[0] # item 是元组 (内容,),取第一个元素即可 cursor.close() # 分词 & 去停用词 cut_words = jieba.cut(text) stop_words = set(stopWordList()) filtered_words = [word for word in cut_words if word not in stop_words] final_text = ' '.join(filtered_words) # 生成词云 generate_word_cloud(final_text, target_img_src, font_path, res_img_src) def main(): # 1. 获取数据库连接(交互式输入) connection = get_db_connection_interactive() # 2. 根据需求生成词云 # 例如:从 article 表的 content 字段生成词云 try: get_img( field='content', table_name='article', target_img_src='./static/content.jpg', res_img_src='./static/contentCloud.jpg', connection=connection ) print("词云生成完毕!") finally: # 关闭数据库连接 connection.close() if __name__ == '__main__': main()