Modify the database hardcoding to switch to command-line interactive database connection.

2025-01-09 23:08:55 +08:00
parent ba56f3b0d4
commit a30773715e
4 changed files with 304 additions and 66 deletions
@@ -1,47 +1,110 @@
+import os
 import jieba
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
-from PIL import Image,ImageDraw
-from pymysql import *
-import json
+from PIL import Image
 import numpy as np
+import pymysql
+
 def stopWordList():
-    return [line.strip() for line in open('./model/stopWords.txt',encoding='utf8').readlines()]
+    """
+    如果 stopWords.txt 文件内容较大，或被频繁读取，
+    可以考虑将其缓存起来，避免重复读文件。
+    """
+    with open('./model/stopWords.txt', encoding='utf8') as f:
+        return [line.strip() for line in f.readlines()]

-def get_img(field,tableName,targetImgSrc,resImgSrc):
-    con = connect(host='47.92.235.6',user='XiaoXueQi',password='XiaoXueQi',database='Weibo_PublicOpinion_AnalysisSystem',port=3306,charset='utf8mb4')
-    cuser = con.cursor()
-    sql = f'select {field} from {tableName}'
-    cuser.execute(sql)
-    data = cuser.fetchall()
-    text = ''
-    for item in data:
-        text += item[0]
-    cuser.close()
-    con.close()
-
-    cut = jieba.cut(text)
-    newCut = []
-    for word in cut:
-        if word not in stopWordList():newCut.append(word)
-    string = ' '.join(newCut)
-
-    img = Image.open(targetImgSrc)
+def generate_word_cloud(text, mask_path, font_path, output_path):
+    """生成词云并保存到 output_path"""
+    img = Image.open(mask_path)
    img_arr = np.array(img)
+
    wc = WordCloud(
        background_color="#fff",
        mask=img_arr,
-        font_path='STHUPO.TTF'
+        font_path=font_path
    )
-    wc.generate_from_text(string)
-
-    fig = plt.figure(1)
-    plt.imshow(wc)
+    wc.generate_from_text(text)

+    plt.figure(figsize=(8, 6))
+    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    plt.close()  # 保存后关闭

-    plt.savefig(resImgSrc,dpi=500)
+def get_db_connection_interactive():
+    """
+    通过终端交互获取数据库连接参数，若按回车则使用默认值。
+    """
+    print("请依次输入数据库连接信息（直接按回车使用默认值）：")

+    host = input(" 1. 主机 (默认: localhost): ") or "localhost"
+    port_str = input(" 2. 端口 (默认: 3306): ") or "3306"
+    port = int(port_str)

-# get_img('content','comments','./static/comment.jpg','./static/commentCloud.jpg')
-get_img('content','article','./static/content.jpg','./static/contentCloud.jpg')
+    user = input(" 3. 用户名 (默认: root): ") or "root"
+    password = input(" 4. 密码 (默认: 312517): ") or "12345678"
+    db_name = input(" 5. 数据库名 (默认: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem"
+
+    print(f"\n即将连接到数据库: {user}@{host}:{port}/{db_name}\n")
+    
+    return pymysql.connect(
+        host=host,
+        user=user,
+        password=password,
+        database=db_name,
+        port=port,
+        charset='utf8mb4'
+    )
+
+def get_img(field, table_name, target_img_src, res_img_src, connection, font_path='STHUPO.TTF'):
+    """ 
+    从数据库拉取指定字段的文本数据，分词处理后生成词云。
+    :param field: 数据库字段名
+    :param table_name: 数据表名
+    :param target_img_src: 词云形状图
+    :param res_img_src: 输出词云文件路径
+    :param connection: 已建立的数据库连接
+    :param font_path: 字体文件路径
+    """
+    cursor = connection.cursor()
+    sql = f'SELECT {field} FROM {table_name}'
+    cursor.execute(sql)
+    data = cursor.fetchall()
+
+    text = ''
+    for item in data:
+        text += item[0]  # item 是元组 (内容,)，取第一个元素即可
+
+    cursor.close()
+
+    # 分词 & 去停用词
+    cut_words = jieba.cut(text)
+    stop_words = set(stopWordList())
+    filtered_words = [word for word in cut_words if word not in stop_words]
+    final_text = ' '.join(filtered_words)
+
+    # 生成词云
+    generate_word_cloud(final_text, target_img_src, font_path, res_img_src)
+
+def main():
+    # 1. 获取数据库连接（交互式输入）
+    connection = get_db_connection_interactive()
+
+    # 2. 根据需求生成词云
+    # 例如：从 article 表的 content 字段生成词云
+    try:
+        get_img(
+            field='content', 
+            table_name='article', 
+            target_img_src='./static/content.jpg', 
+            res_img_src='./static/contentCloud.jpg', 
+            connection=connection
+        )
+        print("词云生成完毕！")
+    finally:
+        # 关闭数据库连接
+        connection.close()
+
+if __name__ == '__main__':
+    main()