diff --git a/README-CN.md b/README-CN.md index 03c45f2..0d029fc 100644 --- a/README-CN.md +++ b/README-CN.md @@ -18,6 +18,8 @@ **微博舆情分析预测系统** 是一个用于监控、分析和预测社交媒体平台(如微博)上的公众舆情趋势的**社交网络舆情分析系统**。该系统利用深度学习、自然语言处理(NLP)和机器学习技术,从大量社交媒体数据中提取有价值的舆情信息,帮助政府、企业及其他组织及时了解公众态度、应对突发事件并优化决策。📈 +Weibo Public Opinion Analysis System + 通过强大的数据采集与处理能力,微博舆情分析预测系统实现了实时数据收集、情感分析、话题分类和舆情预测等功能,确保用户能够在复杂多变的社交网络环境中获得准确、全面的舆情洞察。系统采用模块化设计,易于维护和扩展,旨在为用户提供一个高效、可靠的舆情分析工具,助力各类组织在信息化时代做出明智决策。 ## ✨ 功能 diff --git a/README.md b/README.md index 6d69cea..1a771dd 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,8 @@ **Weibo Public Opinion Analysis and Prediction System** is a **social network public opinion analysis system** designed to monitor, analyze, and predict public opinion trends on social media platforms such as Weibo. This system leverages deep learning, natural language processing (NLP), and machine learning technologies to extract valuable public opinion information from vast amounts of social media data, helping governments, enterprises, and other organizations promptly understand public attitudes, respond to emergencies, and optimize decision-making. 📈 +Weibo Public Opinion Analysis System + Through powerful data collection and processing capabilities, the Weibo Public Opinion Analysis and Prediction System achieves real-time data collection, sentiment analysis, topic classification, and public opinion prediction, ensuring that users can obtain accurate and comprehensive insights into public opinion in the complex and changing social network environment. The system adopts a modular design, making it easy to maintain and expand, aiming to provide users with an efficient and reliable public opinion analysis tool, assisting various organizations in making informed decisions in the information age. ## ✨ Features diff --git a/utils/getEchartsData.py b/utils/getEchartsData.py index aa04825..e7a0f9c 100644 --- a/utils/getEchartsData.py +++ b/utils/getEchartsData.py @@ -1,209 +1,155 @@ -from utils.getPublicData import * -from utils.mynlp import SnowNLP -articleList = getAllArticleData() -commentList = getAllCommentsData() +from utils.getPublicData import * # Import utility functions for data retrieval +from utils.mynlp import SnowNLP # Import SnowNLP for sentiment analysis +from collections import Counter # Import Counter for counting occurrences + +articleList = getAllArticleData() # Retrieve all article data +commentList = getAllCommentsData() # Retrieve all comment data def getTypeList(): - return list(set([x[8] for x in getAllArticleData()])) + # Return a list of unique article types + return list(set([x[8] for x in articleList])) def getArticleByType(type): - articles = [] - for i in articleList: - if i[8] == type: - articles.append(i) - return articles + # Return a list of articles that match the specified type + return [article for article in articleList if article[8] == type] def getArticleLikeCount(type): + # Categorize articles by the number of likes they have articles = getArticleByType(type) - X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~'] - Y = [0 for x in range(len(X))] + intervals = [(0, 100), (100, 1000), (1000, 5000), (5000, 15000), + (15000, 30000), (30000, 50000), (50000, float('inf'))] + X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000', + '30000-50000','50000-~'] + Y = [0] * len(intervals) for article in articles: likeCount = int(article[1]) - if likeCount < 100: - Y[0] += 1 - elif likeCount < 1000: - Y[1] += 1 - elif likeCount < 5000: - Y[2] += 1 - elif likeCount < 15000: - Y[3] += 1 - elif likeCount < 30000: - Y[4] += 1 - elif likeCount < 50000: - Y[5] += 1 - elif likeCount >= 50000: - Y[6] += 1 - return X,Y + for i, (lower, upper) in enumerate(intervals): + if lower <= likeCount < upper: + Y[i] += 1 + break + return X, Y def getArticleCommentsLen(type): + # Categorize articles by the length of comments they have articles = getArticleByType(type) - X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~'] - Y = [0 for x in range(len(X))] + intervals = [(0, 100), (100, 500), (500, 1000), (1000, 1500), + (1500, 3000), (3000, 5000), (5000, 10000), + (10000, 15000), (15000, float('inf'))] + X = ['0-100','100-500','500-1000','1000-1500','1500-3000', + '3000-5000','5000-10000','10000-15000','15000-~'] + Y = [0] * len(intervals) for article in articles: commentLen = int(article[2]) - if commentLen < 100: - Y[0] += 1 - elif commentLen < 500: - Y[1] += 1 - elif commentLen < 5000: - Y[2] += 1 - elif commentLen < 1000: - Y[3] += 1 - elif commentLen < 1500: - Y[4] += 1 - elif commentLen < 3000: - Y[5] += 1 - elif commentLen < 5000: - Y[6] += 1 - elif commentLen < 10000: - Y[7] += 1 - elif commentLen >= 15000: - Y[8] += 1 - return X,Y + for i, (lower, upper) in enumerate(intervals): + if lower <= commentLen < upper: + Y[i] += 1 + break + return X, Y def getArticleRepotsLen(type): + # Categorize articles by the number of reposts articles = getArticleByType(type) - X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~'] - Y = [0 for x in range(len(X))] + intervals = [(0, 100), (100, 300), (300, 500), (500, 1000), + (1000, 2000), (2000, 3000), (3000, 4000), + (4000, 5000), (5000, 10000), (10000, 15000), + (15000, 30000), (30000, 70000), (70000, float('inf'))] + X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000', + '3000-4000','4000-5000','5000-10000','10000-15000','15000-30000', + '30000-70000','70000-~'] + Y = [0] * len(intervals) for article in articles: repostsCount = int(article[3]) - if repostsCount < 100: - Y[0] += 1 - elif repostsCount < 300: - Y[1] += 1 - elif repostsCount < 500: - Y[2] += 1 - elif repostsCount < 1000: - Y[3] += 1 - elif repostsCount < 3000: - Y[4] += 1 - elif repostsCount < 4000: - Y[5] += 1 - elif repostsCount < 5000: - Y[6] += 1 - elif repostsCount < 10000: - Y[7] += 1 - elif repostsCount < 15000: - Y[8] += 1 - elif repostsCount < 30000: - Y[9] += 1 - elif repostsCount < 70000: - Y[10] += 1 - elif repostsCount >= 70000: - Y[11] += 1 - return X,Y + for i, (lower, upper) in enumerate(intervals): + if lower <= repostsCount < upper: + Y[i] += 1 + break + return X, Y def getIPByArticleRegion(): - articleRegionDic = {} - for i in articleList: - if i[4] != '无': - if i[4] in articleRegionDic.keys(): - articleRegionDic[i[4]] += 1 - else: - articleRegionDic[i[4]] = 1 - resultData = [] - for key,value in articleRegionDic.items(): - resultData.append({ - 'name':key, - 'value':value - }) + # Count articles by their regions, excluding '无' + regions = [article[4] for article in articleList if article[4] != '无'] + region_counts = Counter(regions) + resultData = [{'name': key, 'value': value} for key, value in region_counts.items()] return resultData def getIPByCommentsRegion(): - commentRegionDic = {} - for i in commentList: - if i[3] != '无': - if i[3] in commentRegionDic.keys(): - commentRegionDic[i[3]] += 1 - else: - commentRegionDic[i[3]] = 1 - resultData = [] - for key,value in commentRegionDic.items(): - resultData.append({ - 'name':key, - 'value':value - }) + # Count comments by their regions, excluding '无' + regions = [comment[3] for comment in commentList if comment[3] != '无'] + region_counts = Counter(regions) + resultData = [{'name': key, 'value': value} for key, value in region_counts.items()] return resultData def getCommentDataOne(): - X = [] + # Categorize comments based on some numerical value, possibly length or count rangeNum = 20 - for item in range(100): - X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1))) - Y = [0 for x in range(len(X))] + intervals = [(rangeNum * i, rangeNum * (i + 1)) for i in range(100)] + X = [f"{lower}-{upper}" for lower, upper in intervals] + Y = [0] * len(intervals) for comment in commentList: - for item in range(100): - if int(comment[2]) < rangeNum * (item + 1): - Y[item] += 1 + comment_value = int(comment[2]) + for i, (lower, upper) in enumerate(intervals): + if lower <= comment_value < upper: + Y[i] += 1 break - return X,Y + return X, Y def getCommentDataTwo(): - genderDic = {} - for i in commentList: - if i[6] in genderDic.keys(): - genderDic[i[6]] += 1 - else: - genderDic[i[6]] = 1 - resultData = [{ - 'name':x[0], - 'value':x[1] - } for x in genderDic.items()] + # Count comments by gender + genders = [comment[6] for comment in commentList] + gender_counts = Counter(genders) + resultData = [{'name': key, 'value': value} for key, value in gender_counts.items()] return resultData def getYuQingCharDataOne(): + # Analyze sentiment of hot words hotWordList = getAllHotWords() - X = ['正面','中性','负面'] - Y = [0,0,0] + sentiments = [] for word in hotWordList: emotionValue = SnowNLP(word[0]).sentiments if emotionValue > 0.4: - Y[0] += 1 + sentiments.append('正面') elif emotionValue < 0.2: - Y[2] += 1 + sentiments.append('负面') else: - Y[1] += 1 - biedata = [{ - 'name':x, - 'value':Y[index] - } for index,x in enumerate(X)] - return X,Y,biedata + sentiments.append('中性') + counts = Counter(sentiments) + X = ['正面','中性','负面'] + Y = [counts.get(sentiment, 0) for sentiment in X] + biedata = [{'name': x, 'value': y} for x, y in zip(X, Y)] + return X, Y, biedata def getYuQingCharDataTwo(): - X = ['正面', '中性', '负面'] - biedata1 = [{ - 'name':x, - 'value':0 - } for x in X] - biedata2 = [{ - 'name': x, - 'value': 0 - } for x in X] - + # Analyze sentiment of comments and articles + comment_sentiments = [] for comment in commentList: emotionValue = SnowNLP(comment[4]).sentiments if emotionValue > 0.4: - biedata1[0]['value'] += 1 + comment_sentiments.append('正面') elif emotionValue < 0.2: - biedata1[2]['value'] += 1 + comment_sentiments.append('负面') else: - biedata1[1]['value'] += 1 - for artile in articleList: - emotionValue = SnowNLP(artile[5]).sentiments + comment_sentiments.append('中性') + comment_counts = Counter(comment_sentiments) + + article_sentiments = [] + for article in articleList: + emotionValue = SnowNLP(article[5]).sentiments if emotionValue > 0.4: - biedata2[0]['value'] += 1 + article_sentiments.append('正面') elif emotionValue < 0.2: - biedata2[2]['value'] += 1 + article_sentiments.append('负面') else: - biedata2[1]['value'] += 1 - return biedata1,biedata2 + article_sentiments.append('中性') + article_counts = Counter(article_sentiments) + + X = ['正面', '中性', '负面'] + biedata1 = [{'name': x, 'value': comment_counts.get(x, 0)} for x in X] + biedata2 = [{'name': x, 'value': article_counts.get(x, 0)} for x in X] + return biedata1, biedata2 def getYuQingCharDataThree(): + # Retrieve top 10 hot words and their counts hotWordList = getAllHotWords() - x1Data = [] - y1Data = [] - for i in hotWordList[:10]: - x1Data.append(i[0]) - y1Data.append(int(i[1])) - return x1Data,y1Data - + x1Data = [word[0] for word in hotWordList[:10]] + y1Data = [int(word[1]) for word in hotWordList[:10]] + return x1Data, y1Data \ No newline at end of file