diff --git a/model/topicdefine.py b/model/topicDefine.py similarity index 95% rename from model/topicdefine.py rename to model/topicDefine.py index 37e96be..7bcfd3e 100644 --- a/model/topicdefine.py +++ b/model/topicDefine.py @@ -39,7 +39,7 @@ def topicdefine(): for x in articleList: label_article.append((x[0],predict_topic(x[5]))) for x in commentList: - label_comments.append((x[5],predict_topic(x[4]))) + label_comments.append((x[8],predict_topic(x[4]))) return label_article,label_comments # 更新数据库 @@ -53,7 +53,7 @@ def update_data(): params = [str(label),str(id)] query(sql, params) for row in label_comments: - label, id = row + id, label = row sql = "UPDATE comments SET label = %s WHERE authorName = %s" params = [str(label),str(id)] query(sql, params) diff --git a/utils/getEchartsData.py b/utils/getEchartsData.py index d23ab3e..7e138c2 100644 --- a/utils/getEchartsData.py +++ b/utils/getEchartsData.py @@ -128,12 +128,12 @@ def getIPCharByCommentsRegion(): def getCommentCharDataOne(): xData = [] rangeNum = 20 - for item in range(1,100): + for item in range(100): xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1))) yData = [0 for x in range(len(xData))] for comment in commentList: - for item in range(99): - if int(comment[2]) < rangeNum * (item + 2): + for item in range(100): + if int(comment[2]) < rangeNum * (item + 1): yData[item] += 1 break return xData,yData diff --git a/utils/predict_demo1.py b/utils/predict_demo1.py new file mode 100644 index 0000000..8fd96dc --- /dev/null +++ b/utils/predict_demo1.py @@ -0,0 +1,47 @@ +import numpy as np +import datetime +import matplotlib.pyplot as plt + + +def datetime_to_number(date: str): # 格式化日期转换为 integer + date_number = datetime.datetime.strptime(date, "%Y-%m-%d") + base_number = datetime.datetime.strptime("2024-1-1", "%Y-%m-%d") + return (date_number - base_number).days + + +def predict_future_values(data): + # 提取并排序日期 + sorted_dates = sorted(data.keys(), key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d")) + sorted_data = {k: data[k] for k in sorted_dates} + + # 将日期转换为整数并提取相应的值 + xs = np.array([datetime_to_number(date) for date in sorted_data.keys()]) + ys = np.array([data[date] for date in sorted_data.keys()]) + + # 拟合线性回归模型 + fit = np.polyfit(xs, ys, 1) + fn = np.poly1d(fit) + + # 获取最新日期,并生成未来三天的日期 + latest_date = sorted_dates[-1] + latest_date_obj = datetime.datetime.strptime(latest_date, "%Y-%m-%d") + future_dates = [(latest_date_obj + datetime.timedelta(days=i)).strftime("%Y-%m-%d") for i in range(1, 6)] + + # 预测未来日期的值 + predictions = {} + for date in future_dates: + date_num = datetime_to_number(date) + if int(fn(date_num))<=0: + predictions[date] = 0 + else: + predictions[date] = int(fn(date_num)) + + return predictions + + +if __name__ == '__main__': + data = {'2024-06-15': 1, '2024-06-18': 1, '2024-06-22': 1, '2024-06-23': 1, '2024-07-01': 3, '2024-07-02': 4, '2024-07-03': 4, '2024-07-04': 14} + predictions = predict_future_values(data) + print(predictions) + # for date, value in predictions.items(): + # print(f'{date} PREDICTION: {value}') diff --git a/utils/yuqingpredict.py b/utils/yuqingpredict.py new file mode 100644 index 0000000..4b3aa89 --- /dev/null +++ b/utils/yuqingpredict.py @@ -0,0 +1,93 @@ +from utils.getPublicData import * +articleList = getAllArticleData() +commentList = getAllCommentsData() +import csv +import os +import datetime +def getTopicByArticle():# 返回文章内容的话题字典 + articleTopicDic = {} + for i in articleList: + if i[14] != None: + if i[14] in articleTopicDic.keys(): + articleTopicDic[i[14]] += 1 + else: + articleTopicDic[i[14]] = 1 + resultData = [] + for key,value in articleTopicDic.items(): + resultData.append({ + 'name':key, + 'value':value + }) + return resultData + +def getTopicByComments():# 返回评论内容的话题字典 + commentsTopicDic = {} + for i in commentList: + if i[9] != None: + if i[9] in commentsTopicDic: + commentsTopicDic[i[9]] += 1 + else: + commentsTopicDic[i[9]] = 1 + resultData = [] + for key,value in commentsTopicDic.items(): + resultData.append({ + 'name':key, + 'value':value + }) + return resultData + +def mergeTopics(article_topics, comment_topics):# 合并话题 + merged_dict = {} + for topic in article_topics + comment_topics: + if topic['name'] in merged_dict: + merged_dict[topic['name']] += topic['value'] + else: + merged_dict[topic['name']] = topic['value'] + merged_list = [{'name': key, 'value': value} for key, value in merged_dict.items()] + return merged_list +def getTopicData(): + # 读取合并文件 merge.csv # 取前十个话题 + top_10_topics = pd.read_csv('./merged_topics.csv').head(10) + # 获取话题名称和对应的值 + xData = top_10_topics['name'].tolist() + yData = top_10_topics['value'].tolist() + return xData, yData + +def getTopicPageCreatedAtCharData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量 + createdAt = {} + for i in articleList: + if i[14]==topic: + if i[7] in createdAt.keys(): + createdAt[i[7]] += 1 + else: + createdAt[i[7]] = 1 + for i in commentList: + if i[9]==topic: + if i[1] in createdAt.keys(): + createdAt[i[1]] += 1 + else: + createdAt[i[1]] = 1 + sorted_data = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))} + return topic,sorted_data + # return topic,list(createdAt.keys()),list(createdAt.values()) + # return topic, createdAt.items() + +def writeTopicsToCSV(topics, file_name): + # 检查文件是否存在,如果存在则附加写入,否则新建一个 + file_exists = os.path.isfile(file_name) + # 按值的降序排序 + sorted_topics = sorted(topics, key=lambda x: x['value'], reverse=True) + with open(file_name, 'w', newline='', encoding='utf-8') as csvfile: + fieldnames = ['name', 'value'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + # 如果文件不存在,则写入表头 + if not file_exists: + writer.writeheader() + # 写入数据 + for topic in sorted_topics: + writer.writerow(topic) +if __name__ == '__main__': + # 将话题数据写入 CSV 文件 + # merged_topics = mergeTopics(getTopicByArticle(), getTopicByComments()) + # writeTopicsToCSV(merged_topics, 'merged_topics.csv') + print(getTopicPageCreatedAtCharData("生活"))