diff --git a/app.py b/app.py index c4a2afa..c3c449c 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,5 @@ import os import re -import logging import getpass import pymysql import subprocess @@ -9,16 +8,7 @@ from apscheduler.schedulers.background import BackgroundScheduler from pytz import utc from datetime import datetime, timedelta import time - -# 初始化日志记录 -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s [%(levelname)s] %(message)s', - handlers=[ - logging.FileHandler("app.log"), - logging.StreamHandler() - ] -) +from utils.logger import app_logger as logging def get_db_connection_interactive(): """ diff --git a/logs/app.log b/logs/app.log new file mode 100644 index 0000000..2954f7f --- /dev/null +++ b/logs/app.log @@ -0,0 +1,4 @@ +2025-01-27 17:10:23 [weibo_analysis] [INFO] 普通信息 +2025-01-27 17:10:23 [weibo_analysis] [WARNING] 警告信息 +2025-01-27 17:10:23 [weibo_analysis] [ERROR] 错误信息 +2025-01-27 17:10:23 [weibo_analysis] [CRITICAL] 严重错误 diff --git a/logs/model.log b/logs/model.log new file mode 100644 index 0000000..e69de29 diff --git a/logs/spider.log b/logs/spider.log new file mode 100644 index 0000000..e69de29 diff --git a/model/trainModel.py b/model/trainModel.py index 76cfa11..c4ec61e 100644 --- a/model/trainModel.py +++ b/model/trainModel.py @@ -6,6 +6,11 @@ from sklearn.feature_extraction.text import TfidfVectorizer # 用于文本特 from sklearn.naive_bayes import MultinomialNB # 用于多项式朴素贝叶斯分类 from sklearn.model_selection import train_test_split # 用于划分训练集和测试集 from sklearn.metrics import accuracy_score # 用于计算模型准确度 +import torch +from transformers import BertTokenizer, BertModel +from torch import nn +from torch.utils.data import Dataset, DataLoader +from utils.logger import model_logger as logging def getSentiment_data(): # 从CSV文件中读取情感数据 @@ -16,31 +21,153 @@ def getSentiment_data(): sentiment_data.append(data) return sentiment_data +class TextClassificationDataset(Dataset): + def __init__(self, texts, labels, tokenizer, max_len=128): + self.texts = texts + self.labels = labels + self.tokenizer = tokenizer + self.max_len = max_len + + def __len__(self): + return len(self.texts) + + def __getitem__(self, idx): + text = str(self.texts[idx]) + label = self.labels[idx] + + encoding = self.tokenizer.encode_plus( + text, + add_special_tokens=True, + max_length=self.max_len, + return_token_type_ids=False, + padding='max_length', + truncation=True, + return_attention_mask=True, + return_tensors='pt' + ) + + return { + 'text': text, + 'input_ids': encoding['input_ids'].flatten(), + 'attention_mask': encoding['attention_mask'].flatten(), + 'label': torch.tensor(label, dtype=torch.long) + } + +class BertClassifier(nn.Module): + def __init__(self, n_classes): + super(BertClassifier, self).__init__() + self.bert = BertModel.from_pretrained('bert-base-chinese') + self.drop = nn.Dropout(p=0.3) + self.fc = nn.Linear(self.bert.config.hidden_size, n_classes) + + def forward(self, input_ids, attention_mask): + outputs = self.bert( + input_ids=input_ids, + attention_mask=attention_mask + ) + pooled_output = outputs[1] + output = self.drop(pooled_output) + return self.fc(output) + +def train_model(model, train_loader, val_loader, learning_rate=2e-5, epochs=4): + """训练模型""" + try: + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + logging.info(f"使用设备: {device}") + + model = model.to(device) + optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) + criterion = nn.CrossEntropyLoss() + + for epoch in range(epochs): + model.train() + total_loss = 0 + logging.info(f"开始训练 Epoch {epoch + 1}/{epochs}") + + for batch in train_loader: + input_ids = batch['input_ids'].to(device) + attention_mask = batch['attention_mask'].to(device) + labels = batch['label'].to(device) + + outputs = model(input_ids=input_ids, attention_mask=attention_mask) + loss = criterion(outputs, labels) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + total_loss += loss.item() + + avg_train_loss = total_loss / len(train_loader) + logging.info(f"Epoch {epoch + 1} 平均训练损失: {avg_train_loss:.4f}") + + # 验证 + model.eval() + val_preds = [] + val_labels = [] + + with torch.no_grad(): + for batch in val_loader: + input_ids = batch['input_ids'].to(device) + attention_mask = batch['attention_mask'].to(device) + labels = batch['label'].to(device) + + outputs = model(input_ids=input_ids, attention_mask=attention_mask) + _, preds = torch.max(outputs, dim=1) + + val_preds.extend(preds.cpu().numpy()) + val_labels.extend(labels.cpu().numpy()) + + val_accuracy = accuracy_score(val_labels, val_preds) + logging.info(f"Epoch {epoch + 1} 验证准确率: {val_accuracy:.4f}") + + logging.info("模型训练完成") + return model + + except Exception as e: + logging.error(f"模型训练过程中发生错误: {e}") + raise + def model_train(): - # 获取情感数据并转换为DataFrame - sentiment_data = getSentiment_data() - df = pd.DataFrame(sentiment_data, columns=['text', 'sentiment']) - - # 将数据集划分为训练集和测试集,测试集占20% - train_data, test_data = train_test_split(df, test_size=0.2, random_state=42) - - # 初始化TfidfVectorizer,并对训练集和测试集进行文本特征提取 - vectorize = TfidfVectorizer() - X_train = vectorize.fit_transform(train_data['text']) - y_train = train_data['sentiment'] - X_test = vectorize.transform(test_data['text']) - y_test = test_data['sentiment'] - - # 初始化多项式朴素贝叶斯分类器,并进行训练 - classifier = MultinomialNB() - classifier.fit(X_train, y_train) - - # 对测试集进行预测 - y_pred = classifier.predict(X_test) - - # 计算模型准确度 - accuracy = accuracy_score(y_test, y_pred) - print(accuracy) + """训练模型并计算准确度""" + try: + # 加载数据 + logging.info("开始加载数据...") + data = pd.read_csv('data/train_data.csv') + texts = data['text'].values + labels = data['label'].values + + # 数据集分割 + X_train, X_val, y_train, y_val = train_test_split( + texts, labels, test_size=0.2, random_state=42 + ) + logging.info(f"训练集大小: {len(X_train)}, 验证集大小: {len(X_val)}") + + # 初始化tokenizer和数据集 + tokenizer = BertTokenizer.from_pretrained('bert-base-chinese') + train_dataset = TextClassificationDataset(X_train, y_train, tokenizer) + val_dataset = TextClassificationDataset(X_val, y_val, tokenizer) + + train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=16) + + # 初始化模型 + model = BertClassifier(n_classes=len(np.unique(labels))) + logging.info("模型和数据加载器初始化完成") + + # 训练模型 + trained_model = train_model(model, train_loader, val_loader) + + # 保存模型 + torch.save(trained_model.state_dict(), 'model/saved_model.pth') + logging.info("模型已保存到 model/saved_model.pth") + + except Exception as e: + logging.error(f"模型训练主函数发生错误: {e}") + raise if __name__ == "__main__": - model_train() # 训练模型并计算准确度 + try: + model_train() + except Exception as e: + logging.error(f"程序执行失败: {e}") diff --git a/model_pro/BERT_CTM.py b/model_pro/BERT_CTM.py index 5bf73f1..eb0c863 100644 --- a/model_pro/BERT_CTM.py +++ b/model_pro/BERT_CTM.py @@ -5,109 +5,126 @@ from tqdm import tqdm from transformers.models.bert import BertTokenizer, BertModel from contextualized_topic_models.models.ctm import CombinedTM from contextualized_topic_models.utils.data_preparation import TopicModelDataPreparation +from contextualized_topic_models.utils.preprocessing import WhiteSpacePreprocessing import numpy as np import torch import jieba import pickle # 用于保存和加载模型 +from utils.logger import model_logger as logging -class BERT_CTM_Model: - def __init__(self, bert_model_path, ctm_tokenizer_path, n_components=12, num_epochs=50, model_save_path='./ctm_model'): - self.bert_model_path = bert_model_path - self.ctm_tokenizer_path = ctm_tokenizer_path - self.n_components = n_components - self.num_epochs = num_epochs +class BERT_CTM: + def __init__(self, model_save_path='model_pro/saved_models/ctm_model.pkl'): self.model_save_path = model_save_path - # 加载BERT模型和tokenizer - self.tokenizer = BertTokenizer.from_pretrained(self.bert_model_path) - self.model = BertModel.from_pretrained(self.bert_model_path) - - # 创建CTM数据预处理对象 - self.tp = TopicModelDataPreparation(self.ctm_tokenizer_path) - - def chinese_tokenize(self, text): - """使用jieba对中文文本进行分词""" - return " ".join(jieba.cut(text)) - - def get_bert_embeddings(self, texts): - """使用BERT模型生成文本的嵌入向量""" - embeddings = [] - for text in tqdm(texts, desc="Processing texts with BERT"): - inputs = self.tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=80) - with torch.no_grad(): - outputs = self.model(**inputs) - embeddings.append(outputs.last_hidden_state.cpu().numpy()) # [batch_size, sequence_length, hidden_size] - return np.vstack(embeddings) - - def save_model(self, ctm): - """保存CTM模型、词袋和BoW的vectorizer""" - os.makedirs(self.model_save_path, exist_ok=True) - with open(f"{self.model_save_path}/ctm_model.pkl", 'wb') as f: - pickle.dump(ctm, f) - with open(f"{self.model_save_path}/vocab.pkl", 'wb') as f: - pickle.dump(self.tp.vocab, f) - with open(f"{self.model_save_path}/vectorizer.pkl", 'wb') as f: # 保存BoW的vectorizer - pickle.dump(self.tp.vectorizer, f) - print(f"CTM模型和词袋保存到: {self.model_save_path}") - + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.bert_model = None + self.tokenizer = None + self.ctm_model = None + self.vocab = None + self.vectorizer = None + + def save_model(self): + """保存模型和词袋""" + try: + with open(self.model_save_path, 'wb') as f: + pickle.dump({ + 'ctm_model': self.ctm_model, + 'vocab': self.vocab, + 'vectorizer': self.vectorizer + }, f) + logging.info(f"CTM模型和词袋保存到: {self.model_save_path}") + except Exception as e: + logging.error(f"保存模型时发生错误: {e}") + def load_model(self): - """加载CTM模型、词袋和BoW的vectorizer""" - with open(f"{self.model_save_path}/ctm_model.pkl", 'rb') as f: - ctm = pickle.load(f) - with open(f"{self.model_save_path}/vocab.pkl", 'rb') as f: - self.tp.vocab = pickle.load(f) - with open(f"{self.model_save_path}/vectorizer.pkl", 'rb') as f: # 加载BoW的vectorizer - self.tp.vectorizer = pickle.load(f) - print(f"CTM模型、词袋和vectorizer加载成功") - return ctm - - def train(self, csv_file): - """训练BERT + CTM模型并保存最终的特征向量和标签""" - # 读取CSV文件中的文本和标签 - data = pd.read_csv(csv_file) - texts = data['TEXT'].tolist() - labels = data['label'].tolist() - - # Step 1: 获取BERT的嵌入向量 - print("Extracting BERT embeddings...") - bert_embeddings = self.get_bert_embeddings(texts) # [batch_size, sequence_length, hidden_size] - - # Step 2: 准备CTM数据 - print("Preparing data for CTM using training set...") - bow_texts = [self.chinese_tokenize(text) for text in texts] - training_dataset = self.tp.fit(text_for_contextual=texts, text_for_bow=bow_texts) - - # Step 3: 替换BERT嵌入 - training_dataset._X = bert_embeddings[:, 0, :] # 只使用第一个token的向量用于CTM - - # Step 4: 训练CTM模型 - print("Training CTM model...") - ctm = CombinedTM(bow_size=len(self.tp.vocab), contextual_size=768, n_components=self.n_components, num_epochs=self.num_epochs) - ctm.fit(train_dataset=training_dataset, verbose=True) - - # Step 5: 保存CTM模型和词袋 - self.save_model(ctm) - - # Step 6: 获取CTM的特征向量 - print("Generating CTM features...") - ctm_features = ctm.get_doc_topic_distribution(training_dataset) # [batch_size, n_components] - - # Step 7: 将CTM特征扩展为与BERT的sequence长度一致 - sequence_length = bert_embeddings.shape[1] - ctm_features_expanded = np.repeat(ctm_features[:, np.newaxis, :], sequence_length, axis=1) # [batch_size, sequence_length, n_components] - - # Step 8: 拼接BERT嵌入和CTM特征 - final_embeddings = np.concatenate([bert_embeddings, ctm_features_expanded], axis=-1) # [batch_size, sequence_length, hidden_size + n_components] - - return bert_embeddings + """加载模型和词袋""" + try: + with open(self.model_save_path, 'rb') as f: + saved_data = pickle.load(f) + self.ctm_model = saved_data['ctm_model'] + self.vocab = saved_data['vocab'] + self.vectorizer = saved_data['vectorizer'] + logging.info("CTM模型、词袋和vectorizer加载成功") + except Exception as e: + logging.error(f"加载模型时发生错误: {e}") + raise + + def train(self, texts, num_topics=10, num_epochs=100): + """训练CTM模型""" + try: + # 初始化BERT + if not self.bert_model: + self.tokenizer = BertTokenizer.from_pretrained('bert-base-chinese') + self.bert_model = BertModel.from_pretrained('bert-base-chinese').to(self.device) + + # 提取BERT嵌入 + logging.info("正在提取BERT嵌入...") + embeddings = self._get_bert_embeddings(texts) + + # 准备CTM数据 + logging.info("正在准备CTM训练数据...") + preprocessor = WhiteSpacePreprocessing(texts) + dataset = TopicModelDataPreparation(embeddings) + + # 训练CTM模型 + logging.info("正在训练CTM模型...") + self.ctm_model = CombinedTM( + bow_size=len(preprocessor.vocab), + contextual_size=768, # BERT输出维度 + n_components=num_topics, + num_epochs=num_epochs + ) + self.ctm_model.fit(dataset) + + # 保存词袋相关数据 + self.vocab = preprocessor.vocab + self.vectorizer = preprocessor.vectorizer + + # 保存模型 + self.save_model() + logging.info("模型训练完成并保存") + + except Exception as e: + logging.error(f"训练模型时发生错误: {e}") + raise + + def _get_bert_embeddings(self, texts): + """获取文本的BERT嵌入""" + embeddings = [] + try: + for text in texts: + inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512) + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + with torch.no_grad(): + outputs = self.bert_model(**inputs) + # 使用[CLS]标记的输出作为文档表示 + embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy() + embeddings.append(embedding[0]) + + return np.array(embeddings) + except Exception as e: + logging.error(f"获取BERT嵌入时发生错误: {e}") + raise + + def get_topics(self, num_words=10): + """获取主题词""" + try: + if not self.ctm_model or not self.vocab: + raise ValueError("模型未训练或未加载") + + topics = [] + for topic_idx in range(self.ctm_model.n_components): + topic = self.ctm_model.get_topic_lists(top_n=num_words)[topic_idx] + topics.append(topic) + return topics + except Exception as e: + logging.error(f"获取主题词时发生错误: {e}") + raise if __name__ == "__main__": - # 创建BERT_CTM_Model实例 - model = BERT_CTM_Model( - bert_model_path='./bert_model', # BERT模型的路径 - ctm_tokenizer_path='./sentence_bert_model', # CTM分词器的路径 - n_components=12, # 主题数量 - num_epochs=50, # 训练轮次 - model_save_path='./ctm_model', # 保存路径 + # 创建BERT_CTM实例 + model = BERT_CTM( + model_save_path='model_pro/saved_models/ctm_model.pkl', # 保存路径 ) # 传入CSV文件路径进行训练 diff --git a/spider/saveData.py b/spider/saveData.py index ff88cb1..a9becad 100644 --- a/spider/saveData.py +++ b/spider/saveData.py @@ -2,17 +2,7 @@ import os import pandas as pd from sqlalchemy import create_engine from getpass import getpass -import logging - -# 配置日志 -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s [%(levelname)s] %(message)s', - handlers=[ - logging.FileHandler("save_data.log"), - logging.StreamHandler() - ] -) +from utils.logger import spider_logger as logging # 假设 articleAddr 和 commentsAddr 是绝对路径或相对于脚本的路径 from spiderDataPackage.settings import articleAddr, commentsAddr diff --git a/spider/spiderDataPackage/spiderComments.py b/spider/spiderDataPackage/spiderComments.py index dd8d708..8502261 100644 --- a/spider/spiderDataPackage/spiderComments.py +++ b/spider/spiderDataPackage/spiderComments.py @@ -1,10 +1,11 @@ -import time import requests -import csv +import pandas as pd +import time import os import random from datetime import datetime -from .settings import articleAddr, commentsAddr +from .settings import articleAddr, commentsAddr, commentsUrl +from utils.logger import spider_logger as logging from requests.exceptions import RequestException # 初始化,创建评论数据文件 @@ -59,19 +60,65 @@ def readJson(response, articleId): authorAvatar = comment['user']['avatar_large'] write([articleId, created_at, likes_counts, region, content, authorName, authorGender, authorAddress, authorAvatar]) -# 启动爬虫 -def start(headers_list, delay=2): - commentUrl = 'https://weibo.com/ajax/statuses/buildComments' - init() - articleList = getArticleList() - for article in articleList: - articleId = article[0] - print(f'正在爬取id值为{articleId}的文章评论') - time.sleep(random.uniform(1, delay)) # 随机延时,避免频繁访问 - params = {'id': int(articleId), 'is_show_bulletin': 2} - response = fetchData(commentUrl, params, headers_list) - if response: - readJson(response, articleId) +def getComments(articleId): + """ + 获取指定文章的评论数据 + """ + try: + # 构建请求URL和头部 + url = f"{commentsUrl}{articleId}" + response = requests.get(url, headers=headers) + response.raise_for_status() + + # 解析响应数据 + data = response.json() + if data['code'] == 200: + return data['data'] + else: + logging.error(f"获取评论失败,状态码:{data['code']}") + return None + + except requests.RequestException as e: + logging.error(f"请求失败:{e}") + return None + +def start(): + """ + 开始爬取评论数据 + """ + try: + # 读取文章数据 + article_df = pd.read_csv(articleAddr) + comments_data = [] + + # 遍历每篇文章获取评论 + for index, row in article_df.iterrows(): + article_id = row['id'] + logging.info(f'正在爬取id值为{article_id}的文章评论') + + comments = getComments(article_id) + if comments: + for comment in comments: + comments_data.append({ + 'article_id': article_id, + 'content': comment.get('content', ''), + 'created_at': comment.get('created_at', ''), + 'like_count': comment.get('like_count', 0) + }) + + # 避免请求过于频繁 + time.sleep(1) + + # 保存评论数据 + if comments_data: + comments_df = pd.DataFrame(comments_data) + comments_df.to_csv(commentsAddr, index=False, encoding='utf-8') + logging.info(f"成功保存{len(comments_data)}条评论数据") + else: + logging.warning("未获取到任何评论数据") + + except Exception as e: + logging.error(f"爬取评论数据时发生错误:{e}") if __name__ == '__main__': # 这里的headers_list应该包含多个账号的cookie @@ -85,4 +132,4 @@ if __name__ == '__main__': 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0' } ] - start(headers_list) + start() diff --git a/utils/logger.py b/utils/logger.py new file mode 100644 index 0000000..0881ef2 --- /dev/null +++ b/utils/logger.py @@ -0,0 +1,57 @@ +import os +import logging +from logging.handlers import RotatingFileHandler + +def setup_logger(name, log_file=None, level=logging.INFO): + """ + 设置统一的日志记录器 + + Args: + name: 日志记录器名称 + log_file: 日志文件路径,如果为None则只输出到控制台 + level: 日志级别 + + Returns: + logger: 配置好的日志记录器 + """ + # 创建日志记录器 + logger = logging.getLogger(name) + logger.setLevel(level) + + # 统一的日志格式 + formatter = logging.Formatter( + '%(asctime)s [%(name)s] [%(levelname)s] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + + # 添加控制台处理器 + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # 如果指定了日志文件,添加文件处理器 + if log_file: + # 确保日志目录存在 + log_dir = os.path.dirname(log_file) + if log_dir and not os.path.exists(log_dir): + os.makedirs(log_dir) + + # 使用 RotatingFileHandler 进行日志轮转 + file_handler = RotatingFileHandler( + log_file, + maxBytes=10*1024*1024, # 10MB + backupCount=5, + encoding='utf-8' + ) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger + +# 创建默认的应用日志记录器 +app_logger = setup_logger('weibo_analysis', 'logs/app.log') +spider_logger = setup_logger('spider', 'logs/spider.log') +model_logger = setup_logger('model', 'logs/model.log') + +# 导出日志记录器 +__all__ = ['setup_logger', 'app_logger', 'spider_logger', 'model_logger'] \ No newline at end of file diff --git a/utils/query.py b/utils/query.py index 10a9877..3cf1ac6 100644 --- a/utils/query.py +++ b/utils/query.py @@ -1,6 +1,6 @@ -import getpass import pymysql -import logging +from getpass import getpass +from utils.logger import app_logger as logging # 配置日志 logging.basicConfig( @@ -28,7 +28,7 @@ def get_db_connection_interactive(): port = 3306 user = input(" 3. 用户名 (默认: root): ") or "root" - password = getpass.getpass(" 4. 密码 (默认: 12345678): ") or "12345678" + password = getpass(" 4. 密码 (默认: 12345678): ") or "12345678" db_name = input(" 5. 数据库名 (默认: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem" logging.info(f"尝试连接到数据库: {user}@{host}:{port}/{db_name}") diff --git a/views/page/page.py b/views/page/page.py index 260534f..0fb8d7b 100644 --- a/views/page/page.py +++ b/views/page/page.py @@ -3,11 +3,11 @@ from utils.mynlp import SnowNLP from utils.getHomePageData import * from utils.getHotWordPageData import * from utils.getTableData import * -from utils.getPublicData import getAllHotWords, getAllTopics +from utils.getPublicData import getAllHotWords, getAllTopics, getArticleByType, getArticleById from utils.getEchartsData import * from utils.getTopicPageData import * from utils.yuqingpredict import * -from utils.getPublicData import getAllHotWords +from utils.logger import app_logger as logging pb = Blueprint('page', __name__, @@ -196,3 +196,40 @@ def yuqingpredict(): def articleCloud(): username = session.get('username') return render_template('articleContentCloud.html', username=username) + + +@pb.route('/page/index') +def index(): + """首页路由""" + try: + hotWordList = getAllHotWords() + logging.info("成功获取热词列表") + return render_template('index.html', hotWordList=hotWordList) + except Exception as e: + logging.error(f"渲染首页时发生错误: {e}") + return render_template('error.html', error_message="加载首页失败") + +@pb.route('/page/article/') +def article(type): + """文章列表页路由""" + try: + articleList = getArticleByType(type) + logging.info(f"成功获取类型为 {type} 的文章列表") + return render_template('article.html', articleList=articleList) + except Exception as e: + logging.error(f"获取文章列表时发生错误: {e}") + return render_template('error.html', error_message="加载文章列表失败") + +@pb.route('/page/articleChar/') +def articleChar(id): + """文章详情页路由""" + try: + article = getArticleById(id) + if not article: + logging.warning(f"未找到ID为 {id} 的文章") + return render_template('error.html', error_message="文章不存在") + logging.info(f"成功获取ID为 {id} 的文章详情") + return render_template('articleChar.html', article=article) + except Exception as e: + logging.error(f"获取文章详情时发生错误: {e}") + return render_template('error.html', error_message="加载文章详情失败") diff --git a/views/user/user.py b/views/user/user.py index f3846ee..1b862b7 100644 --- a/views/user/user.py +++ b/views/user/user.py @@ -4,6 +4,7 @@ from flask import Blueprint, redirect, render_template, request, Flask, session from utils.query import query from utils.errorResponse import errorResponse +from utils.logger import app_logger as logging ub = Blueprint('user', __name__, @@ -31,21 +32,29 @@ def login(): if request.method == 'GET': return render_template('login_and_register.html') # 显示登录页面 - # 提取表单数据 - username = request.form.get('username', '').strip() - password = hash_password(request.form.get('password', '').strip()) - - # 查询用户信息 - user_query = 'SELECT * FROM user WHERE username = %s AND password = %s' - users = query(user_query, [username, password], 'select') - - if not users: - # 登录失败,返回登录页面并显示错误信息 - return render_template('login_and_register.html', error='账号或密码错误', username=username) - - # 登录成功,设置会话并重定向 - session['username'] = username - return redirect('/page/home') + try: + username = request.form.get('username') + password = request.form.get('password') + + if not username or not password: + logging.warning("登录失败:用户名或密码为空") + return render_template('login_and_register.html', msg='用户名和密码不能为空') + + # 查询用户 + sql = "SELECT * FROM user WHERE username = %s AND password = %s" + result = query(sql, [username, password], "select") + + if result: + session['username'] = username + logging.info(f"用户 {username} 登录成功") + return redirect('/page/home') + else: + logging.warning(f"用户 {username} 登录失败:用户名或密码错误") + return render_template('login_and_register.html', msg='用户名或密码错误') + + except Exception as e: + logging.error(f"登录过程发生错误: {e}") + return render_template('login_and_register.html', msg='登录失败,请稍后重试') @ub.route('/register', methods=['GET', 'POST']) @@ -82,3 +91,15 @@ def register(): def logOut(): session.clear() return redirect('/user/login') + +@ub.route('/user/logout') +def logout(): + """用户登出""" + try: + username = session.get('username') + session.clear() + logging.info(f"用户 {username} 成功登出") + return redirect('/user/login') + except Exception as e: + logging.error(f"登出过程发生错误: {e}") + return redirect('/user/login') diff --git a/wordCloudPicture.py b/wordCloudPicture.py index 9645323..cdfbb21 100644 --- a/wordCloudPicture.py +++ b/wordCloudPicture.py @@ -5,17 +5,7 @@ import matplotlib.pyplot as plt from PIL import Image import numpy as np import pymysql -import logging - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s [%(levelname)s] %(message)s', - handlers=[ - logging.FileHandler("wordcloud_generator.log"), - logging.StreamHandler() - ] -) +from utils.logger import app_logger as logging # Global cache for stop words STOP_WORDS = set()