The old emotion recognition model has been replaced with the new model_pro, and the results have been integrated into the project.
This commit is contained in:
+55
-34
@@ -6,12 +6,12 @@ from tqdm import tqdm
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import chardet # 导入 chardet
|
||||
import chardet
|
||||
|
||||
# 导入您定义的模型和模块
|
||||
from MHA import MultiHeadAttentionLayer
|
||||
from classifier import FinalClassifier
|
||||
from BERT_CTM import BERT_CTM_Model
|
||||
# 导入改进版模型的组件
|
||||
from model_pro.MHA import MultiHeadAttentionLayer
|
||||
from model_pro.classifier import FinalClassifier
|
||||
from model_pro.BERT_CTM import BERT_CTM_Model
|
||||
|
||||
# 设置设备
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
@@ -30,7 +30,7 @@ def detect_file_encoding(file_path, num_bytes=10000):
|
||||
result = chardet.detect(rawdata)
|
||||
encoding = result['encoding']
|
||||
confidence = result['confidence']
|
||||
print(f"Detected encoding: {encoding} with confidence {confidence}")
|
||||
print(f"检测到的编码: {encoding}, 置信度: {confidence}")
|
||||
return encoding
|
||||
|
||||
|
||||
@@ -42,8 +42,6 @@ def get_bert_ctm_embeddings(texts, bert_model_path, ctm_tokenizer_path, n_compon
|
||||
n_components=n_components,
|
||||
num_epochs=num_epochs
|
||||
)
|
||||
# 加载已保存的CTM模型
|
||||
bert_ctm_model.load_model()
|
||||
# 获取嵌入
|
||||
embeddings = bert_ctm_model.get_bert_embeddings(texts)
|
||||
return embeddings
|
||||
@@ -60,15 +58,11 @@ def predict(model_save_path, input_data_path, output_path, bert_model_path, ctm_
|
||||
num_classes=2):
|
||||
try:
|
||||
# 加载模型
|
||||
# 修改这里,设置 weights_only=True 以消除 FutureWarning
|
||||
checkpoint = torch.load(model_save_path, map_location=device, weights_only=False)
|
||||
classifier_model = FinalClassifier(input_dim=768, num_classes=num_classes)
|
||||
classifier_model.load_state_dict(checkpoint['classifier_model_state_dict'])
|
||||
classifier_model.to(device)
|
||||
print("加载模型...")
|
||||
classifier_model = torch.load(model_save_path, map_location=device)
|
||||
classifier_model.eval()
|
||||
|
||||
attention_model = MultiHeadAttentionLayer(embed_size=768, num_heads=8)
|
||||
attention_model.load_state_dict(checkpoint['attention_model_state_dict'])
|
||||
attention_model.to(device)
|
||||
attention_model.eval()
|
||||
|
||||
@@ -76,11 +70,12 @@ def predict(model_save_path, input_data_path, output_path, bert_model_path, ctm_
|
||||
encoding = detect_file_encoding(input_data_path)
|
||||
|
||||
# 读取输入数据
|
||||
print("读取输入数据...")
|
||||
data = pd.read_csv(input_data_path, encoding=encoding)
|
||||
texts = data['TEXT'].tolist()
|
||||
|
||||
# 生成嵌入
|
||||
print("Generating embeddings...")
|
||||
print("生成文本嵌入...")
|
||||
embeddings = get_bert_ctm_embeddings(texts, bert_model_path, ctm_tokenizer_path)
|
||||
|
||||
# 准备DataLoader
|
||||
@@ -88,63 +83,89 @@ def predict(model_save_path, input_data_path, output_path, bert_model_path, ctm_
|
||||
|
||||
# 存储预测结果
|
||||
all_predictions = []
|
||||
all_probabilities = []
|
||||
|
||||
print("开始预测...")
|
||||
with torch.no_grad():
|
||||
for batch in tqdm(data_loader, desc="Predicting"):
|
||||
for batch in tqdm(data_loader, desc="预测进度"):
|
||||
batch_x = batch[0].to(device)
|
||||
batch_x = torch.mean(batch_x, dim=1)
|
||||
|
||||
# 使用注意力机制
|
||||
attention_output = attention_model(batch_x, batch_x, batch_x)
|
||||
|
||||
# 获取分类结果
|
||||
outputs = classifier_model(attention_output)
|
||||
outputs = torch.mean(outputs, dim=1)
|
||||
|
||||
# 获取预测概率
|
||||
probabilities = torch.softmax(outputs, dim=1)
|
||||
|
||||
# 获取预测标签
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
|
||||
all_predictions.extend(predicted.cpu().numpy())
|
||||
all_probabilities.extend(probabilities.cpu().numpy())
|
||||
|
||||
# 添加预测结果和概率到数据框
|
||||
data['Predicted_Label'] = all_predictions
|
||||
data['Confidence'] = [prob[pred] for prob, pred in zip(all_probabilities, all_predictions)]
|
||||
|
||||
# 保存预测结果
|
||||
data['Predicted_Label'] = all_predictions
|
||||
data.to_csv(output_path, index=False, encoding='utf-8')
|
||||
print(f"Predictions saved to {output_path}")
|
||||
print(f"预测结果已保存到 {output_path}")
|
||||
|
||||
# 统计标签的个数和占比
|
||||
label_counts = data['Predicted_Label'].value_counts()
|
||||
total_count = len(data)
|
||||
stats = {}
|
||||
stats = {
|
||||
'统计信息': {
|
||||
'总样本数': total_count,
|
||||
'各类别统计': {}
|
||||
}
|
||||
}
|
||||
|
||||
for label, count in label_counts.items():
|
||||
label_name = "良好" if label == 0 else "不良"
|
||||
percentage = (count / total_count) * 100
|
||||
stats[label_name] = {
|
||||
'count': count,
|
||||
'percentage': f"{percentage:.2f}%"
|
||||
confidence_mean = data[data['Predicted_Label'] == label]['Confidence'].mean()
|
||||
|
||||
stats['统计信息']['各类别统计'][label_name] = {
|
||||
'数量': int(count),
|
||||
'占比': f"{percentage:.2f}%",
|
||||
'平均置信度': f"{confidence_mean:.2f}"
|
||||
}
|
||||
print(f"Label: {label_name}, Count: {count}, Percentage: {percentage:.2f}%")
|
||||
print(f"标签: {label_name}, 数量: {count}, 占比: {percentage:.2f}%, 平均置信度: {confidence_mean:.2f}")
|
||||
|
||||
# 将统计信息保存到 JSON 文件
|
||||
with open(stats_output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(stats, f, ensure_ascii=False)
|
||||
json.dump(stats, f, ensure_ascii=False, indent=4)
|
||||
|
||||
return True # 成功执行
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error during prediction: {e}")
|
||||
return False # 执行失败
|
||||
print(f"预测过程中出现错误: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python using_example.py <input_data_path> <stats_output_path>")
|
||||
print("使用方法: python predict.py <input_data_path> <stats_output_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_data_path = sys.argv[1]
|
||||
stats_output_path = sys.argv[2]
|
||||
|
||||
# 定义路径
|
||||
model_save_path = 'BCAT/final_model.pt'
|
||||
output_path = 'BCAT/predictions.csv' # 保存预测结果的文件
|
||||
bert_model_path = 'BCAT/bert_model'
|
||||
ctm_tokenizer_path = 'BCAT/sentence_bert_model'
|
||||
model_save_path = 'model_pro/final_model.pt'
|
||||
output_path = 'model_pro/predictions.csv'
|
||||
bert_model_path = 'model_pro/bert_model'
|
||||
ctm_tokenizer_path = 'model_pro/sentence_bert_model'
|
||||
|
||||
# 执行预测
|
||||
success = predict(model_save_path, input_data_path, output_path, bert_model_path, ctm_tokenizer_path,
|
||||
stats_output_path)
|
||||
|
||||
if success:
|
||||
sys.exit(0) # 成功
|
||||
sys.exit(0)
|
||||
else:
|
||||
sys.exit(1) # 失败
|
||||
sys.exit(1)
|
||||
|
||||
+69
-20
@@ -1,10 +1,59 @@
|
||||
from utils.getPublicData import * # Import utility functions for data retrieval
|
||||
from utils.mynlp import SnowNLP # Import SnowNLP for sentiment analysis
|
||||
from collections import Counter # Import Counter for counting occurrences
|
||||
import torch
|
||||
from model_pro.MHA import MultiHeadAttentionLayer
|
||||
from model_pro.classifier import FinalClassifier
|
||||
from model_pro.BERT_CTM import BERT_CTM_Model
|
||||
|
||||
articleList = getAllArticleData() # Retrieve all article data
|
||||
commentList = getAllCommentsData() # Retrieve all comment data
|
||||
|
||||
# 设置设备
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# 加载模型(全局变量,避免重复加载)
|
||||
model_save_path = 'model_pro/final_model.pt'
|
||||
bert_model_path = 'model_pro/bert_model'
|
||||
ctm_tokenizer_path = 'model_pro/sentence_bert_model'
|
||||
|
||||
try:
|
||||
classifier_model = torch.load(model_save_path, map_location=device)
|
||||
classifier_model.eval()
|
||||
attention_model = MultiHeadAttentionLayer(embed_size=768, num_heads=8)
|
||||
attention_model.to(device)
|
||||
attention_model.eval()
|
||||
bert_ctm_model = BERT_CTM_Model(
|
||||
bert_model_path=bert_model_path,
|
||||
ctm_tokenizer_path=ctm_tokenizer_path
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"模型加载失败: {e}")
|
||||
|
||||
def predict_sentiment(texts):
|
||||
"""使用改进版模型预测情感"""
|
||||
try:
|
||||
# 获取文本嵌入
|
||||
embeddings = bert_ctm_model.get_bert_embeddings(texts)
|
||||
|
||||
# 转换为tensor
|
||||
batch_x = torch.tensor(embeddings, dtype=torch.float32).to(device)
|
||||
batch_x = torch.mean(batch_x, dim=1)
|
||||
|
||||
with torch.no_grad():
|
||||
# 使用注意力机制
|
||||
attention_output = attention_model(batch_x, batch_x, batch_x)
|
||||
# 获取分类结果
|
||||
outputs = classifier_model(attention_output)
|
||||
outputs = torch.mean(outputs, dim=1)
|
||||
# 获取预测标签
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
|
||||
return predicted.cpu().numpy()
|
||||
except Exception as e:
|
||||
print(f"预测过程中出现错误: {e}")
|
||||
return None
|
||||
|
||||
def getTypeList():
|
||||
# Return a list of unique article types
|
||||
return list(set([x[8] for x in articleList]))
|
||||
@@ -119,32 +168,32 @@ def getYuQingCharDataOne():
|
||||
return X, Y, biedata
|
||||
|
||||
def getYuQingCharDataTwo():
|
||||
# Analyze sentiment of comments and articles
|
||||
comment_sentiments = []
|
||||
for comment in commentList:
|
||||
emotionValue = SnowNLP(comment[4]).sentiments
|
||||
if emotionValue > 0.4:
|
||||
comment_sentiments.append('正面')
|
||||
elif emotionValue < 0.2:
|
||||
comment_sentiments.append('负面')
|
||||
else:
|
||||
comment_sentiments.append('中性')
|
||||
comment_counts = Counter(comment_sentiments)
|
||||
# 分析评论和文章的情感
|
||||
comment_texts = [comment[4] for comment in commentList]
|
||||
article_texts = [article[5] for article in articleList]
|
||||
|
||||
article_sentiments = []
|
||||
for article in articleList:
|
||||
emotionValue = SnowNLP(article[5]).sentiments
|
||||
if emotionValue > 0.4:
|
||||
article_sentiments.append('正面')
|
||||
elif emotionValue < 0.2:
|
||||
article_sentiments.append('负面')
|
||||
# 预测评论情感
|
||||
comment_predictions = predict_sentiment(comment_texts)
|
||||
if comment_predictions is not None:
|
||||
comment_sentiments = ['良好' if pred == 0 else '不良' for pred in comment_predictions]
|
||||
else:
|
||||
article_sentiments.append('中性')
|
||||
comment_sentiments = []
|
||||
|
||||
# 预测文章情感
|
||||
article_predictions = predict_sentiment(article_texts)
|
||||
if article_predictions is not None:
|
||||
article_sentiments = ['良好' if pred == 0 else '不良' for pred in article_predictions]
|
||||
else:
|
||||
article_sentiments = []
|
||||
|
||||
# 统计结果
|
||||
comment_counts = Counter(comment_sentiments)
|
||||
article_counts = Counter(article_sentiments)
|
||||
|
||||
X = ['正面', '中性', '负面']
|
||||
X = ['良好', '不良']
|
||||
biedata1 = [{'name': x, 'value': comment_counts.get(x, 0)} for x in X]
|
||||
biedata2 = [{'name': x, 'value': article_counts.get(x, 0)} for x in X]
|
||||
|
||||
return biedata1, biedata2
|
||||
|
||||
def getYuQingCharDataThree():
|
||||
|
||||
+58
-8
@@ -8,12 +8,61 @@ from utils.getEchartsData import *
|
||||
from utils.getTopicPageData import *
|
||||
from utils.yuqingpredict import *
|
||||
from utils.logger import app_logger as logging
|
||||
import torch
|
||||
from model_pro.MHA import MultiHeadAttentionLayer
|
||||
from model_pro.classifier import FinalClassifier
|
||||
from model_pro.BERT_CTM import BERT_CTM_Model
|
||||
|
||||
pb = Blueprint('page',
|
||||
__name__,
|
||||
url_prefix='/page',
|
||||
template_folder='templates')
|
||||
|
||||
# 设置设备
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# 加载模型(全局变量,避免重复加载)
|
||||
model_save_path = 'model_pro/final_model.pt'
|
||||
bert_model_path = 'model_pro/bert_model'
|
||||
ctm_tokenizer_path = 'model_pro/sentence_bert_model'
|
||||
|
||||
try:
|
||||
classifier_model = torch.load(model_save_path, map_location=device)
|
||||
classifier_model.eval()
|
||||
attention_model = MultiHeadAttentionLayer(embed_size=768, num_heads=8)
|
||||
attention_model.to(device)
|
||||
attention_model.eval()
|
||||
bert_ctm_model = BERT_CTM_Model(
|
||||
bert_model_path=bert_model_path,
|
||||
ctm_tokenizer_path=ctm_tokenizer_path
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"模型加载失败: {e}")
|
||||
|
||||
def predict_sentiment(text):
|
||||
"""使用改进版模型预测单个文本的情感"""
|
||||
try:
|
||||
# 获取文本嵌入
|
||||
embeddings = bert_ctm_model.get_bert_embeddings([text])
|
||||
|
||||
# 转换为tensor
|
||||
batch_x = torch.tensor(embeddings, dtype=torch.float32).to(device)
|
||||
batch_x = torch.mean(batch_x, dim=1)
|
||||
|
||||
with torch.no_grad():
|
||||
# 使用注意力机制
|
||||
attention_output = attention_model(batch_x, batch_x, batch_x)
|
||||
# 获取分类结果
|
||||
outputs = classifier_model(attention_output)
|
||||
outputs = torch.mean(outputs, dim=1)
|
||||
# 获取预测标签和概率
|
||||
probabilities = torch.softmax(outputs, dim=1)
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
|
||||
return predicted.item(), probabilities[0][predicted.item()].item()
|
||||
except Exception as e:
|
||||
print(f"预测过程中出现错误: {e}")
|
||||
return None, None
|
||||
|
||||
@pb.route('/home')
|
||||
def home():
|
||||
@@ -172,14 +221,15 @@ def yuqingpredict():
|
||||
defaultTopic = request.args.get('Topic')
|
||||
TopicLen = getTopicLen(defaultTopic)
|
||||
X, Y = getTopicCreatedAtandpredictData(defaultTopic)
|
||||
sentences = ''
|
||||
value = SnowNLP(defaultTopic).sentiments
|
||||
if value == 0.5:
|
||||
sentences = '中性'
|
||||
elif value > 0.5:
|
||||
sentences = '正面'
|
||||
elif value < 0.5:
|
||||
sentences = '负面'
|
||||
|
||||
# 使用改进版模型进行情感预测
|
||||
predicted_label, confidence = predict_sentiment(defaultTopic)
|
||||
if predicted_label is not None:
|
||||
sentences = '良好' if predicted_label == 0 else '不良'
|
||||
sentences = f"{sentences} (置信度: {confidence:.2f})"
|
||||
else:
|
||||
sentences = '预测失败'
|
||||
|
||||
comments = getCommentFilterDataTopic(defaultTopic)
|
||||
return render_template('yuqingpredict.html',
|
||||
username=username,
|
||||
|
||||
Reference in New Issue
Block a user