A new microblog sentiment recognition model has been added, based on the fine-tuning of GPT2, but it has not yet been adapted to the system.

This commit is contained in:
戒酒的李白
2025-05-18 22:07:18 +08:00
parent d24b47acfc
commit ed23ab1908
11 changed files with 120928 additions and 0 deletions
+20
View File
@@ -0,0 +1,20 @@
import torch
import torch.nn as nn
class AdapterLayer(nn.Module):
def __init__(self, input_size, adapter_size):
super(AdapterLayer, self).__init__()
# 第一个全连接层降维
self.down_project = nn.Linear(input_size, adapter_size)
# ReLU激活函数
self.relu = nn.ReLU()
# 第二个全连接层升维
self.up_project = nn.Linear(adapter_size, input_size)
def forward(self, x):
# 通过Adapter层的前向传播
down_projected = self.down_project(x)
relu = self.relu(down_projected)
up_projected = self.up_project(x)
# 将Adapter的输出与输入相加(残差连接)
return x + up_projected
+11
View File
@@ -0,0 +1,11 @@
一种Adapter-tuning的实现方式,只提供的思路,具体可以视情况稍微修改。
这里补充一些模型层数:
GPT-2 Small12个GPT2Block,约有1.17亿个参数。
GPT-2 Medium24个GPT2Block,约有3.48亿个参数。
GPT-2 Large36个GPT2Block,约有7.55亿个参数。
GPT-2 XL (也称为Extra Large)48个GPT2Block,约有15.54亿个参数。
RoBERTa Base12个RobertaLayer,总共约有1.25亿个参数。
RoBERTa Large24个RobertaLayer,总共约有3.55亿个参数。
+22
View File
@@ -0,0 +1,22 @@
from transformers.models.roberta.modeling_roberta import RobertaLayer
class RobertaLayerWithAdapter(RobertaLayer):
def __init__(self, config):
super().__init__(config)
# 假设Adapter的大小为64
adapter_size = 64
self.adapter = AdapterLayer(config.hidden_size, adapter_size)
def forward(self, hidden_states, attention_mask=None, head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, past_key_value=None, output_attentions=False):
# 调用原始的前向传播方法
self_outputs = super().forward(hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
# 得到Transformer层的输出
sequence_output = self_outputs[0]
# 将输出通过Adapter层
sequence_output = self.adapter(sequence_output)
# 返回修改后的输出(其他输出保持不变)
return (sequence_output,) + self_outputs[1:]
"""
RoBERTa的每个RobertaLayer包含一个自注意力(self-attention)机制和一个前馈网络,这些层共同构成了RoBERTa的基础架构。
"""
+40
View File
@@ -0,0 +1,40 @@
from transformers.models.gpt2.modeling_gpt2 import GPT2Block
class GPT2BlockWithAdapter(GPT2Block):
def __init__(self, config):
super().__init__(config)
# 假设Adapter的大小为64
adapter_size = 64
self.adapter = AdapterLayer(config.n_embd, adapter_size)
def forward(
self,
hidden_states,
layer_past=None,
attention_mask=None,
head_mask=None,
use_cache=False,
output_attentions=False,
):
# 调用原始的前向传播方法
attn_outputs = super().forward(
hidden_states,
layer_past=layer_past,
attention_mask=attention_mask,
head_mask=head_mask,
use_cache=use_cache,
output_attentions=output_attentions,
)
# 得到Transformer层的输出
a = attn_outputs[0] # 输出的第一部分是attention的结果
# 将输出通过Adapter层
a = self.adapter(a)
# 返回修改后的输出(其他输出保持不变)
outputs = (a,) + attn_outputs[1:]
return outputs
"""
每个GPT2Block包含了一系列的自注意力(Self-Attention)和前馈网络(Feed-Forward)层,这些层共同构成了模型的基础架构。
"""