A new microblog sentiment recognition model has been added, based on the fine-tuning of GPT2, but it has not yet been adapted to the system.
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
class AdapterLayer(nn.Module):
|
||||
def __init__(self, input_size, adapter_size):
|
||||
super(AdapterLayer, self).__init__()
|
||||
# 第一个全连接层降维
|
||||
self.down_project = nn.Linear(input_size, adapter_size)
|
||||
# ReLU激活函数
|
||||
self.relu = nn.ReLU()
|
||||
# 第二个全连接层升维
|
||||
self.up_project = nn.Linear(adapter_size, input_size)
|
||||
|
||||
def forward(self, x):
|
||||
# 通过Adapter层的前向传播
|
||||
down_projected = self.down_project(x)
|
||||
relu = self.relu(down_projected)
|
||||
up_projected = self.up_project(x)
|
||||
# 将Adapter的输出与输入相加(残差连接)
|
||||
return x + up_projected
|
||||
@@ -0,0 +1,11 @@
|
||||
一种Adapter-tuning的实现方式,只提供的思路,具体可以视情况稍微修改。
|
||||
|
||||
|
||||
这里补充一些模型层数:
|
||||
GPT-2 Small:12个GPT2Block,约有1.17亿个参数。
|
||||
GPT-2 Medium:24个GPT2Block,约有3.48亿个参数。
|
||||
GPT-2 Large:36个GPT2Block,约有7.55亿个参数。
|
||||
GPT-2 XL (也称为Extra Large):48个GPT2Block,约有15.54亿个参数。
|
||||
|
||||
RoBERTa Base:12个RobertaLayer,总共约有1.25亿个参数。
|
||||
RoBERTa Large:24个RobertaLayer,总共约有3.55亿个参数。
|
||||
@@ -0,0 +1,22 @@
|
||||
from transformers.models.roberta.modeling_roberta import RobertaLayer
|
||||
|
||||
class RobertaLayerWithAdapter(RobertaLayer):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
# 假设Adapter的大小为64
|
||||
adapter_size = 64
|
||||
self.adapter = AdapterLayer(config.hidden_size, adapter_size)
|
||||
|
||||
def forward(self, hidden_states, attention_mask=None, head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, past_key_value=None, output_attentions=False):
|
||||
# 调用原始的前向传播方法
|
||||
self_outputs = super().forward(hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
|
||||
# 得到Transformer层的输出
|
||||
sequence_output = self_outputs[0]
|
||||
# 将输出通过Adapter层
|
||||
sequence_output = self.adapter(sequence_output)
|
||||
# 返回修改后的输出(其他输出保持不变)
|
||||
return (sequence_output,) + self_outputs[1:]
|
||||
|
||||
"""
|
||||
RoBERTa的每个RobertaLayer包含一个自注意力(self-attention)机制和一个前馈网络,这些层共同构成了RoBERTa的基础架构。
|
||||
"""
|
||||
@@ -0,0 +1,40 @@
|
||||
from transformers.models.gpt2.modeling_gpt2 import GPT2Block
|
||||
|
||||
class GPT2BlockWithAdapter(GPT2Block):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
# 假设Adapter的大小为64
|
||||
adapter_size = 64
|
||||
self.adapter = AdapterLayer(config.n_embd, adapter_size)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
hidden_states,
|
||||
layer_past=None,
|
||||
attention_mask=None,
|
||||
head_mask=None,
|
||||
use_cache=False,
|
||||
output_attentions=False,
|
||||
):
|
||||
# 调用原始的前向传播方法
|
||||
attn_outputs = super().forward(
|
||||
hidden_states,
|
||||
layer_past=layer_past,
|
||||
attention_mask=attention_mask,
|
||||
head_mask=head_mask,
|
||||
use_cache=use_cache,
|
||||
output_attentions=output_attentions,
|
||||
)
|
||||
# 得到Transformer层的输出
|
||||
a = attn_outputs[0] # 输出的第一部分是attention的结果
|
||||
# 将输出通过Adapter层
|
||||
a = self.adapter(a)
|
||||
# 返回修改后的输出(其他输出保持不变)
|
||||
outputs = (a,) + attn_outputs[1:]
|
||||
return outputs
|
||||
"""
|
||||
每个GPT2Block包含了一系列的自注意力(Self-Attention)和前馈网络(Feed-Forward)层,这些层共同构成了模型的基础架构。
|
||||
|
||||
"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user