-
Notifications
You must be signed in to change notification settings - Fork 29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implemented Roberta Model #65
base: main
Are you sure you want to change the base?
Changes from all commits
cb11ef2
7c9b117
429176b
60c4efb
7c93334
35bbd73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,4 +23,8 @@ | |
|
||
from . import bert | ||
from .bert import * | ||
|
||
from . import roberta | ||
from .roberta import * | ||
|
||
from .vit import * |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .roberta import RobertaModel, roberta_base |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import ivy | ||
from ivy_models.bert.layers import BertEmbedding | ||
|
||
|
||
class RobertaEmbeddings(BertEmbedding): | ||
"""Same as Bert Embedding with tiny change in the positional indexing.""" | ||
|
||
def __init__( | ||
self, | ||
vocab_size, | ||
hidden_size, | ||
max_position_embeddings, | ||
type_vocab_size=1, | ||
pad_token_id=None, | ||
embd_drop_rate=0.1, | ||
layer_norm_eps=1e-5, | ||
position_embedding_type="absolute", | ||
v=None, | ||
): | ||
super(RobertaEmbeddings, self).__init__( | ||
vocab_size, | ||
hidden_size, | ||
max_position_embeddings, | ||
type_vocab_size, | ||
pad_token_id, | ||
embd_drop_rate, | ||
layer_norm_eps, | ||
position_embedding_type, | ||
v, | ||
) | ||
self.padding_idx = 1 | ||
|
||
def _forward( | ||
self, | ||
input_ids, | ||
token_type_ids=None, | ||
position_ids=None, | ||
past_key_values_length: int = 0, | ||
): | ||
input_shape = input_ids.shape | ||
seq_length = input_shape[1] | ||
|
||
if position_ids is None: | ||
position_ids = ivy.expand_dims( | ||
ivy.arange(self.padding_idx + 1, seq_length + self.padding_idx), axis=0 | ||
) | ||
position_ids = position_ids[ | ||
:, past_key_values_length : seq_length + past_key_values_length | ||
] | ||
return super(RobertaEmbeddings, self)._forward( | ||
input_ids, | ||
token_type_ids=token_type_ids, | ||
position_ids=position_ids, | ||
past_key_values_length=past_key_values_length, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from ivy_models.helpers import load_transformers_weights | ||
from ivy_models.bert import BertConfig, BertModel | ||
from .layers import RobertaEmbeddings | ||
|
||
|
||
class RobertaModel(BertModel): | ||
def __init__(self, config: BertConfig, pooler_out=False): | ||
super(RobertaModel, self).__init__(config, pooler_out=pooler_out) | ||
|
||
@classmethod | ||
def get_spec_class(self): | ||
return BertConfig | ||
|
||
def _build(self, *args, **kwargs): | ||
self.embeddings = RobertaEmbeddings(**self.config.get_embd_attrs()) | ||
super(RobertaModel, self)._build(*args, **kwargs) | ||
|
||
def _forward( | ||
self, | ||
input_ids, | ||
attention_mask=None, | ||
token_type_ids=None, | ||
position_ids=None, | ||
encoder_hidden_states=None, | ||
encoder_attention_mask=None, | ||
past_key_values=None, | ||
use_cache=None, | ||
output_attentions=None, | ||
): | ||
if input_ids[:, 0].sum().item() != 0: | ||
print("NOT ALLOWED") | ||
return super(RobertaModel, self)._forward( | ||
input_ids, | ||
attention_mask=attention_mask, | ||
token_type_ids=token_type_ids, | ||
position_ids=position_ids, | ||
encoder_hidden_states=encoder_hidden_states, | ||
encoder_attention_mask=encoder_attention_mask, | ||
past_key_values=past_key_values, | ||
use_cache=use_cache, | ||
output_attentions=output_attentions, | ||
) | ||
|
||
|
||
def _roberta_weights_mapping(name): | ||
key_map = [(f"__v{i}__", f"__{j}__") for i, j in zip(range(12), range(12))] | ||
key_map = key_map + [ | ||
("attention__dense", "attention.output.dense"), | ||
("attention__LayerNorm", "attention.output.LayerNorm"), | ||
] | ||
key_map = key_map + [ | ||
("ffd__dense1", "intermediate.dense"), | ||
("ffd__dense2", "output.dense"), | ||
("ffd__LayerNorm", "output.LayerNorm"), | ||
] | ||
name = name.replace("__w", ".weight").replace("__b", ".bias") | ||
name = ( | ||
name.replace("biasias", "bias") | ||
.replace("weighteight", "weight") | ||
.replace(".weightord", ".word") | ||
) | ||
for ref, new in key_map: | ||
name = name.replace(ref, new) | ||
name = name.replace("__", ".") | ||
return name | ||
|
||
|
||
def roberta_base(pretrained=True): | ||
# instantiate the hyperparameters same as bert | ||
# set the dropout rate to 0.0 to avoid stochasticity in the output | ||
|
||
config = BertConfig( | ||
vocab_size=50265, | ||
hidden_size=768, | ||
num_hidden_layers=12, | ||
num_attention_heads=12, | ||
intermediate_size=3072, | ||
hidden_act="gelu", | ||
hidden_dropout=0.0, | ||
attn_drop_rate=0.0, | ||
max_position_embeddings=514, | ||
type_vocab_size=1, | ||
) | ||
|
||
model = RobertaModel(config, pooler_out=True) | ||
if pretrained: | ||
w_clean = load_transformers_weights( | ||
"roberta-base", model, _roberta_weights_mapping | ||
) | ||
model.v = w_clean | ||
return model |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import os | ||
import ivy | ||
import pytest | ||
import numpy as np | ||
from ivy_models import roberta_base | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you please quickly ref |
||
|
||
@pytest.mark.parametrize("batch_shape", [[1]]) | ||
@pytest.mark.parametrize("load_weights", [False, True]) | ||
def test_roberta(device, fw, batch_shape, load_weights): | ||
"""Test RoBerta Base Sequence Classification""" | ||
|
||
num_dims = 768 | ||
this_dir = os.path.dirname(os.path.realpath(__file__)) | ||
input_path = os.path.join(this_dir, "roberta_inputs.npy") | ||
inputs = np.load(input_path, allow_pickle=True).tolist() | ||
model = roberta_base(load_weights) | ||
|
||
inputs = {k: ivy.asarray(v) for k, v in inputs.items()} | ||
logits = model(**inputs)["pooler_output"] | ||
assert logits.shape == tuple([ivy.to_scalar(batch_shape), num_dims]) | ||
|
||
if load_weights: | ||
ref_logits_path = os.path.join(this_dir, "roberta_pooled_output.npy") | ||
ref_logits = np.load(ref_logits_path) | ||
assert np.allclose(ref_logits, ivy.to_numpy(logits), rtol=0.005, atol=0.005) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the value tests are unfortunately not passing when I checked, could you please verify this again? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should be
to_scalar
instead ofitem