Commit 63850d7d authored by schaper's avatar schaper
Browse files

Add evaluation and first steps of fine-tuning on paws

parent 2157877f
Loading
Loading
Loading
Loading
+96 −8
Original line number Diff line number Diff line
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
from sentence_transformers import SentenceTransformer, util, losses, InputExample
from scipy.stats import spearmanr, pearsonr
from sklearn import metrics
from datetime import datetime
from torch.utils.data import DataLoader
import math
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
import torch

class SBERT_Model:
    def __init__(self, name, filepath, dataset):
@@ -46,12 +51,95 @@ class SBERT_Model:
        print(f"{self.name}: MSE:{self.get_MSE()}; Pearson:{self.get_pearson()}; Spearman:{self.get_spearman()}")


dataset = load_dataset('paws', 'labeled_final', split='test')
dataset.set_format(type='pandas')
dataset = dataset[:]
test_dataset = load_dataset('paws', 'labeled_final', split='test')
# test_dataset.set_format(type='pandas')
# test_dataset = test_dataset[:]
#
# zero_model = SBERT_Model("Zero Model", 'models/nli-bert-large/', test_dataset)
# ft_model = SBERT_Model("Fine-tuned Model", 'models/stsb-bert-large/', test_dataset)
#
# zero_model.print_statistics()
# ft_model.print_statistics()

zero_model = SBERT_Model("Zero Model", 'models/nli-bert-large/', dataset)
ft_model = SBERT_Model("Fine-tuned Model", 'models/stsb-bert-large/', dataset)
# Here, we fine-tune our stsb-bert-large model on PAWS

zero_model.print_statistics()
ft_model.print_statistics()
# #Check if dataset exsist. If not, download and extract  it
# sts_dataset_path = 'datasets/stsbenchmark.tsv.gz'
#
# if not os.path.exists(sts_dataset_path):
#     util.http_get('https://sbert.net/datasets/stsbenchmark.tsv.gz', sts_dataset_path)


# Read the dataset
model_name = 'bert-base-nli-mean-tokens'
train_batch_size = 128
num_epochs = 1
model_save_path = 'output/training_paws_continue_training-'+model_name+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


# # Load a pre-trained sentence transformer model
model = SentenceTransformer('models/stsb-bert-large')

# # Convert the dataset to a DataLoader ready for training
# logging.info("Read STSbenchmark train dataset")

train_samples = []
dev_samples = []
# with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
#     reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)
#     for row in reader:
#         score = float(row['score']) / 5.0  # Normalize score to range 0 ... 1
#         inp_example = InputExample(texts=[row['sentence1'], row['sentence2']], label=score)
#
#         if row['split'] == 'dev':
#             dev_samples.append(inp_example)
#         elif row['split'] == 'test':
#             test_samples.append(inp_example)
#         else:
#             train_samples.append(inp_example)

train_dataset = load_dataset('paws', 'labeled_final', split='train')
train_dataset.set_format(type='pandas')
train_dataset = train_dataset[:]

for index, row in train_dataset.iterrows():
    train_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=row['label']))

dev_dataset = load_dataset('paws', 'labeled_final', split='validation')
dev_dataset.set_format(type='pandas')
dev_dataset = dev_dataset[:]


for index, row in dev_dataset.iterrows():
    dev_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=row['label']))


train_dataloader = DataLoader(train_samples, shuffle=True, batch_size=train_batch_size)
train_loss = losses.CosineSimilarityLoss(model=model)


# Development set: Measure correlation between cosine score and gold labels
evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name='paws-dev')


# Configure the training. We skip evaluation in this example
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data for warm-up


# Train the model
model.fit(train_objectives=[(train_dataloader, train_loss)],
          evaluator=evaluator,
          epochs=num_epochs,
          evaluation_steps=1000,
          warmup_steps=warmup_steps,
          output_path=model_save_path)


##############################################################################
#
# Load the stored model and evaluate its performance on STS benchmark dataset
#
##############################################################################

paws_ft_model = ft_model = SBERT_Model("PAWS fine-tuned Model", model_save_path, test_dataset)
paws_ft_model.print_statistics()