Commit 5b0b57c9 authored by schaper's avatar schaper
Browse files

Rename paws.py to paws_evaluation.py§

parent f29a0a6f
Loading
Loading
Loading
Loading

paws.py

deleted100644 → 0
+0 −113
Original line number Diff line number Diff line
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util, losses, InputExample
from scipy.stats import spearmanr, pearsonr
from sklearn import metrics
from datetime import datetime
from torch.utils.data import DataLoader
import math
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
import torch
import probing

class SBERT_Model:
    def __init__(self, name, filepath, dataset):
        self.name = name
        self.filepath = filepath
        self.dataset = dataset
        self.model = SentenceTransformer(filepath)
        self.sentences1 = dataset['sentence1'].tolist()
        self.sentences2 = dataset['sentence2'].tolist()
        self.labels = dataset['label'].tolist()
        self.embeddings1 = self.get_embeddings(self.sentences1)
        self.embeddings2 = self.get_embeddings(self.sentences2)
        self.cosine_scores = self.get_cosine_scores()
        self.preds = self.get_preds()

    def get_embeddings(self, sentences):
        return self.model.encode(sentences, convert_to_tensor=True)

    def get_preds(self):
        """This method extracts the scores for similarity between sentence pairs
        Cosine_scores have scores for similarity between all of the sentences,
        but we only need similarity between each sentence pair.
        """
        preds = []
        for i in range(len(self.cosine_scores[0])):
            preds.append(float(self.cosine_scores[i][i]))
        return preds

    def get_cosine_scores(self):
        return util.pytorch_cos_sim(self.embeddings1, self.embeddings2)

    def get_pearson(self):
        return pearsonr(self.labels, self.preds)[0]

    def get_spearman(self):
        return spearmanr(self.labels, self.preds)[0]

    def get_MSE(self):
        return metrics.mean_squared_error(self.labels, self.preds)

    def print_statistics(self):
        print(f"{self.name}: MSE:{self.get_MSE()}; Pearson:{self.get_pearson()}; Spearman:{self.get_spearman()}")


test_dataset = load_dataset('paws', 'labeled_final', split='test')
test_dataset.set_format(type='pandas')
test_dataset = test_dataset[:]

zero_model = SBERT_Model("Zero Model", 'models/nli-bert-large/', test_dataset)
ft_model = SBERT_Model("Fine-tuned Model", 'models/stsb-bert-large/', test_dataset)
# paws_ft_model = SBERT_Model("Fine-tuned on PAWS Model", "models/paws-stsb-bert-large", test_dataset)

zero_model.print_statistics()
ft_model.print_statistics()
# paws_ft_model.print_statistics()

# # Here, we fine-tune our stsb-bert-large model on PAWS
#
# # Read the dataset
# model_name = 'bert-base-nli-mean-tokens'
# train_batch_size = 128
# num_epochs = 1
# model_save_path = 'output/training_paws_continue_training-'+model_name+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
#
# # # Load a pre-trained sentence transformer model
# model = SentenceTransformer('models/stsb-bert-large')
#
# # # Convert the dataset to a DataLoader ready for training
# train_samples = []
# dev_samples = []
#
# train_dataset = load_dataset('paws', 'labeled_final', split='train')
# train_dataset.set_format(type='pandas')
# train_dataset = train_dataset[:]
# for index, row in train_dataset.iterrows():
#     x = torch.FloatTensor([row['label']])
#     train_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=x))
#
# dev_dataset = load_dataset('paws', 'labeled_final', split='validation')
# dev_dataset.set_format(type='pandas')
# dev_dataset = dev_dataset[:]
#
#
# for index, row in dev_dataset.iterrows():
#     dev_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=row['label']))
#
#
# train_dataloader = DataLoader(train_samples, shuffle=True, batch_size=train_batch_size)
# train_loss = losses.CosineSimilarityLoss(model=model)
#
# # Development set: Measure correlation between cosine score and gold labels
# evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name='paws-dev')
#
# # Configure the training. We skip evaluation in this example
# warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data for warm-up
#
# # Train the model
# model.fit(train_objectives=[(train_dataloader, train_loss)],
#           evaluator=evaluator,
#           epochs=num_epochs,
#           evaluation_steps=1000,
#           warmup_steps=warmup_steps,
#           output_path=model_save_path)
+2 −1
Original line number Diff line number Diff line
from datasets import load_dataset
from SBERT_Model import SBERT_Model
# import probing

test_dataset = load_dataset('paws', 'labeled_final', split='test')
test_dataset.set_format(type='pandas')
@@ -15,3 +14,5 @@ ft_model.print_statistics()
paws_ft_model.print_statistics()

zero_model.visualize_preds()
ft_model.visualize_preds()
paws_ft_model.visualize_preds()