Commit 924968e1 authored by schaper's avatar schaper
Browse files

Fix get_preds function

parent 5bd3787a
Loading
Loading
Loading
Loading
+56 −89
Original line number Diff line number Diff line
@@ -14,9 +14,9 @@ class SBERT_Model:
        self.filepath = filepath
        self.dataset = dataset
        self.model = SentenceTransformer(filepath)
        self.sentences1 = dataset.head(100)['sentence1'].tolist()
        self.sentences2 = dataset.head(100)['sentence2'].tolist()
        self.labels = dataset.head(100)['label'].tolist()
        self.sentences1 = dataset['sentence1'].tolist()
        self.sentences2 = dataset['sentence2'].tolist()
        self.labels = dataset['label'].tolist()
        self.embeddings1 = self.get_embeddings(self.sentences1)
        self.embeddings2 = self.get_embeddings(self.sentences2)
        self.cosine_scores = self.get_cosine_scores()
@@ -32,7 +32,7 @@ class SBERT_Model:
        """
        preds = []
        for i in range(len(self.cosine_scores[0])):
            preds.append(self.cosine_scores[i][i])
            preds.append(float(self.cosine_scores[i][i]))
        return preds

    def get_cosine_scores(self):
@@ -52,94 +52,61 @@ class SBERT_Model:


test_dataset = load_dataset('paws', 'labeled_final', split='test')
# test_dataset.set_format(type='pandas')
# test_dataset = test_dataset[:]
#
# zero_model = SBERT_Model("Zero Model", 'models/nli-bert-large/', test_dataset)
# ft_model = SBERT_Model("Fine-tuned Model", 'models/stsb-bert-large/', test_dataset)
#
# zero_model.print_statistics()
# ft_model.print_statistics()

# Here, we fine-tune our stsb-bert-large model on PAWS

# #Check if dataset exsist. If not, download and extract  it
# sts_dataset_path = 'datasets/stsbenchmark.tsv.gz'
#
# if not os.path.exists(sts_dataset_path):
#     util.http_get('https://sbert.net/datasets/stsbenchmark.tsv.gz', sts_dataset_path)


# Read the dataset
model_name = 'bert-base-nli-mean-tokens'
train_batch_size = 128
num_epochs = 1
model_save_path = 'output/training_paws_continue_training-'+model_name+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

test_dataset.set_format(type='pandas')
test_dataset = test_dataset[:]

# # Load a pre-trained sentence transformer model
model = SentenceTransformer('models/stsb-bert-large')
zero_model = SBERT_Model("Zero Model", 'models/nli-bert-large/', test_dataset)
ft_model = SBERT_Model("Fine-tuned Model", 'models/stsb-bert-large/', test_dataset)
# paws_ft_model = SBERT_Model("Fine-tuned on PAWS Model", "models/paws-stsb-bert-large", test_dataset)

# # Convert the dataset to a DataLoader ready for training
# logging.info("Read STSbenchmark train dataset")
zero_model.print_statistics()
ft_model.print_statistics()
# paws_ft_model.print_statistics()

train_samples = []
dev_samples = []
# with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
#     reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)
#     for row in reader:
#         score = float(row['score']) / 5.0  # Normalize score to range 0 ... 1
#         inp_example = InputExample(texts=[row['sentence1'], row['sentence2']], label=score)
# # Here, we fine-tune our stsb-bert-large model on PAWS
#
#         if row['split'] == 'dev':
#             dev_samples.append(inp_example)
#         elif row['split'] == 'test':
#             test_samples.append(inp_example)
#         else:
#             train_samples.append(inp_example)

train_dataset = load_dataset('paws', 'labeled_final', split='train')
train_dataset.set_format(type='pandas')
train_dataset = train_dataset[:]
for index, row in train_dataset.iterrows():
    x = torch.FloatTensor([row['label']])
    train_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=x))

dev_dataset = load_dataset('paws', 'labeled_final', split='validation')
dev_dataset.set_format(type='pandas')
dev_dataset = dev_dataset[:]


for index, row in dev_dataset.iterrows():
    dev_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=row['label']))


train_dataloader = DataLoader(train_samples, shuffle=True, batch_size=train_batch_size)
train_loss = losses.CosineSimilarityLoss(model=model)


# Development set: Measure correlation between cosine score and gold labels
evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name='paws-dev')


# Configure the training. We skip evaluation in this example
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data for warm-up


# Train the model
model.fit(train_objectives=[(train_dataloader, train_loss)],
          evaluator=evaluator,
          epochs=num_epochs,
          evaluation_steps=1000,
          warmup_steps=warmup_steps,
          output_path=model_save_path)


##############################################################################
# # Read the dataset
# model_name = 'bert-base-nli-mean-tokens'
# train_batch_size = 128
# num_epochs = 1
# model_save_path = 'output/training_paws_continue_training-'+model_name+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
#
# Load the stored model and evaluate its performance on STS benchmark dataset
# # # Load a pre-trained sentence transformer model
# model = SentenceTransformer('models/stsb-bert-large')
#
##############################################################################

paws_ft_model = ft_model = SBERT_Model("PAWS fine-tuned Model", model_save_path, test_dataset)
paws_ft_model.print_statistics()
# # # Convert the dataset to a DataLoader ready for training
# train_samples = []
# dev_samples = []
#
# train_dataset = load_dataset('paws', 'labeled_final', split='train')
# train_dataset.set_format(type='pandas')
# train_dataset = train_dataset[:]
# for index, row in train_dataset.iterrows():
#     x = torch.FloatTensor([row['label']])
#     train_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=x))
#
# dev_dataset = load_dataset('paws', 'labeled_final', split='validation')
# dev_dataset.set_format(type='pandas')
# dev_dataset = dev_dataset[:]
#
#
# for index, row in dev_dataset.iterrows():
#     dev_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=row['label']))
#
#
# train_dataloader = DataLoader(train_samples, shuffle=True, batch_size=train_batch_size)
# train_loss = losses.CosineSimilarityLoss(model=model)
#
# # Development set: Measure correlation between cosine score and gold labels
# evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name='paws-dev')
#
# # Configure the training. We skip evaluation in this example
# warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data for warm-up
#
# # Train the model
# model.fit(train_objectives=[(train_dataloader, train_loss)],
#           evaluator=evaluator,
#           epochs=num_epochs,
#           evaluation_steps=1000,
#           warmup_steps=warmup_steps,
#           output_path=model_save_path)