Loading probing.py +11 −5 Original line number Diff line number Diff line from STSB_Dataset import STSB_Dataset import pandas as pd import spacy import string import random df = STSB_Dataset('data/stsbenchmark/sts-test.csv').as_dataframe Loading Loading @@ -54,6 +55,11 @@ for sent2 in sentences2_nlp: dependency_hierarchy2.append(dep_hierarchy2) def get_random_string(length): letters = string.ascii_lowercase result_str = ''.join(random.choice(letters) for i in range(length)) return result_str def mask_token(pos_tag): Loading @@ -65,7 +71,7 @@ def mask_token(pos_tag): for i in range(len(layer)): if layer[i].pos_ == pos_tag: mask_token = layer[i].text df.iloc[e, 0] = df.iloc[e, 0].replace(mask_token, "[MASK]") df.iloc[e, 0] = df.iloc[e, 0].replace(mask_token, get_random_string(random.randrange(1, 9))) break for e in range(len(dependency_hierarchy2)): Loading @@ -76,7 +82,7 @@ def mask_token(pos_tag): for i in range(len(layer)): if layer[i].pos_ == pos_tag: mask_token = layer[i].text df.iloc[e, 1] = df.iloc[e, 1].replace(mask_token, "[MASK]") df.iloc[e, 1] = df.iloc[e, 1].replace(mask_token, get_random_string(random.randrange(1, 9))) break return df Loading @@ -94,7 +100,7 @@ def mask_first(pos_tag): if pos_tag in pos_tags_sent: e = pos_tags_sent.index(pos_tag) df.iloc[i, 0] = df.iloc[i, 0].replace(pos_tag_tokens[e], "[MASK]") df.iloc[i, 0] = df.iloc[i, 0].replace(pos_tag_tokens[e], get_random_string(random.randrange(1, 9))) else: if i not in del_rows: del_rows.append(i) Loading @@ -107,7 +113,7 @@ def mask_first(pos_tag): if pos_tag in pos_tags_sent: e = pos_tags_sent.index(pos_tag) df.iloc[i, 1] = df.iloc[i, 1].replace(pos_tag_tokens[e], "[MASK]") df.iloc[i, 1] = df.iloc[i, 1].replace(pos_tag_tokens[e], random.randrange(1, 9)) else: if i not in del_rows: del_rows.append(i) Loading Loading
probing.py +11 −5 Original line number Diff line number Diff line from STSB_Dataset import STSB_Dataset import pandas as pd import spacy import string import random df = STSB_Dataset('data/stsbenchmark/sts-test.csv').as_dataframe Loading Loading @@ -54,6 +55,11 @@ for sent2 in sentences2_nlp: dependency_hierarchy2.append(dep_hierarchy2) def get_random_string(length): letters = string.ascii_lowercase result_str = ''.join(random.choice(letters) for i in range(length)) return result_str def mask_token(pos_tag): Loading @@ -65,7 +71,7 @@ def mask_token(pos_tag): for i in range(len(layer)): if layer[i].pos_ == pos_tag: mask_token = layer[i].text df.iloc[e, 0] = df.iloc[e, 0].replace(mask_token, "[MASK]") df.iloc[e, 0] = df.iloc[e, 0].replace(mask_token, get_random_string(random.randrange(1, 9))) break for e in range(len(dependency_hierarchy2)): Loading @@ -76,7 +82,7 @@ def mask_token(pos_tag): for i in range(len(layer)): if layer[i].pos_ == pos_tag: mask_token = layer[i].text df.iloc[e, 1] = df.iloc[e, 1].replace(mask_token, "[MASK]") df.iloc[e, 1] = df.iloc[e, 1].replace(mask_token, get_random_string(random.randrange(1, 9))) break return df Loading @@ -94,7 +100,7 @@ def mask_first(pos_tag): if pos_tag in pos_tags_sent: e = pos_tags_sent.index(pos_tag) df.iloc[i, 0] = df.iloc[i, 0].replace(pos_tag_tokens[e], "[MASK]") df.iloc[i, 0] = df.iloc[i, 0].replace(pos_tag_tokens[e], get_random_string(random.randrange(1, 9))) else: if i not in del_rows: del_rows.append(i) Loading @@ -107,7 +113,7 @@ def mask_first(pos_tag): if pos_tag in pos_tags_sent: e = pos_tags_sent.index(pos_tag) df.iloc[i, 1] = df.iloc[i, 1].replace(pos_tag_tokens[e], "[MASK]") df.iloc[i, 1] = df.iloc[i, 1].replace(pos_tag_tokens[e], random.randrange(1, 9)) else: if i not in del_rows: del_rows.append(i) Loading