Loading probing.py +15 −4 Original line number Diff line number Diff line from STSB_Dataset import STSB_Dataset import pandas as pd import spacy import string import random df = STSB_Dataset('data/stsbenchmark/sts-test.csv').as_dataframe Loading Loading @@ -53,8 +55,15 @@ for sent2 in sentences2_nlp: dependency_hierarchy2.append(dep_hierarchy2) def get_random_string(length): letters = string.ascii_lowercase result_str = ''.join(random.choice(letters) for i in range(length)) return result_str def mask_token(pos_tag): df = STSB_Dataset('data/stsbenchmark/sts-test.csv').as_dataframe for e in range(len(dependency_hierarchy1)): mask_token = None for layer in dependency_hierarchy1[e]: Loading @@ -63,7 +72,7 @@ def mask_token(pos_tag): for i in range(len(layer)): if layer[i].pos_ == pos_tag: mask_token = layer[i].text df.iloc[e, 0] = df.iloc[e, 0].replace(mask_token, "[MASK]") df.iloc[e, 0] = df.iloc[e, 0].replace(mask_token, get_random_string(random.randrange(1, 9))) break for e in range(len(dependency_hierarchy2)): Loading @@ -74,13 +83,15 @@ def mask_token(pos_tag): for i in range(len(layer)): if layer[i].pos_ == pos_tag: mask_token = layer[i].text df.iloc[e, 1] = df.iloc[e, 1].replace(mask_token, "[MASK]") df.iloc[e, 1] = df.iloc[e, 1].replace(mask_token, get_random_string(random.randrange(1, 9))) break return df def mask_first(pos_tag): df = STSB_Dataset('data/stsbenchmark/sts-test.csv').as_dataframe i = -1 del_rows = [] Loading @@ -91,7 +102,7 @@ def mask_first(pos_tag): if pos_tag in pos_tags_sent: e = pos_tags_sent.index(pos_tag) df.iloc[i, 0] = df.iloc[i, 0].replace(pos_tag_tokens[e], "[MASK]") df.iloc[i, 0] = df.iloc[i, 0].replace(pos_tag_tokens[e], get_random_string(random.randrange(1, 9))) else: if i not in del_rows: del_rows.append(i) Loading @@ -104,7 +115,7 @@ def mask_first(pos_tag): if pos_tag in pos_tags_sent: e = pos_tags_sent.index(pos_tag) df.iloc[i, 1] = df.iloc[i, 1].replace(pos_tag_tokens[e], "[MASK]") df.iloc[i, 1] = df.iloc[i, 1].replace(pos_tag_tokens[e], random.randrange(1, 9)) else: if i not in del_rows: del_rows.append(i) Loading Loading
probing.py +15 −4 Original line number Diff line number Diff line from STSB_Dataset import STSB_Dataset import pandas as pd import spacy import string import random df = STSB_Dataset('data/stsbenchmark/sts-test.csv').as_dataframe Loading Loading @@ -53,8 +55,15 @@ for sent2 in sentences2_nlp: dependency_hierarchy2.append(dep_hierarchy2) def get_random_string(length): letters = string.ascii_lowercase result_str = ''.join(random.choice(letters) for i in range(length)) return result_str def mask_token(pos_tag): df = STSB_Dataset('data/stsbenchmark/sts-test.csv').as_dataframe for e in range(len(dependency_hierarchy1)): mask_token = None for layer in dependency_hierarchy1[e]: Loading @@ -63,7 +72,7 @@ def mask_token(pos_tag): for i in range(len(layer)): if layer[i].pos_ == pos_tag: mask_token = layer[i].text df.iloc[e, 0] = df.iloc[e, 0].replace(mask_token, "[MASK]") df.iloc[e, 0] = df.iloc[e, 0].replace(mask_token, get_random_string(random.randrange(1, 9))) break for e in range(len(dependency_hierarchy2)): Loading @@ -74,13 +83,15 @@ def mask_token(pos_tag): for i in range(len(layer)): if layer[i].pos_ == pos_tag: mask_token = layer[i].text df.iloc[e, 1] = df.iloc[e, 1].replace(mask_token, "[MASK]") df.iloc[e, 1] = df.iloc[e, 1].replace(mask_token, get_random_string(random.randrange(1, 9))) break return df def mask_first(pos_tag): df = STSB_Dataset('data/stsbenchmark/sts-test.csv').as_dataframe i = -1 del_rows = [] Loading @@ -91,7 +102,7 @@ def mask_first(pos_tag): if pos_tag in pos_tags_sent: e = pos_tags_sent.index(pos_tag) df.iloc[i, 0] = df.iloc[i, 0].replace(pos_tag_tokens[e], "[MASK]") df.iloc[i, 0] = df.iloc[i, 0].replace(pos_tag_tokens[e], get_random_string(random.randrange(1, 9))) else: if i not in del_rows: del_rows.append(i) Loading @@ -104,7 +115,7 @@ def mask_first(pos_tag): if pos_tag in pos_tags_sent: e = pos_tags_sent.index(pos_tag) df.iloc[i, 1] = df.iloc[i, 1].replace(pos_tag_tokens[e], "[MASK]") df.iloc[i, 1] = df.iloc[i, 1].replace(pos_tag_tokens[e], random.randrange(1, 9)) else: if i not in del_rows: del_rows.append(i) Loading