Loading srl.py +6 −4 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ import pandas as pd import logging, re import time def load_nameslab(): namfn = 'data/names_labeled.csv' df = pd.read_csv(namfn, index_col=0) Loading @@ -21,6 +22,7 @@ def load_flipper(): namlabs_df, namlabs = load_nameslab() def crude_ner(sent): global namlabs nampat = '\\b' + '\\b|\\b'.join([i for i in namlabs]) + '\\b' Loading @@ -45,9 +47,9 @@ sent = pd.read_csv('data/sent_df.csv') sent = sent[sent.s.apply(crude_ner) == True] #print(max(sent.index)) #vdberg_output_11315.csv.csv.csv.csv 64210 #print([i for i in sent.index if i > 64202]) #exit(0) data = sent.loc[64202:] over = [i for i in sent.index if i > 99000] start_i = over[0] data = sent.loc[start_i:] srl_pred = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz") allen_ner = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz") Loading @@ -72,7 +74,7 @@ for i, r in data.iterrows(): tagged = pd.DataFrame(zip(*tagged), columns=['w', 'n'] + verbs) persons = tagged[tagged.n.str.endswith('PER')] names = persons[persons.w.apply(crude_ner) == True].copy() names = names.w.str.strip('-') names.w = names.w.str.strip('—') #print('names', names) verbs = names.set_index(['w']).iloc[:,1:] Loading Loading
srl.py +6 −4 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ import pandas as pd import logging, re import time def load_nameslab(): namfn = 'data/names_labeled.csv' df = pd.read_csv(namfn, index_col=0) Loading @@ -21,6 +22,7 @@ def load_flipper(): namlabs_df, namlabs = load_nameslab() def crude_ner(sent): global namlabs nampat = '\\b' + '\\b|\\b'.join([i for i in namlabs]) + '\\b' Loading @@ -45,9 +47,9 @@ sent = pd.read_csv('data/sent_df.csv') sent = sent[sent.s.apply(crude_ner) == True] #print(max(sent.index)) #vdberg_output_11315.csv.csv.csv.csv 64210 #print([i for i in sent.index if i > 64202]) #exit(0) data = sent.loc[64202:] over = [i for i in sent.index if i > 99000] start_i = over[0] data = sent.loc[start_i:] srl_pred = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz") allen_ner = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz") Loading @@ -72,7 +74,7 @@ for i, r in data.iterrows(): tagged = pd.DataFrame(zip(*tagged), columns=['w', 'n'] + verbs) persons = tagged[tagged.n.str.endswith('PER')] names = persons[persons.w.apply(crude_ner) == True].copy() names = names.w.str.strip('-') names.w = names.w.str.strip('—') #print('names', names) verbs = names.set_index(['w']).iloc[:,1:] Loading