Loading srl.py +26 −20 Original line number Diff line number Diff line Loading @@ -48,10 +48,12 @@ def hardcoded_clean(names): names.w = names.w.str.replace('—', '') names.w = names.w.str.replace('‘s', '') names.w = names.w.str.replace('2.', '') names.w = names.w.str.replace('’a', '') return names parser = ArgumentParser() parser.add_argument("-s", "--start_i", type=int, help="which i to start from") parser.add_argument("-s", "--start_i", type=int, default=157584, help="which i to start from") parser.add_argument("-pre", "--preprocess", action="store_true", default=False, help="whether to preprocess or not") args = parser.parse_args() sent = pd.read_csv('data/sent_df.csv') Loading @@ -64,6 +66,8 @@ data = sent.loc[start_i:] srl_pred = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz") allen_ner = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz") preprocess = True #args.preprocess times = [] verb_df = pd.DataFrame(columns=['arg', 'verb', 'same', 'id']) for i, r in data.iterrows(): Loading @@ -82,6 +86,8 @@ for i, r in data.iterrows(): #print('verbs', verbs) tagged = pd.DataFrame(zip(*tagged), columns=['w', 'n'] + verbs) verbs.to_csv('outputs/srl_ner_tagged_{}.csv'.format(i)) if preprocess: persons = tagged[tagged.n.str.endswith('PER')] names = persons[persons.w.apply(crude_ner) == True].copy() #names.w = names.w.str.strip('—') Loading Loading
srl.py +26 −20 Original line number Diff line number Diff line Loading @@ -48,10 +48,12 @@ def hardcoded_clean(names): names.w = names.w.str.replace('—', '') names.w = names.w.str.replace('‘s', '') names.w = names.w.str.replace('2.', '') names.w = names.w.str.replace('’a', '') return names parser = ArgumentParser() parser.add_argument("-s", "--start_i", type=int, help="which i to start from") parser.add_argument("-s", "--start_i", type=int, default=157584, help="which i to start from") parser.add_argument("-pre", "--preprocess", action="store_true", default=False, help="whether to preprocess or not") args = parser.parse_args() sent = pd.read_csv('data/sent_df.csv') Loading @@ -64,6 +66,8 @@ data = sent.loc[start_i:] srl_pred = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz") allen_ner = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz") preprocess = True #args.preprocess times = [] verb_df = pd.DataFrame(columns=['arg', 'verb', 'same', 'id']) for i, r in data.iterrows(): Loading @@ -82,6 +86,8 @@ for i, r in data.iterrows(): #print('verbs', verbs) tagged = pd.DataFrame(zip(*tagged), columns=['w', 'n'] + verbs) verbs.to_csv('outputs/srl_ner_tagged_{}.csv'.format(i)) if preprocess: persons = tagged[tagged.n.str.endswith('PER')] names = persons[persons.w.apply(crude_ner) == True].copy() #names.w = names.w.str.strip('—') Loading