Loading process_srl.py +24 −12 Original line number Diff line number Diff line Loading @@ -2,17 +2,24 @@ import pandas as pd import nltk from nltk.stem import WordNetLemmatizer import os from argparse import ArgumentParser def read_csvs(dir): def load_output(dir='outputs', combine=False): outfile = 'data/srl_output.csv' if combine: with open(outfile, 'w', encoding='utf-8') as o: o.write('') with open(outfile, 'a', encoding='utf-8') as o: for p, d, fs in os.walk(dir): for i in fs: with open(os.path.join(p, i), encoding='utf-8') as i: i_lines = i.readlines()[1:] o.writelines(i_lines) df = pd.read_csv(outfile, names=['w','verb','arg','same','id']) df = pd.read_csv(outfile, names=['name','verb','arg','same','sent_id']) return df Loading @@ -30,17 +37,22 @@ def load_nameslab(): return df, df.name_bias.to_dict() pd.set_option('display.max_colwidth', 200) pd.set_option('display.max_columns', 10) df = pd.read_csv('data/srl_output.csv', names=['name','verb','arg','same','sent_id']) #read_csvs('outputs') #pd.read_csv('vdberg_output.csv') parser = ArgumentParser() # Add more options if you like parser.add_argument("-c", "--combine", action="store_true", default=False, help="whether to combine or not") args = parser.parse_args('v') df = load_output(dir='outputs', combine=args.combine) sent = pd.read_csv('data/sent_df.csv', names=['sent_id','bias','doc_id','s']) df = pd.merge(df, sent, on=['sent_id']) namlabs_df, namlabs = load_nameslab() df = pd.merge(df, namlabs_df, on=['name']) #df = df.drop(columns='same') df = df[df['arg'].str.startswith('B-ARG0')] pd.set_option('display.max_colwidth', 200) pd.set_option('display.max_columns', 10) print(df.columns) print(df.shape) #print(df.head(n=5)) Loading Loading
process_srl.py +24 −12 Original line number Diff line number Diff line Loading @@ -2,17 +2,24 @@ import pandas as pd import nltk from nltk.stem import WordNetLemmatizer import os from argparse import ArgumentParser def read_csvs(dir): def load_output(dir='outputs', combine=False): outfile = 'data/srl_output.csv' if combine: with open(outfile, 'w', encoding='utf-8') as o: o.write('') with open(outfile, 'a', encoding='utf-8') as o: for p, d, fs in os.walk(dir): for i in fs: with open(os.path.join(p, i), encoding='utf-8') as i: i_lines = i.readlines()[1:] o.writelines(i_lines) df = pd.read_csv(outfile, names=['w','verb','arg','same','id']) df = pd.read_csv(outfile, names=['name','verb','arg','same','sent_id']) return df Loading @@ -30,17 +37,22 @@ def load_nameslab(): return df, df.name_bias.to_dict() pd.set_option('display.max_colwidth', 200) pd.set_option('display.max_columns', 10) df = pd.read_csv('data/srl_output.csv', names=['name','verb','arg','same','sent_id']) #read_csvs('outputs') #pd.read_csv('vdberg_output.csv') parser = ArgumentParser() # Add more options if you like parser.add_argument("-c", "--combine", action="store_true", default=False, help="whether to combine or not") args = parser.parse_args('v') df = load_output(dir='outputs', combine=args.combine) sent = pd.read_csv('data/sent_df.csv', names=['sent_id','bias','doc_id','s']) df = pd.merge(df, sent, on=['sent_id']) namlabs_df, namlabs = load_nameslab() df = pd.merge(df, namlabs_df, on=['name']) #df = df.drop(columns='same') df = df[df['arg'].str.startswith('B-ARG0')] pd.set_option('display.max_colwidth', 200) pd.set_option('display.max_columns', 10) print(df.columns) print(df.shape) #print(df.head(n=5)) Loading