Loading process_srl.py +6 −4 Original line number Diff line number Diff line Loading @@ -56,7 +56,8 @@ parser = ArgumentParser() parser.add_argument("-c", "--combine", action="store_true", default=False, help="whether to combine or not") parser.add_argument("-a", "--arg", default=None, help="analyze args or not") parser.add_argument("-n", "--n", type=int, default=50, help="nr of top verbs to show") parser.add_argument("-mf", "--min_freq", type=int, default=5, help="min verb frequency to eliminate rarest verbs") parser.add_argument("-min", "--min_freq", type=int, default=100, help="min verb frequency to eliminate rarest verbs") parser.add_argument("-max", "--max_freq", type=int, default=2000, help="min verb frequency to eliminate rarest verbs") parser.add_argument("-l", "--lemmatize", action="store_true", default=False, help="whether to (re)do lemmatization") args = parser.parse_args() MIN_FREQ = args.min_freq Loading @@ -77,9 +78,6 @@ output = output[output['name'] != 'Ryan'] #print('Arg-0 only {}:'.format(df.shape)) # combine output print(output.sent_id.value_counts().head()) output['sent_id'] = output['sent_id'].astype(float) sent['sent_id'] = sent['sent_id'].astype(float) df = pd.merge(output, sent, on=['sent_id']) namlabs_df, namlabs = load_nameslab() df = pd.merge(df, namlabs_df, on=['name']) Loading Loading @@ -117,6 +115,8 @@ if ARG: # compute overall counts overall_freq = df.verb.value_counts() print(overall_freq.head(3)) print(overall_freq.tail(3)) df["across"] = overall_freq.loc[df.verb].values df = df[df.across > MIN_FREQ] binned = pd.DataFrame() Loading @@ -130,6 +130,8 @@ for n, gr in df.groupby(['bias', 'name_bias']): sorted_verbs = counts.sort_values(by="normalized", ascending=False).head(N) sorted_verbs.to_csv("frequent_verbs/{}{}.csv".format(n_edit, '_arg' if ARG else ''), sep="\t") binned[n] = sorted_verbs.index binned[n + 'across'] = sorted_verbs.across binned[n + 'across'] = sorted_verbs.across print(binned.head(N)) binned.to_csv('frequent_verbs/most_freq_verbs{}.csv'.format('_arg' if ARG else '')) Loading Loading
process_srl.py +6 −4 Original line number Diff line number Diff line Loading @@ -56,7 +56,8 @@ parser = ArgumentParser() parser.add_argument("-c", "--combine", action="store_true", default=False, help="whether to combine or not") parser.add_argument("-a", "--arg", default=None, help="analyze args or not") parser.add_argument("-n", "--n", type=int, default=50, help="nr of top verbs to show") parser.add_argument("-mf", "--min_freq", type=int, default=5, help="min verb frequency to eliminate rarest verbs") parser.add_argument("-min", "--min_freq", type=int, default=100, help="min verb frequency to eliminate rarest verbs") parser.add_argument("-max", "--max_freq", type=int, default=2000, help="min verb frequency to eliminate rarest verbs") parser.add_argument("-l", "--lemmatize", action="store_true", default=False, help="whether to (re)do lemmatization") args = parser.parse_args() MIN_FREQ = args.min_freq Loading @@ -77,9 +78,6 @@ output = output[output['name'] != 'Ryan'] #print('Arg-0 only {}:'.format(df.shape)) # combine output print(output.sent_id.value_counts().head()) output['sent_id'] = output['sent_id'].astype(float) sent['sent_id'] = sent['sent_id'].astype(float) df = pd.merge(output, sent, on=['sent_id']) namlabs_df, namlabs = load_nameslab() df = pd.merge(df, namlabs_df, on=['name']) Loading Loading @@ -117,6 +115,8 @@ if ARG: # compute overall counts overall_freq = df.verb.value_counts() print(overall_freq.head(3)) print(overall_freq.tail(3)) df["across"] = overall_freq.loc[df.verb].values df = df[df.across > MIN_FREQ] binned = pd.DataFrame() Loading @@ -130,6 +130,8 @@ for n, gr in df.groupby(['bias', 'name_bias']): sorted_verbs = counts.sort_values(by="normalized", ascending=False).head(N) sorted_verbs.to_csv("frequent_verbs/{}{}.csv".format(n_edit, '_arg' if ARG else ''), sep="\t") binned[n] = sorted_verbs.index binned[n + 'across'] = sorted_verbs.across binned[n + 'across'] = sorted_verbs.across print(binned.head(N)) binned.to_csv('frequent_verbs/most_freq_verbs{}.csv'.format('_arg' if ARG else '')) Loading