Loading process_srl.py +7 −5 Original line number Diff line number Diff line Loading @@ -53,12 +53,13 @@ def crude_ner(sent): # process command line arguments parser = ArgumentParser() parser.add_argument("-c", "--combine", action="store_true", default=False, help="whether to combine or not") parser.add_argument("-a", "--arg", default='ARG0', help="focus on an arg") parser.add_argument("-a", "--arg", default=None, help="analyze args or not") parser.add_argument("-n", "--n", type=int, default=50, help="nr of top verbs to show") parser.add_argument("-mf", "--min_freq", type=int, default=5, help="min verb frequency to eliminate rarest verbs") args = parser.parse_args() MIN_FREQ = args.min_freq N = args.n ARG = args.arg # load files sent = pd.read_csv('data/sent_df.csv', names=['sent_id','bias','doc_id','s']) Loading Loading @@ -104,7 +105,8 @@ else: #df.verb = df.verb.apply(lemmatizer.lemmatize) # arg df = df[df.arg.str.endswith('0')] if ARG: #df = df[df.arg.str.endswith(ARG)] df.verb += ' (' + df.arg.str[2:] + ')' # compute overall counts Loading @@ -120,11 +122,11 @@ for n, gr in df.groupby(['bias', 'name_bias']): counts['normalized'] = counts.within / counts.across #gr.verb.value_counts(normalize=True) #counts.normalized = counts.normalized.apply(round) sorted_verbs = counts.sort_values(by="normalized", ascending=False).head(N) sorted_verbs.to_csv("frequent_verbs/{}.csv".format(n_edit), sep="\t") sorted_verbs.to_csv("frequent_verbs/{}{}.csv".format(n_edit, '_arg' if ARG else ''), sep="\t") binned[n] = sorted_verbs.index print(binned.head(N)) binned.to_csv('frequent_verbs/most_freq_verbs.csv') binned.to_csv('frequent_verbs/most_freq_verbs{}.csv'.format('_arg' if ARG else '')) exit(0) Loading Loading
process_srl.py +7 −5 Original line number Diff line number Diff line Loading @@ -53,12 +53,13 @@ def crude_ner(sent): # process command line arguments parser = ArgumentParser() parser.add_argument("-c", "--combine", action="store_true", default=False, help="whether to combine or not") parser.add_argument("-a", "--arg", default='ARG0', help="focus on an arg") parser.add_argument("-a", "--arg", default=None, help="analyze args or not") parser.add_argument("-n", "--n", type=int, default=50, help="nr of top verbs to show") parser.add_argument("-mf", "--min_freq", type=int, default=5, help="min verb frequency to eliminate rarest verbs") args = parser.parse_args() MIN_FREQ = args.min_freq N = args.n ARG = args.arg # load files sent = pd.read_csv('data/sent_df.csv', names=['sent_id','bias','doc_id','s']) Loading Loading @@ -104,7 +105,8 @@ else: #df.verb = df.verb.apply(lemmatizer.lemmatize) # arg df = df[df.arg.str.endswith('0')] if ARG: #df = df[df.arg.str.endswith(ARG)] df.verb += ' (' + df.arg.str[2:] + ')' # compute overall counts Loading @@ -120,11 +122,11 @@ for n, gr in df.groupby(['bias', 'name_bias']): counts['normalized'] = counts.within / counts.across #gr.verb.value_counts(normalize=True) #counts.normalized = counts.normalized.apply(round) sorted_verbs = counts.sort_values(by="normalized", ascending=False).head(N) sorted_verbs.to_csv("frequent_verbs/{}.csv".format(n_edit), sep="\t") sorted_verbs.to_csv("frequent_verbs/{}{}.csv".format(n_edit, '_arg' if ARG else ''), sep="\t") binned[n] = sorted_verbs.index print(binned.head(N)) binned.to_csv('frequent_verbs/most_freq_verbs.csv') binned.to_csv('frequent_verbs/most_freq_verbs{}.csv'.format('_arg' if ARG else '')) exit(0) Loading