argument analysis added as command line (2f288f16) · Commits · vdberg / Webis_Bias_Flipper_2018

process_srl.py

+7 −5

Original line number	Diff line number	Diff line
		@@ -53,12 +53,13 @@ def crude_ner(sent):
		# process command line arguments
		parser = ArgumentParser()
		parser.add_argument("-c", "--combine", action="store_true", default=False, help="whether to combine or not")
		parser.add_argument("-a", "--arg", default='ARG0', help="focus on an arg")
		parser.add_argument("-a", "--arg", default=None, help="analyze args or not")
		parser.add_argument("-n", "--n", type=int, default=50, help="nr of top verbs to show")
		parser.add_argument("-mf", "--min_freq", type=int, default=5, help="min verb frequency to eliminate rarest verbs")
		args = parser.parse_args()
		MIN_FREQ = args.min_freq
		N = args.n
		ARG = args.arg

		# load files
		sent = pd.read_csv('data/sent_df.csv', names=['sent_id','bias','doc_id','s'])
		@@ -104,7 +105,8 @@ else:
		#df.verb = df.verb.apply(lemmatizer.lemmatize)

		# arg
		df = df[df.arg.str.endswith('0')]
		if ARG:
		#df = df[df.arg.str.endswith(ARG)]
		df.verb += ' (' + df.arg.str[2:] + ')'

		# compute overall counts
		@@ -120,11 +122,11 @@ for n, gr in df.groupby(['bias', 'name_bias']):
		counts['normalized'] = counts.within / counts.across #gr.verb.value_counts(normalize=True)
		#counts.normalized = counts.normalized.apply(round)
		sorted_verbs = counts.sort_values(by="normalized", ascending=False).head(N)
		sorted_verbs.to_csv("frequent_verbs/{}.csv".format(n_edit), sep="\t")
		sorted_verbs.to_csv("frequent_verbs/{}{}.csv".format(n_edit, '_arg' if ARG else ''), sep="\t")
		binned[n] = sorted_verbs.index

		print(binned.head(N))
		binned.to_csv('frequent_verbs/most_freq_verbs.csv')
		binned.to_csv('frequent_verbs/most_freq_verbs{}.csv'.format('_arg' if ARG else ''))

		exit(0)