Loading process_srl.py +16 −10 Original line number Diff line number Diff line Loading @@ -85,21 +85,27 @@ df.verb = df.verb.apply(lemmatizer.lemmatize) def cust(x): return ' (' + str(round(x, 2)) + ')' return ' (' + str(x) + ')' grs = pd.DataFrame() for n, gr in df.groupby(['bias', 'name_bias']): n_edit = '{}_coverage_of_{}_person'.format(n[0], n[1]) counts = gr.verb.value_counts() counts = counts / overall_counts.loc[counts.index] counts2 = overall_counts.loc[counts.index] counts.index += counts.apply(cust) counts.index += counts2.apply(cust) sorted_verbs = counts.sort_values(ascending=False).head(args.n) sorted_verbs.to_csv("{}.csv".format(n_edit)) counts = pd.DataFrame(gr.verb.value_counts())#, columns=["within"]) #print(counts.columns) counts["across"] = overall_counts.loc[counts.index] #print(counts) counts["normalized"] = counts.verb / counts.across #print(counts) #counts.normalized = counts.normalized.apply(round) #print(counts) sorted_verbs = counts.sort_values(by="normalized", ascending=False).head(args.n) #print(sorted_verbs) #exit(0) sorted_verbs.to_csv("frequent_verbs/{}.csv".format(n_edit), sep="\t") grs[n] = sorted_verbs.index print(grs) grs.to_csv('most_freq_verbs.csv') grs.to_csv('frequent_verbs/most_freq_verbs.csv') exit(0) Loading Loading
process_srl.py +16 −10 Original line number Diff line number Diff line Loading @@ -85,21 +85,27 @@ df.verb = df.verb.apply(lemmatizer.lemmatize) def cust(x): return ' (' + str(round(x, 2)) + ')' return ' (' + str(x) + ')' grs = pd.DataFrame() for n, gr in df.groupby(['bias', 'name_bias']): n_edit = '{}_coverage_of_{}_person'.format(n[0], n[1]) counts = gr.verb.value_counts() counts = counts / overall_counts.loc[counts.index] counts2 = overall_counts.loc[counts.index] counts.index += counts.apply(cust) counts.index += counts2.apply(cust) sorted_verbs = counts.sort_values(ascending=False).head(args.n) sorted_verbs.to_csv("{}.csv".format(n_edit)) counts = pd.DataFrame(gr.verb.value_counts())#, columns=["within"]) #print(counts.columns) counts["across"] = overall_counts.loc[counts.index] #print(counts) counts["normalized"] = counts.verb / counts.across #print(counts) #counts.normalized = counts.normalized.apply(round) #print(counts) sorted_verbs = counts.sort_values(by="normalized", ascending=False).head(args.n) #print(sorted_verbs) #exit(0) sorted_verbs.to_csv("frequent_verbs/{}.csv".format(n_edit), sep="\t") grs[n] = sorted_verbs.index print(grs) grs.to_csv('most_freq_verbs.csv') grs.to_csv('frequent_verbs/most_freq_verbs.csv') exit(0) Loading