Loading process_srl.py +2 −1 Original line number Diff line number Diff line Loading @@ -31,6 +31,7 @@ print(df.shape) nltk.download('wordnet') lemmatizer = WordNetLemmatizer() df['lemma'] = df.verb.apply(lemmatizer.lemmatize) df.arg = df.arg.str[2:] vad = pd.read_csv('data/NRC-VAD-Lexicon.txt', delimiter='\t', index_col=0) df['dom'] = df.lemma.apply(get_dom) Loading @@ -42,7 +43,7 @@ arg_agr = pd.DataFrame(columns=['']) dom_agr = pd.DataFrame(columns=['']) for n, gr in grs: print(n, gr.dom.mean()) arg_agr[n] = gr['arg'].value_counts() arg_agr[n] = gr['arg'].value_counts(normalize=True) #dom_agr[n] = gr.nlargest(n=10, columns=['dom']) print(arg_agr.head(n=10)) Loading Loading
process_srl.py +2 −1 Original line number Diff line number Diff line Loading @@ -31,6 +31,7 @@ print(df.shape) nltk.download('wordnet') lemmatizer = WordNetLemmatizer() df['lemma'] = df.verb.apply(lemmatizer.lemmatize) df.arg = df.arg.str[2:] vad = pd.read_csv('data/NRC-VAD-Lexicon.txt', delimiter='\t', index_col=0) df['dom'] = df.lemma.apply(get_dom) Loading @@ -42,7 +43,7 @@ arg_agr = pd.DataFrame(columns=['']) dom_agr = pd.DataFrame(columns=['']) for n, gr in grs: print(n, gr.dom.mean()) arg_agr[n] = gr['arg'].value_counts() arg_agr[n] = gr['arg'].value_counts(normalize=True) #dom_agr[n] = gr.nlargest(n=10, columns=['dom']) print(arg_agr.head(n=10)) Loading