Commit 9eea7955 authored by vdberg's avatar vdberg
Browse files

processÄ

parent 6b623faf
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@ print(df.shape)
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()
df['lemma'] = df.verb.apply(lemmatizer.lemmatize)
df.arg = df.arg.str[2:]

vad = pd.read_csv('data/NRC-VAD-Lexicon.txt', delimiter='\t', index_col=0)
df['dom'] = df.lemma.apply(get_dom)
@@ -42,7 +43,7 @@ arg_agr = pd.DataFrame(columns=[''])
dom_agr = pd.DataFrame(columns=[''])
for n, gr in grs:
    print(n, gr.dom.mean())
    arg_agr[n] = gr['arg'].value_counts()
    arg_agr[n] = gr['arg'].value_counts(normalize=True)
    #dom_agr[n] = gr.nlargest(n=10, columns=['dom'])

print(arg_agr.head(n=10))