Commit 163c04a2 authored by vdberg's avatar vdberg
Browse files

process improved

parent d9baec1a
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -34,10 +34,10 @@ df['lemma'] = df.verb.apply(lemmatizer.lemmatize)
vad = pd.read_csv('data/NRC-VAD-Lexicon.txt', delimiter='\t', index_col=0)
df['dom'] = df.lemma.apply(get_dom)

grs = df.groupby('same')
grs = df.drop_duplicates.groupby('same')
# symptoms of agency

for n, gr in grs:
    print(n)
    print(gr.shape(10))
    print(gr.nlargest(n=10, columns=['dom']))
    print('\n', gr.dom.mean())