Commit 40e55b37 authored by EstherMaria's avatar EstherMaria
Browse files

merge output

parents 19484eab ad450f69
Loading
Loading
Loading
Loading
+20008 −0

File added.

Preview size limit exceeded, changes collapsed.

process_srl.py

0 → 100644
+26 −0
Original line number Diff line number Diff line
import pandas as pd
from nltk.stem import WordNetLemmatizer


def get_dom(v):
    global vad
    try:
        return round(vad.loc[v],2)['Dominance']
    except KeyError:
        return None

df = pd.read_csv('vdberg_output.csv')

lemmatizer = WordNetLemmatizer()
df['lemma'] = df.verb.apply(lemmatizer.lemmatize)

vad = pd.read_csv('data/NRC-VAD-Lexicon.txt', delimiter='\t', index_col=0)
df['dom'] = df.lemma.apply(get_dom)

grs = df.groupby('same')
# symptoms of agency

for n, gr in grs:
    print(n)
    print(gr)
    print('\n', gr.dom.mean())
+9 −9
Original line number Diff line number Diff line
@@ -43,8 +43,9 @@ def ner(t):

sent = pd.read_csv('data/sent_df.csv')
sent = sent[sent.s.apply(crude_ner) == True]
#print([i for i in sent.index if i > 1400])
data = sent.iloc[:]
print(max(sent.index))
exit(0)
data = sent.loc[11237:11243]

srl_pred = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz")
allen_ner = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz")
@@ -52,7 +53,7 @@ allen_ner = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/mod
verb_df = pd.DataFrame(columns=['arg', 'verb', 'same'])
for i,r in data.iterrows():
    names = ner(r.s)
    print('Processing: {}'.format(i, r.s))
    print('Processing: {}, {}'.format(i, r.s))

    roles = srl_pred.predict(sentence=r.s)['verbs']
    #print('roles', roles)
@@ -81,7 +82,6 @@ for i,r in data.iterrows():
        print('Found {} verbs'.format(len(verbs)))
        print(verbs)
        print()

        verb_df.arg = verb_df.arg.str[2:]
        verb_df.to_csv('vdberg_output.csv')

vdberg_output.csv

0 → 100644
+5 −0
Original line number Diff line number Diff line
,arg,same,verb
Sessions,ARG0,False,defended
Sessions,ARG0,False,saying
Sessions,ARG2,False,asked
Sessions,ARG1,False,‘