Commit acd306f2 authored by vdberg's avatar vdberg
Browse files

sentencebysentence

parent f0163767
Loading
Loading
Loading
Loading

data/sent_df.csv

0 → 100644
+245858 −0

File added.

Preview size limit exceeded, changes collapsed.

+19 −6
Original line number Diff line number Diff line
from allennlp.predictors.predictor import Predictor
import pandas as pd
import logging
import logging, re


def load_nameslab():
@@ -21,6 +21,12 @@ def load_flipper():

namlabs_df, namlabs = load_nameslab()

def crude_ner():
    global namlabs
    nampat = '\\b' + '\\b|\\b'.join([i for i in namlabs.index]) + '\\b'
    m = re.search(nampat, sent)
    return m


def ner(t):
    global allen_ner
@@ -28,9 +34,16 @@ def ner(t):
    return o


flipper = load_flipper()
flipper['text'] = flipper.original_title +'. '+ flipper.original_body
data = flipper[:4]
### load flipper

#flipper = load_flipper()
#flipper['text'] = flipper.original_title +'. '+ flipper.original_body

### load sent_df

sent = pd.read_csv('sent_df.csv')
sent= sent[sent.s.apply(crude_ner()) == True]
data = sent[:50]

srl_pred = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz")
allen_ner = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz")
@@ -39,10 +52,10 @@ logging.basicConfig(filename='vdberg_log.log',level=logging.DEBUG)

verb_df = pd.DataFrame(columns=['arg', 'verb', 'same'])
for i,r in data.iterrows():
    names = ner(r.text)
    names = ner(r.s)
    logging.info('Predicted NER for {}'.format(i))

    roles = srl_pred.predict(sentence=r.text)['verbs']
    roles = srl_pred.predict(sentence=r.s)['verbs']
    logging.info('Predicted roles for {}'.format(i))
    #print('roles', roles)