Commit df64fad9 authored by EstherMaria's avatar EstherMaria
Browse files
parents 03163455 5fe1688e
Loading
Loading
Loading
Loading

data/srl_output.csv

0 → 100644
+0 −0

Empty file added.

+16 −1
Original line number Diff line number Diff line
import pandas as pd
from nltk.stem import WordNetLemmatizer
import os


def read_csvs(dir):
    outfile = 'data/srl_output.csv'
    with open(outfile, 'a', encoding='utf-8') as o:
        for p, d, fs in os.walk(dir):
            for i in fs:
                with open(os.path.join(p, i), encoding='utf-8') as i:
                    i_lines = i.readlines()[1:]
                o.writelines(i_lines)
    df = pd.read_csv(outfile, names=['w','verb','arg','same','id'])
    return df


def get_dom(v):
@@ -9,7 +22,9 @@ def get_dom(v):
    except KeyError:
        return None

df = pd.read_csv('vdberg_output.csv')

df = read_csvs('outputs') #pd.read_csv('vdberg_output.csv')
print(df)

lemmatizer = WordNetLemmatizer()
df['lemma'] = df.verb.apply(lemmatizer.lemmatize)