Commit 515b0cf5 authored by Aileen Reichelt's avatar Aileen Reichelt
Browse files

Delete spacy_pipeline.py which was used as a demonstration

parent 401b14b1
Loading
Loading
Loading
Loading

spacy_pipeline.py

deleted100644 → 0
+0 −53
Original line number Diff line number Diff line
import spacy
from STSB_Dataset import STSB_Dataset

"""
Read more here:
https://spacy.io/usage/linguistic-features
"""

def print_nlp(text, mode = "pos"):
    if mode == "pos":
        nlp_list = [token.pos_ for token in text]
    if mode == "lemma":
        nlp_list = [token.lemma_ for token in text]
    if mode == "tag":
        nlp_list = [token.tag_ for token in text]
    if mode == "dep":        
        nlp_list = [token.dep_ for token in text]
    
    print(" ".join(nlp_list))
        

nlp = spacy.load("en_core_web_sm")

dataset = STSB_Dataset('data/stsbenchmark/sts-test.csv')

# spacy representation
sentences1 = []
sentences2 = []

for sentence in [s['sentence1'] for s in dataset._examples]:
    sentences1.append(nlp(sentence))
for sentence in [s['sentence2'] for s in dataset._examples]:
    sentences2.append(nlp(sentence))

doc = sentences1[1]

print("Part of speech:")
print_nlp(doc)

print("\nLemmas:")
print_nlp(doc, "lemma")

print("\nTags (related to part of speech):")
print_nlp(doc, "tag")

print("\nDependency role:")
print_nlp(doc, "dep")


print("\nDependency head/children:")
for token in doc:
    print(f"Token: {token.text}, Head: {token.head.text}, Children: {[child for child in token.children]}")