Loading spacy_pipeline.pydeleted 100644 → 0 +0 −53 Original line number Diff line number Diff line import spacy from STSB_Dataset import STSB_Dataset """ Read more here: https://spacy.io/usage/linguistic-features """ def print_nlp(text, mode = "pos"): if mode == "pos": nlp_list = [token.pos_ for token in text] if mode == "lemma": nlp_list = [token.lemma_ for token in text] if mode == "tag": nlp_list = [token.tag_ for token in text] if mode == "dep": nlp_list = [token.dep_ for token in text] print(" ".join(nlp_list)) nlp = spacy.load("en_core_web_sm") dataset = STSB_Dataset('data/stsbenchmark/sts-test.csv') # spacy representation sentences1 = [] sentences2 = [] for sentence in [s['sentence1'] for s in dataset._examples]: sentences1.append(nlp(sentence)) for sentence in [s['sentence2'] for s in dataset._examples]: sentences2.append(nlp(sentence)) doc = sentences1[1] print("Part of speech:") print_nlp(doc) print("\nLemmas:") print_nlp(doc, "lemma") print("\nTags (related to part of speech):") print_nlp(doc, "tag") print("\nDependency role:") print_nlp(doc, "dep") print("\nDependency head/children:") for token in doc: print(f"Token: {token.text}, Head: {token.head.text}, Children: {[child for child in token.children]}") Loading
spacy_pipeline.pydeleted 100644 → 0 +0 −53 Original line number Diff line number Diff line import spacy from STSB_Dataset import STSB_Dataset """ Read more here: https://spacy.io/usage/linguistic-features """ def print_nlp(text, mode = "pos"): if mode == "pos": nlp_list = [token.pos_ for token in text] if mode == "lemma": nlp_list = [token.lemma_ for token in text] if mode == "tag": nlp_list = [token.tag_ for token in text] if mode == "dep": nlp_list = [token.dep_ for token in text] print(" ".join(nlp_list)) nlp = spacy.load("en_core_web_sm") dataset = STSB_Dataset('data/stsbenchmark/sts-test.csv') # spacy representation sentences1 = [] sentences2 = [] for sentence in [s['sentence1'] for s in dataset._examples]: sentences1.append(nlp(sentence)) for sentence in [s['sentence2'] for s in dataset._examples]: sentences2.append(nlp(sentence)) doc = sentences1[1] print("Part of speech:") print_nlp(doc) print("\nLemmas:") print_nlp(doc, "lemma") print("\nTags (related to part of speech):") print_nlp(doc, "tag") print("\nDependency role:") print_nlp(doc, "dep") print("\nDependency head/children:") for token in doc: print(f"Token: {token.text}, Head: {token.head.text}, Children: {[child for child in token.children]}")