Loading allzweckmesser/corpus.py +11 −2 Original line number Diff line number Diff line Loading @@ -6,7 +6,7 @@ import os.path from bs4 import BeautifulSoup from .model import Reading, Syllable, Token from .model import Reading, Syllable, Token, Verse BASE_HTML = """<!DOCTYPE html PUBLIC"-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> Loading Loading @@ -112,12 +112,21 @@ def separate_punctuation(tokens): return tokens def reconstruct_verse_text_from_reading(reading): codepoints = [' ' for _ in range(reading.tokens[-1].span[1])] for token in reading.tokens: codepoints[token.span[0]:token.span[1]] = token.text return ''.join(codepoints) class HypotacticLine: def __init__(self, element): self.element = element self.reading = get_reading_from_line_element(element) reading = get_reading_from_line_element(element) reading.tokens = separate_punctuation(reading.tokens) text = reconstruct_verse_text_from_reading(reading) self.verse = Verse(verse=text, readings=[reading]) class HypotacticDocument: Loading Loading
allzweckmesser/corpus.py +11 −2 Original line number Diff line number Diff line Loading @@ -6,7 +6,7 @@ import os.path from bs4 import BeautifulSoup from .model import Reading, Syllable, Token from .model import Reading, Syllable, Token, Verse BASE_HTML = """<!DOCTYPE html PUBLIC"-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> Loading Loading @@ -112,12 +112,21 @@ def separate_punctuation(tokens): return tokens def reconstruct_verse_text_from_reading(reading): codepoints = [' ' for _ in range(reading.tokens[-1].span[1])] for token in reading.tokens: codepoints[token.span[0]:token.span[1]] = token.text return ''.join(codepoints) class HypotacticLine: def __init__(self, element): self.element = element self.reading = get_reading_from_line_element(element) reading = get_reading_from_line_element(element) reading.tokens = separate_punctuation(reading.tokens) text = reconstruct_verse_text_from_reading(reading) self.verse = Verse(verse=text, readings=[reading]) class HypotacticDocument: Loading