# coding: utf-8 from __future__ import unicode_literals import pytest from ..util import get_doc @pytest.fixture def text(): return """ It was a bright cold day in April, and the clocks were striking thirteen. Winston Smith, his chin nuzzled into his breast in an effort to escape the vile wind, slipped quickly through the glass doors of Victory Mansions, though not quickly enough to prevent a swirl of gritty dust from entering along with him. The hallway smelt of boiled cabbage and old rag mats. At one end of it a coloured poster, too large for indoor display, had been tacked to the wall. It depicted simply an enormous face, more than a metre wide: the face of a man of about forty-five, with a heavy black moustache and ruggedly handsome features. Winston made for the stairs. It was no use trying the lift. Even at the best of times it was seldom working, and at present the electric current was cut off during daylight hours. It was part of the economy drive in preparation for Hate Week. The flat was seven flights up, and Winston, who was thirty-nine and had a varicose ulcer above his right ankle, went slowly, resting several times on the way. On each landing, opposite the lift-shaft, the poster with the enormous face gazed from the wall. It was one of those pictures which are so contrived that the eyes follow you about when you move. BIG BROTHER IS WATCHING YOU, the caption beneath it ran. """ @pytest.fixture def heads(): # fmt: off return [1, 1, 0, 3, 2, 1, -4, -1, -1, -7, -8, 1, 2, 1, -12, -1, -2, -1, 1, 4, 3, 1, 1, 0, -1, 1, -2, -4, 1, -2, 1, -2, 3, -1, 1, -4, -13, -14, -1, -2, 2, 1, -3, -1, 1, -2, -9, -1, -11, 1, 1, -14, 1, -2, 1, -2, -1, 1, -2, -6, -1, -1, -2, -1, -1, -42, -1, 1, 1, 0, -1, 1, -2, -1, 2, 1, -4, -8, 18, 1, -2, -1, -1, 3, -1, 1, 10, 9, 1, 7, -1, 1, -2, 3, 2, 1, 0, -1, 1, -2, -4, -1, 1, 0, -1, 2, 1, -4, -1, 2, 1, 1, 1, -6, -11, 1, 20, -1, 2, -1, -3, -1, 3, 2, 1, -4, -10, -11, 3, 2, 1, -4, -1, 1, -3, -1, 0, -1, 1, 0, -1, 1, -2, -4, 1, 0, 1, -2, -1, 1, -2, -6, 1, 9, -1, 1, 6, -1, -1, 3, 2, 1, 0, -1, -2, 7, -1, 2, 1, 3, -1, 1, -10, -1, -2, 1, -2, -5, 1, 0, -1, -1, 1, -2, -5, -1, -1, -2, -1, 1, -2, -12, 1, 1, 0, 1, -2, -1, -4, -5, 18, -1, 2, -1, -4, 2, 1, -3, -4, -5, 2, 1, -3, -1, 2, 1, -3, -17, -24, -1, -2, -1, -4, 1, -2, -3, 1, -2, -10, 17, 1, -2, 14, 13, 3, 2, 1, -4, 8, -1, 1, 5, -1, 2, 1, -3, 0, -1, 1, -2, -4, 1, 0, -1, -1, 2, -1, -3, 1, -2, 1, -2, 3, 1, 1, -4, -1, -2, 2, 1, -3, -19, -1, 1, 1, 0, 0, 6, 5, 1, 3, -1, -1, 0, -1, -1] # fmt: on def test_parser_parse_navigate_consistency(en_tokenizer, text, heads): tokens = en_tokenizer(text) doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads) for head in doc: for child in head.lefts: assert child.head == head for child in head.rights: assert child.head == head def test_parser_parse_navigate_child_consistency(en_tokenizer, text, heads): tokens = en_tokenizer(text) doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads) lefts = {} rights = {} for head in doc: assert head.i not in lefts lefts[head.i] = set() for left in head.lefts: lefts[head.i].add(left.i) assert head.i not in rights rights[head.i] = set() for right in head.rights: rights[head.i].add(right.i) for head in doc: assert head.n_rights == len(rights[head.i]) assert head.n_lefts == len(lefts[head.i]) for child in doc: if child.i < child.head.i: assert child.i in lefts[child.head.i] assert child.i not in rights[child.head.i] lefts[child.head.i].remove(child.i) elif child.i > child.head.i: assert child.i in rights[child.head.i] assert child.i not in lefts[child.head.i] rights[child.head.i].remove(child.i) for head_index, children in lefts.items(): assert not children for head_index, children in rights.items(): assert not children def test_parser_parse_navigate_edges(en_tokenizer, text, heads): tokens = en_tokenizer(text) doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads) for token in doc: subtree = list(token.subtree) debug = "\t".join((token.text, token.left_edge.text, subtree[0].text)) assert token.left_edge == subtree[0], debug debug = "\t".join( ( token.text, token.right_edge.text, subtree[-1].text, token.right_edge.head.text, ) ) assert token.right_edge == subtree[-1], debug