import zipfile
import xml.etree.ElementTree as ET

from .utils import BaseParser


class Parser(BaseParser):
    """Extract text from open document files.
    """

    def extract(self, filename, **kwargs):
        # Inspiration from
        # https://github.com/odoo/odoo/blob/master/addons/document/odt2txt.py
        with open(filename, 'rb') as stream:
            zip_stream = zipfile.ZipFile(stream)
            self.content = ET.fromstring(zip_stream.read("content.xml"))
        return self.to_string()

    def to_string(self):
        """ Converts the document to a string. """
        buff = u""
        for child in self.content.iter():
            if child.tag in [self.qn('text:p'), self.qn('text:h')]:
                buff += self.text_to_string(child) + "\n"
        # remove last newline char
        if buff:
            buff = buff[:-1]
        return buff

    def text_to_string(self, element):
        buff = u""
        if element.text is not None:
            buff += element.text
        for child in element:
            if child.tag == self.qn('text:tab'):
                buff += "\t"
                if child.tail is not None:
                    buff += child.tail
            elif child.tag == self.qn('text:s'):
                buff += u" "
                if child.get(self.qn('text:c')) is not None:
                    buff += u" " * (int(child.get(self.qn('text:c'))) - 1)
                if child.tail is not None:
                    buff += child.tail
            else:
                buff += self.text_to_string(child)
        if element.tail is not None:
            buff += element.tail
        return buff

    def qn(self, namespace):
        """Connect tag prefix to longer namespace"""
        nsmap = {
            'text': 'urn:oasis:names:tc:opendocument:xmlns:text:1.0',
        }
        spl = namespace.split(':')
        return '{{{}}}{}'.format(nsmap[spl[0]], spl[1])