# $Id: selftest.py 2213 2005-01-11 18:49:47Z fredrik $ # elementtree selftest program # this test script uses Python's "doctest" module to check that the # *test script* works as expected. import sys try: from StringIO import StringIO BytesIO = StringIO except ImportError: from io import BytesIO, StringIO from lxml import etree as ElementTree def stdout(): if sys.version_info[0] < 3: return sys.stdout class bytes_stdout(object): def write(self, data): if isinstance(data, bytes): data = data.decode('ISO8859-1') sys.stdout.write(data) return bytes_stdout() def unserialize(text): file = StringIO(text) tree = ElementTree.parse(file) return tree.getroot() def serialize(elem, encoding=None): file = BytesIO() tree = ElementTree.ElementTree(elem) if encoding: tree.write(file, encoding=encoding) else: tree.write(file) result = file.getvalue() if sys.version_info[0] >= 3: result = result.decode('ISO8859-1') result = result.replace(' />', '/>') if result[-1:] == '\n': result = result[:-1] return result def summarize(elem): return elem.tag def summarize_list(seq): return list(map(summarize, seq)) SAMPLE_XML = unserialize(""" text
subtext
""") SAMPLE_XML_NS = unserialize(""" text
subtext
""") # interface tests def check_string(string): len(string) for char in string: if len(char) != 1: print("expected one-character string, got %r" % char) new_string = string + "" new_string = string + " " string[:0] def check_mapping(mapping): len(mapping) keys = mapping.keys() items = mapping.items() for key in keys: item = mapping[key] mapping["key"] = "value" if mapping["key"] != "value": print("expected value string, got %r" % mapping["key"]) def check_element(element): if not hasattr(element, "tag"): print("no tag member") if not hasattr(element, "attrib"): print("no attrib member") if not hasattr(element, "text"): print("no text member") if not hasattr(element, "tail"): print("no tail member") check_string(element.tag) check_mapping(element.attrib) if element.text is not None: check_string(element.text) if element.tail is not None: check_string(element.tail) def check_element_tree(tree): check_element(tree.getroot()) def element(): """ Test element tree interface. >>> element = ElementTree.Element("tag") >>> check_element(element) >>> tree = ElementTree.ElementTree(element) >>> check_element_tree(tree) """ def parsefile(): """ Test parsing from file. Note that we're opening the files in here; by default, the 'parse' function opens the file in binary mode, and doctest doesn't filter out carriage returns. >>> file = open("samples/simple.xml", "rb") >>> tree = ElementTree.parse(file) >>> file.close() >>> tree.write(stdout()) text texttail >>> file = open("samples/simple-ns.xml", "rb") >>> tree = ElementTree.parse(file) >>> file.close() >>> tree.write(stdout()) text texttail """ def writefile(): """ >>> elem = ElementTree.Element("tag") >>> elem.text = "text" >>> serialize(elem) 'text' >>> ElementTree.SubElement(elem, "subtag").text = "subtext" >>> serialize(elem) 'textsubtext' """ def encoding(): r""" Test encoding issues. >>> elem = ElementTree.Element("tag") >>> elem.text = u'abc' >>> serialize(elem) 'abc' >>> serialize(elem, "utf-8") 'abc' >>> serialize(elem, "us-ascii") 'abc' >>> serialize(elem, "iso-8859-1").lower() "\nabc" >>> elem.text = "<&\"\'>" >>> serialize(elem) '<&"\'>' >>> serialize(elem, "utf-8") '<&"\'>' >>> serialize(elem, "us-ascii") # cdata characters '<&"\'>' >>> serialize(elem, "iso-8859-1").lower() '\n<&"\'>' >>> elem.attrib["key"] = "<&\"\'>" >>> elem.text = None >>> serialize(elem) '' >>> serialize(elem, "utf-8") '' >>> serialize(elem, "us-ascii") '' >>> serialize(elem, "iso-8859-1").lower() '\n' >>> elem.text = u'\xe5\xf6\xf6<>' >>> elem.attrib.clear() >>> serialize(elem) 'åöö<>' >>> serialize(elem, "utf-8") '\xc3\xa5\xc3\xb6\xc3\xb6<>' >>> serialize(elem, "us-ascii") 'åöö<>' >>> serialize(elem, "iso-8859-1").lower() "\n\xe5\xf6\xf6<>" >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' >>> elem.text = None >>> serialize(elem) '' >>> serialize(elem, "utf-8") '' >>> serialize(elem, "us-ascii") '' >>> serialize(elem, "iso-8859-1").lower() '\n' """ if sys.version_info[0] >= 3: encoding.__doc__ = encoding.__doc__.replace("u'", "'") def qname(): """ Test QName handling. 1) decorated tags >>> elem = ElementTree.Element("{uri}tag") >>> serialize(elem) # 1.1 '' ## 2) decorated attributes ## >>> elem.attrib["{uri}key"] = "value" ## >>> serialize(elem) # 2.1 ## '' """ def cdata(): """ Test CDATA handling (etc). >>> serialize(unserialize("hello")) 'hello' >>> serialize(unserialize("hello")) 'hello' >>> serialize(unserialize("")) 'hello' """ def find(): """ Test find methods (including xpath syntax). >>> elem = SAMPLE_XML >>> elem.find("tag").tag 'tag' >>> ElementTree.ElementTree(elem).find("tag").tag 'tag' >>> elem.find("section/tag").tag 'tag' >>> ElementTree.ElementTree(elem).find("section/tag").tag 'tag' >>> elem.findtext("tag") 'text' >>> elem.findtext("tog", "default") 'default' >>> ElementTree.ElementTree(elem).findtext("tag") 'text' >>> elem.findtext("section/tag") 'subtext' >>> ElementTree.ElementTree(elem).findtext("section/tag") 'subtext' >>> summarize_list(elem.findall("tag")) ['tag', 'tag'] >>> summarize_list(elem.findall("*")) ['tag', 'tag', 'section'] >>> summarize_list(elem.findall(".//tag")) ['tag', 'tag', 'tag'] >>> summarize_list(elem.findall("section/tag")) ['tag'] >>> summarize_list(elem.findall("section//tag")) ['tag'] >>> summarize_list(elem.findall("section/*")) ['tag'] >>> summarize_list(elem.findall("section//*")) ['tag'] >>> summarize_list(elem.findall("section/.//*")) ['tag'] >>> summarize_list(elem.findall("*/*")) ['tag'] >>> summarize_list(elem.findall("*//*")) ['tag'] >>> summarize_list(elem.findall("*/tag")) ['tag'] >>> summarize_list(elem.findall("*/./tag")) ['tag'] >>> summarize_list(elem.findall("./tag")) ['tag', 'tag'] >>> summarize_list(elem.findall(".//tag")) ['tag', 'tag', 'tag'] >>> summarize_list(elem.findall("././tag")) ['tag', 'tag'] >>> summarize_list(ElementTree.ElementTree(elem).findall("/tag")) ['tag', 'tag'] >>> summarize_list(ElementTree.ElementTree(elem).findall("./tag")) ['tag', 'tag'] >>> elem = SAMPLE_XML_NS >>> summarize_list(elem.findall("tag")) [] >>> summarize_list(elem.findall("{http://effbot.org/ns}tag")) ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag")) ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] """ # XXX only deep copying is supported def copy(): """ Test copy handling (etc). >>> import copy >>> e1 = unserialize("hello") >>> # e2 = copy.copy(e1) >>> e3 = copy.deepcopy(e1) >>> e1.find("foo").tag = "bar" >>> serialize(e1).replace(' ', '') 'hello' ## >>> serialize(e2).replace(' ', '') ## 'hello' >>> serialize(e3).replace(' ', '') 'hello' """ def attrib(): """ Test attribute handling. >>> elem = ElementTree.Element("tag") >>> elem.get("key") # 1.1 >>> elem.get("key", "default") # 1.2 'default' >>> elem.set("key", "value") >>> elem.get("key") # 1.3 'value' >>> elem = ElementTree.Element("tag", key="value") >>> elem.get("key") # 2.1 'value' >>> elem.attrib # 2.2 {'key': 'value'} >>> elem = ElementTree.Element("tag", {"key": "value"}) >>> elem.get("key") # 3.1 'value' >>> elem.attrib # 3.2 {'key': 'value'} >>> elem = ElementTree.Element("tag", {"key": "other"}, key="value") >>> elem.get("key") # 4.1 'value' >>> elem.attrib # 4.2 {'key': 'value'} """ def makeelement(): """ Test makeelement handling. >>> elem = ElementTree.Element("tag") >>> subelem = elem.makeelement("subtag", {"key": "value"}) >>> elem.append(subelem) >>> serialize(elem) '' >>> elem.clear() >>> serialize(elem) '' >>> elem.append(subelem) >>> serialize(elem) '' """ ## def observer(): ## """ ## Test observers. ## >>> def observer(action, elem): ## ... print("%s %s" % (action, elem.tag)) ## >>> builder = ElementTree.TreeBuilder() ## >>> builder.addobserver(observer) ## >>> parser = ElementTree.XMLParser(builder) ## >>> file = open("samples/simple.xml", "rb") ## >>> parser.feed(file.read()) ## start root ## start element ## end element ## start element ## end element ## start empty-element ## end empty-element ## end root ## >>> file.close() ## """ ENTITY_XML = """\ %user-entities; ]> &entity; """ ## def entity(): ## """ ## Test entity handling. ## 1) bad entities ## >>> ElementTree.XML("&entity;") ## Traceback (most recent call last): ## SyntaxError: undefined entity: line 1, column 10 ## 2) custom entity ## >>> parser = ElementTree.XMLParser() ## >>> parser.entity["entity"] = "text" ## >>> parser.feed(ENTITY_XML) ## >>> root = parser.close() ## >>> serialize(root) ## 'text' ## """ if __name__ == "__main__": import doctest, selftest2 failed, tested = doctest.testmod(selftest2) print("%d tests ok." % (tested - failed)) if failed > 0: print("%d tests failed. Exiting with non-zero return code." % failed) sys.exit(1)