#!/usr/bin/python -u # # The exercise of rewriting xsltproc on top of the python # bindings, not complete yet and shows up the things missing # from the existing python interfaces # import sys import time import os import string import libxml2 # Memory debug specific libxml2.debugMemory(1) import libxslt debug = 0 repeat = 0 timing = 0 novalid = 0 noout = 0 docbook = 0 html = 0 xinclude = 0 profile = 0 params = {} output = None errorno = 0 # # timing # begin = 0 endtime = 0 def startTimer(): global begin begin = time.time() def endTimer(msg): global begin global endtime endtime = time.time() print "%s took %d ms" % (msg, (endtime - begin) * 1000) def xsltProcess(doc, cur, filename): global timing global xinclude global params global html if xinclude: if timing: startTimer() doc.XIncludeProcess() if timing: endTimer("XInclude processing %s" % (filename)) if timing: startTimer() if output == None: if repeat != 0: for j in range(1, repeat): res = cur.applyStylesheet(doc, params) res.freeDoc() doc.freeDoc() if html == 1: doc = libxml2.htmlParseFile(filename, None) else: doc = libxml2.parseFile(filename, None) # ctxt = libxslt.newTransformContext(doc) # if ctxt == None: # return if profile: print "TODO: Profiling not yet supported" else: res = cur.applyStylesheet(doc, params) if timing: if repeat != 0: endTimer("Applying stylesheet %d times" % (repeat)) else: endTimer("Applying stylesheet") doc.freeDoc() if res == None: print "no result for %s" % (filename) return if noout != 0: res.freeDoc() return if debug == 1: res.debugDumpDocument(None) else: if timing: startTimer() cur.saveResultToFilename("-", res, 0) if timing: endTimer("Saving result") res.freeDoc() else: print "TODO: xsltRunStylesheet not yet mapped" def usage(name = 'pyxsltproc'): print "Usage: %s [options] stylesheet file [file ...]" % (name) print "a reimplementation of xsltproc(1) on top of libxslt-python" print " Options:" print "\t--version or -V: show the version of libxml and libxslt used" print "\t--verbose or -v: show logs of what's happening" print "\t--output file or -o file: save to a given file" print "\t--timing: display the time used" print "\t--repeat: run the transformation 20 times" print "\t--debug: dump the tree of the result instead" print "\t--novalid skip the Dtd loading phase" print "\t--noout: do not dump the result" print "\t--maxdepth val : increase the maximum depth" print "\t--html: the input document is(are) an HTML file(s)" print "\t--param name value : pass a (parameter,value) pair" print "\t value is an XPath expression." print "\t string values must be quoted like \"'string'\"" print "\t or use stringparam to avoid it" print "\t--stringparam name value : pass a (parameter,string value) pair" print "\t--nonet refuse to fetch DTDs or entities over network" print "\t--catalogs : use SGML catalogs from $SGML_CATALOG_FILES" print "\t otherwise XML Catalogs starting from " print "\t file:///etc/xml/catalog are activated by default" print "\t--xinclude : do XInclude processing on document input" print "\t--profile or --norman : dump profiling information " print "\nProject libxslt home page: http://xmlsoft.org/XSLT/" print "To report bugs and get help: http://xmlsoft.org/XSLT/bugs.html" def main(args = None): global debug global repeat global timing global novalid global noout global docbook global html global xinclude global profile global params global output global errorno done = 0 cur = None if not args: args = sys.argv[1:] if len(args) <= 0: usage(sys.argv[0]) i = 0 while i < len(args): if args[i] == "-": break if args[i][0] != '-': i = i + 1 continue if args[i] == "-timing" or args[i] == "--timing": timing = 1 elif args[i] == "-debug" or args[i] == "--debug": debug = 1 elif args[i] == "-verbose" or args[i] == "--verbose" or \ args[i] == "-v": print "TODO: xsltSetGenericDebugFunc() mapping missing" elif args[i] == "-version" or args[i] == "--version" or \ args[i] == "-V": print "TODO: version information mapping missing" elif args[i] == "-verbose" or args[i] == "--verbose" or \ args[i] == "-v": if repeat == 0: repeat = 20 else: repeat = 100 elif args[i] == "-novalid" or args[i] == "--novalid": print "TODO: xmlLoadExtDtdDefaultValue mapping missing" novalid = 1 elif args[i] == "-noout" or args[i] == "--noout": noout = 1 elif args[i] == "-html" or args[i] == "--html": html = 1 elif args[i] == "-nonet" or args[i] == "--nonet": print "TODO: xmlSetExternalEntityLoader mapping missing" nonet = 1 elif args[i] == "-catalogs" or args[i] == "--catalogs": try: catalogs = os.environ['SGML_CATALOG_FILES'] except: catalogs = None if catalogs != none: libxml2.xmlLoadCatalogs(catalogs) else: print "Variable $SGML_CATALOG_FILES not set" elif args[i] == "-xinclude" or args[i] == "--xinclude": xinclude = 1 libxslt.setXIncludeDefault(1) elif args[i] == "-param" or args[i] == "--param": i = i + 1 params[args[i]] = args[i + 1] i = i + 1 elif args[i] == "-stringparam" or args[i] == "--stringparam": i = i + 1 params[args[i]] = "'%s'" % (args[i + 1]) i = i + 1 elif args[i] == "-maxdepth" or args[i] == "--maxdepth": print "TODO: xsltMaxDepth mapping missing" else: print "Unknown option %s" % (args[i]) usage() return(3) i = i + 1 libxml2.lineNumbersDefault(1) libxml2.substituteEntitiesDefault(1) # TODO: xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS # if novalid: # TODO: xmlLoadExtDtdDefaultValue = 0 # TODO libxslt.exsltRegisterAll(); libxslt.registerTestModule() i = 0 while i < len(args) and done == 0: if args[i] == "-maxdepth" or args[i] == "--maxdepth": i = i + 2 continue if args[i] == "-o" or args[i] == "-output" or args[i] == "--output": i = i + 2 continue if args[i] == "-param" or args[i] == "--param": i = i + 3 continue if args[i] == "-stringparam" or args[i] == "--stringparam": i = i + 3 continue if args[i] != "-" and args[i][0] == '-': i = i + 1 continue if timing: startTimer() style = libxml2.parseFile(args[i]) if timing: endTimer("Parsing stylesheet %s" % (args[i])) if style == None: print "cannot parse %s" % (args[i]) cur = None errorno = 4 done = 1 else: cur = libxslt.loadStylesheetPI(style) if cur != None: xsltProcess(style, cur, args[i]) cur = None else: cur = libxslt.parseStylesheetDoc(style) if cur == None: style.freeDoc() errorno = 5 done = 1 i = i + 1 break while i < len(args) and done == 0 and cur != None: if timing: startTimer() if html: doc = libxml2.htmlParseFile(args[i], None) else: doc = libxml2.parseFile(args[i]) if doc == None: print "unable to parse %s" % (args[i]) errorno = 6 i = i + 1 continue if timing: endTimer("Parsing document %s" % (args[i])) xsltProcess(doc, cur, args[i]) i = i + 1 if cur != None: cur.freeStylesheet() params = None if __name__ == "__main__": main() # Memory debug specific libxslt.cleanup() if libxml2.debugMemory(1) != 0: print "Memory leak %d bytes" % (libxml2.debugMemory(1)) libxml2.dumpMemory() sys.exit(errorno)