[Subversion] / PEAK / src / peak / util / SOX.py  

Diff of /PEAK/src/peak/util/SOX.py

Parent Directory | Revision Log

version 333, Sat Feb 16 03:14:46 2002 UTC version 1799, Thu Aug 19 21:25:11 2004 UTC
Line 4 
Line 4 
     events into a hierarchy of objects.  E.g., stack handling,      events into a hierarchy of objects.  E.g., stack handling,
     delegation to node classes, etc.      delegation to node classes, etc.
   
     Note: I originally wrote this module because the Python default DOM      If all you need is to read an XML file and turn it into objects, you
     for XML (way back in the XML-SIG for Python 1.5.2 days) was horrendously      came to the right place.  If you need an actual model of the XML file
     slow for processing (typically enormous) XMI files.  If 2.2 minidom      that you can manipulate, with absolute fidelity to the original, you
     is fast enough and easy enough to use, this package may go away once      might be better off with a DOM, since this doesn't retain processing
     I've ported the XMI support to minidom.  So let me know if you're using      instructions or comments.
     this module for anything;  I have some non-TransWarp uses for it myself,  
     but depending on how easy using minidom turns out to be...  I may switch      SOX is faster than 'minidom' or any other DOM that I know of.  On the
     them over too!      other hand, SOX is slower than PyRXP, but SOX handles Unicode correctly.
 """  
       To use this module, you will need a "document" object that implements
       either 'ISOXNode' or 'ISOXNode_NS', depending on whether you want
 from xml.sax import ContentHandler, parse      namespace support.  The interfaces are very similar, except that
 import Interface      the 'NS' version has some enhancements/simplifications that can't be
       added to the non-namespace version for backward-compatibility reasons.
   
       Once you have your document object, just call
       'SOX.load(filenameOrStream,documentObject,namespaces=flag)' to get back
       the result of your document object's '_finish()' method after it has
       absorbed all of the XML data supplied.
   
   
   
   
   
   
   
   
   
   
   
       If you need a simple document or node class, 'Document', 'Document_NS',
       'Node', and 'Node_NS' are available for subclassing or idea-stealing.
   """
   
   
   from xml.sax.saxutils import XMLGenerator, quoteattr, escape
   from protocols import Interface, advise, Adapter
   from kjbuckets import kjGraph
   
   __all__ = [
       'load', 'ISOXNode', 'ISOXNode_NS', 'IXMLBuilder', 'ExpatBuilder',
       'Node', 'Node_NS', 'Document', 'Document_NS', 'IndentedXML',
   ]
   
   
 class ISOXNode(Interface.Base):  class ISOXNode(Interface):
   
     """Object mapping from an XML element      """Object mapping from an XML element
   
Line 53 
Line 53 
         processing guided by the XML structures, like an interpretive parser.          processing guided by the XML structures, like an interpretive parser.
     """      """
   
     def _newNode(self,name,attributeMap):      def _newNode(name,attributeMap):
         """Create new child node from 'name' and 'attributeMap'          """Create new child node from 'name' and 'attributeMap'
   
            Child node must implement the 'ISOXNode' interface."""             Child node must implement the 'ISOXNode' interface."""
   
     def _acquireFrom(self,parentNode):      def _acquireFrom(parentNode):
         """Parent-child relationship hook          """Parent-child relationship hook
   
            Called on newly created nodes to give them a chance to acquire             Called on newly created nodes to give them a chance to acquire
            context information from their parent node"""             context information from their parent node"""
   
     def _addText(self,text):      def _addText(text):
         """Add text string 'text' to node"""          """Add text string 'text' to node"""
   
     def _addNode(self,subObj):      def _addNode(name,subObj):
         """Add sub-node 'subObj' to node"""          """Add finished sub-node 'subObj' to node"""
   
     def _finish(self):      def _finish():
         """Return an object to be used in place of this node in call to the          """Return an object to be used in place of this node in call to the
             parent's '_addNode()' method.  Returning 'None' will result in              parent's '_addNode()' method.  Returning 'None' will result in
             nothing being added to the parent."""              nothing being added to the parent."""
Line 80 
Line 80 
   
   
   
 class ObjectMakingHandler(ContentHandler):  class ISOXNode_NS(Interface):
   
     """SAX handler that makes a pseudo-DOM"""      def _newNode(name, attributeMap):
   
     def __init__(self,documentRoot):          """Create new child node from 'name' and 'attributeMap'
         self.stack = [documentRoot]  
         ContentHandler.__init__(self)  
   
     def startElement(self, name, atts):  
         top = self.stack[-1]  
         node = top._newNode(name,atts)  
         node._acquireFrom(top)  
         self.stack.append(node)  
   
     def characters(self, ch):             Child node must implement the 'ISOX2Node' interface."""
         self.stack[-1]._addText(ch)  
   
     def endElement(self, name):      def _setNS(ns2uri, uri2ns):
         stack = self.stack          """Set namespace declaration maps"""
         top = stack.pop()  
       def _addText(text):
           """Add text string 'text' to node"""
   
   
       def _addNode(name,subObj):
           """Add finished sub-node 'subObj' to node"""
   
   
       def _finish():
           """Return an object to be used in place of this node in call to the
               parent's '_addNode()' method.  Returning 'None' will result in
               nothing being added to the parent."""
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   class IXMLBuilder(Interface):
   
       def _xml_addChild(data):
           """Add 'data' to element's children"""
   
       def _xml_finish():
           """Return finished value to be passed to parent's 'addChild()'"""
   
       def _xml_newTag(name,attrs,newPrefixes,parser):
           """Create and return a subnode for a tag"""
   
       def _xml_addText(xml):
           """Return a new subnode for text"""
   
       def _xml_addLiteral(xml):
           """Return a new subnode for literals such as comments, PIs, etc."""
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   class SoxNodeAsXMLBuilder(Adapter):
   
       advise(
           instancesProvide=[IXMLBuilder],
           asAdapterForProtocols=[ISOXNode]
       )
   
       def _xml_addText(self,text):
           self.subject._addText(text)
   
       def _xml_addLiteral(self,text):
           pass
   
       def _xml_finish(self):
           return self.subject._finish()
   
       def _xml_addChild(self,node):
           self.subject._addNode(self.lastName,node)    # XXX
   
       def _xml_newTag(self,name,attrs,newPrefixes,parser):
           node = self.subject._newNode(name,dict(attrs))
           node._acquireFrom(self.subject)
           self.lastName = name
           return node
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   class NSNodeAsXMLBuilder(Adapter):
   
       advise(
           instancesProvide=[IXMLBuilder],
           asAdapterForProtocols=[ISOXNode_NS]
       )
   
       def _xml_addText(self,text):
           self.subject._addText(text)
   
       def _xml_addLiteral(self,text):
           pass
   
       def _xml_finish(self):
           return self.subject._finish()
   
       def _xml_addChild(self,node):
           self.subject._addNode(self.lastName,node)    # XXX
   
       def _xml_newTag(self,name,attrs,newPrefixes,parser):
           node = self.subject._newNode(name,dict(attrs))
           if newPrefixes:
               ns2uri = dict(
                   [(prefix,stack[-1]) for prefix,stack in parser.nsInfo.items()]
               )
               node._setNS(ns2uri, ~kjGraph(ns2uri.items()))
           self.lastName = name
           return node
   
         if top._name != name:  
             raise SyntaxError,"End tag '%s' found when '%s' was wanted" % (name, top._name)  
   
         out = top._finish()  
   
         if out is not None:  
             stack[-1]._addNode(name,out)  
   
     def endDocument(self):  
         self.document = self.stack[0]._finish()  
         del self.stack  
   
   
   
Line 125 
Line 248 
   
     """Simple, DOM-like ISOXNode implementation"""      """Simple, DOM-like ISOXNode implementation"""
   
     __implements__ = ISOXNode      advise( instancesProvide = [ISOXNode] )
   
     def __init__(self,name='',atts={},**kw):      def __init__(self,name='',atts={},**kw):
         self._name = name          self._name = name
         self._subNodes = []          self._subNodes = []
         self._allNodes = []          self._allNodes = []
         d=self.__dict__          self.__dict__.update(atts)
         for a in atts.keys():  
             d[a]=atts[a]  
   
         self.__dict__.update(kw)          self.__dict__.update(kw)
   
     def _addNode(self,name,node):      def _addNode(self,name,node):
Line 161 
Line 281 
                 d = n._findFirst(name)                  d = n._findFirst(name)
                 if d: return d                  if d: return d
   
       def _finish(self):
           return self
   
     def _finish(self): return self  
   
     _acquiredAttrs = ()      _acquiredAttrs = ()
   
Line 182 
Line 303 
     def _newNode(self,name,atts):      def _newNode(self,name,atts):
         return Node(name,atts)          return Node(name,atts)
   
   class Node_NS(Node):
   
       advise( instancesProvide = [ISOXNode_NS] )
       ns2uri = {}
       uri2ns = kjGraph()
   
       def _newNode(self,name,atts):
           node = self.__class__(
               name, atts, ns2uri=self.ns2uri, uri2ns=self.uri2ns
           )
           return node
   
       def _setNS(self, ns2uri, uri2ns):
           self.ns2uri, self.uri2ns = ns2uri, uri2ns
   
   
   class Document_NS(Node_NS):
   
       _finish = Document._finish.im_func
   
       def _newNode(self,name,atts):
           return Node_NS(name, atts)
   
 def load(filename_or_stream, documentObject=None):  def load(filename_or_stream, documentObject=None, namespaces=False):
   
     """Build a tree from a filename/stream, rooted in a document object"""      """Build a tree from a filename/stream, rooted in a document object"""
   
       if namespaces:
   
           if documentObject is None:
               documentObject = Document_NS()
   
       else:
     if documentObject is None:      if documentObject is None:
         documentObject = Document()          documentObject = Document()
   
     handler = ObjectMakingHandler(documentObject)  
     parse(filename_or_stream, handler)      if isinstance(filename_or_stream,str):
     return handler.document          filename_or_stream = open(filename_or_stream,'rt')
   
       elif hasattr(filename_or_stream,'getByteStream'):
           filename_or_stream = filename_or_stream.getByteStream()
   
       return ExpatBuilder().parseFile(filename_or_stream,documentObject)
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   class IndentedXML(XMLGenerator):
   
       """SAX handler that writes its output to an IndentedStream"""
   
       def __init__(self, out=None, encoding="iso-8859-1"):
           if out is None:
               from IndentedStream import IndentedStream
               out = IndentedStream()
           XMLGenerator.__init__(self,out,encoding)
   
       def startElement(self,name,attrs):
           XMLGenerator.startElement(self,name,attrs)
           self._out.push(1)
   
       def startElementNS(self,name,qname,attrs):
           XMLGenerator.startElementNS(self,name,qname,attrs)
           self._out.push(1)
   
       def characters(self,content):
           self._out.push()
           self._out.setMargin(absolute=0)
           XMLGenerator.characters(self,content)
           self._out.pop()
   
       def endElement(self,name):
           self._out.pop()
           XMLGenerator.endElement(self,name)
   
       def endElementNS(self,name,qname):
           self._out.pop()
           XMLGenerator.endElementNS(self,name,qname)
   
   
   
   
   
   
   
   
   
   
   class ExpatBuilder:
   
       """Parser that assembles a document"""
   
       def __init__(self):
           self.parser = self.makeParser()
           self.stack   = []   # "object being assembled" stack
           self.nsStack = []
           self.nsInfo  = {}   # URI stack for each NS prefix
   
       def makeParser(self):
           from xml.parsers.expat import ParserCreate
           p = ParserCreate()
           p.ordered_attributes = True
           p.returns_unicode = True
           p.specified_attributes = True
           p.StartElementHandler = self.startElement
           p.EndElementHandler = self.endElement
           p.CommentHandler = self.comment
           p.DefaultHandler = self.buildLiteral
           # We don't use:
           # .StartDoctypeDeclHandler
           # .StartNamespaceDeclHandler
           # .EndNamespaceDeclHandler
           # .XmlDeclHandler(version, encoding, standalone)
           # .ElementDeclHandler(name, model)
           # .AttlistDeclHandler(elname, attname, type, default, required)
           # .EndDoctypeDeclHandler()
           # .ProcessingInstructionHandler(target, data)
           # .UnparsedEntityDeclHandler(entityN,base,systemId,publicId,notationN)
           # .EntityDeclHandler(
           #      entityName, is_parameter_entity, value, base,
           #      systemId, publicId, notationName)
           # .NotationDeclHandler(notationName, base, systemId, publicId)
           # .StartCdataSectionHandler()
           # .EndCdataSectionHandler()
           # .NotStandaloneHandler()
           return p
   
   
   
       def parseFile(self, stream, rootNode):
           self.__init__()
           self.stack.append(IXMLBuilder(rootNode))
           self.parser.CharacterDataHandler = self.stack[-1]._xml_addText
           self.parser.ParseFile(stream)
           return self.stack[-1]._xml_finish()
   
   
       def comment(self,data):
           self.buildLiteral(u'<!--%s-->' % data)
   
       def buildLiteral(self,xml):
           self.stack[-1]._xml_addLiteral(xml)
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
       def startElement(self, name, attrs):
   
           prefixes = []; a = []
           pop = attrs.pop
           append = a.append
   
           while attrs:
               k = pop(0); v=pop(0)
               append((k,v))
   
               if not k.startswith('xmlns'):
                   continue
   
               rest = k[5:]
               if not rest:
                   ns = ''
               elif rest.startswith(':'):
                   ns = rest[1:]
               else:
                   continue
   
               self.nsInfo.setdefault(ns,[]).append(v)
               prefixes.append(ns)
   
           self.nsStack.append(prefixes)
           element = self.stack[-1]._xml_newTag(name, a, prefixes, self)
           self.stack.append(IXMLBuilder(element))
           self.parser.CharacterDataHandler = self.stack[-1]._xml_addText
   
       def endElement(self, name):
           last = self.stack.pop()
           self.parser.CharacterDataHandler = self.stack[-1]._xml_addText
           self.stack[-1]._xml_addChild(last._xml_finish())
           for prefix in self.nsStack.pop():
               self.nsInfo[prefix].pop()
   
   
   
   
   
   


Generate output suitable for use with a patch program
Legend:
Removed from v.333  
changed lines
  Added in v.1799

cvs-admin@eby-sarna.com

Powered by ViewCVS 1.0-dev

ViewCVS and CVS Help