1  """ 
  2  SAX-based adapter to copy trees from/to the Python standard library. 
  3   
  4  Use the `ElementTreeContentHandler` class to build an ElementTree from 
  5  SAX events. 
  6   
  7  Use the `ElementTreeProducer` class or the `saxify()` function to fire 
  8  the SAX events of an ElementTree against a SAX ContentHandler. 
  9   
 10  See http://codespeak.net/lxml/sax.html 
 11  """ 
 12   
 13  from xml.sax.handler import ContentHandler 
 14  from lxml import etree 
 15  from lxml.etree import ElementTree, SubElement 
 16  from lxml.etree import Comment, ProcessingInstruction 
 17   
 19      """General SAX error. 
 20      """ 
 21      pass 
  22   
 24      if tag[0] == '{': 
 25          return tuple(tag[1:].split('}', 1)) 
 26      else: 
 27          return (None, tag) 
  28   
 29   
 30 -class ElementTreeContentHandler(ContentHandler): 
  31      """Build an lxml ElementTree from SAX events. 
 32      """ 
 33 -    def __init__(self, makeelement=None): 
  34          ContentHandler.__init__(self) 
 35          self._root = None 
 36          self._root_siblings = [] 
 37          self._element_stack = [] 
 38          self._default_ns = None 
 39          self._ns_mapping = { None : [None] } 
 40          self._new_mappings = {} 
 41          if makeelement is None: 
 42              makeelement = etree.Element 
 43          self._makeelement = makeelement 
  44   
 45 -    def _get_etree(self): 
  46          "Contains the generated ElementTree after parsing is finished." 
 47          return ElementTree(self._root) 
  48   
 49      etree = property(_get_etree, doc=_get_etree.__doc__) 
 50   
 51 -    def setDocumentLocator(self, locator): 
  53   
 54 -    def startDocument(self): 
  56   
 57 -    def endDocument(self): 
  59   
 60 -    def startPrefixMapping(self, prefix, uri): 
  61          self._new_mappings[prefix] = uri 
 62          try: 
 63              self._ns_mapping[prefix].append(uri) 
 64          except KeyError: 
 65              self._ns_mapping[prefix] = [uri] 
 66          if prefix is None: 
 67              self._default_ns = uri 
  68   
 69 -    def endPrefixMapping(self, prefix): 
  70          ns_uri_list = self._ns_mapping[prefix] 
 71          ns_uri_list.pop() 
 72          if prefix is None: 
 73              self._default_ns = ns_uri_list[-1] 
  74   
 75 -    def _buildTag(self, ns_name_tuple): 
  76          ns_uri, local_name = ns_name_tuple 
 77          if ns_uri: 
 78              el_tag = "{%s}%s" % ns_name_tuple 
 79          elif self._default_ns: 
 80              el_tag = "{%s}%s" % (self._default_ns, local_name) 
 81          else: 
 82              el_tag = local_name 
 83          return el_tag 
  84   
 85 -    def startElementNS(self, ns_name, qname, attributes=None): 
  86          el_name = self._buildTag(ns_name) 
 87          if attributes: 
 88              attrs = {} 
 89              try: 
 90                  iter_attributes = attributes.iteritems() 
 91              except AttributeError: 
 92                  iter_attributes = attributes.items() 
 93   
 94              for name_tuple, value in iter_attributes: 
 95                  if name_tuple[0]: 
 96                      attr_name = "{%s}%s" % name_tuple 
 97                  else: 
 98                      attr_name = name_tuple[1] 
 99                  attrs[attr_name] = value 
100          else: 
101              attrs = None 
102   
103          element_stack = self._element_stack 
104          if self._root is None: 
105              element = self._root = \ 
106                        self._makeelement(el_name, attrs, self._new_mappings) 
107              if self._root_siblings and hasattr(element, 'addprevious'): 
108                  for sibling in self._root_siblings: 
109                      element.addprevious(sibling) 
110              del self._root_siblings[:] 
111          else: 
112              element = SubElement(element_stack[-1], el_name, 
113                                   attrs, self._new_mappings) 
114          element_stack.append(element) 
115   
116          self._new_mappings.clear() 
 117   
118 -    def processingInstruction(self, target, data): 
 119          pi = ProcessingInstruction(target, data) 
120          if self._root is None: 
121              self._root_siblings.append(pi) 
122          else: 
123              self._element_stack[-1].append(pi) 
 124   
125 -    def endElementNS(self, ns_name, qname): 
 126          element = self._element_stack.pop() 
127          el_tag = self._buildTag(ns_name) 
128          if el_tag != element.tag: 
129              raise SaxError("Unexpected element closed: " + el_tag) 
 130   
131 -    def startElement(self, name, attributes=None): 
 132          if attributes: 
133              attributes = dict( 
134                      [((None, k), v) for k, v in attributes.items()] 
135                  ) 
136          self.startElementNS((None, name), name, attributes) 
 137   
138 -    def endElement(self, name): 
 139          self.endElementNS((None, name), name) 
 140   
141 -    def characters(self, data): 
 142          last_element = self._element_stack[-1] 
143          try: 
144               
145              last_element = last_element[-1] 
146              last_element.tail = (last_element.tail or '') + data 
147          except IndexError: 
148               
149              last_element.text = (last_element.text or '') + data 
 150   
151      ignorableWhitespace = characters 
 152   
153   
155      """Produces SAX events for an element and children. 
156      """ 
157 -    def __init__(self, element_or_tree, content_handler): 
 158          try: 
159              element = element_or_tree.getroot() 
160          except AttributeError: 
161              element = element_or_tree 
162          self._element = element 
163          self._content_handler = content_handler 
164          from xml.sax.xmlreader import AttributesNSImpl as attr_class 
165          self._attr_class = attr_class 
166          self._empty_attributes = attr_class({}, {}) 
 167   
169          self._content_handler.startDocument() 
170   
171          element = self._element 
172          if hasattr(element, 'getprevious'): 
173              siblings = [] 
174              sibling = element.getprevious() 
175              while getattr(sibling, 'tag', None) is ProcessingInstruction: 
176                  siblings.append(sibling) 
177                  sibling = sibling.getprevious() 
178              for sibling in siblings[::-1]: 
179                  self._recursive_saxify(sibling, {}) 
180   
181          self._recursive_saxify(element, {}) 
182   
183          if hasattr(element, 'getnext'): 
184              sibling = element.getnext() 
185              while getattr(sibling, 'tag', None) is ProcessingInstruction: 
186                  self._recursive_saxify(sibling, {}) 
187                  sibling = sibling.getnext() 
188   
189          self._content_handler.endDocument() 
 190   
192          content_handler = self._content_handler 
193          tag = element.tag 
194          if tag is Comment or tag is ProcessingInstruction: 
195              if tag is ProcessingInstruction: 
196                  content_handler.processingInstruction( 
197                      element.target, element.text) 
198              if element.tail: 
199                  content_handler.characters(element.tail) 
200              return 
201   
202          new_prefixes = [] 
203          build_qname = self._build_qname 
204          attribs = element.items() 
205          if attribs: 
206              attr_values = {} 
207              attr_qnames = {} 
208              for attr_ns_name, value in attribs: 
209                  attr_ns_tuple = _getNsTag(attr_ns_name) 
210                  attr_values[attr_ns_tuple] = value 
211                  attr_qnames[attr_ns_tuple] = build_qname( 
212                      attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) 
213              sax_attributes = self._attr_class(attr_values, attr_qnames) 
214          else: 
215              sax_attributes = self._empty_attributes 
216   
217          ns_uri, local_name = _getNsTag(tag) 
218          qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) 
219   
220          for prefix, uri in new_prefixes: 
221              content_handler.startPrefixMapping(prefix, uri) 
222          content_handler.startElementNS((ns_uri, local_name), 
223                                         qname, sax_attributes) 
224          if element.text: 
225              content_handler.characters(element.text) 
226          for child in element: 
227              self._recursive_saxify(child, prefixes) 
228          content_handler.endElementNS((ns_uri, local_name), qname) 
229          for prefix, uri in new_prefixes: 
230              content_handler.endPrefixMapping(prefix) 
231          if element.tail: 
232              content_handler.characters(element.tail) 
 233   
234 -    def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes): 
 235          if ns_uri is None: 
236              return local_name 
237          try: 
238              prefix = prefixes[ns_uri] 
239          except KeyError: 
240              prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes) 
241              new_prefixes.append( (prefix, ns_uri) ) 
242          return prefix + ':' + local_name 
  243   
244 -def saxify(element_or_tree, content_handler): 
 245      """One-shot helper to generate SAX events from an XML tree and fire 
246      them against a SAX ContentHandler. 
247      """ 
248      return ElementTreeProducer(element_or_tree, content_handler).saxify() 
 249