source: ZMS/trunk/_builder.py @ 1761

Revision 1761, 18.6 KB checked in by zmsdev, 8 weeks ago (diff)

applied minor performance-fixes (2)

Line 
1################################################################################
2# _builder.py
3#
4# Implementation of class Builder (see below).
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the terms of the GNU General Public License
8# as published by the Free Software Foundation; either version 2
9# of the License, or (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, write to the Free Software
18# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19################################################################################
20
21# Imports
22import pyexpat
23import time
24import Globals
25# Product Imports.
26import _globals
27
28################################################################################
29# class ParseError(Exception):
30#
31# General exception class to indicate parsing errors.
32################################################################################
33class ParseError(Exception): pass
34
35
36################################################################################
37# class Builder
38#
39# Implements a builder class (cp. design pattern "BUILDER") to build a tree of ZOPE objects
40# out of an XML formatted document. Uses the class "pyexpat" (cp. module "Shared.DC.xml") for
41# parsing the XML document. The general approach of the XML parser "pyexpat" is event driven,
42# where handler methods are called on occurence of XML tags. Builder redirects these events to
43# a set of own handler methods (see below). To build up the object tree, Builder provides the
44# following functionality:
45#
46# 1. Usually, the occurence of a XML tag induces the instanciation of a new node object. Therefore,
47#    Builder contains a mapping table ("dGlobalAttrs"), that maps XML tags to python classes. The
48#    handler method "Builder.OnStartElement()" creates a node object of the corresponding class.
49#    This node object is then made current.
50#
51# 2. In General, events are directed to the current node object. Therefore, they have to contain
52#    a set of interface methods (see below). The node objects are responsible for handling these
53#    events. This includes the insertion into the object tree as well as the interpretation of
54#    XML tag parameters.
55#
56# 3. A dedicated root object is managed by Builder. The root object may be predefined or created
57#    during the parsing process.
58#
59# Builder is usually used as a mix-in base class for other classes. For usage, the following
60# issues must be taken into consideration:
61#
62# 1. Overwrite "dGlobalAttrs" with a dictionary, that maps XML-Tags to python classes.
63# 2. Call "Builder.parse()" to initiate the parsing and building process.
64# 3. Equip all python classes with the following interface methods:
65#
66#    - xmlOnStartElement(self, dTagName, dTagAttrs, oParentNode, oRoot)
67#    - xmlOnCharacterData(self, sData, bInCData)
68#    - xmlOnEndElement(self)
69#    - xmlOnUnknownStartTag(self, sTagName, dTagAttrs)
70#    - xmlOnUnknownEndTag(self, sTagName)
71#    - xmlGetTagName(self)
72#    - xmlGetParent(self)
73#
74################################################################################
75class Builder:
76    """ Builder """
77 
78    ######## class variables ########
79    iBufferSize=1028 * 32   # buffer size for XML file parsing
80 
81     
82    ############################################################################
83    # Builder.__init__(self):
84    #
85    # Constructor.
86    ############################################################################
87    def __init__(self):
88        """ Builder.__init__ """
89        self.oRoot      = None   # root node of object tree
90        self.oCurrNode  = None   # current node
91        self.bInRootTag = False  # inside root tag?
92        self.bInCData   = False  # inside CDATA section?
93
94
95    ############################################################################
96    # Builder.parse(self, root, input):
97    #
98    # Parse a given XML document and build a recursive object tree via event handler.
99    #
100    # IN:  input = XML document as string
101    #            = XML document as file object   
102    #      root  = pre-set root node for object tree (prevents the creation of a root object, when
103    #              the first root tag appears in XML-document)
104    #            = None, if no root object is given (will be instanciated)
105    #
106    # OUT: root object
107    #      None, if nothing was parsed
108    ############################################################################
109    def parse(self, input, root=None, bInRootTag=0):
110        """ Builder.parse """
111       
112        # prepare builder
113        self._unknownTagName  = None
114        self.oRoot            = root
115        self.oRootNode        = None
116        self.oCurrNode        = None
117        self.bInRootTag       = bInRootTag
118        self.bInCData         = False
119        if bInRootTag:
120          self.oCurrNode = root
121       
122        # create parser object
123        p = pyexpat.ParserCreate()
124       
125        # connect parser object with handler methods
126        p.StartElementHandler = self.OnStartElement
127        p.EndElementHandler = self.OnEndElement
128        p.CharacterDataHandler = self.OnCharacterData
129        p.StartCdataSectionHandler = self.OnStartCData
130        p.EndCdataSectionHandler = self.OnEndCData
131        p.ProcessingInstructionHandler = self.OnProcessingInstruction
132        p.CommentHandler = self.OnComment
133        p.StartNamespaceDeclHandler = self.OnStartNamespaceDecl
134        p.EndNamespaceDeclHandler = self.OnEndNamespaceDecl
135       
136        #### parsing ####
137        _globals.writeLog( self, "#### parsing ####")
138        if type(input) is str:
139          # input is a string!
140          rv = p.Parse(input, 1)
141        else:
142          # input is a file object!
143          while True:
144           
145            v=input.read(self.iBufferSize)
146            if v=="":
147              rv = 1
148              break
149           
150            rv = p.Parse(v, 0)
151            if not rv:
152              break
153       
154        # raise parser exception
155        if not rv:
156            raise ParseError('%s at line %s' % (pyexpat.ErrorString(p.ErrorCode), p.ErrorLineNumber))
157        ####
158       
159        return self.oRootNode
160
161
162    ############################################################################
163    # Builder.OnStartElement(self, name, attrs):
164    #
165    # Handler of XML-Parser:
166    # Called at the start of a XML element (resp. on occurence of a XML start tag).
167    # Usually, the occurence of a XML tag induces the instanciation of a new node object. Therefore,
168    # Builder contains a mapping table ("dGlobalAttrs"), that maps XML tags to python classes. The
169    # newly created node object is then made current. If no matching class is found for a XML tag,
170    # the event handler "xmlOnUnknownStart()" is called on the current object.
171    #
172    # IN: name  = element name (=tag name)
173    #     attrs = dictionary of element attributes
174    ############################################################################
175    def OnStartElement(self, name, attrs):
176        """ Builder.OnStartElement """
177        _globals.writeLog( self, "[Builder.OnStartElement(" + str(name) + ")]")
178       
179        name = _globals.unencode( name)
180        attrs = _globals.unencode( attrs)
181       
182        # handle alias
183        if name in self.getMetaobjIds(sort=0) and not self.dGlobalAttrs.has_key(name):
184          attrs['meta_id'] = name
185          name = 'ZMSCustom'
186       
187        if self.bInRootTag or \
188           self.oRoot == None or \
189           (self.oRoot.id == self.getDocumentElement().id and \
190            self.dGlobalAttrs.has_key(name) and \
191            self.dGlobalAttrs[name]['obj_class'] is not None):
192           
193            _globals.writeLog( self, "[Builder.OnStartElement]: " + \
194                "We are inside the XML root tag OR no root object is set" + \
195                "-> instanciate node object in any case")
196           
197            if self.dGlobalAttrs.has_key(name) and \
198               self.dGlobalAttrs[name]['obj_class'] is not None:
199               
200                # class defined for tag!
201                if self.oCurrNode==None and self.oRoot!=None and self.oRoot.id==self.getDocumentElement().id:
202                  self.oCurrNode = self.oRoot
203                   
204                # create node instance
205                _globals.writeBlock( self, "[Builder.OnStartElement]: create new object <" + str(name) + "> in " + str(self.oCurrNode))
206                newNode = None
207                if 'id_fix' in attrs.keys():
208                  id = attrs.get( 'id_fix')
209                  newNode = getattr(self.oCurrNode,id,None)
210                elif 'id_prefix' in attrs.keys():
211                  prefix = attrs.get( 'id_prefix')
212                  id = self.oCurrNode.getNewId(prefix)
213                elif 'id' in attrs.keys():
214                  id = attrs.get( 'id')
215                  prefix = _globals.id_prefix(id)
216                  id = self.oCurrNode.getNewId(prefix)
217                else:
218                  id = self.oCurrNode.getNewId()
219                sort_id = self.oCurrNode.getNewSortId()
220               
221                ##### Create ####
222                if newNode is None:
223                  newNode = self.dGlobalAttrs[name]['obj_class'](id,sort_id)
224                  self.oCurrNode._setObject(newNode.id, newNode)
225                  newNode = getattr(self.oCurrNode,newNode.id)
226               
227                ##### Identify Content-Object ####
228                if newNode.meta_type == 'ZMSCustom':
229                  meta_id = attrs.get( 'meta_id')
230                  if meta_id not in self.getMetaobjIds( sort=0):
231                    _globals.writeError(newNode,'[_builder.OnStartElement]: no object-definition available ('+str(meta_id)+')!')
232                  newNode.meta_id = meta_id
233               
234                ##### Object State ####
235                newNode.initializeWorkVersion()
236                obj_attrs = newNode.getObjAttrs()
237                langs = self.getLangIds()
238                for lang in langs:
239                  req = {'lang':lang,'preview':'preview'}
240                  ##### Object State ####
241                  newNode.setObjStateNew(req)
242                  ##### Init Properties ####
243                  if 'active' in obj_attrs.keys():
244                    newNode.setObjProperty('active',1,lang)
245                  if len( langs) == 1:
246                    newNode.setObjProperty('change_uid','xml',lang)
247                    newNode.setObjProperty('change_dt',time.time(),lang)
248               
249                _globals.writeLog( self, "[Builder.OnStartElement]: object with id " + str(newNode.id) + " of class " + str(newNode.__class__) + " created in " + str(self.oCurrNode.__class__))
250               
251                if self.oRoot is None:   # root object set?
252                    self.oRoot = newNode # -> set root node
253               
254                # notify new node
255                newNode.xmlOnStartElement(name, attrs, self.oCurrNode, self.oRoot)
256               
257                # set new node as current node
258                self.oCurrNode = newNode
259           
260            else:
261                # no class defined for tag
262                # -> offer to current object
263                if self.oCurrNode==None:
264                    raise ParseError("Unknown tag (" + name + "): no current object available!")  # no current object available!
265               
266                if not self.oCurrNode.xmlOnUnknownStartTag(name, attrs):
267                    if self._unknownTagName == None:
268                        self._unknownTagName = name
269                    _globals.writeLog( self, "[Builder.OnStartElement]: Unknown start-tag (" + name + "): current object did not accept tag!")  # current object did not accept tag!
270                    # raise ParseError("Unknown start-tag (" + name + "): current object did not accept tag!")  # current object did not accept tag!
271         
272        else:
273            _globals.writeLog( self, "[Builder.OnStartElement]: " +
274                "we have encountered the XML root tag and a root object is predefined" + \
275                "-> simply notify root object")
276            self.oRoot.xmlOnStartElement(name, attrs, None, self.oRoot)
277           
278            # set root node as current node
279            self.oCurrNode = self.oRoot
280       
281        # we are inside the XML root now!
282        self.bInRootTag=1
283        if self.oRootNode is None:
284          self.oRootNode = self.oCurrNode
285
286
287    ############################################################################
288    # Builder.OnEndElement(self, name):
289    #
290    # Handler of XML-Parser:
291    # Called at the end of a XML element (resp. on occurence of a XML end tag).
292    #
293    # IN: name  = element name (=tag name)
294    ############################################################################
295    def OnEndElement(self, name):
296        """ Builder.OnEndElement """
297        _globals.writeLog( self, "[Builder.OnEndElement(" + str(name) + ")]")
298       
299        # do we have a current node?
300        if self.oCurrNode==None:
301            raise ParseError("Unmatching end tag (" + name + ")")
302       
303        # is this the right tag name?
304        if name==self.oCurrNode.xmlGetTagName() or name==self.oCurrNode.meta_type:
305         
306          ##### VersionManager ####
307          self.oCurrNode.resetObjStates()
308         
309          # notify current node
310          self.oCurrNode.xmlOnEndElement()
311         
312          parent = self.oCurrNode.xmlGetParent()
313         
314          # set parent node as current node
315          self.oCurrNode = parent
316         
317       
318        else:
319            if self.dGlobalAttrs.has_key(name) and \
320               self.dGlobalAttrs[name]['obj_class'] is not None:
321                if self.dGlobalAttrs.has_key(self.oCurrNode.xmlGetTagName()) and \
322                   self.dGlobalAttrs[self.oCurrNode.xmlGetTagName()]['obj_class'] is not None:
323                    # tag name is known, but not valid at this place!
324                    raise ParseError("Unmatching end tag (/" + name + "), expected(/" + self.oCurrNode.xmlGetTagName() + ")")
325           
326            else:
327                # tag name is unknown -> offer it to current object
328                if not self.oCurrNode.xmlOnUnknownEndTag(name):
329                    if name == self._unknownTagName:
330                      self._unknownTagName = None
331                    _globals.writeLog( self, "[Builder.OnEndElement]: Unknown end-tag (/" + name + ")")  # current object did not accept tag!
332                    # raise ParseError("Unknown end-tag (" + name + ")")  # current object did not accept tag!
333
334
335    ############################################################################
336    # Builder.OnCharacterData(self, data):
337    #
338    # Handler of XML-Parser:
339    # Called after plain character data was parsed. Forwards the character data to the current
340    # node. The class attribute "bInCData" determines, wether the character data is nested in a
341    # CDATA block.
342    #
343    # IN: data = character data string
344    ############################################################################
345    def OnCharacterData(self, data):
346        """ Builder.OnCharacterData """
347        _globals.writeLog( self, "[Builder.OnCharacterData]")
348       
349        # do we have a current node?
350        if self.oCurrNode==None:
351           raise ParseError("Unexpected character data found")
352           
353        # notify current node
354        self.oCurrNode.xmlOnCharacterData(data, self.bInCData)
355
356
357    ############################################################################
358    # Builder.OnStartCData(self):
359    #
360    # Handler of XML-Parser:
361    # Called at the start of a CDATA block (resp. on occurence of the "CDATA[" tag).
362    ############################################################################
363    def OnStartCData(self):
364        """ Builder.OnStartCData """
365        self.bInCData=1
366
367
368    ############################################################################
369    # Builder.OnEndCData(self):
370    #
371    # Handler of XML-Parser:
372    # Called at the end of a CDATA block (resp. on occurence of the "]" tag).
373    ############################################################################
374    def OnEndCData(self):
375        """ Builder.OnEndCData """
376        self.bInCData=0
377
378
379    ############################################################################
380    # Builder.OnProcessingInstruction(self, target, data):
381    #
382    # Handler of XML-Parser:
383    # Called on occurence of a processing instruction.
384    #
385    # IN: target = target (processing instruction)
386    #     data   = dictionary of data
387    ############################################################################
388    def OnProcessingInstruction(self, target, data):
389        """ Builder.OnProcessingInstruction """
390        pass  # ignored
391
392
393    ############################################################################
394    # Builder.OnComment(self, data):
395    #
396    # Handler of XML-Parser:
397    # Called on occurence of a comment.
398    #
399    # IN: data = comment string
400    ############################################################################
401    def OnComment(self, data):
402        """ Builder.OnComment """
403        pass  # ignored
404
405
406    ############################################################################
407    # Builder.OnStartNamespaceDecl(self, prefix, uri):
408    #
409    # Handler of XML-Parser:
410    # Called at the start of a namespace declaration.
411    #
412    # IN: prefix = prefix of namespace
413    #     uri    = namespace identifier
414    ############################################################################
415    def OnStartNamespaceDecl(self, prefix, uri):
416        """ Builder.OnStartNamespaceDecl """
417        pass  # ignored
418
419
420    ############################################################################
421    # Builder.OnEndNamespaceDecl(self, prefix):
422    #
423    # Handler of XML-Parser:
424    # Called at the end of a namespace declaration.
425    #
426    # IN: prefix = prefix of namespace
427    ############################################################################
428    def OnEndNamespaceDecl(self, prefix):
429        """ Builder.OnEndNamespaceDecl """
430        pass  # ignored
431
432################################################################################
Note: See TracBrowser for help on using the repository browser.