source: trunk/CifFile/YappsStarParser_1_1.py @ 469

Last change on this file since 469 was 469, checked in by toby, 11 years ago

rework phase import

  • Property svn:executable set to *
File size: 14.2 KB
Line 
1from StarFile import *
2from types import *
3import copy
4# An alternative specification for the Cif Parser, based on Yapps2
5# by Amit Patel (http://theory.stanford.edu/~amitp/Yapps)
6#
7# helper code: we define our match tokens
8lastval = ''
9def monitor(location,value):
10    global lastval
11    # print 'At %s: %s' % (location,`value`)
12    lastval = `value`
13    return value
14
15# Strip extras gets rid of leading and trailing whitespace, and
16# semicolons.
17def stripextras(value):
18    # we get rid of semicolons and leading/trailing terminators etc.
19     import re
20     jj = re.compile("[\n\r\f \t\v]*")
21     semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;")
22     cut = semis.match(value)
23     if cut:        #we have a semicolon-delimited string
24          nv = value[cut.end():len(value)-2]
25          try:
26             if nv[-1]=='\r': nv = nv[:-1]
27          except IndexError:    #empty data value
28             pass
29          return nv
30     else: 
31          cut = jj.match(value)
32          if cut:
33               return stripstring(value[cut.end():])
34          return value
35
36# helper function to get rid of inverted commas etc.
37
38def stripstring(value):
39     if value:
40         if value[0]== '\'' and value[-1]=='\'':
41           return value[1:-1]
42         if value[0]=='"' and value[-1]=='"':
43           return value[1:-1]
44     return value
45
46# helper function to populate a nested LoopBlock structure given an
47# empty structure together with listed values.   The values are
48# organised into a list of lists, where each time 'stop' was
49# encountered one list terminates and a new one starts.
50# For a correctly constructed loop, the final 'popout' will pop out
51# of the iteration completely and raise a StopIteration error.
52#
53# Note that there may be an empty list at the very end of our itemlists,
54# so we remove that if necessary.
55#
56# We optimise for CIF files by loading differently if we have a flat loop
57
58def makeloop(loopstructure,itemlists):
59    if itemlists[-1] == []: itemlists.pop(-1)
60    # print 'Making loop with %s' % `itemlists`
61    if loopstructure.dimension == 1 and loopstructure.loops == []:
62        storage_iter = loopstructure.fast_load_iter()
63    else:
64        storage_iter = loopstructure.load_iter()
65    nowloop = loopstructure
66    for datalist in itemlists:
67       for datavalue in datalist:
68           try:
69               nowloop,target = storage_iter.next()
70           except StopIteration:
71               print "StopIter at %s/%s" % (datavalue,datalist)
72               raise StopIteration
73           # print 'Got %s %s ->' % (`nowloop`,`target`),
74           target.append(datavalue)
75           # print '%s' % `target`
76       # the end of each list is the same as a stop_ token
77       # print 'Saw end of list'
78       nowloop.popout = True
79       nowloop,blank = storage_iter.next()  #execute the pop
80       # print 'discarding %s/%s' % (`nowloop`,`blank`)
81    # print 'Makeloop returning %s' % `loopstructure`
82    return loopstructure
83
84# return an object with the appropriate amount of nesting
85def make_empty(nestlevel):
86    gd = []
87    for i in range(1,nestlevel):
88        gd = [gd]
89    return gd
90
91# this function updates a dictionary first checking for name collisions,
92# which imply that the CIF is invalid.  We need case insensitivity for
93# names.
94
95# Unfortunately we cannot check loop item contents against non-loop contents
96# in a non-messy way during parsing, as we may not have easy access to previous
97# key value pairs in the context of our call (unlike our built-in access to all
98# previous loops).
99# For this reason, we don't waste time checking looped items against non-looped
100# names during parsing of a data block.  This would only match a subset of the
101# final items.   We do check against ordinary items, however.
102#
103# Note the following situations:
104# (1) new_dict is empty -> we have just added a loop; do no checking
105# (2) new_dict is not empty -> we have some new key-value pairs
106#
107def cif_update(old_dict,new_dict,loops):
108    old_keys = map(lambda a:a.lower(),old_dict.keys())
109    if new_dict != {}:    # otherwise we have a new loop
110        #print 'Comparing %s to %s' % (`old_keys`,`new_dict.keys()`)
111        for new_key in new_dict.keys():
112            if new_key.lower() in old_keys:
113                raise CifError, "Duplicate dataname or blockname %s in input file" % new_key
114            old_dict[new_key] = new_dict[new_key]
115#
116# this takes two lines, so we couldn't fit it into a one line execution statement...
117def order_update(order_array,new_name):
118    order_array.append(new_name) 
119    return new_name
120
121
122# Begin -- grammar generated by Yapps
123import sys, re
124import yapps3_compiled_rt as yappsrt
125
126class StarParserScanner(yappsrt.Scanner):
127    patterns = [
128        ('([ \t\n\r](?!;))|[ \t]', re.compile('([ \t\n\r](?!;))|[ \t]')),
129        ('(#.*[\n\r](?!;))|(#.*)', re.compile('(#.*[\n\r](?!;))|(#.*)')),
130        ('LBLOCK', re.compile('(L|l)(O|o)(O|o)(P|p)_')),
131        ('GLOBAL', re.compile('(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_')),
132        ('STOP', re.compile('(S|s)(T|t)(O|o)(P|p)_')),
133        ('save_heading', re.compile('(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')),
134        ('save_end', re.compile('(S|s)(A|a)(V|v)(E|e)_')),
135        ('data_name', re.compile('_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')),
136        ('data_heading', re.compile('(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')),
137        ('start_sc_line', re.compile('(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+')),
138        ('sc_line_of_text', re.compile('[^;\r\n]([^\r\n])*(\r\n|\r|\n)+')),
139        ('end_sc_line', re.compile(';')),
140        ('data_value_1', re.compile('((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\(\\{\\[\\]][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+')),
141        ('END', re.compile('$')),
142    ]
143    def __init__(self, str):
144        yappsrt.Scanner.__init__(self,None,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],str)
145
146class StarParser(yappsrt.Parser):
147    Context = yappsrt.Context
148    def input(self, _parent=None):
149        _context = self.Context(_parent, self._scanner, self._pos, 'input', [])
150        _token = self._peek('END', 'data_heading')
151        if _token == 'data_heading':
152            dblock = self.dblock(_context)
153            allblocks = StarFile(); allblocks.NewBlock(dblock[0],blockcontents=dblock[1],fix=False,replace=False)
154            while self._peek('END', 'data_heading') == 'data_heading':
155                dblock = self.dblock(_context)
156                allblocks.NewBlock(dblock[0],blockcontents=monitor('input',dblock[1]),fix=False,replace=False)
157            if self._peek() not in ['END', 'data_heading']:
158                raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
159            END = self._scan('END')
160        else: # == 'END'
161            END = self._scan('END')
162            allblocks = StarFile()
163        return allblocks
164
165    def dblock(self, _parent=None):
166        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [])
167        data_heading = self._scan('data_heading')
168        heading = data_heading[5:];thisblock=StarBlock(overwrite=False)
169        while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
170            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
171            if _token != 'save_heading':
172                dataseq = self.dataseq(thisblock, _context)
173            else: # == 'save_heading'
174                save_frame = self.save_frame(_context)
175                thisblock["saves"].NewBlock(save_frame[0],save_frame[1],fix=False,replace=True)
176        if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']:
177            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']))
178        return (heading,monitor('dblock',thisblock))
179
180    def dataseq(self, starblock, _parent=None):
181        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
182        data = self.data(starblock, _context)
183        while self._peek('LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
184            data = self.data(starblock, _context)
185        if self._peek() not in ['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']:
186            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))
187
188    def data(self, currentblock, _parent=None):
189        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
190        _token = self._peek('LBLOCK', 'data_name')
191        if _token == 'LBLOCK':
192            top_loop = self.top_loop(_context)
193            currentblock.insert_loop(top_loop,audit=False)
194        else: # == 'data_name'
195            datakvpair = self.datakvpair(_context)
196            currentblock.AddLoopItem(datakvpair[:2],precheck=True)
197
198    def datakvpair(self, _parent=None):
199        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
200        data_name = self._scan('data_name')
201        data_value = self.data_value(_context)
202        return [data_name,data_value]
203
204    def data_value(self, _parent=None):
205        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
206        _token = self._peek('data_value_1', 'start_sc_line')
207        if _token == 'data_value_1':
208            data_value_1 = self._scan('data_value_1')
209            thisval = stripstring(data_value_1)
210        else: # == 'start_sc_line'
211            sc_lines_of_text = self.sc_lines_of_text(_context)
212            thisval = stripextras(sc_lines_of_text)
213        return monitor('data_value',thisval)
214
215    def sc_lines_of_text(self, _parent=None):
216        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
217        start_sc_line = self._scan('start_sc_line')
218        lines = start_sc_line
219        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
220            sc_line_of_text = self._scan('sc_line_of_text')
221            lines = lines+sc_line_of_text
222        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
223            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
224        end_sc_line = self._scan('end_sc_line')
225        return monitor('sc_line_of_text',lines+end_sc_line)
226
227    def top_loop(self, _parent=None):
228        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
229        LBLOCK = self._scan('LBLOCK')
230        loopfield = self.loopfield(_context)
231        loopvalues = self.loopvalues(_context)
232        return makeloop(loopfield,loopvalues)
233
234    def loopfield(self, _parent=None):
235        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
236        toploop=LoopBlock(dimension=1,overwrite=False);curloop=toploop;poploop=None;dim=1
237        while self._peek('data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line') not in ['data_value_1', 'start_sc_line']:
238            _token = self._peek('data_name', 'LBLOCK', 'STOP')
239            if _token == 'data_name':
240                data_name = self._scan('data_name')
241                curloop[data_name]=[]
242            elif _token == 'LBLOCK':
243                LBLOCK = self._scan('LBLOCK')
244                dim=dim+1;newloop=LoopBlock(dimension=dim,overwrite=False);poploop=curloop;curloop.insert_loop(newloop,audit=False);curloop=newloop
245            else: # == 'STOP'
246                STOP = self._scan('STOP')
247                curloop=poploop;dim=dim-1
248        if self._peek() not in ['data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line']:
249            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line']))
250        return toploop
251
252    def loopvalues(self, _parent=None):
253        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
254        data_value = self.data_value(_context)
255        dataloop=[[data_value]]
256        while self._peek('data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'STOP', 'start_sc_line']:
257            _token = self._peek('data_value_1', 'STOP', 'start_sc_line')
258            if _token != 'STOP':
259                data_value = self.data_value(_context)
260                dataloop[-1].append(monitor('loopval',data_value))
261            else: # == 'STOP'
262                STOP = self._scan('STOP')
263                dataloop.append([])
264        if self._peek() not in ['data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']:
265            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))
266        return dataloop
267
268    def save_frame(self, _parent=None):
269        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [])
270        save_heading = self._scan('save_heading')
271        savehead = save_heading[5:];savebody = StarBlock(overwrite=False)
272        while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
273            dataseq = self.dataseq(savebody, _context)
274        if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']:
275            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']))
276        save_end = self._scan('save_end')
277        return (savehead,monitor('save_frame',savebody))
278
279
280def parse(rule, text):
281    P = StarParser(StarParserScanner(text))
282    return yappsrt.wrap_error_reporter(P, rule)
283
284# End -- grammar generated by Yapps
285
286
Note: See TracBrowser for help on using the repository browser.