1 | from StarFile import * |
---|
2 | from types import * |
---|
3 | import copy |
---|
4 | # An alternative specification for the Cif Parser, based on Yapps2 |
---|
5 | # by Amit Patel (http://theory.stanford.edu/~amitp/Yapps) |
---|
6 | # |
---|
7 | # helper code: we define our match tokens |
---|
8 | lastval = '' |
---|
9 | def monitor(location,value): |
---|
10 | global lastval |
---|
11 | # print 'At %s: %s' % (location,`value`) |
---|
12 | lastval = `value` |
---|
13 | return value |
---|
14 | |
---|
15 | # Strip extras gets rid of leading and trailing whitespace, and |
---|
16 | # semicolons. |
---|
17 | def stripextras(value): |
---|
18 | # we get rid of semicolons and leading/trailing terminators etc. |
---|
19 | import re |
---|
20 | jj = re.compile("[\n\r\f \t\v]*") |
---|
21 | semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;") |
---|
22 | cut = semis.match(value) |
---|
23 | if cut: #we have a semicolon-delimited string |
---|
24 | nv = value[cut.end():len(value)-2] |
---|
25 | try: |
---|
26 | if nv[-1]=='\r': nv = nv[:-1] |
---|
27 | except IndexError: #empty data value |
---|
28 | pass |
---|
29 | return nv |
---|
30 | else: |
---|
31 | cut = jj.match(value) |
---|
32 | if cut: |
---|
33 | return stripstring(value[cut.end():]) |
---|
34 | return value |
---|
35 | |
---|
36 | # helper function to get rid of inverted commas etc. |
---|
37 | |
---|
38 | def stripstring(value): |
---|
39 | if value: |
---|
40 | if value[0]== '\'' and value[-1]=='\'': |
---|
41 | return value[1:-1] |
---|
42 | if value[0]=='"' and value[-1]=='"': |
---|
43 | return value[1:-1] |
---|
44 | return value |
---|
45 | |
---|
46 | # helper function to populate a nested LoopBlock structure given an |
---|
47 | # empty structure together with listed values. The values are |
---|
48 | # organised into a list of lists, where each time 'stop' was |
---|
49 | # encountered one list terminates and a new one starts. |
---|
50 | # For a correctly constructed loop, the final 'popout' will pop out |
---|
51 | # of the iteration completely and raise a StopIteration error. |
---|
52 | # |
---|
53 | # Note that there may be an empty list at the very end of our itemlists, |
---|
54 | # so we remove that if necessary. |
---|
55 | # |
---|
56 | # We optimise for CIF files by loading differently if we have a flat loop |
---|
57 | |
---|
58 | def makeloop(loopstructure,itemlists): |
---|
59 | if itemlists[-1] == []: itemlists.pop(-1) |
---|
60 | # print 'Making loop with %s' % `itemlists` |
---|
61 | if loopstructure.dimension == 1 and loopstructure.loops == []: |
---|
62 | storage_iter = loopstructure.fast_load_iter() |
---|
63 | else: |
---|
64 | storage_iter = loopstructure.load_iter() |
---|
65 | nowloop = loopstructure |
---|
66 | for datalist in itemlists: |
---|
67 | for datavalue in datalist: |
---|
68 | try: |
---|
69 | nowloop,target = storage_iter.next() |
---|
70 | except StopIteration: |
---|
71 | print "StopIter at %s/%s" % (datavalue,datalist) |
---|
72 | raise StopIteration |
---|
73 | # print 'Got %s %s ->' % (`nowloop`,`target`), |
---|
74 | target.append(datavalue) |
---|
75 | # print '%s' % `target` |
---|
76 | # the end of each list is the same as a stop_ token |
---|
77 | # print 'Saw end of list' |
---|
78 | nowloop.popout = True |
---|
79 | nowloop,blank = storage_iter.next() #execute the pop |
---|
80 | # print 'discarding %s/%s' % (`nowloop`,`blank`) |
---|
81 | # print 'Makeloop returning %s' % `loopstructure` |
---|
82 | return loopstructure |
---|
83 | |
---|
84 | # return an object with the appropriate amount of nesting |
---|
85 | def make_empty(nestlevel): |
---|
86 | gd = [] |
---|
87 | for i in range(1,nestlevel): |
---|
88 | gd = [gd] |
---|
89 | return gd |
---|
90 | |
---|
91 | # this function updates a dictionary first checking for name collisions, |
---|
92 | # which imply that the CIF is invalid. We need case insensitivity for |
---|
93 | # names. |
---|
94 | |
---|
95 | # Unfortunately we cannot check loop item contents against non-loop contents |
---|
96 | # in a non-messy way during parsing, as we may not have easy access to previous |
---|
97 | # key value pairs in the context of our call (unlike our built-in access to all |
---|
98 | # previous loops). |
---|
99 | # For this reason, we don't waste time checking looped items against non-looped |
---|
100 | # names during parsing of a data block. This would only match a subset of the |
---|
101 | # final items. We do check against ordinary items, however. |
---|
102 | # |
---|
103 | # Note the following situations: |
---|
104 | # (1) new_dict is empty -> we have just added a loop; do no checking |
---|
105 | # (2) new_dict is not empty -> we have some new key-value pairs |
---|
106 | # |
---|
107 | def cif_update(old_dict,new_dict,loops): |
---|
108 | old_keys = map(lambda a:a.lower(),old_dict.keys()) |
---|
109 | if new_dict != {}: # otherwise we have a new loop |
---|
110 | #print 'Comparing %s to %s' % (`old_keys`,`new_dict.keys()`) |
---|
111 | for new_key in new_dict.keys(): |
---|
112 | if new_key.lower() in old_keys: |
---|
113 | raise CifError, "Duplicate dataname or blockname %s in input file" % new_key |
---|
114 | old_dict[new_key] = new_dict[new_key] |
---|
115 | # |
---|
116 | # this takes two lines, so we couldn't fit it into a one line execution statement... |
---|
117 | def order_update(order_array,new_name): |
---|
118 | order_array.append(new_name) |
---|
119 | return new_name |
---|
120 | |
---|
121 | |
---|
122 | # Begin -- grammar generated by Yapps |
---|
123 | import sys, re |
---|
124 | import yapps3_compiled_rt as yappsrt |
---|
125 | |
---|
126 | class StarParserScanner(yappsrt.Scanner): |
---|
127 | patterns = [ |
---|
128 | ('([ \t\n\r](?!;))|[ \t]', re.compile('([ \t\n\r](?!;))|[ \t]')), |
---|
129 | ('(#.*[\n\r](?!;))|(#.*)', re.compile('(#.*[\n\r](?!;))|(#.*)')), |
---|
130 | ('LBLOCK', re.compile('(L|l)(O|o)(O|o)(P|p)_')), |
---|
131 | ('GLOBAL', re.compile('(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_')), |
---|
132 | ('STOP', re.compile('(S|s)(T|t)(O|o)(P|p)_')), |
---|
133 | ('save_heading', re.compile('(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')), |
---|
134 | ('save_end', re.compile('(S|s)(A|a)(V|v)(E|e)_')), |
---|
135 | ('data_name', re.compile('_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')), |
---|
136 | ('data_heading', re.compile('(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')), |
---|
137 | ('start_sc_line', re.compile('(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+')), |
---|
138 | ('sc_line_of_text', re.compile('[^;\r\n]([^\r\n])*(\r\n|\r|\n)+')), |
---|
139 | ('end_sc_line', re.compile(';')), |
---|
140 | ('data_value_1', re.compile('((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\(\\{\\[\\]][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+')), |
---|
141 | ('END', re.compile('$')), |
---|
142 | ] |
---|
143 | def __init__(self, str): |
---|
144 | yappsrt.Scanner.__init__(self,None,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],str) |
---|
145 | |
---|
146 | class StarParser(yappsrt.Parser): |
---|
147 | Context = yappsrt.Context |
---|
148 | def input(self, _parent=None): |
---|
149 | _context = self.Context(_parent, self._scanner, self._pos, 'input', []) |
---|
150 | _token = self._peek('END', 'data_heading') |
---|
151 | if _token == 'data_heading': |
---|
152 | dblock = self.dblock(_context) |
---|
153 | allblocks = StarFile(); allblocks.NewBlock(dblock[0],blockcontents=dblock[1],fix=False,replace=False) |
---|
154 | while self._peek('END', 'data_heading') == 'data_heading': |
---|
155 | dblock = self.dblock(_context) |
---|
156 | allblocks.NewBlock(dblock[0],blockcontents=monitor('input',dblock[1]),fix=False,replace=False) |
---|
157 | if self._peek() not in ['END', 'data_heading']: |
---|
158 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading'])) |
---|
159 | END = self._scan('END') |
---|
160 | else: # == 'END' |
---|
161 | END = self._scan('END') |
---|
162 | allblocks = StarFile() |
---|
163 | return allblocks |
---|
164 | |
---|
165 | def dblock(self, _parent=None): |
---|
166 | _context = self.Context(_parent, self._scanner, self._pos, 'dblock', []) |
---|
167 | data_heading = self._scan('data_heading') |
---|
168 | heading = data_heading[5:];thisblock=StarBlock(overwrite=False) |
---|
169 | while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: |
---|
170 | _token = self._peek('save_heading', 'LBLOCK', 'data_name') |
---|
171 | if _token != 'save_heading': |
---|
172 | dataseq = self.dataseq(thisblock, _context) |
---|
173 | else: # == 'save_heading' |
---|
174 | save_frame = self.save_frame(_context) |
---|
175 | thisblock["saves"].NewBlock(save_frame[0],save_frame[1],fix=False,replace=True) |
---|
176 | if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']: |
---|
177 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading'])) |
---|
178 | return (heading,monitor('dblock',thisblock)) |
---|
179 | |
---|
180 | def dataseq(self, starblock, _parent=None): |
---|
181 | _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock]) |
---|
182 | data = self.data(starblock, _context) |
---|
183 | while self._peek('LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']: |
---|
184 | data = self.data(starblock, _context) |
---|
185 | if self._peek() not in ['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']: |
---|
186 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading'])) |
---|
187 | |
---|
188 | def data(self, currentblock, _parent=None): |
---|
189 | _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock]) |
---|
190 | _token = self._peek('LBLOCK', 'data_name') |
---|
191 | if _token == 'LBLOCK': |
---|
192 | top_loop = self.top_loop(_context) |
---|
193 | currentblock.insert_loop(top_loop,audit=False) |
---|
194 | else: # == 'data_name' |
---|
195 | datakvpair = self.datakvpair(_context) |
---|
196 | currentblock.AddLoopItem(datakvpair[:2],precheck=True) |
---|
197 | |
---|
198 | def datakvpair(self, _parent=None): |
---|
199 | _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', []) |
---|
200 | data_name = self._scan('data_name') |
---|
201 | data_value = self.data_value(_context) |
---|
202 | return [data_name,data_value] |
---|
203 | |
---|
204 | def data_value(self, _parent=None): |
---|
205 | _context = self.Context(_parent, self._scanner, self._pos, 'data_value', []) |
---|
206 | _token = self._peek('data_value_1', 'start_sc_line') |
---|
207 | if _token == 'data_value_1': |
---|
208 | data_value_1 = self._scan('data_value_1') |
---|
209 | thisval = stripstring(data_value_1) |
---|
210 | else: # == 'start_sc_line' |
---|
211 | sc_lines_of_text = self.sc_lines_of_text(_context) |
---|
212 | thisval = stripextras(sc_lines_of_text) |
---|
213 | return monitor('data_value',thisval) |
---|
214 | |
---|
215 | def sc_lines_of_text(self, _parent=None): |
---|
216 | _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', []) |
---|
217 | start_sc_line = self._scan('start_sc_line') |
---|
218 | lines = start_sc_line |
---|
219 | while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text': |
---|
220 | sc_line_of_text = self._scan('sc_line_of_text') |
---|
221 | lines = lines+sc_line_of_text |
---|
222 | if self._peek() not in ['end_sc_line', 'sc_line_of_text']: |
---|
223 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line'])) |
---|
224 | end_sc_line = self._scan('end_sc_line') |
---|
225 | return monitor('sc_line_of_text',lines+end_sc_line) |
---|
226 | |
---|
227 | def top_loop(self, _parent=None): |
---|
228 | _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', []) |
---|
229 | LBLOCK = self._scan('LBLOCK') |
---|
230 | loopfield = self.loopfield(_context) |
---|
231 | loopvalues = self.loopvalues(_context) |
---|
232 | return makeloop(loopfield,loopvalues) |
---|
233 | |
---|
234 | def loopfield(self, _parent=None): |
---|
235 | _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', []) |
---|
236 | toploop=LoopBlock(dimension=1,overwrite=False);curloop=toploop;poploop=None;dim=1 |
---|
237 | while self._peek('data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line') not in ['data_value_1', 'start_sc_line']: |
---|
238 | _token = self._peek('data_name', 'LBLOCK', 'STOP') |
---|
239 | if _token == 'data_name': |
---|
240 | data_name = self._scan('data_name') |
---|
241 | curloop[data_name]=[] |
---|
242 | elif _token == 'LBLOCK': |
---|
243 | LBLOCK = self._scan('LBLOCK') |
---|
244 | dim=dim+1;newloop=LoopBlock(dimension=dim,overwrite=False);poploop=curloop;curloop.insert_loop(newloop,audit=False);curloop=newloop |
---|
245 | else: # == 'STOP' |
---|
246 | STOP = self._scan('STOP') |
---|
247 | curloop=poploop;dim=dim-1 |
---|
248 | if self._peek() not in ['data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line']: |
---|
249 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line'])) |
---|
250 | return toploop |
---|
251 | |
---|
252 | def loopvalues(self, _parent=None): |
---|
253 | _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', []) |
---|
254 | data_value = self.data_value(_context) |
---|
255 | dataloop=[[data_value]] |
---|
256 | while self._peek('data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'STOP', 'start_sc_line']: |
---|
257 | _token = self._peek('data_value_1', 'STOP', 'start_sc_line') |
---|
258 | if _token != 'STOP': |
---|
259 | data_value = self.data_value(_context) |
---|
260 | dataloop[-1].append(monitor('loopval',data_value)) |
---|
261 | else: # == 'STOP' |
---|
262 | STOP = self._scan('STOP') |
---|
263 | dataloop.append([]) |
---|
264 | if self._peek() not in ['data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']: |
---|
265 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading'])) |
---|
266 | return dataloop |
---|
267 | |
---|
268 | def save_frame(self, _parent=None): |
---|
269 | _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', []) |
---|
270 | save_heading = self._scan('save_heading') |
---|
271 | savehead = save_heading[5:];savebody = StarBlock(overwrite=False) |
---|
272 | while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']: |
---|
273 | dataseq = self.dataseq(savebody, _context) |
---|
274 | if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']: |
---|
275 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading'])) |
---|
276 | save_end = self._scan('save_end') |
---|
277 | return (savehead,monitor('save_frame',savebody)) |
---|
278 | |
---|
279 | |
---|
280 | def parse(rule, text): |
---|
281 | P = StarParser(StarParserScanner(text)) |
---|
282 | return yappsrt.wrap_error_reporter(P, rule) |
---|
283 | |
---|
284 | # End -- grammar generated by Yapps |
---|
285 | |
---|
286 | |
---|