1 | # To maximize python3/python2 compatibility |
---|
2 | from __future__ import print_function |
---|
3 | from __future__ import unicode_literals |
---|
4 | from __future__ import division |
---|
5 | from __future__ import absolute_import |
---|
6 | |
---|
7 | from .StarFile import StarBlock,StarFile,StarList,StarDict |
---|
8 | from io import StringIO |
---|
9 | # An alternative specification for the Cif Parser, based on Yapps2 |
---|
10 | # by Amit Patel (http://theory.stanford.edu/~amitp/Yapps) |
---|
11 | # |
---|
12 | # helper code: we define our match tokens |
---|
13 | lastval = '' |
---|
14 | def monitor(location,value): |
---|
15 | global lastval |
---|
16 | #print 'At %s: %s' % (location,repr(value)) |
---|
17 | lastval = repr(value) |
---|
18 | return value |
---|
19 | |
---|
20 | # Strip extras gets rid of leading and trailing whitespace, and |
---|
21 | # semicolons. |
---|
22 | def stripextras(value): |
---|
23 | from .StarFile import remove_line_folding, remove_line_prefix |
---|
24 | # we get rid of semicolons and leading/trailing terminators etc. |
---|
25 | import re |
---|
26 | jj = re.compile("[\n\r\f \t\v]*") |
---|
27 | semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;") |
---|
28 | cut = semis.match(value) |
---|
29 | if cut: #we have a semicolon-delimited string |
---|
30 | nv = value[cut.end():len(value)-2] |
---|
31 | try: |
---|
32 | if nv[-1]=='\r': nv = nv[:-1] |
---|
33 | except IndexError: #empty data value |
---|
34 | pass |
---|
35 | # apply protocols |
---|
36 | nv = remove_line_prefix(nv) |
---|
37 | nv = remove_line_folding(nv) |
---|
38 | return nv |
---|
39 | else: |
---|
40 | cut = jj.match(value) |
---|
41 | if cut: |
---|
42 | return stripstring(value[cut.end():]) |
---|
43 | return value |
---|
44 | |
---|
45 | # helper function to get rid of inverted commas etc. |
---|
46 | |
---|
47 | def stripstring(value): |
---|
48 | if value: |
---|
49 | if value[0]== '\'' and value[-1]=='\'': |
---|
50 | return value[1:-1] |
---|
51 | if value[0]=='"' and value[-1]=='"': |
---|
52 | return value[1:-1] |
---|
53 | return value |
---|
54 | |
---|
55 | # helper function to get rid of triple quotes |
---|
56 | def striptriple(value): |
---|
57 | if value: |
---|
58 | if value[:3] == '"""' and value[-3:] == '"""': |
---|
59 | return value[3:-3] |
---|
60 | if value[:3] == "'''" and value[-3:] == "'''": |
---|
61 | return value[3:-3] |
---|
62 | return value |
---|
63 | |
---|
64 | # helper function to populate a StarBlock given a list of names |
---|
65 | # and values . |
---|
66 | # |
---|
67 | # Note that there may be an empty list at the very end of our itemlists, |
---|
68 | # so we remove that if necessary. |
---|
69 | # |
---|
70 | |
---|
71 | def makeloop(target_block,loopdata): |
---|
72 | loop_seq,itemlists = loopdata |
---|
73 | if itemlists[-1] == []: itemlists.pop(-1) |
---|
74 | # print 'Making loop with %s' % repr(itemlists) |
---|
75 | step_size = len(loop_seq) |
---|
76 | for col_no in range(step_size): |
---|
77 | target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True) |
---|
78 | # print 'Makeloop constructed %s' % repr(loopstructure) |
---|
79 | # now construct the loop |
---|
80 | try: |
---|
81 | target_block.CreateLoop(loop_seq) #will raise ValueError on problem |
---|
82 | except ValueError: |
---|
83 | error_string = 'Incorrect number of loop values for loop containing %s' % repr(loop_seq) |
---|
84 | print(error_string, file=sys.stderr) |
---|
85 | raise ValueError(error_string) |
---|
86 | |
---|
87 | # return an object with the appropriate amount of nesting |
---|
88 | def make_empty(nestlevel): |
---|
89 | gd = [] |
---|
90 | for i in range(1,nestlevel): |
---|
91 | gd = [gd] |
---|
92 | return gd |
---|
93 | |
---|
94 | # this function updates a dictionary first checking for name collisions, |
---|
95 | # which imply that the CIF is invalid. We need case insensitivity for |
---|
96 | # names. |
---|
97 | |
---|
98 | # Unfortunately we cannot check loop item contents against non-loop contents |
---|
99 | # in a non-messy way during parsing, as we may not have easy access to previous |
---|
100 | # key value pairs in the context of our call (unlike our built-in access to all |
---|
101 | # previous loops). |
---|
102 | # For this reason, we don't waste time checking looped items against non-looped |
---|
103 | # names during parsing of a data block. This would only match a subset of the |
---|
104 | # final items. We do check against ordinary items, however. |
---|
105 | # |
---|
106 | # Note the following situations: |
---|
107 | # (1) new_dict is empty -> we have just added a loop; do no checking |
---|
108 | # (2) new_dict is not empty -> we have some new key-value pairs |
---|
109 | # |
---|
110 | def cif_update(old_dict,new_dict,loops): |
---|
111 | old_keys = map(lambda a:a.lower(),old_dict.keys()) |
---|
112 | if new_dict != {}: # otherwise we have a new loop |
---|
113 | #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys())) |
---|
114 | for new_key in new_dict.keys(): |
---|
115 | if new_key.lower() in old_keys: |
---|
116 | raise CifError("Duplicate dataname or blockname %s in input file" % new_key) |
---|
117 | old_dict[new_key] = new_dict[new_key] |
---|
118 | # |
---|
119 | # this takes two lines, so we couldn't fit it into a one line execution statement... |
---|
120 | def order_update(order_array,new_name): |
---|
121 | order_array.append(new_name) |
---|
122 | return new_name |
---|
123 | |
---|
124 | # and finally...turn a sequence into a python dict (thanks to Stackoverflow) |
---|
125 | def pairwise(iterable): |
---|
126 | it = iter(iterable) |
---|
127 | while 1: |
---|
128 | yield next(it), next(it) |
---|
129 | |
---|
130 | |
---|
131 | # Begin -- grammar generated by Yapps |
---|
132 | import sys, re |
---|
133 | from . import yapps3_compiled_rt as yappsrt |
---|
134 | |
---|
135 | class StarParserScanner(yappsrt.Scanner): |
---|
136 | def __init__(self, *args,**kwargs): |
---|
137 | patterns = [ |
---|
138 | ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'), |
---|
139 | ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'), |
---|
140 | ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'), |
---|
141 | ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'), |
---|
142 | ('STOP', '(S|s)(T|t)(O|o)(P|p)_'), |
---|
143 | ('save_heading', '(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), |
---|
144 | ('save_end', '(S|s)(A|a)(V|v)(E|e)_'), |
---|
145 | ('data_name', '_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), |
---|
146 | ('data_heading', '(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), |
---|
147 | ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'), |
---|
148 | ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'), |
---|
149 | ('end_sc_line', ';'), |
---|
150 | ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+'), |
---|
151 | ('END', '$'), |
---|
152 | ] |
---|
153 | yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs) |
---|
154 | |
---|
155 | class StarParser(yappsrt.Parser): |
---|
156 | Context = yappsrt.Context |
---|
157 | def input(self, prepared, _parent=None): |
---|
158 | _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared]) |
---|
159 | _token = self._peek('END', 'data_heading') |
---|
160 | if _token == 'data_heading': |
---|
161 | dblock = self.dblock(prepared, _context) |
---|
162 | allblocks = prepared;allblocks.merge_fast(dblock) |
---|
163 | while self._peek('END', 'data_heading') == 'data_heading': |
---|
164 | dblock = self.dblock(prepared, _context) |
---|
165 | allblocks.merge_fast(dblock) |
---|
166 | if self._peek() not in ['END', 'data_heading']: |
---|
167 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading'])) |
---|
168 | END = self._scan('END') |
---|
169 | else: # == 'END' |
---|
170 | END = self._scan('END') |
---|
171 | allblocks = prepared |
---|
172 | return allblocks |
---|
173 | |
---|
174 | def dblock(self, prepared, _parent=None): |
---|
175 | _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared]) |
---|
176 | data_heading = self._scan('data_heading') |
---|
177 | heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,StarBlock(overwrite=False));act_block=thisbc[newname] |
---|
178 | while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: |
---|
179 | _token = self._peek('save_heading', 'LBLOCK', 'data_name') |
---|
180 | if _token != 'save_heading': |
---|
181 | dataseq = self.dataseq(thisbc[heading], _context) |
---|
182 | else: # == 'save_heading' |
---|
183 | save_frame = self.save_frame(_context) |
---|
184 | thisbc.merge_fast(save_frame,parent=act_block) |
---|
185 | if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']: |
---|
186 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading'])) |
---|
187 | thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc)) |
---|
188 | |
---|
189 | def dataseq(self, starblock, _parent=None): |
---|
190 | _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock]) |
---|
191 | data = self.data(starblock, _context) |
---|
192 | while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']: |
---|
193 | data = self.data(starblock, _context) |
---|
194 | if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']: |
---|
195 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading'])) |
---|
196 | |
---|
197 | def data(self, currentblock, _parent=None): |
---|
198 | _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock]) |
---|
199 | _token = self._peek('LBLOCK', 'data_name') |
---|
200 | if _token == 'LBLOCK': |
---|
201 | top_loop = self.top_loop(_context) |
---|
202 | makeloop(currentblock,top_loop) |
---|
203 | else: # == 'data_name' |
---|
204 | datakvpair = self.datakvpair(_context) |
---|
205 | currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True) |
---|
206 | |
---|
207 | def datakvpair(self, _parent=None): |
---|
208 | _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', []) |
---|
209 | data_name = self._scan('data_name') |
---|
210 | data_value = self.data_value(_context) |
---|
211 | return [data_name,data_value] |
---|
212 | |
---|
213 | def data_value(self, _parent=None): |
---|
214 | _context = self.Context(_parent, self._scanner, self._pos, 'data_value', []) |
---|
215 | _token = self._peek('data_value_1', 'start_sc_line') |
---|
216 | if _token == 'data_value_1': |
---|
217 | data_value_1 = self._scan('data_value_1') |
---|
218 | thisval = stripstring(data_value_1) |
---|
219 | else: # == 'start_sc_line' |
---|
220 | sc_lines_of_text = self.sc_lines_of_text(_context) |
---|
221 | thisval = stripextras(sc_lines_of_text) |
---|
222 | return monitor('data_value',thisval) |
---|
223 | |
---|
224 | def sc_lines_of_text(self, _parent=None): |
---|
225 | _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', []) |
---|
226 | start_sc_line = self._scan('start_sc_line') |
---|
227 | lines = StringIO();lines.write(start_sc_line) |
---|
228 | while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text': |
---|
229 | sc_line_of_text = self._scan('sc_line_of_text') |
---|
230 | lines.write(sc_line_of_text) |
---|
231 | if self._peek() not in ['end_sc_line', 'sc_line_of_text']: |
---|
232 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line'])) |
---|
233 | end_sc_line = self._scan('end_sc_line') |
---|
234 | lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue()) |
---|
235 | |
---|
236 | def top_loop(self, _parent=None): |
---|
237 | _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', []) |
---|
238 | LBLOCK = self._scan('LBLOCK') |
---|
239 | loopfield = self.loopfield(_context) |
---|
240 | loopvalues = self.loopvalues(_context) |
---|
241 | return loopfield,loopvalues |
---|
242 | |
---|
243 | def loopfield(self, _parent=None): |
---|
244 | _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', []) |
---|
245 | toploop=[] |
---|
246 | while self._peek('data_name', 'data_value_1', 'start_sc_line') == 'data_name': |
---|
247 | data_name = self._scan('data_name') |
---|
248 | toploop.append(data_name) |
---|
249 | if self._peek() not in ['data_name', 'data_value_1', 'start_sc_line']: |
---|
250 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'start_sc_line'])) |
---|
251 | return toploop |
---|
252 | |
---|
253 | def loopvalues(self, _parent=None): |
---|
254 | _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', []) |
---|
255 | data_value = self.data_value(_context) |
---|
256 | dataloop=[data_value] |
---|
257 | while self._peek('data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'start_sc_line']: |
---|
258 | data_value = self.data_value(_context) |
---|
259 | dataloop.append(monitor('loopval',data_value)) |
---|
260 | if self._peek() not in ['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']: |
---|
261 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading'])) |
---|
262 | return dataloop |
---|
263 | |
---|
264 | def save_frame(self, _parent=None): |
---|
265 | _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', []) |
---|
266 | save_heading = self._scan('save_heading') |
---|
267 | savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,StarBlock(overwrite=False));act_block=savebc[newname] |
---|
268 | while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: |
---|
269 | _token = self._peek('save_heading', 'LBLOCK', 'data_name') |
---|
270 | if _token != 'save_heading': |
---|
271 | dataseq = self.dataseq(savebc[savehead], _context) |
---|
272 | else: # == 'save_heading' |
---|
273 | save_frame = self.save_frame(_context) |
---|
274 | savebc.merge_fast(save_frame,parent=act_block) |
---|
275 | if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']: |
---|
276 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading'])) |
---|
277 | save_end = self._scan('save_end') |
---|
278 | return monitor('save_frame',savebc) |
---|
279 | |
---|
280 | |
---|
281 | def parse(rule, text): |
---|
282 | P = StarParser(StarParserScanner(text)) |
---|
283 | return yappsrt.wrap_error_reporter(P, rule) |
---|
284 | |
---|
285 | # End -- grammar generated by Yapps |
---|
286 | |
---|
287 | |
---|
288 | |
---|