[469] | 1 | from StarFile import * |
---|
| 2 | from types import * |
---|
| 3 | import copy |
---|
| 4 | # An alternative specification for the Cif Parser, based on Yapps2 |
---|
| 5 | # by Amit Patel (http://theory.stanford.edu/~amitp/Yapps) |
---|
| 6 | # |
---|
| 7 | # helper code: we define our match tokens |
---|
| 8 | lastval = '' |
---|
| 9 | def monitor(location,value): |
---|
| 10 | global lastval |
---|
| 11 | # print 'At %s: %s' % (location,`value`) |
---|
| 12 | lastval = `value` |
---|
| 13 | return value |
---|
| 14 | |
---|
| 15 | # Strip extras gets rid of leading and trailing whitespace, and |
---|
| 16 | # semicolons. |
---|
| 17 | def stripextras(value): |
---|
| 18 | # we get rid of semicolons and leading/trailing terminators etc. |
---|
| 19 | import re |
---|
| 20 | jj = re.compile("[\n\r\f \t\v]*") |
---|
| 21 | semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;") |
---|
| 22 | cut = semis.match(value) |
---|
| 23 | if cut: #we have a semicolon-delimited string |
---|
| 24 | nv = value[cut.end():len(value)-2] |
---|
| 25 | try: |
---|
| 26 | if nv[-1]=='\r': nv = nv[:-1] |
---|
| 27 | except IndexError: #empty data value |
---|
| 28 | pass |
---|
| 29 | return nv |
---|
| 30 | else: |
---|
| 31 | cut = jj.match(value) |
---|
| 32 | if cut: |
---|
| 33 | return stripstring(value[cut.end():]) |
---|
| 34 | return value |
---|
| 35 | |
---|
| 36 | # helper function to get rid of inverted commas etc. |
---|
| 37 | |
---|
| 38 | def stripstring(value): |
---|
| 39 | if value: |
---|
| 40 | if value[0]== '\'' and value[-1]=='\'': |
---|
| 41 | return value[1:-1] |
---|
| 42 | if value[0]=='"' and value[-1]=='"': |
---|
| 43 | return value[1:-1] |
---|
| 44 | return value |
---|
| 45 | |
---|
| 46 | # helper function to populate a nested LoopBlock structure given an |
---|
| 47 | # empty structure together with listed values. The values are |
---|
| 48 | # organised into a list of lists, where each time 'stop' was |
---|
| 49 | # encountered one list terminates and a new one starts. |
---|
| 50 | # For a correctly constructed loop, the final 'popout' will pop out |
---|
| 51 | # of the iteration completely and raise a StopIteration error. |
---|
| 52 | # |
---|
| 53 | # Note that there may be an empty list at the very end of our itemlists, |
---|
| 54 | # so we remove that if necessary. |
---|
| 55 | # |
---|
| 56 | # We optimise for CIF files by loading differently if we have a flat loop |
---|
| 57 | |
---|
| 58 | def makeloop(loopstructure,itemlists): |
---|
| 59 | if itemlists[-1] == []: itemlists.pop(-1) |
---|
| 60 | # print 'Making loop with %s' % `itemlists` |
---|
| 61 | if loopstructure.dimension == 1 and loopstructure.loops == []: |
---|
| 62 | storage_iter = loopstructure.fast_load_iter() |
---|
| 63 | else: |
---|
| 64 | storage_iter = loopstructure.load_iter() |
---|
| 65 | nowloop = loopstructure |
---|
| 66 | for datalist in itemlists: |
---|
| 67 | for datavalue in datalist: |
---|
| 68 | try: |
---|
| 69 | nowloop,target = storage_iter.next() |
---|
| 70 | except StopIteration: |
---|
| 71 | print "StopIter at %s/%s" % (datavalue,datalist) |
---|
| 72 | raise StopIteration |
---|
| 73 | # print 'Got %s %s ->' % (`nowloop`,`target`), |
---|
| 74 | target.append(datavalue) |
---|
| 75 | # print '%s' % `target` |
---|
| 76 | # the end of each list is the same as a stop_ token |
---|
| 77 | # print 'Saw end of list' |
---|
| 78 | nowloop.popout = True |
---|
| 79 | nowloop,blank = storage_iter.next() #execute the pop |
---|
| 80 | # print 'discarding %s/%s' % (`nowloop`,`blank`) |
---|
| 81 | # print 'Makeloop returning %s' % `loopstructure` |
---|
| 82 | return loopstructure |
---|
| 83 | |
---|
| 84 | # return an object with the appropriate amount of nesting |
---|
| 85 | def make_empty(nestlevel): |
---|
| 86 | gd = [] |
---|
| 87 | for i in range(1,nestlevel): |
---|
| 88 | gd = [gd] |
---|
| 89 | return gd |
---|
| 90 | |
---|
| 91 | # this function updates a dictionary first checking for name collisions, |
---|
| 92 | # which imply that the CIF is invalid. We need case insensitivity for |
---|
| 93 | # names. |
---|
| 94 | |
---|
| 95 | # Unfortunately we cannot check loop item contents against non-loop contents |
---|
| 96 | # in a non-messy way during parsing, as we may not have easy access to previous |
---|
| 97 | # key value pairs in the context of our call (unlike our built-in access to all |
---|
| 98 | # previous loops). |
---|
| 99 | # For this reason, we don't waste time checking looped items against non-looped |
---|
| 100 | # names during parsing of a data block. This would only match a subset of the |
---|
| 101 | # final items. We do check against ordinary items, however. |
---|
| 102 | # |
---|
| 103 | # Note the following situations: |
---|
| 104 | # (1) new_dict is empty -> we have just added a loop; do no checking |
---|
| 105 | # (2) new_dict is not empty -> we have some new key-value pairs |
---|
| 106 | # |
---|
| 107 | def cif_update(old_dict,new_dict,loops): |
---|
| 108 | old_keys = map(lambda a:a.lower(),old_dict.keys()) |
---|
| 109 | if new_dict != {}: # otherwise we have a new loop |
---|
| 110 | #print 'Comparing %s to %s' % (`old_keys`,`new_dict.keys()`) |
---|
| 111 | for new_key in new_dict.keys(): |
---|
| 112 | if new_key.lower() in old_keys: |
---|
| 113 | raise CifError, "Duplicate dataname or blockname %s in input file" % new_key |
---|
| 114 | old_dict[new_key] = new_dict[new_key] |
---|
| 115 | # |
---|
| 116 | # this takes two lines, so we couldn't fit it into a one line execution statement... |
---|
| 117 | def order_update(order_array,new_name): |
---|
| 118 | order_array.append(new_name) |
---|
| 119 | return new_name |
---|
| 120 | |
---|
| 121 | |
---|
| 122 | # Begin -- grammar generated by Yapps |
---|
| 123 | import sys, re |
---|
| 124 | import yapps3_compiled_rt as yappsrt |
---|
| 125 | |
---|
| 126 | class StarParserScanner(yappsrt.Scanner): |
---|
| 127 | patterns = [ |
---|
| 128 | ('([ \t\n\r](?!;))|[ \t]', re.compile('([ \t\n\r](?!;))|[ \t]')), |
---|
| 129 | ('(#.*[\n\r](?!;))|(#.*)', re.compile('(#.*[\n\r](?!;))|(#.*)')), |
---|
| 130 | ('LBLOCK', re.compile('(L|l)(O|o)(O|o)(P|p)_')), |
---|
| 131 | ('GLOBAL', re.compile('(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_')), |
---|
| 132 | ('STOP', re.compile('(S|s)(T|t)(O|o)(P|p)_')), |
---|
| 133 | ('save_heading', re.compile('(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')), |
---|
| 134 | ('save_end', re.compile('(S|s)(A|a)(V|v)(E|e)_')), |
---|
| 135 | ('data_name', re.compile('_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')), |
---|
| 136 | ('data_heading', re.compile('(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')), |
---|
| 137 | ('start_sc_line', re.compile('(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+')), |
---|
| 138 | ('sc_line_of_text', re.compile('[^;\r\n]([^\r\n])*(\r\n|\r|\n)+')), |
---|
| 139 | ('end_sc_line', re.compile(';')), |
---|
| 140 | ('data_value_1', re.compile('((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\(\\{\\[\\]][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+')), |
---|
| 141 | ('END', re.compile('$')), |
---|
| 142 | ] |
---|
| 143 | def __init__(self, str): |
---|
| 144 | yappsrt.Scanner.__init__(self,None,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],str) |
---|
| 145 | |
---|
| 146 | class StarParser(yappsrt.Parser): |
---|
| 147 | Context = yappsrt.Context |
---|
| 148 | def input(self, _parent=None): |
---|
| 149 | _context = self.Context(_parent, self._scanner, self._pos, 'input', []) |
---|
| 150 | _token = self._peek('END', 'data_heading') |
---|
| 151 | if _token == 'data_heading': |
---|
| 152 | dblock = self.dblock(_context) |
---|
| 153 | allblocks = StarFile(); allblocks.NewBlock(dblock[0],blockcontents=dblock[1],fix=False,replace=False) |
---|
| 154 | while self._peek('END', 'data_heading') == 'data_heading': |
---|
| 155 | dblock = self.dblock(_context) |
---|
| 156 | allblocks.NewBlock(dblock[0],blockcontents=monitor('input',dblock[1]),fix=False,replace=False) |
---|
| 157 | if self._peek() not in ['END', 'data_heading']: |
---|
| 158 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading'])) |
---|
| 159 | END = self._scan('END') |
---|
| 160 | else: # == 'END' |
---|
| 161 | END = self._scan('END') |
---|
| 162 | allblocks = StarFile() |
---|
| 163 | return allblocks |
---|
| 164 | |
---|
| 165 | def dblock(self, _parent=None): |
---|
| 166 | _context = self.Context(_parent, self._scanner, self._pos, 'dblock', []) |
---|
| 167 | data_heading = self._scan('data_heading') |
---|
| 168 | heading = data_heading[5:];thisblock=StarBlock(overwrite=False) |
---|
| 169 | while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: |
---|
| 170 | _token = self._peek('save_heading', 'LBLOCK', 'data_name') |
---|
| 171 | if _token != 'save_heading': |
---|
| 172 | dataseq = self.dataseq(thisblock, _context) |
---|
| 173 | else: # == 'save_heading' |
---|
| 174 | save_frame = self.save_frame(_context) |
---|
| 175 | thisblock["saves"].NewBlock(save_frame[0],save_frame[1],fix=False,replace=True) |
---|
| 176 | if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']: |
---|
| 177 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading'])) |
---|
| 178 | return (heading,monitor('dblock',thisblock)) |
---|
| 179 | |
---|
| 180 | def dataseq(self, starblock, _parent=None): |
---|
| 181 | _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock]) |
---|
| 182 | data = self.data(starblock, _context) |
---|
| 183 | while self._peek('LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']: |
---|
| 184 | data = self.data(starblock, _context) |
---|
| 185 | if self._peek() not in ['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']: |
---|
| 186 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading'])) |
---|
| 187 | |
---|
| 188 | def data(self, currentblock, _parent=None): |
---|
| 189 | _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock]) |
---|
| 190 | _token = self._peek('LBLOCK', 'data_name') |
---|
| 191 | if _token == 'LBLOCK': |
---|
| 192 | top_loop = self.top_loop(_context) |
---|
| 193 | currentblock.insert_loop(top_loop,audit=False) |
---|
| 194 | else: # == 'data_name' |
---|
| 195 | datakvpair = self.datakvpair(_context) |
---|
| 196 | currentblock.AddLoopItem(datakvpair[:2],precheck=True) |
---|
| 197 | |
---|
| 198 | def datakvpair(self, _parent=None): |
---|
| 199 | _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', []) |
---|
| 200 | data_name = self._scan('data_name') |
---|
| 201 | data_value = self.data_value(_context) |
---|
| 202 | return [data_name,data_value] |
---|
| 203 | |
---|
| 204 | def data_value(self, _parent=None): |
---|
| 205 | _context = self.Context(_parent, self._scanner, self._pos, 'data_value', []) |
---|
| 206 | _token = self._peek('data_value_1', 'start_sc_line') |
---|
| 207 | if _token == 'data_value_1': |
---|
| 208 | data_value_1 = self._scan('data_value_1') |
---|
| 209 | thisval = stripstring(data_value_1) |
---|
| 210 | else: # == 'start_sc_line' |
---|
| 211 | sc_lines_of_text = self.sc_lines_of_text(_context) |
---|
| 212 | thisval = stripextras(sc_lines_of_text) |
---|
| 213 | return monitor('data_value',thisval) |
---|
| 214 | |
---|
| 215 | def sc_lines_of_text(self, _parent=None): |
---|
| 216 | _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', []) |
---|
| 217 | start_sc_line = self._scan('start_sc_line') |
---|
| 218 | lines = start_sc_line |
---|
| 219 | while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text': |
---|
| 220 | sc_line_of_text = self._scan('sc_line_of_text') |
---|
| 221 | lines = lines+sc_line_of_text |
---|
| 222 | if self._peek() not in ['end_sc_line', 'sc_line_of_text']: |
---|
| 223 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line'])) |
---|
| 224 | end_sc_line = self._scan('end_sc_line') |
---|
| 225 | return monitor('sc_line_of_text',lines+end_sc_line) |
---|
| 226 | |
---|
| 227 | def top_loop(self, _parent=None): |
---|
| 228 | _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', []) |
---|
| 229 | LBLOCK = self._scan('LBLOCK') |
---|
| 230 | loopfield = self.loopfield(_context) |
---|
| 231 | loopvalues = self.loopvalues(_context) |
---|
| 232 | return makeloop(loopfield,loopvalues) |
---|
| 233 | |
---|
| 234 | def loopfield(self, _parent=None): |
---|
| 235 | _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', []) |
---|
| 236 | toploop=LoopBlock(dimension=1,overwrite=False);curloop=toploop;poploop=None;dim=1 |
---|
| 237 | while self._peek('data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line') not in ['data_value_1', 'start_sc_line']: |
---|
| 238 | _token = self._peek('data_name', 'LBLOCK', 'STOP') |
---|
| 239 | if _token == 'data_name': |
---|
| 240 | data_name = self._scan('data_name') |
---|
| 241 | curloop[data_name]=[] |
---|
| 242 | elif _token == 'LBLOCK': |
---|
| 243 | LBLOCK = self._scan('LBLOCK') |
---|
| 244 | dim=dim+1;newloop=LoopBlock(dimension=dim,overwrite=False);poploop=curloop;curloop.insert_loop(newloop,audit=False);curloop=newloop |
---|
| 245 | else: # == 'STOP' |
---|
| 246 | STOP = self._scan('STOP') |
---|
| 247 | curloop=poploop;dim=dim-1 |
---|
| 248 | if self._peek() not in ['data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line']: |
---|
| 249 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line'])) |
---|
| 250 | return toploop |
---|
| 251 | |
---|
| 252 | def loopvalues(self, _parent=None): |
---|
| 253 | _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', []) |
---|
| 254 | data_value = self.data_value(_context) |
---|
| 255 | dataloop=[[data_value]] |
---|
| 256 | while self._peek('data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'STOP', 'start_sc_line']: |
---|
| 257 | _token = self._peek('data_value_1', 'STOP', 'start_sc_line') |
---|
| 258 | if _token != 'STOP': |
---|
| 259 | data_value = self.data_value(_context) |
---|
| 260 | dataloop[-1].append(monitor('loopval',data_value)) |
---|
| 261 | else: # == 'STOP' |
---|
| 262 | STOP = self._scan('STOP') |
---|
| 263 | dataloop.append([]) |
---|
| 264 | if self._peek() not in ['data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']: |
---|
| 265 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading'])) |
---|
| 266 | return dataloop |
---|
| 267 | |
---|
| 268 | def save_frame(self, _parent=None): |
---|
| 269 | _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', []) |
---|
| 270 | save_heading = self._scan('save_heading') |
---|
| 271 | savehead = save_heading[5:];savebody = StarBlock(overwrite=False) |
---|
| 272 | while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']: |
---|
| 273 | dataseq = self.dataseq(savebody, _context) |
---|
| 274 | if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']: |
---|
| 275 | raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading'])) |
---|
| 276 | save_end = self._scan('save_end') |
---|
| 277 | return (savehead,monitor('save_frame',savebody)) |
---|
| 278 | |
---|
| 279 | |
---|
| 280 | def parse(rule, text): |
---|
| 281 | P = StarParser(StarParserScanner(text)) |
---|
| 282 | return yappsrt.wrap_error_reporter(P, rule) |
---|
| 283 | |
---|
| 284 | # End -- grammar generated by Yapps |
---|
| 285 | |
---|
| 286 | |
---|