Ignore:
Timestamp:
Oct 24, 2017 11:53:41 AM (4 years ago)
Author:
vondreele
Message:

replace old CifFile? with new py 2/7/3.6 compliant code
fix cif file import phase & powder file
fix CemComp? restraint editing

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/CifFile/YappsStarParser_1_1.py

    r469 r3137  
    1 from StarFile import *
    2 from types import *
    3 import copy
     1# To maximize python3/python2 compatibility
     2from __future__ import print_function
     3from __future__ import unicode_literals
     4from __future__ import division
     5from __future__ import absolute_import
     6
     7from .StarFile import StarBlock,StarFile,StarList,StarDict
     8from io import StringIO
    49# An alternative specification for the Cif Parser, based on Yapps2
    510# by Amit Patel (http://theory.stanford.edu/~amitp/Yapps)
     
    914def monitor(location,value):
    1015    global lastval
    11     # print 'At %s: %s' % (location,`value`)
    12     lastval = `value`
     16    #print 'At %s: %s' % (location,repr(value))
     17    lastval = repr(value)
    1318    return value
    1419
     
    1621# semicolons.
    1722def stripextras(value):
    18     # we get rid of semicolons and leading/trailing terminators etc.
     23     from .StarFile import remove_line_folding, remove_line_prefix
     24     # we get rid of semicolons and leading/trailing terminators etc.
    1925     import re
    2026     jj = re.compile("[\n\r\f \t\v]*")
     
    2733          except IndexError:    #empty data value
    2834             pass
    29           return nv
    30      else:
     35          # apply protocols
     36          nv = remove_line_prefix(nv)
     37          nv = remove_line_folding(nv)
     38          return nv
     39     else:
    3140          cut = jj.match(value)
    3241          if cut:
     
    4453     return value
    4554
    46 # helper function to populate a nested LoopBlock structure given an
    47 # empty structure together with listed values.   The values are
    48 # organised into a list of lists, where each time 'stop' was
    49 # encountered one list terminates and a new one starts.
    50 # For a correctly constructed loop, the final 'popout' will pop out
    51 # of the iteration completely and raise a StopIteration error.
     55# helper function to get rid of triple quotes
     56def striptriple(value):
     57    if value:
     58        if value[:3] == '"""' and value[-3:] == '"""':
     59            return value[3:-3]
     60        if value[:3] == "'''" and value[-3:] == "'''":
     61            return value[3:-3]
     62    return value
     63
     64# helper function to populate a StarBlock given a list of names
     65# and values .
    5266#
    5367# Note that there may be an empty list at the very end of our itemlists,
    5468# so we remove that if necessary.
    5569#
    56 # We optimise for CIF files by loading differently if we have a flat loop
    57 
    58 def makeloop(loopstructure,itemlists):
     70
     71def makeloop(target_block,loopdata):
     72    loop_seq,itemlists = loopdata
    5973    if itemlists[-1] == []: itemlists.pop(-1)
    60     # print 'Making loop with %s' % `itemlists`
    61     if loopstructure.dimension == 1 and loopstructure.loops == []:
    62         storage_iter = loopstructure.fast_load_iter()
    63     else:
    64         storage_iter = loopstructure.load_iter()
    65     nowloop = loopstructure
    66     for datalist in itemlists:
    67        for datavalue in datalist:
    68            try:
    69                nowloop,target = storage_iter.next()
    70            except StopIteration:
    71                print "StopIter at %s/%s" % (datavalue,datalist)
    72                raise StopIteration
    73            # print 'Got %s %s ->' % (`nowloop`,`target`),
    74            target.append(datavalue)
    75            # print '%s' % `target`
    76        # the end of each list is the same as a stop_ token
    77        # print 'Saw end of list'
    78        nowloop.popout = True
    79        nowloop,blank = storage_iter.next()  #execute the pop
    80        # print 'discarding %s/%s' % (`nowloop`,`blank`)
    81     # print 'Makeloop returning %s' % `loopstructure`
    82     return loopstructure
     74    # print 'Making loop with %s' % repr(itemlists)
     75    step_size = len(loop_seq)
     76    for col_no in range(step_size):
     77       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
     78    # print 'Makeloop constructed %s' % repr(loopstructure)
     79    # now construct the loop
     80    try:
     81        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
     82    except ValueError:
     83        error_string =  'Incorrect number of loop values for loop containing %s' % repr(loop_seq)
     84        print(error_string, file=sys.stderr)
     85        raise ValueError(error_string)
    8386
    8487# return an object with the appropriate amount of nesting
     
    9194# this function updates a dictionary first checking for name collisions,
    9295# which imply that the CIF is invalid.  We need case insensitivity for
    93 # names. 
     96# names.
    9497
    9598# Unfortunately we cannot check loop item contents against non-loop contents
    9699# in a non-messy way during parsing, as we may not have easy access to previous
    97 # key value pairs in the context of our call (unlike our built-in access to all 
     100# key value pairs in the context of our call (unlike our built-in access to all
    98101# previous loops).
    99102# For this reason, we don't waste time checking looped items against non-looped
     
    108111    old_keys = map(lambda a:a.lower(),old_dict.keys())
    109112    if new_dict != {}:    # otherwise we have a new loop
    110         #print 'Comparing %s to %s' % (`old_keys`,`new_dict.keys()`)
     113        #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys()))
    111114        for new_key in new_dict.keys():
    112115            if new_key.lower() in old_keys:
    113                 raise CifError, "Duplicate dataname or blockname %s in input file" % new_key
     116                raise CifError("Duplicate dataname or blockname %s in input file" % new_key)
    114117            old_dict[new_key] = new_dict[new_key]
    115118#
    116119# this takes two lines, so we couldn't fit it into a one line execution statement...
    117120def order_update(order_array,new_name):
    118     order_array.append(new_name) 
     121    order_array.append(new_name)
    119122    return new_name
     123
     124# and finally...turn a sequence into a python dict (thanks to Stackoverflow)
     125def pairwise(iterable):
     126    it = iter(iterable)
     127    while 1:
     128        yield next(it), next(it)
    120129
    121130
    122131# Begin -- grammar generated by Yapps
    123132import sys, re
    124 import yapps3_compiled_rt as yappsrt
     133from . import yapps3_compiled_rt as yappsrt
    125134
    126135class StarParserScanner(yappsrt.Scanner):
    127     patterns = [
    128         ('([ \t\n\r](?!;))|[ \t]', re.compile('([ \t\n\r](?!;))|[ \t]')),
    129         ('(#.*[\n\r](?!;))|(#.*)', re.compile('(#.*[\n\r](?!;))|(#.*)')),
    130         ('LBLOCK', re.compile('(L|l)(O|o)(O|o)(P|p)_')),
    131         ('GLOBAL', re.compile('(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_')),
    132         ('STOP', re.compile('(S|s)(T|t)(O|o)(P|p)_')),
    133         ('save_heading', re.compile('(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')),
    134         ('save_end', re.compile('(S|s)(A|a)(V|v)(E|e)_')),
    135         ('data_name', re.compile('_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')),
    136         ('data_heading', re.compile('(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')),
    137         ('start_sc_line', re.compile('(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+')),
    138         ('sc_line_of_text', re.compile('[^;\r\n]([^\r\n])*(\r\n|\r|\n)+')),
    139         ('end_sc_line', re.compile(';')),
    140         ('data_value_1', re.compile('((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\(\\{\\[\\]][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+')),
    141         ('END', re.compile('$')),
    142     ]
    143     def __init__(self, str):
    144         yappsrt.Scanner.__init__(self,None,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],str)
     136    def __init__(self, *args,**kwargs):
     137        patterns = [
     138         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
     139         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
     140         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
     141         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
     142         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
     143         ('save_heading', '(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
     144         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
     145         ('data_name', '_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
     146         ('data_heading', '(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
     147         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
     148         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
     149         ('end_sc_line', ';'),
     150         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\(\\{\\[\\]][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+'),
     151         ('END', '$'),
     152        ]
     153        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)
    145154
    146155class StarParser(yappsrt.Parser):
    147156    Context = yappsrt.Context
    148     def input(self, _parent=None):
    149         _context = self.Context(_parent, self._scanner, self._pos, 'input', [])
     157    def input(self, prepared, _parent=None):
     158        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
    150159        _token = self._peek('END', 'data_heading')
    151160        if _token == 'data_heading':
    152             dblock = self.dblock(_context)
    153             allblocks = StarFile(); allblocks.NewBlock(dblock[0],blockcontents=dblock[1],fix=False,replace=False)
     161            dblock = self.dblock(prepared, _context)
     162            allblocks = prepared;allblocks.merge_fast(dblock)
    154163            while self._peek('END', 'data_heading') == 'data_heading':
    155                 dblock = self.dblock(_context)
    156                 allblocks.NewBlock(dblock[0],blockcontents=monitor('input',dblock[1]),fix=False,replace=False)
     164                dblock = self.dblock(prepared, _context)
     165                allblocks.merge_fast(dblock)
    157166            if self._peek() not in ['END', 'data_heading']:
    158167                raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
     
    160169        else: # == 'END'
    161170            END = self._scan('END')
    162             allblocks = StarFile()
     171            allblocks = prepared
    163172        return allblocks
    164173
    165     def dblock(self, _parent=None):
    166         _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [])
     174    def dblock(self, prepared, _parent=None):
     175        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
    167176        data_heading = self._scan('data_heading')
    168         heading = data_heading[5:];thisblock=StarBlock(overwrite=False)
    169         while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
     177        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,StarBlock(overwrite=False));act_block=thisbc[newname]
     178        while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
    170179            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
    171180            if _token != 'save_heading':
    172                 dataseq = self.dataseq(thisblock, _context)
     181                dataseq = self.dataseq(thisbc[heading], _context)
    173182            else: # == 'save_heading'
    174183                save_frame = self.save_frame(_context)
    175                 thisblock["saves"].NewBlock(save_frame[0],save_frame[1],fix=False,replace=True)
    176         if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']:
     184                thisbc.merge_fast(save_frame,parent=act_block)
     185        if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']:
    177186            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']))
    178         return (heading,monitor('dblock',thisblock))
     187        thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))
    179188
    180189    def dataseq(self, starblock, _parent=None):
    181190        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
    182191        data = self.data(starblock, _context)
    183         while self._peek('LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
     192        while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
    184193            data = self.data(starblock, _context)
    185         if self._peek() not in ['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']:
    186             raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))
     194        if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
     195            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))
    187196
    188197    def data(self, currentblock, _parent=None):
     
    191200        if _token == 'LBLOCK':
    192201            top_loop = self.top_loop(_context)
    193             currentblock.insert_loop(top_loop,audit=False)
     202            makeloop(currentblock,top_loop)
    194203        else: # == 'data_name'
    195204            datakvpair = self.datakvpair(_context)
    196             currentblock.AddLoopItem(datakvpair[:2],precheck=True)
     205            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True)
    197206
    198207    def datakvpair(self, _parent=None):
     
    216225        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
    217226        start_sc_line = self._scan('start_sc_line')
    218         lines = start_sc_line
     227        lines = StringIO();lines.write(start_sc_line)
    219228        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
    220229            sc_line_of_text = self._scan('sc_line_of_text')
    221             lines = lines+sc_line_of_text
     230            lines.write(sc_line_of_text)
    222231        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
    223232            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
    224233        end_sc_line = self._scan('end_sc_line')
    225         return monitor('sc_line_of_text',lines+end_sc_line)
     234        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())
    226235
    227236    def top_loop(self, _parent=None):
     
    230239        loopfield = self.loopfield(_context)
    231240        loopvalues = self.loopvalues(_context)
    232         return makeloop(loopfield,loopvalues)
     241        return loopfield,loopvalues
    233242
    234243    def loopfield(self, _parent=None):
    235244        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
    236         toploop=LoopBlock(dimension=1,overwrite=False);curloop=toploop;poploop=None;dim=1
    237         while self._peek('data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line') not in ['data_value_1', 'start_sc_line']:
    238             _token = self._peek('data_name', 'LBLOCK', 'STOP')
    239             if _token == 'data_name':
    240                 data_name = self._scan('data_name')
    241                 curloop[data_name]=[]
    242             elif _token == 'LBLOCK':
    243                 LBLOCK = self._scan('LBLOCK')
    244                 dim=dim+1;newloop=LoopBlock(dimension=dim,overwrite=False);poploop=curloop;curloop.insert_loop(newloop,audit=False);curloop=newloop
    245             else: # == 'STOP'
    246                 STOP = self._scan('STOP')
    247                 curloop=poploop;dim=dim-1
    248         if self._peek() not in ['data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line']:
    249             raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'LBLOCK', 'STOP', 'data_value_1', 'start_sc_line']))
     245        toploop=[]
     246        while self._peek('data_name', 'data_value_1', 'start_sc_line') == 'data_name':
     247            data_name = self._scan('data_name')
     248            toploop.append(data_name)
     249        if self._peek() not in ['data_name', 'data_value_1', 'start_sc_line']:
     250            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'start_sc_line']))
    250251        return toploop
    251252
     
    253254        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
    254255        data_value = self.data_value(_context)
    255         dataloop=[[data_value]]
    256         while self._peek('data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'STOP', 'start_sc_line']:
    257             _token = self._peek('data_value_1', 'STOP', 'start_sc_line')
    258             if _token != 'STOP':
    259                 data_value = self.data_value(_context)
    260                 dataloop[-1].append(monitor('loopval',data_value))
    261             else: # == 'STOP'
    262                 STOP = self._scan('STOP')
    263                 dataloop.append([])
    264         if self._peek() not in ['data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']:
    265             raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'STOP', 'start_sc_line', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))
     256        dataloop=[data_value]
     257        while self._peek('data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'start_sc_line']:
     258            data_value = self.data_value(_context)
     259            dataloop.append(monitor('loopval',data_value))
     260        if self._peek() not in ['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
     261            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))
    266262        return dataloop
    267263
     
    269265        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [])
    270266        save_heading = self._scan('save_heading')
    271         savehead = save_heading[5:];savebody = StarBlock(overwrite=False)
    272         while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
    273             dataseq = self.dataseq(savebody, _context)
    274         if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']:
    275             raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']))
     267        savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,StarBlock(overwrite=False));act_block=savebc[newname]
     268        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
     269            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
     270            if _token != 'save_heading':
     271                dataseq = self.dataseq(savebc[savehead], _context)
     272            else: # == 'save_heading'
     273                save_frame = self.save_frame(_context)
     274                savebc.merge_fast(save_frame,parent=act_block)
     275        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
     276            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
    276277        save_end = self._scan('save_end')
    277         return (savehead,monitor('save_frame',savebody))
     278        return monitor('save_frame',savebc)
    278279
    279280
Note: See TracChangeset for help on using the changeset viewer.