Ignore:
Timestamp:
Oct 24, 2017 11:53:41 AM (8 years ago)
Author:
vondreele
Message:

replace old CifFile? with new py 2/7/3.6 compliant code
fix cif file import phase & powder file
fix CemComp? restraint editing

File:
1 edited

Legend:

Unmodified
Added
Removed
  • TabularUnified trunk/CifFile/StarFile.py

    r469 r3137  
     1# To maximize python3/python2 compatibility
     2from __future__ import print_function
     3from __future__ import unicode_literals
     4from __future__ import division
     5from __future__ import absolute_import
     6
     7__copyright = """
     8PYCIFRW License Agreement (Python License, Version 2)
     9-----------------------------------------------------
     10
     111. This LICENSE AGREEMENT is between the Australian Nuclear Science
     12and Technology Organisation ("ANSTO"), and the Individual or
     13Organization ("Licensee") accessing and otherwise using this software
     14("PyCIFRW") in source or binary form and its associated documentation.
     15
     162. Subject to the terms and conditions of this License Agreement,
     17ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
     18license to reproduce, analyze, test, perform and/or display publicly,
     19prepare derivative works, distribute, and otherwise use PyCIFRW alone
     20or in any derivative version, provided, however, that this License
     21Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
     222001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
     23in any derivative version prepared by Licensee.
     24
     253. In the event Licensee prepares a derivative work that is based on
     26or incorporates PyCIFRW or any part thereof, and wants to make the
     27derivative work available to others as provided herein, then Licensee
     28hereby agrees to include in any such work a brief summary of the
     29changes made to PyCIFRW.
     30
     314. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
     32basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
     33IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
     34DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
     35FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
     36INFRINGE ANY THIRD PARTY RIGHTS.
     37
     385. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
     39FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
     40RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
     41DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
     42
     436. This License Agreement will automatically terminate upon a material
     44breach of its terms and conditions.
     45
     467. Nothing in this License Agreement shall be deemed to create any
     47relationship of agency, partnership, or joint venture between ANSTO
     48and Licensee. This License Agreement does not grant permission to use
     49ANSTO trademarks or trade name in a trademark sense to endorse or
     50promote products or services of Licensee, or any third party.
     51
     528. By copying, installing or otherwise using PyCIFRW, Licensee agrees
     53to be bound by the terms and conditions of this License Agreement.
     54
    155"""
    2 1.This Software copyright \u00A9 Australian Synchrotron Research Program Inc, ("ASRP").
    3 
    4 2.Subject to ensuring that this copyright notice and licence terms
    5 appear on all copies and all modified versions, of PyCIFRW computer
    6 code ("this Software"), a royalty-free non-exclusive licence is hereby
    7 given (i) to use, copy and modify this Software including the use of
    8 reasonable portions of it in other software and (ii) to publish,
    9 bundle and otherwise re-distribute this Software or modified versions
    10 of this Software to third parties, provided that this copyright notice
    11 and terms are clearly shown as applying to all parts of software
    12 derived from this Software on each occasion it is published, bundled
    13 or re-distributed.  You are encouraged to communicate useful
    14 modifications to ASRP for inclusion for future versions.
    15 
    16 3.No part of this Software may be sold as a standalone package.
    17 
    18 4.If any part of this Software is bundled with Software that is sold,
    19 a free copy of the relevant version of this Software must be made
    20 available through the same distribution channel (be that web server,
    21 tape, CD or otherwise).
    22 
    23 5.It is a term of exercise of any of the above royalty free licence
    24 rights that ASRP gives no warranty, undertaking or representation
    25 whatsoever whether express or implied by statute, common law, custom
    26 or otherwise, in respect of this Software or any part of it.  Without
    27 limiting the generality of the preceding sentence, ASRP will not be
    28 liable for any injury, loss or damage (including consequential loss or
    29 damage) or other loss, loss of profits, costs, charges or expenses
    30 however caused which may be suffered, incurred or arise directly or
    31 indirectly in respect of this Software.
    32 
    33 6. This Software is not licenced for use in medical applications.
    34 """
    35 
    36 from types import *
    37 from urllib import *         # for arbitrary opening
    38 import re
     56
     57
     58# Python 2,3 compatibility
     59try:
     60    from urllib import urlopen         # for arbitrary opening
     61    from urlparse import urlparse, urlunparse
     62except:
     63    from urllib.request import urlopen
     64    from urllib.parse import urlparse,urlunparse
     65import re,os
    3966import copy
     67import textwrap
     68
     69try:
     70    from StringIO import StringIO #not cStringIO as we cannot subclass
     71except ImportError:
     72    from io import StringIO
     73
     74if isinstance(u"abc",str):   #Python 3
     75    unicode = str
     76   
     77try:
     78    import numpy
     79    have_numpy = True
     80except ImportError:
     81    have_numpy = False
     82
    4083class StarList(list):
    41     pass
    42 
    43 # Because DDLm makes a tuple from a tuple...
    44 class StarTuple(tuple):
    45     def __new__(cls,*arglist):
    46         return tuple.__new__(cls,arglist)
     84    def __getitem__(self,args):
     85        if isinstance(args,(int,slice)):
     86            return super(StarList,self).__getitem__(args)
     87        elif isinstance(args,tuple) and len(args)>1:   #extended comma notation
     88            return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:])
     89        else:
     90            return super(StarList,self).__getitem__(args[0])
     91
     92    def __str__(self):
     93        return "SL("+super(StarList,self).__str__() + ")"
    4794
    4895class StarDict(dict):
    4996    pass
    5097
    51 class LoopBlock:
    52     def __init__(self,data = (), dimension = 0, maxoutlength=2048, wraplength=80, overwrite=True):
    53         # print 'Creating new loop block, dimension %d' % dimension
    54         self.block = {}
    55         self.loops = []
    56         self.no_packets = 0
    57         self.item_order = []
    58         self.lower_keys = []    #for efficiency
    59         self.comment_list = {}
    60         self.dimension = dimension
    61         self.popout = False         #used during load iteration
    62         self.curitem = -1           #used during iteration
    63         self.maxoutlength = maxoutlength
    64         self.wraplength = wraplength
    65         self.overwrite = overwrite
    66         if not hasattr(self,'loopclass'):  #in case are derived class
    67             self.loopclass = LoopBlock  #when making new loops
    68         self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
    69         if isinstance(data,(TupleType,ListType)):
    70             for item in data:
    71                 self.AddLoopItem(item)
    72         elif isinstance(data,LoopBlock):
    73             self.block = data.block.copy()
    74             self.item_order = data.item_order[:]
    75             self.lower_keys = data.lower_keys[:]
    76             self.comment_list = data.comment_list.copy()
    77             self.dimension = data.dimension
    78             # loops as well; change loop class
    79             for loopno in range(len(data.loops)):
    80                 try:
    81                     placeholder = self.item_order.index(data.loops[loopno])
    82                 except ValueError:
    83                     print "Warning: loop %s (%s) in loops, but not in item_order (%s)" % (`data.loops[loopno]`,str(data.loops[loopno]),`self.item_order`)
    84                     placeholder = -1
    85                 self.item_order.remove(data.loops[loopno])   #gone
    86                 newobject = self.loopclass(data.loops[loopno])
    87                 # print "Recasting and adding loop %s -> %s" % (`data.loops[loopno]`,`newobject`)
    88                 self.insert_loop(newobject,position=placeholder)
    89 
    90     def __str__(self):
    91         return self.printsection()
    92 
    93     def __setitem__(self,key,value):
    94         # catch a one member loop, for convenience
    95         # we assume the key is a string value only
    96         self.AddLoopItem((key,value))
    97 
    98     def __getitem__(self,key):
    99         if isinstance(key,IntType):   #return a packet!!
    100             return self.GetPacket(key)       
    101         return self.GetLoopItem(key)
    102 
    103     def __delitem__(self,key):
    104         self.RemoveLoopItem(key)
    105 
    106     def __len__(self):
    107         blen = len(self.block)
    108         for aloop in self.loops:
    109             # print 'Aloop is %s' % `aloop`
    110             blen = blen + len(aloop)  # also a LoopBlock
    111         return blen   
    112 
    113     def __nonzero__(self):
    114         if self.__len__() > 0: return 1
    115         return 0
    116 
    117     # keys returns all internal keys
     98
     99class LoopBlock(object):
     100    def __init__(self,parent_block,dataname):
     101        self.loop_no = parent_block.FindLoop(dataname)
     102        if self.loop_no < 0:
     103            raise KeyError('%s is not in a loop structure' % dataname)
     104        self.parent_block = parent_block
     105
    118106    def keys(self):
    119         thesekeys = self.block.keys()
    120         for aloop in self.loops:
    121             thesekeys.extend(aloop.keys())
    122         return thesekeys
     107        return self.parent_block.loops[self.loop_no]
    123108
    124109    def values(self):
    125         ourkeys = self.keys()
    126         return map(lambda a:self[a],ourkeys)
    127 
     110        return [self.parent_block[a] for a in self.keys()]
     111
     112    #Avoid iterator even though that is Python3-esque
    128113    def items(self):
    129         ourkeys = self.keys()
    130         return map(lambda a,b:(a,b),self.keys(),self.values())
     114        return list(zip(self.keys(),self.values()))
     115
     116    def __getitem__(self,dataname):
     117        if isinstance(dataname,int):   #a packet request
     118            return self.GetPacket(dataname)
     119        if dataname in self.keys():
     120            return self.parent_block[dataname]
     121        else:
     122            raise KeyError('%s not in loop block' % dataname)
     123
     124    def __setitem__(self,dataname,value):
     125        self.parent_block[dataname] = value
     126        self.parent_block.AddLoopName(self.keys()[0],dataname)
     127
     128    def __contains__(self,key):
     129        return key in self.parent_block.loops[self.loop_no]
    131130
    132131    def has_key(self,key):
    133         if key.lower() in self.lower_keys:
    134             return 1
    135         for aloop in self.loops:
    136             if aloop.has_key(key): return 1
    137         return 0
    138 
    139     def get(self,key,default=None):
    140         if self.has_key(key):
    141             retval = self.GetLoopItem(key)
    142         else:
    143             retval = default
    144         return retval
    145 
    146     def clear(self):
    147         self.block = {}
    148         self.loops = []
    149         self.item_order = []
    150         self.lower_keys = []
    151         self.no_packets = 0
    152 
    153     # doesn't appear to work
    154     def copy(self):
    155         newcopy = self.copy.im_class(dimension = self.dimension)
    156         newcopy.block = self.block.copy()
    157         newcopy.loops = []
    158         newcopy.no_packets = self.no_packets
    159         newcopy.item_order = self.item_order[:]
    160         newcopy.lower_keys = self.lower_keys[:]
    161         for loop in self.loops:
    162             try:
    163                 placeholder = self.item_order.index(loop)
    164             except ValueError:
    165                 print "Warning: loop %s (%s) in loops, but not in item_order (%s)" % (`loop`,str(loop),`self.item_order`)
    166                 placeholder = -1
    167             newcopy.item_order.remove(loop)   #gone
    168             newobject = loop.copy()
    169             # print "Adding loop %s -> %s" % (`loop`,`newobject`)
    170             newcopy.insert_loop(newobject,position=placeholder)
    171         return newcopy
    172 
    173     # this is not appropriate for subloops.  Instead, the loop block
    174     # should be accessed directly for update
    175      
    176     def update(self,adict):
    177         for key in adict.keys():
    178             self.AddLoopItem((key,adict[key]))
     132        return key in self
     133
     134    def __iter__(self):
     135        packet_list = zip(*self.values())
     136        names = self.keys()
     137        for p in packet_list:
     138            r = StarPacket(p)
     139            for n in range(len(names)):
     140                setattr(r,names[n].lower(),r[n])
     141            yield r
     142
     143    # for compatibility
     144    def __getattr__(self,attname):
     145        return getattr(self.parent_block,attname)
    179146
    180147    def load_iter(self,coords=[]):
    181         count = 0        #to create packet index 
     148        count = 0        #to create packet index
    182149        while not self.popout:
    183150            # ok, we have a new packet:  append a list to our subloops
     
    217184            for iname in self.keys():  #includes lower levels
    218185                target_list = self[iname]
    219                 for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 
     186                for i in range(3,self.dimension): #dim 2 upwards are lists of lists of...
    220187                    target_list = target_list[-1]
    221188                target_list.append([])
     
    232199                drill_values=drill_values[0]   #drill in
    233200            else:
    234                 raise StarError("Malformed loop packet %s" % `top_items[0]`)
    235         my_length = len(drill_values)
     201                raise StarError("Malformed loop packet %s" % repr( top_items[0] ))
     202        my_length = len(drill_values[0])       #length of 'string' entry
    236203        if self.dimension == 0:                #top level
    237204            for aloop in self.loops:
    238205                for apacket in aloop.recursive_iter():
    239                     # print "Recursive yielding %s" % `dict(top_items + apacket.items())`
     206                    # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) )
    240207                    prep_yield = StarPacket(top_values+apacket.values())  #straight list
    241208                    for name,value in top_items + apacket.items():
     
    246213                kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys())
    247214                kvvals = map(lambda a:a[1],kvpairs)   #just values
    248                 # print "Recursive kvpairs at %d: %s" % (i,`kvpairs`)
     215                # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs ))
    249216                if self.loops:
    250217                  for aloop in self.loops:
    251218                    for apacket in aloop.recursive_iter(coord=coord+[i]):
    252                         # print "Recursive yielding %s" % `dict(kvpairs + apacket.items())`
     219                        # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) )
    253220                        prep_yield = StarPacket(kvvals+apacket.values())
    254221                        for name,value in kvpairs + apacket.items():
     
    256223                        yield prep_yield
    257224                else:           # we're at the bottom of the tree
    258                     # print "Recursive yielding %s" % `dict(kvpairs)`
     225                    # print "Recursive yielding %s" % repr( dict(kvpairs) )
    259226                    prep_yield = StarPacket(kvvals)
    260227                    for name,value in kvpairs:
     
    262229                    yield prep_yield
    263230
    264     # small function to use the coordinates. 
     231    # small function to use the coordinates.
    265232    def coord_to_group(self,dataname,coords):
    266           if not isinstance(dataname,StringType):
     233          if not isinstance(dataname,unicode):
    267234             return dataname     # flag inner loop processing
    268235          newm = self[dataname]          # newm must be a list or tuple
    269236          for c in coords:
    270               # print "Coord_to_group: %s ->" % (`newm`),
     237              # print "Coord_to_group: %s ->" % (repr( newm )),
    271238              newm = newm[c]
    272               # print `newm`
    273           return newm 
     239              # print repr( newm )
     240          return newm
    274241
    275242    def flat_iterator(self):
    276         if self.dimension == 0:   
    277             yield copy.copy(self)
    278         else:
    279243            my_length = 0
    280244            top_keys = self.block.keys()
     
    283247            for pack_no in range(my_length):
    284248                yield(self.collapse(pack_no))
    285            
    286 
    287     def insert_loop(self,newloop,position=-1,audit=True):
    288         # check that new loop is kosher
    289         if newloop.dimension != self.dimension + 1:
    290             raise StarError( 'Insertion of loop of wrong nesting level %d, should be %d' % (newloop.dimension, self.dimension+1))
    291         self.loops.append(newloop)
    292         if audit:
    293             dupes = self.audit()
    294             if dupes:
    295                 dupenames = map(lambda a:a[0],dupes)
    296                 raise StarError( 'Duplicate names: %s' % `dupenames`)
    297         if position >= 0:
    298             self.item_order.insert(position,newloop)
    299         else:
    300             self.item_order.append(newloop)
    301         # print "Insert loop: item_order now" + `self.item_order`
    302 
    303     def remove_loop(self,oldloop):
    304         # print "Removing %s: item_order %s" % (`oldloop`,self.item_order)
    305         # print "Length %d" % len(oldloop)
    306         self.item_order.remove(oldloop)
    307         self.loops.remove(oldloop)
    308      
    309     def AddComment(self,itemname,comment):
    310         self.comment_list[itemname.lower()] = comment
    311 
    312     def RemoveComment(self,itemname):
    313         del self.comment_list[itemname.lower()]
    314 
    315     def GetLoopItem(self,itemname):
    316         # assume case is correct first
    317         try:
    318             return self.block[itemname]
    319         except KeyError:
    320             for loop in self.loops:
    321                 try:
    322                     return loop[itemname]
    323                 except KeyError:
    324                     pass
    325         if itemname.lower() not in self.lower_keys:
    326             raise KeyError, 'Item %s not in block' % itemname
    327         # it is there somewhere, now we need to find it
    328         real_keys = self.block.keys()
    329         lower_keys = map(lambda a:a.lower(),self.block.keys())
    330         try:
    331             k_index = lower_keys.index(itemname.lower())
    332         except ValueError:
    333             raise KeyError, 'Item %s not in block' % itemname
    334         return self.block[real_keys[k_index]]
     249
     250
     251    def RemoveItem(self,itemname):
     252        """Remove `itemname` from the block."""
     253        # first check any loops
     254        loop_no = self.FindLoop(itemname)
     255        testkey = itemname.lower()
     256        if testkey in self:
     257            del self.block[testkey]
     258            del self.true_case[testkey]
     259            # now remove from loop
     260            if loop_no >= 0:
     261                self.loops[loop_no].remove(testkey)
     262                if len(self.loops[loop_no])==0:
     263                    del self.loops[loop_no]
     264                    self.item_order.remove(loop_no)
     265            else:  #will appear in order list
     266                self.item_order.remove(testkey)
    335267
    336268    def RemoveLoopItem(self,itemname):
    337         if self.has_key(itemname):
    338             testkey = itemname.lower()
    339             real_keys = self.block.keys()
    340             lower_keys = map(lambda a:a.lower(),real_keys)
    341             try:
    342                 k_index = lower_keys.index(testkey)
    343             except ValueError:    #must be in a lower loop
    344                 for aloop in self.loops:
    345                     if aloop.has_key(itemname):
    346                         # print "Deleting %s (%s)" % (itemname,aloop[itemname])
    347                         del aloop[itemname]
    348                         if len(aloop)==0:  # all gone
    349                            self.remove_loop(aloop)
    350                         break
    351             else:
    352               del self.block[real_keys[k_index]]
    353               self.lower_keys.remove(testkey)
    354               # now remove the key in the order list
    355               for i in range(len(self.item_order)):
    356                 if isinstance(self.item_order[i],StringType): #may be loop
    357                     if self.item_order[i].lower()==testkey:
    358                         del self.item_order[i]
    359                         break
    360             if len(self.block)==0:    #no items in loop, length -> 0
    361                 self.no_packets = 0
    362             return        #no duplicates, no more checking needed
    363 
    364     def AddLoopItem(self,data,precheck=False,maxlength=-1):
    365         # print "Received data %s" % `data`
    366         # we accept only tuples, strings and lists!!
    367         if isinstance(data[0],(TupleType,ListType)):
    368            # internal loop
    369            # first we remove any occurences of these datanames in
    370            # other loops
    371            for one_item in data[0]:
    372                if self.has_key(one_item):
    373                    if not self.overwrite:
    374                        raise StarError( 'Attempt to insert duplicate item name %s' % data[0])
    375                    else:
    376                        del self[one_item]
    377            newloop = self.loopclass(dimension = self.dimension+1)
    378            keyvals = zip(data[0],data[1])
    379            for key,val in keyvals:
    380                newloop.AddLoopItem((key,val))
    381            self.insert_loop(newloop)
    382         elif not isinstance(data[0],StringType):
    383                   raise TypeError, 'Star datanames are strings only (got %s)' % `data[0]`
    384         else:
    385            if data[1] == [] or get_dim(data[1])[0] == self.dimension:
    386                if not precheck:
    387                    self.check_data_name(data[0],maxlength)    # make sure no nasty characters   
    388                # check that we can replace data
    389                if not self.overwrite:
    390                    if self.has_key(data[0]):
    391                        raise StarError( 'Attempt to insert duplicate item name %s' % data[0])
    392                # now make sure the data is OK type
    393                regval = self.regularise_data(data[1])
    394                if not precheck:
    395                    try:
    396                        self.check_item_value(regval)
    397                    except StarError, errmes:
    398                        raise StarError( "Item name " + data[0] + " " + `errmes`)
    399                if self.dimension > 0:
    400                    if self.no_packets <= 0:
    401                        self.no_packets = len(data[1])  #first item in this loop
    402                    if len(data[1]) != self.no_packets:
    403                        raise StarLengthError, 'Not enough values supplied for %s' % (data[0])
    404                try:
    405                    oldpos = self.GetItemPosition(data[0])
    406                except ValueError:
    407                    oldpos = len(self.item_order)#end of list
    408                self.RemoveLoopItem(data[0])     # may be different case, so have to do this
    409                self.block.update({data[0]:regval})  # trust the data is OK
    410                self.lower_keys.insert(oldpos,data[0].lower())
    411                self.item_order.insert(oldpos,data[0])
    412                #    self.lower_keys.append(data[0].lower())
    413                #    self.item_order.append(data[0])
    414                
    415            else:            #dimension mismatch
    416                raise StarLengthError, "input data dim %d != required dim %d: %s %s" % (get_dim(data[1])[0],self.dimension,data[0],`data[1]`)
    417 
    418     def check_data_name(self,dataname,maxlength=-1):
    419         if maxlength > 0:
    420             if len(dataname)>maxlength:
    421                 raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
    422         if dataname[0]!='_':
    423             raise StarError( 'Dataname ' + dataname + ' does not begin with _')
    424         if len (filter (lambda a: ord(a) < 33 or ord(a) > 126, dataname)) > 0:
    425             raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
    426 
    427     def check_item_value(self,item):
    428         test_item = item
    429         if type(item) != TupleType and type(item) != ListType:
    430            test_item = [item]         #single item list
    431         def check_one (it):
    432             if type(it) == StringType:
    433                 if it=='': return
    434                 me = self.char_check.match(it)           
    435                 if not me:
    436                     raise StarError( 'Bad character in %s' % it)
    437                 else:
    438                     if me.span() != (0,len(it)):
    439                         raise StarError('Data item "' + it + '"... contains forbidden characters')
    440         map(check_one,test_item)
    441 
    442     def regularise_data(self,dataitem):
    443         alrighttypes = [IntType, LongType,
    444                         FloatType, StringType]
    445         okmappingtypes = [TupleType, ListType]
    446         thistype = type(dataitem)
    447         if thistype in alrighttypes or thistype in okmappingtypes:
    448             return dataitem
    449         if isinstance(dataitem,StarTuple) or \
    450            isinstance(dataitem,StarList) or \
    451            isinstance(dataitem,StarDict):
    452             return dataitem
    453         # so try to make into a list
    454         try:
    455             regval = list(dataitem)
    456         except TypeError, value:
    457             raise StarError( str(dataitem) + ' is wrong type for data value\n' )
    458         return regval
    459        
     269        """*Deprecated*. Use `RemoveItem` instead"""
     270        self.RemoveItem(itemname)
     271
    460272    def GetLoop(self,keyname):
    461         if keyname in self.block:        #python 2.2 or above
    462             return self
    463         for aloop in self.loops:
    464             try:
    465                 return aloop.GetLoop(keyname)
    466             except KeyError:
    467                 pass
    468         raise KeyError, 'Item %s does not exist' % keyname
     273        """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
     274        `keyname` is only significant as a way to specify the loop."""
     275        return LoopBlock(self,keyname)
    469276
    470277    def GetPacket(self,index):
    471278        thispack = StarPacket([])
    472         for myitem in self.item_order:
    473             if isinstance(myitem,LoopBlock):
    474                 pack_list = map(lambda b:myitem[b][index],myitem.item_order)
    475                 # print 'Pack_list -> %s' % `pack_list`
    476                 thispack.append(pack_list)
    477             elif self.dimension==0:
    478                 thispack.append(self[myitem])
    479             else:
    480                 thispack.append(self[myitem][index])
    481                 setattr(thispack,myitem,thispack[-1])
    482         return thispack
     279        for myitem in self.parent_block.loops[self.loop_no]:
     280            thispack.append(self[myitem][index])
     281            setattr(thispack,myitem,thispack[-1])
     282        return thispack
    483283
    484284    def AddPacket(self,packet):
    485         if self.dimension==0:
    486             raise StarError,"Attempt to add packet to top level block"
    487         for myitem in self.item_order:
    488             self[myitem] = list(self[myitem])   #in case we have stored a tuple
    489             self[myitem].append(packet.__getattribute__(myitem))
    490         self.no_packets +=1
    491             # print "%s now %s" % (myitem,`self[myitem]`)
    492        
    493     def RemoveKeyedPacket(self,keyname,keyvalue):
    494         packet_coord = list(self[keyname]).index(keyvalue)
    495         loophandle = self.GetLoop(keyname)
    496         for packet_entry in loophandle.item_order:
    497             loophandle[packet_entry] = list(loophandle[packet_entry])
    498             del loophandle[packet_entry][packet_coord]
    499         self.no_packets -= 1
    500        
    501     def GetKeyedPacket(self,keyname,keyvalue):
    502         #print "Looking for %s in %s" % (keyvalue, self[keyname])
    503         one_pack= filter(lambda a:getattr(a,keyname)==keyvalue,self)
    504         if len(one_pack)!=1:
    505             raise KeyError, "Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))
    506         #print "Keyed packet: %s" % one_pack[0]
    507         return one_pack[0]
     285        for myitem in self.parent_block.loops[self.loop_no]:
     286            old_values = self.parent_block[myitem]
     287            old_values.append(packet.__getattribute__(myitem))
     288            self.parent_block[myitem] = old_values
    508289
    509290    def GetItemOrder(self):
    510         return self.item_order[:]
     291        """Return a list of datanames in this `LoopBlock` in the order that they will be
     292        printed"""
     293        return self.parent_block.loops[self.loop_no][:]
     294
     295
     296    def GetItemOrder(self):
     297        """Return a list of datanames in this `LoopBlock` in the order that they will be
     298        printed"""
     299        return self.parent_block.loops[self.loop_no][:]
    511300
    512301    def ChangeItemOrder(self,itemname,newpos):
    513         testpos = self.GetItemPosition(itemname)
    514         del self.item_order[testpos]
    515         # so we have an object ready for action
    516         self.item_order.insert(newpos,itemname)
     302        """Change the position at which `itemname` appears when printing out to `newpos`."""
     303        self.parent_block.loops[self.loop_no].remove(itemname.lower())
     304        self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())
    517305
    518306    def GetItemPosition(self,itemname):
     307        """A utility function to get the numerical order in the printout
     308        of `itemname`.  An item has coordinate `(loop_no,pos)` with
     309        the top level having a `loop_no` of -1.  If an integer is passed to
     310        the routine then it will return the position of the loop
     311        referenced by that number."""
    519312        import string
    520         def low_case(item):
    521             try:
    522                 return string.lower(item)
    523             except AttributeError:
    524                 return item
    525         try:
    526             testname = string.lower(itemname)
    527         except AttributeError:
    528             testname = itemname
    529         lowcase_order = map(low_case,self.item_order)
    530         return lowcase_order.index(testname)
    531 
    532     def collapse(self,packet_no):
    533         if self.dimension == 0:
    534             raise StarError( "Attempt to select non-existent packet")
    535         newlb = LoopBlock(dimension=self.dimension-1)
    536         for one_item in self.item_order:
    537             if isinstance(one_item,LoopBlock):
    538                 newlb.insert_loop(one_item.collapse(packet_no))
    539             else:
    540                 # print "Collapse: %s -> %s" % (one_item,`self[one_item][packet_no]`)
    541                 newlb[one_item] = self[one_item][packet_no]
    542         return newlb
    543        
    544     def audit(self):
    545         import sets
    546         allkeys = self.keys()
    547         uniquenames = sets.Set(allkeys)
    548         if len(uniquenames) == len(allkeys): return []
    549         else:             
    550             keycount = map(lambda a:(a,allkeys.count(a)),uniquenames)
    551             return filter(lambda a:a[1]>1,keycount)
    552        
     313        if isinstance(itemname,int):
     314            # return loop position
     315            return (-1, self.item_order.index(itemname))
     316        if not itemname in self:
     317            raise ValueError('No such dataname %s' % itemname)
     318        testname = itemname.lower()
     319        if testname in self.item_order:
     320            return (-1,self.item_order.index(testname))
     321        loop_no = self.FindLoop(testname)
     322        loop_pos = self.loops[loop_no].index(testname)
     323        return loop_no,loop_pos
     324
    553325    def GetLoopNames(self,keyname):
    554326        if keyname in self:
    555327            return self.keys()
    556328        for aloop in self.loops:
    557             try: 
     329            try:
    558330                return aloop.GetLoopNames(keyname)
    559331            except KeyError:
    560332                pass
    561         raise KeyError, 'Item does not exist'
     333        raise KeyError('Item does not exist')
     334
     335    def GetLoopNames(self,keyname):
     336        """Return all datanames appearing together with `keyname`"""
     337        loop_no = self.FindLoop(keyname)
     338        if loop_no >= 0:
     339            return self.loops[loop_no]
     340        else:
     341            raise KeyError('%s is not in any loop' % keyname)
    562342
    563343    def AddToLoop(self,dataname,loopdata):
     
    566346            thisloop[itemname] = itemvalue
    567347
     348    def AddToLoop(self,dataname,loopdata):
     349        """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
     350
     351        Add multiple columns to the loop containing `dataname`. `loopdata` is a
     352        collection of (key,value) pairs, where `key` is the new dataname and `value`
     353        is a list of values for that dataname"""
     354        # check lengths
     355        thisloop = self.FindLoop(dataname)
     356        loop_len = len(self[dataname])
     357        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
     358        if len(bad_vals)>0:
     359           raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
     360               % (repr( bad_vals ),loop_len))
     361        self.update(loopdata)
     362        self.loops[thisloop]+=loopdata.keys()
     363
     364
     365class StarBlock(object):
     366    def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True,
     367                 characterset='ascii',maxnamelength=-1):
     368        self.block = {}    #the actual data storage (lower case keys)
     369        self.loops = {}    #each loop is indexed by a number and contains a list of datanames
     370        self.item_order = []  #lower case, loops referenced by integer
     371        self.formatting_hints = {}
     372        self.true_case = {} #transform lower case to supplied case
     373        self.provide_value = False  #prefer string version always
     374        self.dictionary = None      #DDLm dictionary
     375        self.popout = False         #used during load iteration
     376        self.curitem = -1           #used during iteration
     377        self.cache_vals = True      #store all calculated values
     378        self.maxoutlength = maxoutlength
     379        self.setmaxnamelength(maxnamelength)  #to enforce CIF limit of 75 characters
     380        self.set_characterset(characterset)   #to check input names
     381        self.wraplength = wraplength
     382        self.overwrite = overwrite
     383        self.string_delimiters = ["'",'"',"\n;"]   #universal CIF set
     384        self.list_delimiter = "  "                 #CIF2 default
     385        self.wrapper = textwrap.TextWrapper()
     386        if isinstance(data,(tuple,list)):
     387            for item in data:
     388                self.AddLoopItem(item)
     389        elif isinstance(data,StarBlock):
     390            self.block = data.block.copy()
     391            self.item_order = data.item_order[:]
     392            self.true_case = data.true_case.copy()
     393            # loops as well
     394            self.loops = data.loops.copy()
     395
     396    def setmaxnamelength(self,maxlength):
     397        """Set the maximum allowable dataname length (-1 for no check)"""
     398        self.maxnamelength = maxlength
     399        if maxlength > 0:
     400            bad_names = [a for a in self.keys() if len(a)>self.maxnamelength]
     401            if len(bad_names)>0:
     402                raise StarError('Datanames too long: ' + repr( bad_names ))
     403
     404    def set_characterset(self,characterset):
     405        """Set the characterset for checking datanames: may be `ascii` or `unicode`"""
     406        import sys
     407        self.characterset = characterset
     408        if characterset == 'ascii':
     409            self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
     410        elif characterset == 'unicode':
     411            if sys.maxunicode < 1114111:
     412               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M)
     413            else:
     414               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)
     415
     416    def __str__(self):
     417        return self.printsection()
     418
     419    def __setitem__(self,key,value):
     420        if key == "saves":
     421            raise StarError("""Setting the saves key is deprecated. Add the save block to
     422    an enclosing block collection (e.g. CIF or STAR file) with this block as child""")
     423        self.AddItem(key,value)
     424
     425    def __getitem__(self,key):
     426        if key == "saves":
     427            raise StarError("""The saves key is deprecated. Access the save block from
     428    the enclosing block collection (e.g. CIF or STAR file object)""")
     429        try:
     430           rawitem,is_value = self.GetFullItemValue(key)
     431        except KeyError:
     432           if self.dictionary:
     433               # send the dictionary the required key and a pointer to us
     434               try:
     435                   new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False)
     436               except StarDerivationFailure:   #try now with defaults included
     437                   try:
     438                       new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True)
     439                   except StarDerivationFailure as s:
     440                       print("In StarBlock.__getitem__, " + repr(s))
     441                       raise KeyError('No such item: %s' % key)
     442               print('Set %s to derived value %s' % (key, repr(new_value)))
     443               return new_value
     444           else:
     445               raise KeyError('No such item: %s' % key)
     446        # we now have an item, we can try to convert it to a number if that is appropriate
     447        # note numpy values are never stored but are converted to lists
     448        if not self.dictionary or not key in self.dictionary: return rawitem
     449        print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem )))
     450        if is_value:
     451            if self.provide_value: return rawitem
     452            else:
     453               print('Turning %s into string' % repr( rawitem ))
     454               return self.convert_to_string(key)
     455        else:    # a string
     456            if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \
     457                                      (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)):
     458                return self.dictionary.change_type(key,rawitem)
     459            elif self.provide_value: # catch the question marks
     460                do_calculate = False
     461                if isinstance(rawitem,(list,tuple)):
     462                    known = [a for a in rawitem if a != '?']
     463                    if len(known) == 0:   #all questions
     464                        do_calculate = True
     465                elif rawitem == '?':
     466                        do_calculate = True
     467                if do_calculate:
     468                   # remove old value
     469                   del self[key]
     470                   try:
     471                       new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False)
     472                   except StarDerivationFailure as s:
     473                       try:
     474                           new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True)
     475                       except StarDerivationFailure as s:
     476
     477                           print("Could not turn %s into a value:" + repr(s))
     478                           return rawitem
     479                   else:
     480                       print('Set %s to derived value %s' % (key, repr( new_value )))
     481                       return new_value
     482            return rawitem   #can't do anything
     483
     484    def __delitem__(self,key):
     485        self.RemoveItem(key)
     486
     487    def __len__(self):
     488        blen = len(self.block)
     489        return blen
     490
     491    def __nonzero__(self):
     492        if self.__len__() > 0: return 1
     493        return 0
     494
     495    # keys returns all internal keys
     496    def keys(self):
     497        return list(self.block.keys())    #always lower case
     498
     499    def values(self):
     500        return [self[a] for a in self.keys()]
     501
     502    def items(self):
     503        return list(zip(self.keys(),self.values()))
     504
     505    def __contains__(self,key):
     506        if isinstance(key,(unicode,str)) and key.lower() in self.keys():
     507            return True
     508        return False
     509
     510    def has_key(self,key):
     511        return key in self
     512
     513    def has_key_or_alias(self,key):
     514        """Check if a dataname or alias is available in the block"""
     515        initial_test = key in self
     516        if initial_test: return True
     517        elif self.dictionary:
     518            aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)]
     519            if len(aliases)>0:
     520               return True
     521        return False
     522       
     523    def get(self,key,default=None):
     524        if key in self:
     525            retval = self.__getitem__(key)
     526        else:
     527            retval = default
     528        return retval
     529
     530    def clear(self):
     531        self.block = {}
     532        self.loops = {}
     533        self.item_order = []
     534        self.true_case = {}
     535
     536    # doesn't appear to work
     537    def copy(self):
     538        newcopy = StarBlock()
     539        newcopy.block = self.block.copy()
     540        newcopy.loops = []
     541        newcopy.item_order = self.item_order[:]
     542        newcopy.true_case = self.true_case.copy()
     543        newcopy.loops = self.loops.copy()
     544    #    return self.copy.im_class(newcopy)   #catch inheritance
     545        return newcopy
     546
     547    def update(self,adict):
     548        for key in adict.keys():
     549            self.AddItem(key,adict[key])
     550
     551    def GetItemPosition(self,itemname):
     552        """A utility function to get the numerical order in the printout
     553        of `itemname`.  An item has coordinate `(loop_no,pos)` with
     554        the top level having a `loop_no` of -1.  If an integer is passed to
     555        the routine then it will return the position of the loop
     556        referenced by that number."""
     557        import string
     558        if isinstance(itemname,int):
     559            # return loop position
     560            return (-1, self.item_order.index(itemname))
     561        if not itemname in self:
     562            raise ValueError('No such dataname %s' % itemname)
     563        testname = itemname.lower()
     564        if testname in self.item_order:
     565            return (-1,self.item_order.index(testname))
     566        loop_no = self.FindLoop(testname)
     567        loop_pos = self.loops[loop_no].index(testname)
     568        return loop_no,loop_pos
     569
     570    def ChangeItemOrder(self,itemname,newpos):
     571        """Move the printout order of `itemname` to `newpos`. If `itemname` is
     572        in a loop, `newpos` refers to the order within the loop."""
     573        if isinstance(itemname,(unicode,str)):
     574            true_name = itemname.lower()
     575        else:
     576            true_name = itemname
     577        loopno = self.FindLoop(true_name)
     578        if loopno < 0:  #top level
     579            self.item_order.remove(true_name)
     580            self.item_order.insert(newpos,true_name)
     581        else:
     582            self.loops[loopno].remove(true_name)
     583            self.loops[loopno].insert(newpos,true_name)
     584
     585    def GetItemOrder(self):
     586        """Return a list of datanames in the order in which they will be printed.  Loops are
     587        referred to by numerical index"""
     588        return self.item_order[:]
     589
     590    def AddItem(self,key,value,precheck=False):
     591        """Add dataname `key` to block with value `value`.  `value` may be
     592        a single value, a list or a tuple. If `precheck` is False (the default),
     593        all values will be checked and converted to unicode strings as necessary. If
     594        `precheck` is True, this checking is bypassed.  No checking is necessary
     595        when values are read from a CIF file as they are already in correct form."""
     596        if not isinstance(key,(unicode,str)):
     597             raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
     598        key = unicode(key)    #everything is unicode internally
     599        if not precheck:
     600             self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
     601        # check for overwriting
     602        if key in self:
     603             if not self.overwrite:
     604                 raise StarError( 'Attempt to insert duplicate item name %s' % key)
     605        if not precheck:   #need to sanitise
     606            regval,empty_val = self.regularise_data(value)
     607            pure_string = check_stringiness(regval)
     608            self.check_item_value(regval)
     609        else:
     610            regval,empty_val = value,None
     611            pure_string = True
     612        # update ancillary information first
     613        lower_key = key.lower()
     614        if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
     615            self.item_order.append(lower_key)
     616        # always remove from our case table in case the case is different
     617        try:
     618            del self.true_case[lower_key]
     619        except KeyError:
     620            pass
     621        self.true_case[lower_key] = key
     622        if pure_string:
     623            self.block.update({lower_key:[regval,empty_val]})
     624        else:
     625            self.block.update({lower_key:[empty_val,regval]})
     626
     627    def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
     628        """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
     629        necessary."""
     630        # print "Received data %s" % `incomingdata`
     631        # we accept tuples, strings, lists and dicts!!
     632        # Direct insertion: we have a string-valued key, with an array
     633        # of values -> single-item into our loop
     634        if isinstance(incomingdata[0],(tuple,list)):
     635           # a whole loop
     636           keyvallist = zip(incomingdata[0],incomingdata[1])
     637           for key,value in keyvallist:
     638               self.AddItem(key,value)
     639           self.CreateLoop(incomingdata[0])
     640        elif not isinstance(incomingdata[0],(unicode,str)):
     641             raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
     642        else:
     643            self.AddItem(incomingdata[0],incomingdata[1])
     644
     645    def check_data_name(self,dataname,maxlength=-1):
     646        if maxlength > 0:
     647            self.check_name_length(dataname,maxlength)
     648        if dataname[0]!='_':
     649            raise StarError( 'Dataname ' + dataname + ' does not begin with _')
     650        if self.characterset=='ascii':
     651            if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0:
     652                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
     653        else:
     654            # print 'Checking %s for unicode characterset conformance' % dataname
     655            if len ([a for a in dataname if ord(a) < 33]) > 0:
     656                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)')
     657            if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0:
     658                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)')
     659            if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0:
     660                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)')
     661            if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0:
     662                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)')
     663            if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0:
     664                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)')
     665            if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0:
     666                print('%s fails' % dataname)
     667                for a in dataname: print('%x' % ord(a),end="")
     668                print()
     669                raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)')
     670
     671    def check_name_length(self,dataname,maxlength):
     672        if len(dataname)>maxlength:
     673            raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
     674        return
     675
     676    def check_item_value(self,item):
     677        test_item = item
     678        if not isinstance(item,(list,dict,tuple)):
     679           test_item = [item]         #single item list
     680        def check_one (it):
     681            if isinstance(it,unicode):
     682                if it=='': return
     683                me = self.char_check.match(it)
     684                if not me:
     685                    print("Fail value check: %s" % it)
     686                    raise StarError('Bad character in %s' % it)
     687                else:
     688                    if me.span() != (0,len(it)):
     689                        print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it )))
     690                        raise StarError('Data item "' + repr( it ) +  u'"... contains forbidden characters')
     691        [check_one(a) for a in test_item]
     692
     693    def regularise_data(self,dataitem):
     694        """Place dataitem into a list if necessary"""
     695        from numbers import Number
     696        if isinstance(dataitem,str):
     697            return unicode(dataitem),None
     698        if isinstance(dataitem,(Number,unicode,StarList,StarDict)):
     699            return dataitem,None  #assume StarList/StarDict contain unicode if necessary
     700        if isinstance(dataitem,(tuple,list)):
     701            v,s = zip(*list([self.regularise_data(a) for a in dataitem]))
     702            return list(v),list(s)
     703            #return dataitem,[None]*len(dataitem)
     704        # so try to make into a list
     705        try:
     706            regval = list(dataitem)
     707        except TypeError as value:
     708            raise StarError( str(dataitem) + ' is wrong type for data value\n' )
     709        v,s = zip(*list([self.regularise_data(a) for a in regval]))
     710        return list(v),list(s)
     711
     712    def RemoveItem(self,itemname):
     713        """Remove `itemname` from the block."""
     714        # first check any loops
     715        loop_no = self.FindLoop(itemname)
     716        testkey = itemname.lower()
     717        if testkey in self:
     718            del self.block[testkey]
     719            del self.true_case[testkey]
     720            # now remove from loop
     721            if loop_no >= 0:
     722                self.loops[loop_no].remove(testkey)
     723                if len(self.loops[loop_no])==0:
     724                    del self.loops[loop_no]
     725                    self.item_order.remove(loop_no)
     726            else:  #will appear in order list
     727                self.item_order.remove(testkey)
     728
     729    def RemoveLoopItem(self,itemname):
     730        """*Deprecated*. Use `RemoveItem` instead"""
     731        self.RemoveItem(itemname)
     732
     733    def GetItemValue(self,itemname):
     734        """Return value of `itemname`.  If `itemname` is looped, a list
     735        of all values will be returned."""
     736        return self.GetFullItemValue(itemname)[0]
     737
     738    def GetFullItemValue(self,itemname):
     739        """Return the value associated with `itemname`, and a boolean flagging whether
     740        (True) or not (False) it is in a form suitable for calculation.  False is
     741        always returned for strings and `StarList` objects."""
     742        try:
     743            s,v = self.block[itemname.lower()]
     744        except KeyError:
     745            raise KeyError('Itemname %s not in datablock' % itemname)
     746        # prefer string value unless all are None
     747        # are we a looped value?
     748        if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
     749            if not_none(s):
     750                return s,False    #a string value
     751            else:
     752                return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
     753        elif not_none(s):
     754            return s,False         #a list of string values
     755        else:
     756            if len(v)>0:
     757                return v,not isinstance(v[0],StarList)
     758            return v,True
     759
     760    def CreateLoop(self,datanames,order=-1,length_check=True):
     761           """Create a loop in the datablock. `datanames` is a list of datanames that
     762           together form a loop.  If length_check is True, they should have been initialised in the block
     763           to have the same number of elements (possibly 0). If `order` is given,
     764           the loop will appear at this position in the block when printing
     765           out. A loop counts as a single position."""
     766
     767           if length_check:
     768               # check lengths: these datanames should exist
     769               listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
     770               if len(listed_values) == len(datanames):
     771                   len_set = set([len(self[a]) for a in datanames])
     772                   if len(len_set)>1:
     773                       raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
     774               elif len(listed_values) != 0:
     775                   raise ValueError('Request to loop datanames where some are single values and some are not')
     776           # store as lower case
     777           lc_datanames = [d.lower() for d in datanames]
     778           # remove these datanames from all other loops
     779           [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
     780           # remove empty loops
     781           empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
     782           for a in empty_loops:
     783               self.item_order.remove(a)
     784               del self.loops[a]
     785           if len(self.loops)>0:
     786               loopno = max(self.loops.keys()) + 1
     787           else:
     788               loopno = 1
     789           self.loops[loopno] = list(lc_datanames)
     790           if order >= 0:
     791               self.item_order.insert(order,loopno)
     792           else:
     793               self.item_order.append(loopno)
     794           # remove these datanames from item ordering
     795           self.item_order = [a for a in self.item_order if a not in lc_datanames]
     796
     797    def AddLoopName(self,oldname, newname):
     798        """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
     799        error is raised.  If `newname` is in a different loop, it is removed from that loop.
     800        The number of values associated with `newname` must match the number of values associated
     801        with all other columns of the new loop or a `ValueError` will be raised."""
     802        lower_newname = newname.lower()
     803        loop_no = self.FindLoop(oldname)
     804        if loop_no < 0:
     805            raise KeyError('%s not in loop' % oldname)
     806        if lower_newname in self.loops[loop_no]:
     807            return
     808        # check length
     809        old_provides = self.provide_value
     810        self.provide_value = False
     811        loop_len = len(self[oldname])
     812        self.provide_value = old_provides
     813        if len(self[newname]) != loop_len:
     814            raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
     815        # remove from any other loops
     816        [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
     817        # and add to this loop
     818        self.loops[loop_no].append(lower_newname)
     819        # remove from item_order if present
     820        try:
     821            self.item_order.remove(lower_newname)
     822        except ValueError:
     823            pass
     824
     825    def FindLoop(self,keyname):
     826        """Find the loop that contains `keyname` and return its numerical index or
     827        -1 if not present. The numerical index can be used to refer to the loop in
     828        other routines."""
     829        loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
     830        if len(loop_no)>0:
     831            return loop_no[0]
     832        else:
     833            return -1
     834
     835    def GetLoop(self,keyname):
     836        """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
     837        `keyname` is only significant as a way to specify the loop."""
     838        return LoopBlock(self,keyname)
     839
     840    def GetLoopNames(self,keyname):
     841        if keyname in self:
     842            return self.keys()
     843        for aloop in self.loops:
     844            try:
     845                return aloop.GetLoopNames(keyname)
     846            except KeyError:
     847                pass
     848        raise KeyError('Item does not exist')
     849
     850    def GetLoopNames(self,keyname):
     851        """Return all datanames appearing together with `keyname`"""
     852        loop_no = self.FindLoop(keyname)
     853        if loop_no >= 0:
     854            return self.loops[loop_no]
     855        else:
     856            raise KeyError('%s is not in any loop' % keyname)
     857
     858    def AddLoopName(self,oldname, newname):
     859        """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
     860        error is raised.  If `newname` is in a different loop, it is removed from that loop.
     861        The number of values associated with `newname` must match the number of values associated
     862        with all other columns of the new loop or a `ValueError` will be raised."""
     863        lower_newname = newname.lower()
     864        loop_no = self.FindLoop(oldname)
     865        if loop_no < 0:
     866            raise KeyError('%s not in loop' % oldname)
     867        if lower_newname in self.loops[loop_no]:
     868            return
     869        # check length
     870        old_provides = self.provide_value
     871        self.provide_value = False
     872        loop_len = len(self[oldname])
     873        self.provide_value = old_provides
     874        if len(self[newname]) != loop_len:
     875            raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
     876        # remove from any other loops
     877        [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
     878        # and add to this loop
     879        self.loops[loop_no].append(lower_newname)
     880        # remove from item_order if present
     881        try:
     882            self.item_order.remove(lower_newname)
     883        except ValueError:
     884            pass
     885
     886    def AddToLoop(self,dataname,loopdata):
     887        thisloop = self.GetLoop(dataname)
     888        for itemname,itemvalue in loopdata.items():
     889            thisloop[itemname] = itemvalue
     890
     891    def AddToLoop(self,dataname,loopdata):
     892        """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
     893
     894        Add multiple columns to the loop containing `dataname`. `loopdata` is a
     895        collection of (key,value) pairs, where `key` is the new dataname and `value`
     896        is a list of values for that dataname"""
     897        # check lengths
     898        thisloop = self.FindLoop(dataname)
     899        loop_len = len(self[dataname])
     900        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
     901        if len(bad_vals)>0:
     902           raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
     903               % (repr( bad_vals ),loop_len))
     904        self.update(loopdata)
     905        self.loops[thisloop]+=loopdata.keys()
     906
     907    def RemoveKeyedPacket(self,keyname,keyvalue):
     908        """Remove the packet for which dataname `keyname` takes
     909        value `keyvalue`.  Only the first such occurrence is
     910        removed."""
     911        packet_coord = list(self[keyname]).index(keyvalue)
     912        loopnames = self.GetLoopNames(keyname)
     913        for dataname in loopnames:
     914            self.block[dataname][0] = list(self.block[dataname][0])
     915            del self.block[dataname][0][packet_coord]
     916            self.block[dataname][1] = list(self.block[dataname][1])
     917            del self.block[dataname][1][packet_coord]
     918
     919    def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
     920        """Return the loop packet (a `StarPacket` object) where `keyname` has value
     921        `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
     922        is raised if no packet is found or more than one packet is found."""
     923        my_loop = self.GetLoop(keyname)
     924        #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
     925        #print('Packet check on:' + keyname)
     926        #[print(repr(getattr(a,keyname))) for a in my_loop]
     927        if no_case:
     928           one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
     929        else:
     930           one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
     931        if len(one_pack)!=1:
     932            raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
     933        print("Keyed packet: %s" % one_pack[0])
     934        return one_pack[0]
     935
     936    def GetCompoundKeyedPacket(self,keydict):
     937        """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
     938        in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
     939        True.  `ValueError` is raised if no packet is found or more than one packet is found."""
     940        #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
     941        keynames = list(keydict.keys())
     942        my_loop = self.GetLoop(keynames[0])
     943        for one_key in keynames:
     944            keyval,no_case = keydict[one_key]
     945            if no_case:
     946               my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
     947            else:
     948               my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
     949        if len(my_loop)!=1:
     950            raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
     951        print("Compound keyed packet: %s" % my_loop[0])
     952        return my_loop[0]
     953
     954    def GetKeyedSemanticPacket(self,keyvalue,cat_id):
     955        """Return a complete packet for category `cat_id` where the
     956        category key for the category equals `keyvalue`.  This routine
     957        will understand any joined loops, so if separate loops in the
     958        datafile belong to the
     959        same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
     960        the returned `StarPacket` object will contain datanames from
     961        both categories."""
     962        target_keys = self.dictionary.cat_key_table[cat_id]
     963        target_keys = [k[0] for k in target_keys] #one only in each list
     964        p = StarPacket()
     965        # set case-sensitivity flag
     966        lcase = False
     967        if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
     968            lcase = True
     969        for cat_key in target_keys:
     970            try:
     971                extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
     972            except KeyError:        #missing key
     973                try:
     974                    test_key = self[cat_key]  #generate key if possible
     975                    print('Test key is %s' % repr( test_key ))
     976                    if test_key is not None and\
     977                    not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
     978                        print('Getting packet for key %s' % repr( keyvalue ))
     979                        extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
     980                except:             #cannot be generated
     981                    continue
     982            except ValueError:      #none/more than one, assume none
     983                continue
     984                #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
     985            p.merge_packet(extra_packet)
     986        # the following attributes used to calculate missing values
     987        for keyname in target_keys:
     988            if hasattr(p,keyname):
     989                p.key = [keyname]
     990                break
     991        if not hasattr(p,"key"):
     992            raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
     993        p.cif_dictionary = self.dictionary
     994        p.fulldata = self
     995        return p
     996
     997    def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
     998        """Return a complete packet for category `cat_id` where the keyvalues are
     999        provided as a dictionary of key:(value,caseless) pairs
     1000        This routine
     1001        will understand any joined loops, so if separate loops in the
     1002        datafile belong to the
     1003        same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
     1004        the returned `StarPacket` object will contain datanames from
     1005        the requested category and any children."""
     1006        #if len(keyvalues)==1:   #simplification
     1007        #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
     1008        target_keys = self.dictionary.cat_key_table[cat_id]
     1009        # update the dictionary passed to us with all equivalents, for
     1010        # simplicity.
     1011        parallel_keys = list(zip(*target_keys))  #transpose
     1012        print('Parallel keys:' + repr(parallel_keys))
     1013        print('Keydict:' + repr(keydict))
     1014        start_keys = list(keydict.keys())
     1015        for one_name in start_keys:
     1016            key_set = [a for a in parallel_keys if one_name in a]
     1017            for one_key in key_set:
     1018                keydict[one_key] = keydict[one_name]
     1019        # target_keys is a list of lists, each of which is a compound key
     1020        p = StarPacket()
     1021        # a little function to return the dataname for a key
     1022        def find_key(key):
     1023            for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
     1024                if self.has_key(one_key):
     1025                    return one_key
     1026            return None
     1027        for one_set in target_keys: #loop down the categories
     1028            true_keys = [find_key(k) for k in one_set]
     1029            true_keys = [k for k in true_keys if k is not None]
     1030            if len(true_keys)==len(one_set):
     1031                truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
     1032                try:
     1033                    extra_packet = self.GetCompoundKeyedPacket(truekeydict)
     1034                except KeyError:     #one or more are missing
     1035                    continue         #should try harder?
     1036                except ValueError:
     1037                    continue
     1038            else:
     1039                continue
     1040            print('Merging packet for keys ' + repr(one_set))
     1041            p.merge_packet(extra_packet)
     1042        # the following attributes used to calculate missing values
     1043        p.key = true_keys
     1044        p.cif_dictionary = self.dictionary
     1045        p.fulldata = self
     1046        return p
     1047
     1048
     1049    def set_grammar(self,new_grammar):
     1050        self.string_delimiters = ["'",'"',"\n;",None]
     1051        if new_grammar in ['STAR2','2.0']:
     1052            self.string_delimiters += ['"""',"'''"]
     1053        if new_grammar == '2.0':
     1054            self.list_delimiter = "  "
     1055        elif new_grammar == 'STAR2':
     1056            self.list_delimiter = ", "
     1057        elif new_grammar not in ['1.0','1.1']:
     1058            raise StarError('Request to set unknown grammar %s' % new_grammar)
     1059
    5681060    def SetOutputLength(self,wraplength=80,maxoutlength=2048):
     1061        """Set the maximum output line length (`maxoutlength`) and the line length to
     1062        wrap at (`wraplength`).  The wrap length is a target only and may not always be
     1063        possible."""
    5691064        if wraplength > maxoutlength:
    5701065            raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    5711066        self.wraplength = wraplength
    5721067        self.maxoutlength = maxoutlength
    573         for loop in self.loops:
    574             loop.SetOutputLength(wraplength,maxoutlength)
    575 
    576     def printsection(self,instring='',blockstart="",blockend="",indent=0,coord=[]):
    577         import cStringIO
     1068
     1069    def printsection(self,instring='',blockstart="",blockend="",indent=0,finish_at='',start_from=''):
    5781070        import string
     1071        self.provide_value = False
    5791072        # first make an ordering
    580         order = self.item_order[:]
     1073        self.create_ordering(finish_at,start_from)  #create self.output_order
    5811074        # now do it...
    5821075        if not instring:
    583             outstring = cStringIO.StringIO()       # the returned string
     1076            outstring = CIFStringIO(target_width=80)       # the returned string
    5841077        else:
    5851078            outstring = instring
    586         if not coord:
    587             coords = [0]*(self.dimension-1)
    588         else:
    589             coords = coord
    590         if(len(coords)<self.dimension-1):
    591             raise StarError("Not enough block packet coordinates to uniquely define data")
    592         # print loop delimiter
    593         outstring.write(blockstart)
    594         while len(order)>0:
    595             # print "Order now: " + `order`
    596             itemname = order.pop(0)
    597             if self.dimension == 0:            # ie value next to tag
    598                 if not isinstance(itemname,LoopBlock):  #no loop
    599                    # grab any comment
    600                    thiscomment = self.comment_list.get(itemname.lower(),'')
     1079        # print block delimiter
     1080        outstring.write(blockstart,canbreak=True)
     1081        while len(self.output_order)>0:
     1082           #print "Remaining to output " + `self.output_order`
     1083           itemname = self.output_order.pop(0)
     1084           if not isinstance(itemname,int):  #no loop
     1085                   item_spec = [i for i in self.formatting_hints if i['dataname'].lower()==itemname.lower()]
     1086                   if len(item_spec)>0:
     1087                       item_spec = item_spec[0]
     1088                       col_pos = item_spec.get('column',-1)
     1089                       name_pos = item_spec.get('name_pos',-1)
     1090                   else:
     1091                       col_pos = -1
     1092                       item_spec = {}
     1093                       name_pos = -1
     1094                   if col_pos < 0: col_pos = 40
     1095                   outstring.set_tab(col_pos)
    6011096                   itemvalue = self[itemname]
    602                    if isinstance(itemvalue,StringType):  #need to sanitize
    603                          thisstring = self._formatstring(itemvalue)
    604                    else: thisstring = str(itemvalue)
    605                    # try for a tabstop at 40
    606                    if len(itemname)<40 and (len(thisstring)-40 < self.wraplength-1):
    607                        itemname = itemname + ' '*(40-len(itemname))
    608                    else: itemname = itemname + ' '
    609                    if len(thisstring) + len(itemname) < (self.wraplength-1):
    610                          outstring.write('%s%s' % (itemname,thisstring))
    611                          if thiscomment:
    612                              if len(thiscomment)+len(thisstring)+len(itemname)< (self.wraplength-3):
    613                                  outstring.write(' #'+thiscomment)
    614                    else:
    615                          outstring.write('%s\n %s' % (itemname, thisstring))
    616                          if thiscomment:
    617                              if len(thiscomment)+len(thisstring)<(self.wraplength-3):
    618                                  outstring.write(' #'+thiscomment)
    619                              else:
    620                                  outstring.write('\n#'+thiscomment)
    621                    outstring.write('\n')
    622                 else:   # we are asked to print an internal loop block
    623                     #first make sure we have sensible coords.  Length should be one
    624                     #less than the current dimension
    625                     outstring.write(' '*indent); outstring.write('loop_\n')
    626                     itemname.format_names(outstring,indent+2)
    627                     itemname.format_packets(outstring,coords,indent+2)
    628             else:   # we are a nested loop
    629                 outstring.write(' '*indent); outstring.write('loop_\n')
    630                 self.format_names(outstring,indent+2)
    631                 self.format_packets(outstring,coords,indent+2)
    632         if instring: return   #inside a recursion
     1097                   outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False,startcol=name_pos)
     1098                   outstring.write(' ',canbreak=True,do_tab=False,delimiter=True)    #space after itemname
     1099                   self.format_value(itemvalue,outstring,hints=item_spec)
     1100           else:# we are asked to print a loop block
     1101                    outstring.set_tab(10)       #guess this is OK?
     1102                    loop_spec = [i['name_pos'] for i in self.formatting_hints if i["dataname"]=='loop']
     1103                    if loop_spec:
     1104                        loop_indent = max(loop_spec[0],0)
     1105                    else:
     1106                        loop_indent = indent
     1107                    outstring.write('loop_\n',mustbreak=True,do_tab=False,startcol=loop_indent)
     1108                    self.format_names(outstring,indent+2,loop_no=itemname)
     1109                    self.format_packets(outstring,indent+2,loop_no=itemname)
    6331110        else:
    6341111            returnstring = outstring.getvalue()
     
    6361113        return returnstring
    6371114
    638     def format_names(self,outstring,indent=0):
    639         temp_order = self.item_order[:]
     1115    def format_names(self,outstring,indent=0,loop_no=-1):
     1116        """Print datanames from `loop_no` one per line"""
     1117        temp_order = self.loops[loop_no][:]   #copy
     1118        format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in temp_order])
    6401119        while len(temp_order)>0:
    6411120            itemname = temp_order.pop(0)
    642             if isinstance(itemname,StringType):  #(not loop)
    643                 outstring.write(' ' * indent)
    644                 outstring.write(itemname)
    645                 outstring.write("\n")
    646             else:                                # a loop
    647                 outstring.write(' ' * indent)
    648                 outstring.write("loop_\n")
    649                 itemname.format_names(outstring,indent+2)
    650                 outstring.write(" stop_\n")
    651 
    652     def format_packets(self,outstring,coordinates,indent=0):
    653        import cStringIO
     1121            req_indent = format_hints.get(itemname,{}).get('name_pos',indent)
     1122            outstring.write(' ' * req_indent,do_tab=False)
     1123            outstring.write(self.true_case[itemname],do_tab=False)
     1124            outstring.write("\n",do_tab=False)
     1125
     1126    def format_packets(self,outstring,indent=0,loop_no=-1):
    6541127       import string
    655        # get our current group of data
    656        # print 'Coords: %s' % `coordinates`
    657        alldata = map(lambda a:self.coord_to_group(a,coordinates),self.item_order)
    658        # print 'Alldata: %s' % `alldata`
    659        packet_data = apply(zip,alldata)
    660        # print 'Packet data: %s' % `packet_data`
    661        curstring = ''
     1128       alldata = [self[a] for a in self.loops[loop_no]]
     1129       loopnames = self.loops[loop_no]
     1130       #print 'Alldata: %s' % `alldata`
     1131       packet_data = list(zip(*alldata))
     1132       #print 'Packet data: %s' % `packet_data`
     1133       #create a dictionary for quick lookup of formatting requirements
     1134       format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in loopnames])
    6621135       for position in range(len(packet_data)):
     1136           if position > 0:
     1137               outstring.write("\n")    #new line each packet except first
    6631138           for point in range(len(packet_data[position])):
    6641139               datapoint = packet_data[position][point]
    665                packstring = self.format_packet_item(datapoint,indent)
    666                if len(curstring) + len(packstring)> self.wraplength-2: #past end of line with space
    667                    curstring = curstring + '\n' + ' '*indent + packstring
    668                elif curstring == '':
    669                    curstring = curstring + ' '*indent + packstring
    670                else:
    671                    curstring = curstring + ' ' + packstring
    672            outstring.write(curstring + '\n')     #end of one packet
    673            curstring = ''
    674        outstring.write(' ' + curstring + '\n')    #last time through
    675                
    676     def format_packet_item(self,pack_item,indent):
    677         # print 'Formatting %s' % `pack_item`
    678         curstring = ''
    679         if isinstance(pack_item,(StringType,IntType,FloatType,LongType,StarTuple,StarList)):
    680            if isinstance(pack_item,StringType):
    681                thisstring = self._formatstring(pack_item) #no spaces yet
    682                if '\n' in thisstring:    #must have semicolon digraph then
    683                    curstring = curstring + thisstring
    684                    curstring = curstring + (' ' * indent)
    685                    thisstring = ''
    686            else:
    687                thisstring = '%s' % str(pack_item)
    688            if len(curstring) + len(thisstring)> self.wraplength-2: #past end of line with space
    689                curstring = curstring + '\n' #add the space
    690                curstring = curstring + (' ' * indent) + thisstring
    691            else:
    692                curstring = curstring + ' ' + thisstring
    693         # Now, for each nested loop we call ourselves again
    694         # After first outputting the current line
    695         else:               # a nested packet
    696            if not isinstance(pack_item[0],(ListType,TupleType)):  #base packet
    697                item_list = pack_item
     1140               format_hint = format_hints.get(loopnames[point],{})
     1141               packstring = self.format_packet_item(datapoint,indent,outstring,format_hint)
     1142               outstring.write(' ',canbreak=True,do_tab=False,delimiter=True)
     1143
     1144    def format_packet_item(self,pack_item,indent,outstring,format_hint):
     1145           # print 'Formatting %s' % `pack_item`
     1146           # temporary check for any non-unicode items
     1147           if isinstance(pack_item,str) and not isinstance(pack_item,unicode):
     1148               raise StarError("Item {0!r} is not unicode".format(pack_item))
     1149           if isinstance(pack_item,unicode):
     1150               delimiter = format_hint.get('delimiter',None)
     1151               startcol = format_hint.get('column',-1)
     1152               outstring.write(self._formatstring(pack_item,delimiter=delimiter),startcol=startcol)
    6981153           else:
    699                item_list = apply(zip,pack_item)
    700            for sub_item in item_list:
    701                curstring = curstring + ' ' + self.format_packet_item(sub_item,indent)
    702            # stop_ is not issued at the end of each innermost packet
    703            if isinstance(pack_item[0],(ListType,TupleType)):
    704                curstring = curstring + ' stop_ '
    705         return curstring         
    706 
    707     def _formatstring(self,instring):
     1154               self.format_value(pack_item,outstring,hints = format_hint)
     1155
     1156    def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,hints={}):
    7081157        import string
    709         if len(instring)==0: return "''"
    710         if len(instring)< (self.maxoutlength-2) and '\n' not in instring and not ('"' in instring and '\'' in instring):
    711             if not ' ' in instring and not '\t' in instring and not '\v' \
    712               in instring and not '_' in instring and not (instring[0]=="'" or \
    713                  instring[0]=='"'):                  # no blanks
    714                 return instring
    715             if not "'" in instring:                                       #use apostrophes
    716                 return "'%s'" % (instring)
    717             elif not "\"" in instring:
    718                 return '"%s"' % (instring)
    719         # is a long one or one that needs semicolons due to carriage returns
    720         outstring = "\n;"
    721         # if there are returns in the string, try to work with them
    722         while 1:
    723             retin = string.find(instring,'\n')+1
    724             if retin < self.maxoutlength and retin > 0:      # honour this break
    725                 outstring = outstring + instring[:retin]
    726                 instring = instring[retin:]
    727             elif len(instring)<self.maxoutlength:            # finished
    728                 outstring = outstring + instring + '\n;\n'
    729                 break
    730             else:                             # find a space
    731                 for letter in range(self.maxoutlength-1,self.wraplength-1,-1):
    732                     if instring[letter] in ' \t\f': break
    733                 outstring = outstring + instring[:letter+1]
    734                 outstring = outstring + '\n'
    735                 instring = instring[letter+1:]           
    736         return outstring
    737 
    738 
    739 
    740 class StarBlock(LoopBlock):
    741     def __init__(self,*pos_args,**keyword_args):
    742         LoopBlock.__init__(self,*pos_args,**keyword_args)
    743         self.saves = BlockCollection(element_class=LoopBlock,type_tag="save")
    744 
    745     def __getitem__(self,key):
    746         if key == "saves":
    747             return self.saves
    748         else:
    749             return LoopBlock.__getitem__(self,key)
    750 
    751     def __setitem__(self,key,value):
    752         if key == "saves":
    753             self.saves[key] = value
    754         else:
    755             LoopBlock.__setitem__(self,key,value)
    756 
    757     def clear(self):
    758         LoopBlock.clear(self)
    759         self.saves = BlockCollection(element_class=LoopBlock,type_tag="save_")
    760 
    761     def copy(self):
    762         newblock = LoopBlock.copy(self)
    763         newblock.saves = self.saves.copy()
    764         return self.copy.im_class(newblock)   #catch inheritance
    765 
    766     def has_key(self,key):
    767         if key == "saves": return 1
    768         else: return LoopBlock.has_key(self,key)
    769        
    770     def __str__(self):
    771         retstr = ''
    772         for sb in self.saves.keys():
    773             retstr = retstr + '\nsave_%s\n\n' % sb
    774             self.saves[sb].SetOutputLength(self.wraplength,self.maxoutlength)
    775             retstr = retstr + str(self.saves[sb])
    776             retstr = retstr + '\nsave_\n\n'
    777         return retstr + LoopBlock.__str__(self)
     1158        if hints.get("reformat",False) and "\n" in instring:
     1159            instring = "\n"+self.do_wrapping(instring,hints["reformat_indent"])
     1160        allowed_delimiters = set(self.string_delimiters)
     1161        if len(instring)==0: allowed_delimiters.difference_update([None])
     1162        if len(instring) > (self.maxoutlength-2) or '\n' in instring:
     1163                allowed_delimiters.intersection_update(["\n;","'''",'"""'])
     1164        if ' ' in instring or '\t' in instring or '\v' in instring or (len(instring)>0 and instring[0] in '_$#;([{') or ',' in instring:
     1165                allowed_delimiters.difference_update([None])
     1166        if len(instring)>3 and (instring[:4].lower()=='data' or instring[:4].lower()=='save'):
     1167                allowed_delimiters.difference_update([None])
     1168        if len(instring)>5 and instring[:6].lower()=='global':
     1169                allowed_delimiters.difference_update([None])
     1170        if '"' in instring: allowed_delimiters.difference_update(['"',None])
     1171        if "'" in instring: allowed_delimiters.difference_update(["'",None])
     1172        out_delimiter = "\n;"  #default (most conservative)
     1173        if delimiter in allowed_delimiters:
     1174            out_delimiter = delimiter
     1175        elif "'" in allowed_delimiters: out_delimiter = "'"
     1176        elif '"' in allowed_delimiters: out_delimiter = '"'
     1177        if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter
     1178        elif out_delimiter is None: return instring
     1179        # we are left with semicolon strings
     1180        # use our protocols:
     1181        maxlinelength = max([len(a) for a in instring.split('\n')])
     1182        if maxlinelength > self.maxoutlength:
     1183            protocol_string = apply_line_folding(instring)
     1184        else:
     1185            protocol_string = instring
     1186        # now check for embedded delimiters
     1187        if "\n;" in protocol_string:
     1188            prefix = "CIF:"
     1189            while prefix in protocol_string: prefix = prefix + ":"
     1190            protocol_string = apply_line_prefix(protocol_string,prefix+"> ")
     1191        return "\n;" + protocol_string + "\n;"
     1192
     1193    def format_value(self,itemvalue,stringsink,compound=False,hints={}):
     1194        """Format a Star data value"""
     1195        global have_numpy
     1196        delimiter = hints.get('delimiter',None)
     1197        startcol = hints.get('column',-1)
     1198        if isinstance(itemvalue,str) and not isinstance(itemvalue,unicode): #not allowed
     1199            raise StarError("Non-unicode value {0} found in block".format(itemvalue))
     1200        if isinstance(itemvalue,unicode):  #need to sanitize
     1201            stringsink.write(self._formatstring(itemvalue,delimiter=delimiter,hints=hints),canbreak = True,startcol=startcol)
     1202        elif isinstance(itemvalue,(list)) or (hasattr(itemvalue,'dtype') and hasattr(itemvalue,'__iter__')): #numpy
     1203           stringsink.set_tab(0)
     1204           stringsink.write('[',canbreak=True,newindent=True,mustbreak=compound,startcol=startcol)
     1205           if len(itemvalue)>0:
     1206               self.format_value(itemvalue[0],stringsink)
     1207               for listval in itemvalue[1:]:
     1208                  # print 'Formatting %s' % `listval`
     1209                  stringsink.write(self.list_delimiter,do_tab=False)
     1210                  self.format_value(listval,stringsink,compound=True)
     1211           stringsink.write(']',unindent=True)
     1212        elif isinstance(itemvalue,dict):
     1213           stringsink.set_tab(0)
     1214           stringsink.write('{',newindent=True,mustbreak=compound,startcol=startcol)  #start a new line inside
     1215           items = list(itemvalue.items())
     1216           if len(items)>0:
     1217               stringsink.write("'"+items[0][0]+"'"+':',canbreak=True)
     1218               self.format_value(items[0][1],stringsink)
     1219               for key,value in items[1:]:
     1220                   stringsink.write(self.list_delimiter)
     1221                   stringsink.write("'"+key+"'"+":",canbreak=True)
     1222                   self.format_value(value,stringsink)   #never break between key and value
     1223           stringsink.write('}',unindent=True)
     1224        elif isinstance(itemvalue,(float,int)) or \
     1225             (have_numpy and isinstance(itemvalue,(numpy.number))):  #TODO - handle uncertainties
     1226           stringsink.write(str(itemvalue),canbreak=True,startcol=startcol)   #numbers
     1227        else:
     1228           raise ValueError('Value in unexpected format for output: %s' % repr( itemvalue ))
     1229
     1230    def create_ordering(self,finish_at,start_from):
     1231        """Create a canonical ordering that includes loops using our formatting hints dictionary"""
     1232        requested_order = list([i['dataname'] for i in self.formatting_hints if i['dataname']!='loop'])
     1233        new_order = []
     1234        for item in requested_order:
     1235           if isinstance(item,unicode) and item.lower() in self.item_order:
     1236               new_order.append(item.lower())
     1237           elif item in self:    #in a loop somewhere
     1238               target_loop = self.FindLoop(item)
     1239               if target_loop not in new_order:
     1240                   new_order.append(target_loop)
     1241                   # adjust loop name order
     1242                   loopnames = self.loops[target_loop]
     1243                   loop_order = [i for i in requested_order if i in loopnames]
     1244                   unordered = [i for i in loopnames if i not in loop_order]
     1245                   self.loops[target_loop] = loop_order + unordered
     1246        extras = list([i for i in self.item_order if i not in new_order])
     1247        self.output_order = new_order + extras
     1248        # now handle partial output
     1249        if start_from != '':
     1250            if start_from in requested_order:
     1251                sfi = requested_order.index(start_from)
     1252                loop_order = [self.FindLoop(k) for k in requested_order[sfi:] if self.FindLoop(k)>0]
     1253                candidates = list([k for k in self.output_order if k in requested_order[sfi:]])
     1254                cand_pos = len(new_order)
     1255                if len(candidates)>0:
     1256                    cand_pos = self.output_order.index(candidates[0])
     1257                if len(loop_order)>0:
     1258                    cand_pos = min(cand_pos,self.output_order.index(loop_order[0]))
     1259                if cand_pos < len(self.output_order):
     1260                    print('Output starts from %s, requested %s' % (self.output_order[cand_pos],start_from))
     1261                    self.output_order = self.output_order[cand_pos:]
     1262                else:
     1263                    print('Start is beyond end of output list')
     1264                    self.output_order = []
     1265            elif start_from in extras:
     1266               self.output_order = self.output_order[self.output_order.index(start_from):]
     1267            else:
     1268               self.output_order = []
     1269        if finish_at != '':
     1270            if finish_at in requested_order:
     1271                fai = requested_order.index(finish_at)
     1272                loop_order = list([self.FindLoop(k) for k in requested_order[fai:] if self.FindLoop(k)>0])
     1273                candidates = list([k for k in self.output_order if k in requested_order[fai:]])
     1274                cand_pos = len(new_order)
     1275                if len(candidates)>0:
     1276                    cand_pos = self.output_order.index(candidates[0])
     1277                if len(loop_order)>0:
     1278                    cand_pos = min(cand_pos,self.output_order.index(loop_order[0]))
     1279                if cand_pos < len(self.output_order):
     1280                    print('Output finishes before %s, requested before %s' % (self.output_order[cand_pos],finish_at))
     1281                    self.output_order = self.output_order[:cand_pos]
     1282                else:
     1283                    print('All of block output')
     1284            elif finish_at in extras:
     1285               self.output_order = self.output_order[:self.output_order.index(finish_at)]
     1286        #print('Final order: ' + repr(self.output_order))
     1287
     1288    def convert_to_string(self,dataname):
     1289        """Convert values held in dataname value fork to string version"""
     1290        v,is_value = self.GetFullItemValue(dataname)
     1291        if not is_value:
     1292            return v
     1293        if check_stringiness(v): return v   #already strings
     1294        # TODO...something else
     1295        return v
     1296
     1297    def do_wrapping(self,instring,indent=3):
     1298        """Wrap the provided string"""
     1299        if "   " in instring:   #already formatted
     1300            return instring
     1301        self.wrapper.initial_indent = ' '*indent
     1302        self.wrapper.subsequent_indent = ' '*indent
     1303        # remove leading and trailing space
     1304        instring = instring.strip()
     1305        # split into paragraphs
     1306        paras = instring.split("\n\n")
     1307        wrapped_paras = [self.wrapper.fill(p) for p in paras]
     1308        return "\n".join(wrapped_paras)
     1309
     1310
     1311    def merge(self,new_block,mode="strict",match_att=[],match_function=None,
     1312                   rel_keys = []):
     1313        if mode == 'strict':
     1314           for key in new_block.keys():
     1315               if key in self and key not in match_att:
     1316                  raise StarError( "Identical keys %s in strict merge mode" % key)
     1317               elif key not in match_att:           #a new dataname
     1318                   self[key] = new_block[key]
     1319           # we get here if there are no keys in common, so we can now copy
     1320           # the loops and not worry about overlaps
     1321           for one_loop in new_block.loops.values():
     1322               self.CreateLoop(one_loop)
     1323           # we have lost case information
     1324           self.true_case.update(new_block.true_case)
     1325        elif mode == 'replace':
     1326           newkeys = list(new_block.keys())
     1327           for ma in match_att:
     1328              try:
     1329                   newkeys.remove(ma)        #don't touch the special ones
     1330              except ValueError:
     1331                   pass
     1332           for key in new_block.keys():
     1333                  if isinstance(key,unicode):
     1334                      self[key] = new_block[key]
     1335           # creating the loop will remove items from other loops
     1336           for one_loop in new_block.loops.values():
     1337               self.CreateLoop(one_loop)
     1338           # we have lost case information
     1339           self.true_case.update(new_block.true_case)
     1340        elif mode == 'overlay':
     1341           print('Overlay mode, current overwrite is %s' % self.overwrite)
     1342           raise StarError('Overlay block merge mode not implemented')
     1343           save_overwrite = self.overwrite
     1344           self.overwrite = True
     1345           for attribute in new_block.keys():
     1346               if attribute in match_att: continue      #ignore this one
     1347               new_value = new_block[attribute]
     1348               #non-looped items
     1349               if new_block.FindLoop(attribute)<0:     #not looped
     1350                  self[attribute] = new_value
     1351           my_loops = self.loops.values()
     1352           perfect_overlaps = [a for a in new_block.loops if a in my_loops]
     1353           for po in perfect_overlaps:
     1354              loop_keys = [a for a in po if a in rel_keys]  #do we have a key?
     1355              try:
     1356                  newkeypos = map(lambda a:newkeys.index(a),loop_keys)
     1357                  newkeypos = newkeypos[0]      #one key per loop for now
     1358                  loop_keys = loop_keys[0]
     1359              except (ValueError,IndexError):
     1360                  newkeypos = []
     1361                  overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data
     1362                  new_data = map(lambda a:new_block[a],overlaps) #new packet data
     1363                  packet_data = transpose(overlap_data)
     1364                  new_p_data = transpose(new_data)
     1365                  # remove any packets for which the keys match between old and new; we
     1366                  # make the arbitrary choice that the old data stays
     1367                  if newkeypos:
     1368                      # get matching values in new list
     1369                      print("Old, new data:\n%s\n%s" % (repr(overlap_data[newkeypos]),repr(new_data[newkeypos])))
     1370                      key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos])
     1371                      # filter out any new data with these key values
     1372                      new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data)
     1373                      if new_p_data:
     1374                          new_data = transpose(new_p_data)
     1375                      else: new_data = []
     1376                  # wipe out the old data and enter the new stuff
     1377                  byebyeloop = self.GetLoop(overlaps[0])
     1378                  # print "Removing '%s' with overlaps '%s'" % (`byebyeloop`,`overlaps`)
     1379                  # Note that if, in the original dictionary, overlaps are not
     1380                  # looped, GetLoop will return the block itself.  So we check
     1381                  # for this case...
     1382                  if byebyeloop != self:
     1383                      self.remove_loop(byebyeloop)
     1384                  self.AddLoopItem((overlaps,overlap_data))  #adding old packets
     1385                  for pd in new_p_data:                             #adding new packets
     1386                     if pd not in packet_data:
     1387                        for i in range(len(overlaps)):
     1388                            #don't do this at home; we are appending
     1389                            #to something in place
     1390                            self[overlaps[i]].append(pd[i])
     1391           self.overwrite = save_overwrite
     1392
     1393    def assign_dictionary(self,dic):
     1394        if not dic.diclang=="DDLm":
     1395            print("Warning: ignoring dictionary %s" % dic.dic_as_cif.my_uri)
     1396            return
     1397        self.dictionary = dic
     1398
     1399    def unassign_dictionary(self):
     1400        """Remove dictionary-dependent behaviour"""
     1401        self.dictionary = None
     1402
    7781403
    7791404
    7801405class StarPacket(list):
    781     pass
    782 
    783 class BlockCollection:
    784     def __init__(self,datasource=None,element_class=StarBlock,type_tag=''):
     1406    def merge_packet(self,incoming):
     1407        """Merge contents of incoming packet with this packet"""
     1408        new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"]
     1409        self.extend(incoming)
     1410        for na in new_attrs:
     1411            setattr(self,na,getattr(incoming,na))
     1412
     1413    def __getattr__(self,att_name):
     1414        """Derive a missing attribute"""
     1415        if att_name.lower() in self.__dict__:
     1416            return getattr(self,att_name.lower())
     1417        if att_name in ('cif_dictionary','fulldata','key'):
     1418            raise AttributeError('Programming error: can only assign value of %s' % att_name)
     1419        d = self.cif_dictionary
     1420        c = self.fulldata
     1421        k = self.key
     1422        assert isinstance(k,list)
     1423        d.derive_item(att_name,c,store_value=True)
     1424        #
     1425        # now pick out the new value
     1426        # self.key is a list of the key values
     1427        keydict = dict([(v,(getattr(self,v),True)) for v in k])
     1428        full_pack = c.GetCompoundKeyedPacket(keydict)
     1429        return getattr(full_pack,att_name)
     1430
     1431class BlockCollection(object):
     1432    """A container for StarBlock objects. The constructor takes
     1433    one non-keyword argument `datasource` to set the initial data.  If
     1434    `datasource` is a Python dictionary, the values must be `StarBlock`
     1435    objects and the keys will be blocknames in the new object. Keyword
     1436    arguments:
     1437
     1438    standard:
     1439        `CIF` or `Dic`.  `CIF` enforces 75-character blocknames, and will
     1440        print block contents before that block's save frame.
     1441
     1442    blocktype:
     1443        The type of blocks held in this container. Normally `StarBlock`
     1444        or `CifBlock`.
     1445
     1446    characterset:
     1447        `ascii` or `unicode`.  Blocknames and datanames appearing within
     1448        blocks are restricted to the appropriate characterset. Note that
     1449        only characters in the basic multilingual plane are accepted. This
     1450        restriction will be lifted when PyCIFRW is ported to Python3.
     1451
     1452    scoping:
     1453        `instance` or `dictionary`: `instance` implies that save frames are
     1454        hidden from save frames lower in the hierarchy or in sibling
     1455        hierarchies. `dictionary` makes all save frames visible everywhere
     1456        within a data block.  This setting is only relevant for STAR2 dictionaries and
     1457        STAR2 data files, as save frames are currently not used in plain CIF data
     1458        files.
     1459
     1460"""
     1461    def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock,
     1462                 characterset='ascii',scoping='instance',**kwargs):
     1463        import collections
    7851464        self.dictionary = {}
    786         self.type_tag = type_tag
    787         self.lower_keys = []              # for efficiency
    788         self.element_class = element_class
    789         if isinstance(datasource,(DictType,BlockCollection)):
     1465        self.standard = standard
     1466        self.lower_keys = set()           # short_cuts
     1467        self.renamed = {}
     1468        self.PC = collections.namedtuple('PC',['block_id','parent'])
     1469        self.child_table = {}
     1470        self.visible_keys = []            # for efficiency
     1471        self.block_input_order = []       # to output in same order
     1472        self.scoping = scoping  #will trigger setting of child table
     1473        self.blocktype = blocktype
     1474        self.master_template = {}   #for outputting
     1475        self.set_grammar('2.0')
     1476        self.set_characterset(characterset)
     1477        if isinstance(datasource,BlockCollection):
     1478            self.merge_fast(datasource)
     1479            self.scoping = scoping   #reset visibility
     1480        elif isinstance(datasource,dict):
    7901481            for key,value in datasource.items():
    791                 if value.__class__ == element_class:
    792                     self[key]=value
    793                 else:
    794                     self[key]= element_class(value)
     1482                 self[key]= value
    7951483        self.header_comment = ''
    796      
     1484
     1485    def set_grammar(self,new_grammar):
     1486        """Set the syntax and grammar for output to `new_grammar`"""
     1487        if new_grammar not in ['1.1','1.0','2.0','STAR2']:
     1488            raise StarError('Unrecognised output grammar %s' % new_grammar)
     1489        self.grammar = new_grammar
     1490
     1491    def set_characterset(self,characterset):
     1492        """Set the allowed characters for datanames and datablocks: may be `ascii` or `unicode`. If datanames
     1493        have already been added to any datablocks, they are not checked."""
     1494        self.characterset = characterset
     1495        for one_block in self.lower_keys:
     1496            self[one_block].set_characterset(characterset)
     1497
     1498    def unlock(self):
     1499        """Allow overwriting of all blocks in this collection"""
     1500        for a in self.lower_keys:
     1501            self[a].overwrite=True
     1502
     1503    def lock(self):
     1504        """Disallow overwriting for all blocks in this collection"""
     1505        for a in self.lower_keys:
     1506            self[a].overwrite = False
     1507
    7971508    def __str__(self):
    7981509        return self.WriteOut()
    7991510
    8001511    def __setitem__(self,key,value):
    801         if isinstance(value,(self.element_class,DictType)):
    802             self.NewBlock(key,value,replace=True)
    803         else: raise TypeError
    804         self.lower_keys.append(key.lower())
    805 
    806     # due to attempt to get upper/lower case treated as identical
    807     # we have a bit of cruft here
     1512        self.NewBlock(key,value,parent=None)
     1513
    8081514    def __getitem__(self,key):
     1515        if isinstance(key,(unicode,str)):
     1516           lowerkey = key.lower()
     1517           if lowerkey in self.lower_keys:
     1518               return self.dictionary[lowerkey]
     1519           #print 'Visible keys:' + `self.visible_keys`
     1520           #print 'All keys' + `self.lower_keys`
     1521           #print 'Child table' + `self.child_table`
     1522           raise KeyError('No such item %s' % key)
     1523
     1524    # we have to get an ordered list of the current keys,
     1525    # as we'll have to delete one of them anyway.
     1526    # Deletion will delete any key regardless of visibility
     1527
     1528    def __delitem__(self,key):
     1529        dummy = self[key]   #raise error if not present
     1530        lowerkey = key.lower()
     1531        # get rid of all children recursively as well
     1532        children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey]
     1533        for child in children:
     1534            del self[child]   #recursive call
     1535        del self.dictionary[lowerkey]
     1536        del self.child_table[lowerkey]
    8091537        try:
    810             return self.dictionary[key]
     1538            self.visible_keys.remove(lowerkey)
    8111539        except KeyError:
    812             if key.lower() not in self.lower_keys:
    813                 raise KeyError, "No such item: %s" % key
    814         curr_keys = self.dictionary.keys()
    815         lower_ordered = map(lambda a:a.lower(),curr_keys)
    816         keyindex = lower_ordered.index(key.lower())
    817         return self.dictionary[curr_keys[keyindex]]
    818 
    819     # we have to get an ordered list of the current keys,
    820     # as we'll have to delete one of them anyway
    821     def __delitem__(self,key):
    822         try:
    823             del self.dictionary[key]
    824             self.lower_keys.remove(key.lower())
    825         except KeyError:
    826             if not self.has_key(key):
    827                 raise KeyError
    828             curr_keys = self.dictionary.keys()
    829             lower_ordered = map(lambda a:a.lower(),curr_keys)
    830             keyindex = lower_ordered.index(key.lower())
    831             del self.dictionary[curr_keys[keyindex]]
    832        
     1540            pass
     1541        self.lower_keys.remove(lowerkey)
     1542        self.block_input_order.remove(lowerkey)
     1543
    8331544    def __len__(self):
    834         return len(self.dictionary)
    835 
     1545        return len(self.visible_keys)
     1546
     1547    def __contains__(self,item):
     1548        """Support the 'in' operator"""
     1549        if not isinstance(item,(unicode,str)): return False
     1550        if item.lower() in self.visible_keys:
     1551            return True
     1552        return False
     1553
     1554    # We iterate over all visible
     1555    def __iter__(self):
     1556        for one_block in self.keys():
     1557            yield self[one_block]
     1558
     1559    # TODO: handle different case
    8361560    def keys(self):
    837         return self.dictionary.keys()
    838 
    839     # changes to take case independence into account
     1561        return self.visible_keys
     1562
     1563    # Note that has_key does not exist in 3.5
    8401564    def has_key(self,key):
    841         if not isinstance(key,StringType): return 0
    842         if self.dictionary.has_key(key):
    843            return 1
    844         if key.lower() in self.lower_keys:
    845            return 1
    846         return 0
     1565        return key in self
    8471566
    8481567    def get(self,key,default=None):
    849         if self.dictionary.has_key(key):
    850             return self.dictionary[key]
    851         elif self.has_key(key):     # take account of case
     1568        if key in self:     # take account of case
    8521569            return self.__getitem__(key)
    8531570        else:
     
    8561573    def clear(self):
    8571574        self.dictionary.clear()
    858         self.lower_keys = []
    859 
    860     def copy(self):   
    861         newcopy = self.dictionary.copy()
    862         return BlockCollection('',newcopy)
    863      
     1575        self.lower_keys = set()
     1576        self.child_table = {}
     1577        self.visible_keys = []
     1578        self.block_input_order = []
     1579
     1580    def copy(self):
     1581        newcopy = self.dictionary.copy()  #all blocks
     1582        for k,v in self.dictionary.items():
     1583            newcopy[k] = v.copy()
     1584        newcopy = BlockCollection(newcopy)
     1585        newcopy.child_table = self.child_table.copy()
     1586        newcopy.lower_keys = self.lower_keys.copy()
     1587        newcopy.block_input_order = self.block_input_order.copy()
     1588        newcopy.characterset = self.characterset
     1589        newcopy.SetTemplate(self.master_template.copy())
     1590        newcopy.scoping = self.scoping  #this sets visible keys
     1591        return newcopy
     1592
    8641593    def update(self,adict):
    8651594        for key in adict.keys():
    866             self.dictionary[key] = adict[key]
    867         self.lower_keys.extend(map(lambda a:a.lower(),adict.keys()))
     1595            self[key] = adict[key]
    8681596
    8691597    def items(self):
    870         return self.dictionary.items()
     1598        return [(a,self[a]) for a in self.keys()]
    8711599
    8721600    def first_block(self):
     1601        """Return the 'first' block.  This is not necessarily the first block in the file."""
    8731602        if self.keys():
    8741603            return self[self.keys()[0]]
    8751604
    876     def NewBlock(self,blockname,blockcontents=(),replace=False,fix=True):
    877         if not blockcontents:
    878             blockcontents = self.element_class()
    879         elif isinstance(blockcontents,DictType):
    880             blockcontents = self.element_class(blockcontents)
    881         if not isinstance(blockcontents,self.element_class):
    882             raise StarError( 'Block is not of required type %s, is %s' % self.element_class.__name__,blockcontents.__class__.__name__)
     1605    def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
     1606        """Add a new block named `blockname` with contents `blockcontents`. If `fix`
     1607        is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
     1608        allows a parent block to be set so that block hierarchies can be created.  Depending on
     1609        the output standard, these blocks will be printed out as nested save frames or
     1610        ignored."""
     1611        if blockcontents is None:
     1612            blockcontents = StarBlock()
     1613        if self.standard == "CIF":
     1614            blockcontents.setmaxnamelength(75)
     1615        if len(blockname)>75:
     1616                 raise StarError('Blockname %s is longer than 75 characters' % blockname)
    8831617        if fix:
    8841618            newblockname = re.sub('[  \t]','_',blockname)
    8851619        else: newblockname = blockname
    8861620        new_lowerbn = newblockname.lower()
    887         if self.lower_keys.count(new_lowerbn):    #already in CIF
    888             if not replace:
    889                 raise StarError( "Attempt to replace existing block" + blockname)
    890             # generate a list of lower-case keys in correct order
    891             current_keys = self.dictionary.keys()
    892             blocknames = map(lambda a:a.lower(),current_keys)
    893             location = blocknames.index(new_lowerbn)
    894             del self.dictionary[current_keys[location]]
    895             self.lower_keys.remove(new_lowerbn)
    896         self.dictionary.update({blockname:blockcontents})
    897         self.lower_keys.append(new_lowerbn)
    898 
    899     def merge(self,new_bc,mode="strict",single_block=[],
     1621        if new_lowerbn in self.lower_keys:   #already there
     1622            if self.standard is not None:
     1623               toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
     1624               if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
     1625                  while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
     1626               elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
     1627                  replace_name = new_lowerbn
     1628                  while replace_name in self.lower_keys: replace_name = replace_name + '+'
     1629                  self._rekey(new_lowerbn,replace_name)
     1630                  # now continue on to add in the new block
     1631                  if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
     1632                      parent = replace_name
     1633               else:
     1634                  raise StarError( "Attempt to replace existing block " + blockname)
     1635            else:
     1636               del self[new_lowerbn]
     1637        self.dictionary.update({new_lowerbn:blockcontents})
     1638        self.lower_keys.add(new_lowerbn)
     1639        self.block_input_order.append(new_lowerbn)
     1640        if parent is None:
     1641           self.child_table[new_lowerbn]=self.PC(newblockname,None)
     1642           self.visible_keys.append(new_lowerbn)
     1643        else:
     1644           if parent.lower() in self.lower_keys:
     1645              if self.scoping == 'instance':
     1646                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
     1647              else:
     1648                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
     1649                 self.visible_keys.append(new_lowerbn)
     1650           else:
     1651               print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
     1652        self[new_lowerbn].set_grammar(self.grammar)
     1653        self[new_lowerbn].set_characterset(self.characterset)
     1654        self[new_lowerbn].formatting_hints = self.master_template
     1655        return new_lowerbn  #in case calling routine wants to know
     1656
     1657    def _rekey(self,oldname,newname,block_id=''):
     1658        """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name
     1659           does not change unless [[block_id]] is given.  Prefer [[rename]] for a safe version."""
     1660        move_block = self[oldname]    #old block
     1661        is_visible = oldname in self.visible_keys
     1662        move_block_info = self.child_table[oldname]    #old info
     1663        move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname]
     1664        # now rewrite the necessary bits
     1665        self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children]))
     1666        oldpos = self.block_input_order.index(oldname)
     1667        del self[oldname]   #do this after updating child table so we don't delete children
     1668        self.dictionary.update({newname:move_block})
     1669        self.lower_keys.add(newname)
     1670        #print 'Block input order was: ' + `self.block_input_order`
     1671        self.block_input_order[oldpos:oldpos]=[newname]
     1672        if block_id == '':
     1673           self.child_table.update({newname:move_block_info})
     1674        else:
     1675           self.child_table.update({newname:self.PC(block_id,move_block_info.parent)})
     1676        if is_visible: self.visible_keys += [newname]
     1677
     1678    def rename(self,oldname,newname):
     1679        """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed.  No
     1680           conformance checks are conducted."""
     1681        realoldname = oldname.lower()
     1682        realnewname = newname.lower()
     1683        if realnewname in self.lower_keys:
     1684            raise StarError('Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname))
     1685        if realoldname not in self.lower_keys:
     1686            raise KeyError('Cannot find old block %s' % realoldname)
     1687        self._rekey(realoldname,realnewname,block_id=newname)
     1688
     1689    def makebc(self,namelist,scoping='dictionary'):
     1690        """Make a block collection from a list of block names"""
     1691        newbc = BlockCollection()
     1692        block_lower = [n.lower() for n in namelist]
     1693        proto_child_table = [a for a in self.child_table.items() if a[0] in block_lower]
     1694        newbc.child_table = dict(proto_child_table)
     1695        new_top_level = [(a[0],self.PC(a[1].block_id,None)) for a in newbc.child_table.items() if a[1].parent not in block_lower]
     1696        newbc.child_table.update(dict(new_top_level))
     1697        newbc.lower_keys = set([a[0] for a in proto_child_table])
     1698        newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
     1699        newbc.scoping = scoping
     1700        newbc.block_input_order = block_lower
     1701        return newbc
     1702
     1703
     1704    def merge_fast(self,new_bc,parent=None):
     1705        """Do a fast merge. WARNING: this may change one or more of its frame headers in order to
     1706        remove duplicate frames.  Please keep a handle to the block object instead of the text of
     1707        the header."""
     1708        if self.standard is None:
     1709            mode = 'replace'
     1710        else:
     1711            mode = 'strict'
     1712        overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys)
     1713        if parent is not None:
     1714            parent_name = [a[0] for a in self.dictionary.items() if a[1] == parent]
     1715            if len(parent_name)==0 or len(parent_name)>1:
     1716                raise StarError("Unable to find unique parent block name: have %s" % str(parent_name))
     1717            parent_name = parent_name[0]
     1718        else:
     1719            parent_name = None  #an error will be thrown if we treat as a string
     1720        if overlap_flag and mode != 'replace':
     1721            double_keys = self.lower_keys.intersection(new_bc.lower_keys)
     1722            for dup_key in double_keys:
     1723                  our_parent = self.child_table[dup_key].parent
     1724                  their_parent = new_bc.child_table[dup_key].parent
     1725                  if (our_parent is None and their_parent is not None and parent is None) or\
     1726                      parent is not None:  #rename our block
     1727                    start_key = dup_key
     1728                    while start_key in self.lower_keys: start_key = start_key+'+'
     1729                    self._rekey(dup_key,start_key)
     1730                    if parent_name.lower() == dup_key:  #we just renamed the prospective parent!
     1731                        parent_name = start_key
     1732                  elif our_parent is not None and their_parent is None and parent is None:
     1733                    start_key = dup_key
     1734                    while start_key in new_bc.lower_keys: start_key = start_key+'+'
     1735                    new_bc._rekey(dup_key,start_key)
     1736                  else:
     1737                    raise StarError("In strict merge mode:duplicate keys %s" % dup_key)
     1738        self.dictionary.update(new_bc.dictionary)
     1739        self.lower_keys.update(new_bc.lower_keys)
     1740        self.visible_keys += (list(new_bc.lower_keys))
     1741        self.block_input_order += new_bc.block_input_order
     1742        #print('Block input order now:' + repr(self.block_input_order))
     1743        self.child_table.update(new_bc.child_table)
     1744        if parent_name is not None:     #redo the child_table entries
     1745              reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None]
     1746              reparent_dict = [(a[0],self.PC(a[1],parent_name.lower())) for a in reparent_list]
     1747              self.child_table.update(dict(reparent_dict))
     1748
     1749    def merge(self,new_bc,mode=None,parent=None,single_block=[],
    9001750                   idblock="",match_att=[],match_function=None):
     1751        if mode is None:
     1752            if self.standard is None:
     1753               mode = 'replace'
     1754            else:
     1755               mode = 'strict'
    9011756        if single_block:
    902             self.dictionary[single_block[0]].merge(new_bc[single_block[1]],mode,
     1757            self[single_block[0]].merge(new_bc[single_block[1]],mode,
    9031758                                                   match_att=match_att,
    9041759                                                   match_function=match_function)
    9051760            return None
    906         base_keys = self.keys()
     1761        base_keys = [a[1].block_id for a in self.child_table.items()]
    9071762        block_to_item = base_keys   #default
    908         new_keys = new_bc.keys()
     1763        new_keys = [a[1].block_id for a in new_bc.child_table.items()]    #get list of incoming blocks
    9091764        if match_att:
    9101765            #make a blockname -> item name map
    9111766            if match_function:
    912                 block_to_item = map(lambda a:match_function(self[a]),self.keys())
     1767                block_to_item = [match_function(self[a]) for a in self.keys()]
    9131768            else:
    914                 block_to_item = map(lambda a:self[a].get(match_att[0],None),self.keys())
     1769                block_to_item = [self[a].get(match_att[0],None) for a in self.keys()]
    9151770            #print `block_to_item`
    916         for key in new_keys:
    917             if key == idblock: continue
    918             basekey = key        #default value
    919             attval = new_bc[key].get(match_att[0],0)
     1771        for key in new_keys:        #run over incoming blocknames
     1772            if key == idblock: continue    #skip dictionary id
     1773            basekey = key           #default value
     1774            if len(match_att)>0:
     1775               attval = new_bc[key].get(match_att[0],0)  #0 if ignoring matching
     1776            else:
     1777               attval = 0
    9201778            for ii in range(len(block_to_item)):  #do this way to get looped names
    921                 thisatt = block_to_item[ii]
     1779                thisatt = block_to_item[ii]       #keyname in old block
    9221780                #print "Looking for %s in %s" % (attval,thisatt)
    9231781                if attval == thisatt or \
    924                    (isinstance(thisatt,ListType) and attval in thisatt):
     1782                   (isinstance(thisatt,list) and attval in thisatt):
    9251783                      basekey = base_keys.pop(ii)
    9261784                      block_to_item.remove(thisatt)
    9271785                      break
    928             if not self.dictionary.has_key(basekey) or mode=="replace":
    929                 self.dictionary[basekey] = new_bc[key]
     1786            if not basekey in self or mode=="replace":
     1787                new_parent = new_bc.get_parent(key)
     1788                if parent is not None and new_parent is None:
     1789                   new_parent = parent
     1790                self.NewBlock(basekey,new_bc[key],parent=new_parent)   #add the block
    9301791            else:
    9311792                if mode=="strict":
     
    9331794                elif mode=="overlay":
    9341795                    # print "Merging block %s with %s" % (basekey,key)
    935                     self.dictionary[basekey].merge(new_bc[key],mode,match_att=match_att)
    936                 else: 
     1796                    self[basekey].merge(new_bc[key],mode,match_att=match_att)
     1797                else:
    9371798                    raise StarError( "Merge called with unknown mode %s" % mode)
    9381799
     1800    def checknamelengths(self,target_block,maxlength=-1):
     1801        if maxlength < 0:
     1802            return
     1803        else:
     1804            toolong = [a for a in target_block.keys() if len(a)>maxlength]
     1805        outstring = ""
     1806        if toolong:
     1807           outstring = "\n".join(toolong)
     1808           raise StarError( 'Following data names too long:' + outstring)
     1809
    9391810    def get_all(self,item_name):
    940         raw_values = map(lambda a:self[a].get(item_name),self.dictionary.keys())
    941         raw_values = filter(lambda a:a != None, raw_values)
     1811        raw_values = [self[a].get(item_name) for a in self.keys()]
     1812        raw_values = [a for a in raw_values if a != None]
    9421813        ret_vals = []
    9431814        for rv in raw_values:
    944             if isinstance(rv,ListType):
     1815            if isinstance(rv,list):
    9451816                for rvv in rv:
    9461817                    if rvv not in ret_vals: ret_vals.append(rvv)
     
    9491820        return ret_vals
    9501821
    951     def WriteOut(self,comment='',wraplength=80,maxoutlength=2048):
    952         import cStringIO
     1822    def __setattr__(self,attr_name,newval):
     1823        if attr_name == 'scoping':
     1824            if newval not in ('dictionary','instance'):
     1825                raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval)
     1826            if newval == 'dictionary':
     1827                self.visible_keys = [a for a in self.lower_keys]
     1828            else:
     1829                #only top-level datablocks visible
     1830                self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None]
     1831        object.__setattr__(self,attr_name,newval)
     1832
     1833    def get_parent(self,blockname):
     1834        """Return the name of the block enclosing [[blockname]] in canonical form (lower case)"""
     1835        possibles = (a for a in self.child_table.items() if a[0] == blockname.lower())
     1836        try:
     1837            first = next(possibles)   #get first one
     1838        except:
     1839            raise StarError('no parent for %s' % blockname)
     1840        try:
     1841           second = next(possibles)
     1842        except StopIteration:
     1843           return first[1].parent
     1844        raise StarError('More than one parent for %s' % blockname)
     1845
     1846    def get_roots(self):
     1847        """Get the top-level blocks"""
     1848        return [a for a in self.child_table.items() if a[1].parent==None]
     1849
     1850    def get_children(self,blockname,include_parent=False,scoping='dictionary'):
     1851        """Get all children of [[blockname]] as a block collection. If [[include_parent]] is
     1852        True, the parent block will also be included in the block collection as the root."""
     1853        newbc = BlockCollection()
     1854        block_lower = blockname.lower()
     1855        proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)]
     1856        newbc.child_table = dict(proto_child_table)
     1857        if not include_parent:
     1858           newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower]))
     1859        newbc.lower_keys = set([a[0] for a in proto_child_table])
     1860        newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
     1861        if include_parent:
     1862            newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)})
     1863            newbc.lower_keys.add(block_lower)
     1864            newbc.dictionary.update({block_lower:self.dictionary[block_lower]})
     1865        newbc.scoping = scoping
     1866        return newbc
     1867
     1868    def get_immediate_children(self,parentname):
     1869        """Get the next level of children of the given block as a list, without nested levels"""
     1870        child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()]
     1871        return child_handles
     1872
     1873    # This takes time
     1874    def get_child_list(self,parentname):
     1875        """Get a list of all child categories in alphabetical order"""
     1876        child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])]
     1877        child_handles.sort()
     1878        return child_handles
     1879
     1880    def is_child_of_parent(self,parentname,blockname):
     1881        """Return `True` if `blockname` is a child of `parentname`"""
     1882        checkname = parentname.lower()
     1883        more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname]
     1884        if blockname.lower() in more_children:
     1885           return True
     1886        else:
     1887           for one_child in more_children:
     1888               if self.is_child_of_parent(one_child,blockname): return True
     1889        return False
     1890
     1891    def set_parent(self,parentname,childname):
     1892        """Set the parent block"""
     1893        # first check that both blocks exist
     1894        if parentname.lower() not in self.lower_keys:
     1895            raise KeyError('Parent block %s does not exist' % parentname)
     1896        if childname.lower() not in self.lower_keys:
     1897            raise KeyError('Child block %s does not exist' % childname)
     1898        old_entry = self.child_table[childname.lower()]
     1899        self.child_table[childname.lower()]=self.PC(old_entry.block_id,
     1900               parentname.lower())
     1901        self.scoping = self.scoping #reset visibility
     1902
     1903    def SetTemplate(self,template_file):
     1904            """Use `template_file` as a template for all block output"""
     1905            self.master_template = process_template(template_file)
     1906            for b in self.dictionary.values():
     1907                b.formatting_hints = self.master_template
     1908
     1909    def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
     1910        """Return the contents of this file as a string, wrapping if possible at `wraplength`
     1911        characters and restricting maximum line length to `maxoutlength`.  Delimiters and
     1912        save frame nesting are controlled by `self.grammar`. If `blockorder` is
     1913        provided, blocks are output in this order unless nested save frames have been
     1914        requested (STAR2). The default block order is the order in which blocks were input.
     1915        `saves_after` inserts all save frames after the given dataname,
     1916        which allows less important items to appear later.  Useful in conjunction with a
     1917        template for dictionary files."""
     1918        if maxoutlength != 0:
     1919            self.SetOutputLength(maxoutlength)
    9531920        if not comment:
    9541921            comment = self.header_comment
    955         outstring = cStringIO.StringIO()
     1922        outstring = StringIO()
     1923        if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
     1924            outstring.write(r"#\#CIF_2.0" + "\n")
    9561925        outstring.write(comment)
    957         for datablock in self.dictionary.keys():
    958             outstring.write('\n' + self.type_tag +datablock+'\n')
    959             self.dictionary[datablock].SetOutputLength(wraplength,maxoutlength)
    960             outstring.write(str(self.dictionary[datablock]))
     1926        # prepare all blocks
     1927        for b in self.dictionary.values():
     1928            b.set_grammar(self.grammar)
     1929            b.formatting_hints = self.master_template
     1930            b.SetOutputLength(wraplength,self.maxoutlength)
     1931        # loop over top-level
     1932        # monitor output
     1933        all_names = list(self.child_table.keys())   #i.e. lower case
     1934        if blockorder is None:
     1935            blockorder = self.block_input_order
     1936        top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
     1937        for blockref,blockname in top_block_names:
     1938            print('Writing %s, ' % blockname + repr(self[blockref]))
     1939            outstring.write('\n' + 'data_' +blockname+'\n')
     1940            all_names.remove(blockref)
     1941            if self.standard == 'Dic':              #put contents before save frames
     1942                outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
     1943            if self.grammar == 'STAR2':  #nested save frames
     1944                child_refs = self.get_immediate_children(blockref)
     1945                for child_ref,child_info in child_refs:
     1946                    child_name = child_info.block_id
     1947                    outstring.write('\n\n' + 'save_' + child_name + '\n')
     1948                    self.block_to_string_nested(child_ref,child_name,outstring,4)
     1949                    outstring.write('\n' + 'save_'+ '\n')
     1950            elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
     1951                child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
     1952                for child_ref in child_refs:
     1953                    child_name = self.child_table[child_ref].block_id
     1954                    outstring.write('\n\n' + 'save_' + child_name + '\n')
     1955                    outstring.write(str(self[child_ref]))
     1956                    outstring.write('\n\n' + 'save_' + '\n')
     1957                    all_names.remove(child_ref.lower())
     1958            else:
     1959                raise StarError('Grammar %s is not recognised for output' % self.grammar)
     1960            if self.standard != 'Dic':              #put contents after save frames
     1961                outstring.write(str(self[blockref]))
     1962            else:
     1963                outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
    9611964        returnstring =  outstring.getvalue()
    9621965        outstring.close()
     1966        if len(all_names)>0:
     1967            print('WARNING: following blocks not output: %s' % repr(all_names))
     1968        else:
     1969            print('All blocks output.')
    9631970        return returnstring
    9641971
     1972    def block_to_string_nested(self,block_ref,block_id,outstring,indentlevel=0):
     1973        """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children,
     1974           and syntactically nesting save frames"""
     1975        child_refs = self.get_immediate_children(block_ref)
     1976        self[block_ref].set_grammar(self.grammar)
     1977        if self.standard == 'Dic':
     1978            outstring.write(str(self[block_ref]))
     1979        for child_ref,child_info in child_refs:
     1980            child_name = child_info.block_id
     1981            outstring.write('\n' + 'save_' + child_name + '\n')
     1982            self.block_to_string_nested(child_ref,child_name,outstring,indentlevel)
     1983            outstring.write('\n' + '  '*indentlevel + 'save_' + '\n')
     1984        if self.standard != 'Dic':
     1985            outstring.write(str(self[block_ref]))
     1986
    9651987
    9661988class StarFile(BlockCollection):
    967     def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0,blocktype=StarBlock,**kwargs):
    968         BlockCollection.__init__(self,datasource=datasource,element_class=blocktype,type_tag='data_')
    969         if isinstance(datasource, StarFile):
    970             self.my_uri = datasource.my_uri
    971         self.maxinlength = maxinlength      #no restriction
     1989    def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0,
     1990                scoping='instance',grammar='1.1',scantype='standard',
     1991                **kwargs):
     1992        super(StarFile,self).__init__(datasource=datasource,**kwargs)
     1993        self.my_uri = getattr(datasource,'my_uri','')
    9721994        if maxoutlength == 0:
    973             self.maxoutlength = 2048 
     1995            self.maxoutlength = 2048
    9741996        else:
    9751997            self.maxoutlength = maxoutlength
    976         if type(datasource) is StringType or hasattr(datasource,"read"):
    977             newself = ReadStar(datasource,self.maxinlength,**kwargs)
    978             # print "Reinjecting by calling %s.__init__ with kwargs %s" % (`self.__init__.im_class`,kwargs)
    979             self.__init__.im_class.__init__(self,datasource=newself,maxoutlength=maxoutlength,**kwargs)
     1998        self.scoping = scoping
     1999        if isinstance(datasource,(unicode,str)) or hasattr(datasource,"read"):
     2000            ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype,
     2001            maxlength = maxinlength)
    9802002        self.header_comment = \
    9812003"""#\\#STAR
    9822004##########################################################################
    983 #               STAR Format file 
     2005#               STAR Format file
    9842006#               Produced by PySTARRW module
    985 # 
     2007#
    9862008#  This is a STAR file.  STAR is a superset of the CIF file type.  For
    9872009#  more information, please refer to International Tables for Crystallography,
     
    9932015
    9942016
     2017import math
     2018class CIFStringIO(StringIO):
     2019    def __init__(self,target_width=80,**kwargs):
     2020        StringIO.__init__(self,**kwargs)
     2021        self.currentpos = 0
     2022        self.target_width = target_width
     2023        self.tabwidth = -1
     2024        self.indentlist = [0]
     2025        self.last_char = ""
     2026
     2027    def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False,
     2028                             delimiter=False,startcol=-1):
     2029        """Write a string with correct linebreak, tabs and indents"""
     2030        # do we need to break?
     2031        if delimiter:
     2032            if len(outstring)>1:
     2033                raise ValueError('Delimiter %s is longer than one character' % repr( outstring ))
     2034            output_delimiter = True
     2035        if mustbreak:    #insert a new line and indent
     2036            temp_string = '\n' + ' ' * self.indentlist[-1]
     2037            StringIO.write(self,temp_string)
     2038            self.currentpos = self.indentlist[-1]
     2039            self.last_char = temp_string[-1]
     2040        if self.currentpos+len(outstring)>self.target_width: #try to break
     2041            if not delimiter and outstring[0]!='\n':          #ie <cr>;
     2042              if canbreak:
     2043                temp_string = '\n' + ' ' * self.indentlist[-1]
     2044                StringIO.write(self,temp_string)
     2045                self.currentpos = self.indentlist[-1]
     2046                self.last_char = temp_string[-1]
     2047            else:        #assume a break will be forced on next value
     2048                output_delimiter = False    #the line break becomes the delimiter
     2049        #try to match requested column
     2050        if startcol > 0:
     2051            if self.currentpos < startcol:
     2052                StringIO.write(self,(startcol - self.currentpos)* ' ')
     2053                self.currentpos = startcol
     2054                self.last_char = ' '
     2055            else:
     2056                print('Could not format %s at column %d as already at %d' % (outstring,startcol,self.currentpos))
     2057                startcol = -1   #so that tabbing works as a backup
     2058        #handle tabs
     2059        if self.tabwidth >0 and do_tab and startcol < 0:
     2060            next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth
     2061            #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop)
     2062            if self.currentpos < next_stop:
     2063                StringIO.write(self,(next_stop-self.currentpos)*' ')
     2064                self.currentpos = next_stop
     2065                self.last_char = ' '
     2066        #calculate indentation after tabs and col setting applied
     2067        if newindent:           #indent by current amount
     2068            if self.indentlist[-1] == 0:    #first time
     2069                self.indentlist.append(self.currentpos)
     2070                # print 'Indentlist: ' + `self.indentlist`
     2071            else:
     2072                self.indentlist.append(self.indentlist[-1]+2)
     2073        elif unindent:
     2074            if len(self.indentlist)>1:
     2075                self.indentlist.pop()
     2076            else:
     2077                print('Warning: cannot unindent any further')
     2078        #check that we still need a delimiter
     2079        if self.last_char in [' ','\n','\t']:
     2080            output_delimiter = False
     2081        #now output the string - every invocation comes through here
     2082        if (delimiter and output_delimiter) or not delimiter:
     2083            StringIO.write(self,outstring)
     2084        last_line_break = outstring.rfind('\n')
     2085        if last_line_break >=0:
     2086            self.currentpos = len(outstring)-last_line_break
     2087        else:
     2088            self.currentpos = self.currentpos + len(outstring)
     2089        #remember the last character
     2090        if len(outstring)>0:
     2091            self.last_char = outstring[-1]
     2092
     2093    def set_tab(self,tabwidth):
     2094        """Set the tab stop position"""
     2095        self.tabwidth = tabwidth
     2096
    9952097class StarError(Exception):
    9962098    def __init__(self,value):
    9972099        self.value = value
    9982100    def __str__(self):
    999         return '\nStar Format error: '+ self.value 
     2101        return '\nStar Format error: '+ self.value
    10002102
    10012103class StarLengthError(Exception):
     
    10042106    def __str__(self):
    10052107        return '\nStar length error: ' + self.value
    1006 def ReadStar(filename,maxlength=2048,dest=StarFile(),scantype='standard',grammar='1.1'):
     2108
     2109class StarDerivationError(Exception):
     2110    def __init__(self,fail_name):
     2111        self.fail_name = fail_name
     2112    def __str__(self):
     2113        return "Derivation of %s failed, None returned" % self.fail_name
     2114
     2115#
     2116# This is subclassed from AttributeError in order to allow hasattr
     2117# to work.
     2118#
     2119class StarDerivationFailure(AttributeError):
     2120    def __init__(self,fail_name):
     2121        self.fail_name = fail_name
     2122    def __str__(self):
     2123        return "Derivation of %s failed" % self.fail_name
     2124
     2125def ReadStar(filename,prepared = None, maxlength=-1,
     2126             scantype='standard',grammar='STAR2',CBF=False):
     2127
     2128    """ Read in a STAR file, returning the contents in the `prepared` object.
     2129
     2130    * `filename` may be a URL, a file
     2131    path on the local system, or any object with a `read` method.
     2132
     2133    * `prepared` provides a `StarFile` or `CifFile` object that the contents of `filename`
     2134    will be added to.
     2135
     2136    * `maxlength` is the maximum allowable line length in the input file. This has been set at
     2137    2048 characters for CIF but is unlimited (-1) for STAR files.
     2138
     2139    * `grammar` chooses the STAR grammar variant. `1.0` is the original 1992 CIF/STAR grammar and `1.1`
     2140    is identical except for the exclusion of square brackets as the first characters in
     2141    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
     2142    read files according to the STAR2 publication.  If grammar is `None` or `auto`, autodetection
     2143    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for conformant CIF2.0 files.
     2144    Note that (nested) save frames are read in all grammar variations and then flagged afterwards if
     2145    they do not match the requested grammar.
     2146
     2147    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
     2148    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
     2149    fast C routines.  Note that running PyCIFRW in Jython uses native Java regular expressions
     2150    to provide a speedup regardless of this argument.
     2151
     2152    * `CBF` flags that the input file is in Crystallographic Binary File format. The binary block is
     2153    excised from the input data stream before parsing and is not available in the returned object.
     2154    """
     2155
    10072156    import string
    1008     if grammar=="1.1":
    1009         import YappsStarParser_1_1 as Y
    1010     elif grammar=="1.0":
    1011         import YappsStarParser_1_0 as Y
    1012     elif grammar=="DDLm":
    1013         import YappsStarParser_DDLm as Y
    1014     if isinstance(filename,basestring):
    1015         filestream = urlopen(filename)
     2157    import codecs
     2158    # save desired scoping
     2159    save_scoping = prepared.scoping
     2160    from . import YappsStarParser_1_1 as Y11
     2161    from . import YappsStarParser_1_0 as Y10
     2162    from . import YappsStarParser_2_0 as Y20
     2163    from . import YappsStarParser_STAR2 as YST
     2164    if prepared is None:
     2165        prepared = StarFile()
     2166    if grammar == "auto" or grammar is None:
     2167        try_list = [('2.0',Y20),('1.1',Y11),('1.0',Y10)]
     2168    elif grammar == '1.0':
     2169        try_list = [('1.0',Y10)]
     2170    elif grammar == '1.1':
     2171        try_list = [('1.1',Y11)]
     2172    elif grammar == '2.0':
     2173        try_list = [('2.0',Y20)]
     2174    elif grammar == 'STAR2':
     2175        try_list = [('STAR2',YST)]
     2176    else:
     2177        raise AttributeError('Unknown STAR/CIF grammar requested, %s' % repr( grammar ))
     2178    if isinstance(filename,(unicode,str)):
     2179        # create an absolute URL
     2180        relpath = urlparse(filename)
     2181        if relpath.scheme == "":
     2182            if not os.path.isabs(filename):
     2183                fullpath = os.path.join(os.getcwd(),filename)
     2184            else:
     2185                fullpath = filename
     2186            newrel = list(relpath)
     2187            newrel[0] = "file"
     2188            newrel[2] = fullpath
     2189            my_uri = urlunparse(newrel)
     2190        else:
     2191            my_uri = urlunparse(relpath)
     2192        # print("Full URL is: " + my_uri)
     2193        filestream = urlopen(my_uri)
     2194        text = filestream.read().decode('utf8')
     2195        filestream.close()
    10162196    else:
    10172197        filestream = filename   #already opened for us
    1018     my_uri = ""
    1019     if hasattr(filestream,"geturl"):
    1020         my_uri = filestream.geturl()
    1021     text = filestream.read()
    1022     if isinstance(filename,basestring): #we opened it, we close it
    1023         filestream.close()
     2198        text = filestream.read()
     2199        if not isinstance(text,unicode):
     2200            text = text.decode('utf8')  #CIF is always ascii/utf8
     2201        my_uri = ""
    10242202    if not text:      # empty file, return empty block
    1025         dest.set_uri(my_uri)
    1026         return dest
     2203        return prepared.set_uri(my_uri)
     2204    # filter out non-ASCII characters in CBF files if required.  We assume
     2205    # that the binary is enclosed in a fixed string that occurs
     2206    # nowhere else.
     2207    if CBF:
     2208       text_bits  = text.split("-BINARY-FORMAT-SECTION-")
     2209       text = text_bits[0]
     2210       for section in range(2,len(text_bits),2):
     2211           text = text+" (binary omitted)"+text_bits[section]
    10272212    # we recognise ctrl-Z as end of file
    1028     endoffile = text.find('\x1a')
    1029     if endoffile >= 0: 
     2213    endoffile = text.find(chr(26))
     2214    if endoffile >= 0:
    10302215        text = text[:endoffile]
    1031     split = string.split(text,'\n')
     2216    split = text.split('\n')
    10322217    if maxlength > 0:
    1033         toolong = filter(lambda a:len(a)>maxlength,split)
     2218        toolong = [a for a in split if len(a)>maxlength]
    10342219        if toolong:
    10352220            pos = split.index(toolong[0])
    10362221            raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength))
    1037     try:
    1038         if scantype == 'standard':
     2222    # honour the header string
     2223    if text[:10] != "#\#CIF_2.0" and ('2.0',Y20) in try_list:
     2224        try_list.remove(('2.0',Y20),)
     2225        if not try_list:
     2226            raise StarError('File %s missing CIF2.0 header' % (filename))
     2227    for grammar_name,Y in try_list:
     2228       if scantype == 'standard' or grammar_name in ['2.0','STAR2']:
    10392229            parser = Y.StarParser(Y.StarParserScanner(text))
    1040         else:
     2230       else:
    10412231            parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex'))
    1042         proto_star = getattr(parser,"input")()
    1043     except Y.yappsrt.SyntaxError:
     2232       # handle encoding switch
     2233       if grammar_name in ['2.0','STAR2']:
     2234           prepared.set_characterset('unicode')
     2235       else:
     2236           prepared.set_characterset('ascii')
     2237       proto_star = None
     2238       try:
     2239           proto_star = getattr(parser,"input")(prepared)
     2240       except Y.yappsrt.SyntaxError as e:
     2241           input = parser._scanner.input
     2242           Y.yappsrt.print_error(input, e, parser._scanner)
     2243       except Y.yappsrt.NoMoreTokens:
     2244           print('Could not complete parsing; stopped around here:',file=sys.stderr)
     2245           print(parser._scanner,file=sys.stderr)
     2246       except ValueError:
     2247           print('Unexpected error:')
     2248           import traceback
     2249           traceback.print_exc()
     2250       if proto_star is not None:
     2251           proto_star.set_grammar(grammar_name)   #remember for output
     2252           break
     2253    if proto_star is None:
    10442254        errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval
    1045         errorstring = errorstring + '\nParser status: %s' % `parser._scanner`
     2255        errorstring = errorstring + '\nParser status: %s' % repr( parser._scanner )
    10462256        raise StarError( errorstring)
    1047     # duplication check on all blocks
    1048     audit_result = map(lambda a:(a,proto_star[a].audit()),proto_star.keys())
    1049     audit_result = filter(lambda a:len(a[1])>0,audit_result)
    1050     if audit_result:
    1051         raise StarError( 'Duplicate keys as follows: %s' % `audit_result`)
     2257    # set visibility correctly
     2258    proto_star.scoping = 'dictionary'
    10522259    proto_star.set_uri(my_uri)
     2260    proto_star.scoping = save_scoping
    10532261    return proto_star
    10542262
    10552263def get_dim(dataitem,current=0,packlen=0):
    1056     zerotypes = [IntType, LongType,
    1057                     FloatType, StringType]
     2264    zerotypes = [int, float, str]
    10582265    if type(dataitem) in zerotypes:
    10592266        return current, packlen
     
    10612268       not dataitem.__class__ == [].__class__:
    10622269       return current, packlen
    1063     elif len(dataitem)>0: 
     2270    elif len(dataitem)>0:
    10642271    #    print "Get_dim: %d: %s" % (current,`dataitem`)
    10652272        return get_dim(dataitem[0],current+1,len(dataitem))
    10662273    else: return current+1,0
    1067    
    1068 
    1069 
     2274
     2275def apply_line_folding(instring,minwraplength=60,maxwraplength=80):
     2276    """Insert line folding characters into instring between min/max wraplength"""
     2277    # first check that we need to do this
     2278    lines = instring.split('\n')
     2279    line_len = [len(l) for l in lines]
     2280    if max(line_len) < maxwraplength and re.match("\\[ \v\t\f]*\n",instring) is None:
     2281        return instring
     2282    outstring = "\\\n"   #header
     2283    for l in lines:
     2284        if len(l) < maxwraplength:
     2285            outstring = outstring + l
     2286            if len(l) > 0 and l[-1]=='\\': #who'da thunk it?  A line ending with a backslash
     2287                    outstring = outstring + "\\\n"  #
     2288            outstring = outstring + "\n"  #  put back the split character
     2289        else:
     2290            current_bit = l
     2291            while len(current_bit) > maxwraplength:
     2292                space_pos = re.search('[ \v\f\t]+',current_bit[minwraplength:])
     2293                if space_pos is not None and space_pos.start()<maxwraplength-1:
     2294                    outstring = outstring + current_bit[:minwraplength+space_pos.start()] + "\\\n"
     2295                    current_bit = current_bit[minwraplength+space_pos.start():]
     2296                else:    #just blindly insert
     2297                    outstring = outstring + current_bit[:maxwraplength-1] + "\\\n"
     2298                    current_bit = current_bit[maxwraplength-1:]
     2299            outstring = outstring + current_bit
     2300            if current_bit[-1] == '\\':  #a backslash just happens to be here
     2301                outstring = outstring + "\\\n"
     2302            outstring = outstring + '\n'
     2303    outstring = outstring[:-1]  #remove final newline
     2304    return outstring
     2305
     2306def remove_line_folding(instring):
     2307    """Remove line folding from instring"""
     2308    if re.match(r"\\[ \v\t\f]*" +"\n",instring) is not None:
     2309        return re.sub(r"\\[ \v\t\f]*$" + "\n?","",instring,flags=re.M)
     2310    else:
     2311        return instring
     2312
     2313def apply_line_prefix(instring,prefix):
     2314    """Prefix every line in instring with prefix"""
     2315    if prefix[0] != ";" and "\\" not in prefix:
     2316        header = re.match(r"(\\[ \v\t\f]*" +"\n)",instring)
     2317        if header is not None:
     2318            print('Found line folded string for prefixing...')
     2319            not_header = instring[header.end():]
     2320            outstring = prefix + "\\\\\n" + prefix
     2321        else:
     2322            print('No folding in input string...')
     2323            not_header = instring
     2324            outstring = prefix + "\\\n" + prefix
     2325        outstring = outstring + not_header.replace("\n","\n"+prefix)
     2326        return outstring
     2327    raise StarError("Requested prefix starts with semicolon or contains a backslash: " + prefix)
     2328
     2329def remove_line_prefix(instring):
     2330    """Remove prefix from every line if present"""
     2331    prefix_match = re.match("(?P<prefix>[^;\\\n][^\n\\\\]+)(?P<folding>\\\\{1,2}[ \t\v\f]*\n)",instring)
     2332    if prefix_match is not None:
     2333        prefix_text = prefix_match.group('prefix')
     2334        print('Found prefix %s' % prefix_text)
     2335        prefix_end = prefix_match.end('folding')
     2336        # keep any line folding instructions
     2337        if prefix_match.group('folding')[:2]=='\\\\':  #two backslashes
     2338            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
     2339            return "\\" + outstring  #keep line folding first line
     2340        else:
     2341            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
     2342            return outstring[1:]   #drop first line ending, no longer necessary
     2343    else:
     2344        return instring
     2345
     2346
     2347def listify(item):
     2348    if isinstance(item,unicode): return [item]
     2349    else: return item
     2350
     2351#Transpose the list of lists passed to us
     2352def transpose(base_list):
     2353    new_lofl = []
     2354    full_length = len(base_list)
     2355    opt_range = range(full_length)
     2356    for i in range(len(base_list[0])):
     2357       new_packet = []
     2358       for j in opt_range:
     2359          new_packet.append(base_list[j][i])
     2360       new_lofl.append(new_packet)
     2361    return new_lofl
     2362
     2363# This routine optimised to return as quickly as possible
     2364# as it is called a lot.
     2365def not_none(itemlist):
     2366    """Return true only if no values of None are present"""
     2367    if itemlist is None:
     2368        return False
     2369    if not isinstance(itemlist,(tuple,list)):
     2370        return True
     2371    for x in itemlist:
     2372       if not not_none(x): return False
     2373    return True
     2374
     2375
     2376def check_stringiness(data):
     2377   """Check that the contents of data are all strings"""
     2378   if not hasattr(data,'dtype'):   #so not Numpy
     2379       from numbers import Number
     2380       if isinstance(data,Number): return False
     2381       elif isinstance(data,(unicode,str)): return True
     2382       elif data is None:return False  #should be data are None :)
     2383       else:
     2384           for one_item in data:
     2385               if not check_stringiness(one_item): return False
     2386           return True   #all must be strings
     2387   else:   #numerical python
     2388       import numpy
     2389       if data.ndim == 0:    #a bare value
     2390           if data.dtype.kind in ['S','U']: return True
     2391           else: return False
     2392       else:
     2393           for one_item in numpy.nditer(data):
     2394               print('numpy data: ' + repr( one_item ))
     2395               if not check_stringiness(one_item): return False
     2396           return True
     2397
     2398def process_template(template_file):
     2399    """Process a template datafile to formatting instructions"""
     2400    template_as_cif = StarFile(template_file,grammar="2.0").first_block()
     2401    if isinstance(template_file,(unicode,str)):
     2402        template_string = open(template_file).read()
     2403    else:   #a StringIO object
     2404        template_file.seek(0)   #reset
     2405        template_string = template_file.read()
     2406    #template_as_lines = template_string.split("\n")
     2407    #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#']
     2408    #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_']
     2409    #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0])
     2410    form_hints = []   #ordered array of hint dictionaries
     2411    find_indent = "^ +"
     2412    for item in template_as_cif.item_order:  #order of input
     2413        if not isinstance(item,int):    #not nested
     2414            hint_dict = {"dataname":item}
     2415            # find the line in the file
     2416            start_pos = re.search("(^[ \t]*(?P<name>" + item + ")[ \t\n]+)(?P<spec>([\S]+)|(^;))",template_string,re.I|re.M)
     2417            if start_pos.group("spec") != None:
     2418                spec_pos = start_pos.start("spec")-start_pos.start(0)
     2419                spec_char = template_string[start_pos.start("spec"):start_pos.start("spec")+3]
     2420                if spec_char[0] in '\'";':
     2421                    hint_dict.update({"delimiter":spec_char[0]})
     2422                    if spec_char == '"""' or spec_char == "'''":
     2423                        hint_dict.update({"delimiter":spec_char})
     2424                if spec_char[0] != ";":   #so we need to work out the column number
     2425                    hint_dict.update({"column":spec_pos})
     2426                else:                  #need to put in the carriage return
     2427                    hint_dict.update({"delimiter":"\n;"})
     2428                    # can we format the text?
     2429                    text_val = template_as_cif[item]
     2430                    hint_dict["reformat"] = "\n\t" in text_val or "\n  " in text_val
     2431                    if hint_dict["reformat"]:   #find the indentation
     2432                        p = re.search(find_indent,text_val,re.M)
     2433                        if p.group() is not None:
     2434                            hint_dict["reformat_indent"]=p.end() - p.start()
     2435                if start_pos.group('name') != None:
     2436                    name_pos = start_pos.start('name') - start_pos.start(0)
     2437                    hint_dict.update({"name_pos":name_pos})
     2438            #print '%s: %s' % (item,`hint_dict`)
     2439            form_hints.append(hint_dict)
     2440        else:           #loop block
     2441            testnames = template_as_cif.loops[item]
     2442            total_items = len(template_as_cif.loops[item])
     2443            testname = testnames[0]
     2444            #find the loop spec line in the file
     2445            loop_regex = "(^[ \t]*(?P<loop>loop_)[ \t\n\r]+(?P<name>" + testname + ")([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P<packet>(.(?!_loop|_[\S]+))*))" % (total_items - 1)
     2446            loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S)
     2447            loop_so_far = loop_line.end()
     2448            packet_text = loop_line.group('packet')
     2449            loop_indent = loop_line.start('loop') - loop_line.start(0)
     2450            form_hints.append({"dataname":'loop','name_pos':loop_indent})
     2451            packet_regex = "[ \t]*(?P<all>(?P<sqqq>'''([^\n\r\f']*)''')|(?P<sq>'([^\n\r\f']*)'+)|(?P<dq>\"([^\n\r\"]*)\"+)|(?P<none>[^\s]+))"
     2452            packet_pos = re.finditer(packet_regex,packet_text)
     2453            line_end_pos = re.finditer("^",packet_text,re.M)
     2454            next_end = next(line_end_pos).end()
     2455            last_end = next_end
     2456            for loopname in testnames:
     2457                #find the name in the file for name pos
     2458                name_regex = "(^[ \t]*(?P<name>" + loopname + "))"
     2459                name_match = re.search(name_regex,template_string,re.I|re.M|re.S)
     2460                loop_name_indent = name_match.start('name')-name_match.start(0)
     2461                hint_dict = {"dataname":loopname,"name_pos":loop_name_indent}
     2462                #find the value
     2463                thismatch = next(packet_pos)
     2464                while thismatch.start('all') > next_end:
     2465                    try:
     2466                        last_end = next_end
     2467                        next_end = next(line_end_pos).start()
     2468                        print('next end %d' % next_end)
     2469                    except StopIteration:
     2470                        break
     2471                print('Start %d, last_end %d' % (thismatch.start('all'),last_end))
     2472                col_pos = thismatch.start('all') - last_end + 1
     2473                if thismatch.group('none') is None:
     2474                    if thismatch.group('sqqq') is not None:
     2475                        hint_dict.update({'delimiter':"'''"})
     2476                    else:
     2477                        hint_dict.update({'delimiter':thismatch.groups()[0][0]})
     2478                hint_dict.update({'column':col_pos})
     2479                print('%s: %s' % (loopname,repr( hint_dict )))
     2480                form_hints.append(hint_dict)
     2481    return form_hints
     2482
     2483
     2484#No documentation flags
     2485
Note: See TracChangeset for help on using the changeset viewer.