Changeset 3137 for trunk/CifFile


Ignore:
Timestamp:
Oct 24, 2017 11:53:41 AM (4 years ago)
Author:
vondreele
Message:

replace old CifFile? with new py 2/7/3.6 compliant code
fix cif file import phase & powder file
fix CemComp? restraint editing

Location:
trunk/CifFile
Files:
4 added
3 deleted
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/CifFile/CifFile.py

    r469 r3137  
     1# To maximize python3/python2 compatibility
     2from __future__ import print_function
     3from __future__ import unicode_literals
     4from __future__ import division
     5from __future__ import absolute_import
     6
     7try:
     8    from cStringIO import StringIO
     9except ImportError:
     10    from io import StringIO
     11
     12# Python 2,3 compatibility
     13try:
     14    from urllib import urlopen         # for arbitrary opening
     15    from urlparse import urlparse, urlunparse,urljoin
     16except:
     17    from urllib.request import urlopen
     18    from urllib.parse import urlparse,urlunparse,urljoin
     19
     20# The unicode type does not exist in Python3 as the str type
     21# encompasses unicode.  PyCIFRW tests for 'unicode' would fail
     22# Suggestions for a better approach welcome.
     23
     24if isinstance(u"abc",str):   #Python3
     25    unicode = str
     26   
     27__copyright = """
     28PYCIFRW License Agreement (Python License, Version 2)
     29-----------------------------------------------------
     30
     311. This LICENSE AGREEMENT is between the Australian Nuclear Science
     32and Technology Organisation ("ANSTO"), and the Individual or
     33Organization ("Licensee") accessing and otherwise using this software
     34("PyCIFRW") in source or binary form and its associated documentation.
     35
     362. Subject to the terms and conditions of this License Agreement,
     37ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
     38license to reproduce, analyze, test, perform and/or display publicly,
     39prepare derivative works, distribute, and otherwise use PyCIFRW alone
     40or in any derivative version, provided, however, that this License
     41Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
     422001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
     43in any derivative version prepared by Licensee.
     44
     453. In the event Licensee prepares a derivative work that is based on
     46or incorporates PyCIFRW or any part thereof, and wants to make the
     47derivative work available to others as provided herein, then Licensee
     48hereby agrees to include in any such work a brief summary of the
     49changes made to PyCIFRW.
     50
     514. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
     52basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
     53IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
     54DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
     55FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
     56INFRINGE ANY THIRD PARTY RIGHTS.
     57
     585. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
     59FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
     60RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
     61DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
     62
     636. This License Agreement will automatically terminate upon a material
     64breach of its terms and conditions.
     65
     667. Nothing in this License Agreement shall be deemed to create any
     67relationship of agency, partnership, or joint venture between ANSTO
     68and Licensee. This License Agreement does not grant permission to use
     69ANSTO trademarks or trade name in a trademark sense to endorse or
     70promote products or services of Licensee, or any third party.
     71
     728. By copying, installing or otherwise using PyCIFRW, Licensee agrees
     73to be bound by the terms and conditions of this License Agreement.
     74
    175"""
    2 1.This Software copyright \u00A9 Australian Synchrotron Research Program Inc, ("ASRP").
    3 
    4 2.Subject to ensuring that this copyright notice and licence terms
    5 appear on all copies and all modified versions, of PyCIFRW computer
    6 code ("this Software"), a royalty-free non-exclusive licence is hereby
    7 given (i) to use, copy and modify this Software including the use of
    8 reasonable portions of it in other software and (ii) to publish,
    9 bundle and otherwise re-distribute this Software or modified versions
    10 of this Software to third parties, provided that this copyright notice
    11 and terms are clearly shown as applying to all parts of software
    12 derived from this Software on each occasion it is published, bundled
    13 or re-distributed.  You are encouraged to communicate useful
    14 modifications to ASRP for inclusion for future versions.
    15 
    16 3.No part of this Software may be sold as a standalone package.
    17 
    18 4.If any part of this Software is bundled with Software that is sold,
    19 a free copy of the relevant version of this Software must be made
    20 available through the same distribution channel (be that web server,
    21 tape, CD or otherwise).
    22 
    23 5.It is a term of exercise of any of the above royalty free licence
    24 rights that ASRP gives no warranty, undertaking or representation
    25 whatsoever whether express or implied by statute, common law, custom
    26 or otherwise, in respect of this Software or any part of it.  Without
    27 limiting the generality of the preceding sentence, ASRP will not be
    28 liable for any injury, loss or damage (including consequential loss or
    29 damage) or other loss, loss of profits, costs, charges or expenses
    30 however caused which may be suffered, incurred or arise directly or
    31 indirectly in respect of this Software.
    32 
    33 6. This Software is not licenced for use in medical applications.
    34 """
    35 
    36 from types import *
    37 import re
    38 import StarFile
    39 import sys
    40 class CifLoopBlock(StarFile.LoopBlock):
    41     def __init__(self,data=(),dimension=0,**kwargs):
    42         self.loopclass = CifLoopBlock
    43         if dimension > 1:
    44             raise CifError( 'Attempt to nest loops, loop level %d' % dimension)
    45         StarFile.LoopBlock.__init__(self,data,dimension=dimension,**kwargs)
    46         # self.__iter__ = self.recursive_iter
    47 
    48     def __iter__(self):
    49         return self.recursive_iter()
    50  
    51     def AddLoopItem(self,data,precheck=False):
    52         StarFile.LoopBlock.AddLoopItem(self,data,precheck,maxlength=75)
    53 
    54     def insert_loop(self,newloop,**kwargs):
    55         if newloop.dimension > 1:
    56             raise  CifError( 'Attempt to insert inner loop, loop level %d' % dimension)
    57         StarFile.LoopBlock.insert_loop(self,newloop,**kwargs)
    58 
    59 class CifBlock(CifLoopBlock):
    60     def __init__(self,data = (), strict = 1, maxoutlength=2048,wraplength=80,overwrite=True,dimension=0):
    61         self.strict = strict
    62         CifLoopBlock.__init__(self,data=data,dimension=0,maxoutlength=maxoutlength,wraplength=wraplength,overwrite=overwrite)
    63         if isinstance(data,(StarFile.StarBlock,CifBlock)):
    64             self.saves = StarFile.BlockCollection(datasource=data["saves"],element_class=CifBlock,type_tag="save")
    65         else:
    66             self.saves = StarFile.BlockCollection(element_class=CifLoopBlock,type_tag="save")
    67         if self.strict:
    68             self.checklengths()
    69         self.dictionary = None
    70 
    71     def RemoveCifItem(self,itemname):
    72         CifLoopBlock.RemoveLoopItem(self,itemname)
    73 
    74     def __getitem__(self,key):
    75         if key == "saves":
    76             return self.saves
    77         try:     
    78            rawitem = CifLoopBlock.__getitem__(self,key)
    79         except KeyError:
    80            if self.dictionary:
    81                # send the dictionary the required key and a pointer to us
    82                rawitem = self.dictionary.derive_item(key,self)
    83            else:
    84                raise KeyError, 'No such item: %s' % key
    85         # we now have an item, we can try to convert it to a number if that is appropriate
    86         if not self.dictionary or not self.dictionary.has_key(key): return rawitem
    87         return self.dictionary.change_type(key,rawitem)
     76
     77
     78import re,sys
     79from . import StarFile
     80from .StarFile import StarList  #put in global scope for exec statement
     81try:
     82    import numpy                   #put in global scope for exec statement
     83    from .drel import drel_runtime  #put in global scope for exec statement
     84except ImportError:
     85    pass                       #will fail when using dictionaries for calcs
     86from copy import copy          #must be in global scope for exec statement
     87
     88def track_recursion(in_this_func):
     89    """Keep an eye on a function call to make sure that the key argument hasn't been
     90    seen before"""
     91    def wrapper(*args,**kwargs):
     92        key_arg = args[1]
     93        if key_arg in wrapper.called_list:
     94            print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg)))
     95            raise CifRecursionError( key_arg,wrapper.called_list[:])    #failure
     96        if len(wrapper.called_list) == 0:   #first time
     97            wrapper.stored_use_defaults = kwargs.get("allow_defaults",False)
     98            print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults))
     99        else:
     100            kwargs["allow_defaults"] = wrapper.stored_use_defaults
     101        wrapper.called_list.append(key_arg)
     102        print('Recursion watch: call stack: ' + repr(wrapper.called_list))
     103        try:
     104            result = in_this_func(*args,**kwargs)
     105        except StarFile.StarDerivationError as s:
     106            if len(wrapper.called_list) == 1: #no more
     107                raise StarFile.StarDerivationFailure(wrapper.called_list[0])
     108            else:
     109                raise
     110        finally:
     111            wrapper.called_list.pop()
     112            if len(wrapper.called_list) == 0:
     113                wrapper.stored_used_defaults = 'error'
     114        return result
     115    wrapper.called_list = []
     116    return wrapper
     117
     118class CifBlock(StarFile.StarBlock):
     119    """
     120    A class to hold a single block of a CIF file.  A `CifBlock` object can be treated as
     121    a Python dictionary, in particular, individual items can be accessed using square
     122    brackets e.g. `b['_a_dataname']`.  All other Python dictionary methods are also
     123    available (e.g. `keys()`, `values()`).  Looped datanames will return a list of values.
     124
     125    ## Initialisation
     126
     127    When provided, `data` should be another `CifBlock` whose contents will be copied to
     128    this block.
     129
     130    * if `strict` is set, maximum name lengths will be enforced
     131
     132    * `maxoutlength` is the maximum length for output lines
     133
     134    * `wraplength` is the ideal length to make output lines
     135
     136    * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
     137    is raised).
     138
     139    * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
     140    the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
     141    after setting the dataitem value.
     142    """
     143    def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs):
     144        """When provided, `data` should be another CifBlock whose contents will be copied to
     145        this block.
     146
     147        * if `strict` is set, maximum name lengths will be enforced
     148
     149        * `maxoutlength` is the maximum length for output lines
     150
     151        * `wraplength` is the ideal length to make output lines
     152
     153        * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
     154        is raised).
     155
     156        * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
     157        the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
     158        after setting the dataitem value.
     159        """
     160        if strict: maxnamelength=75
     161        else:
     162           maxnamelength=-1
     163        super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs)
     164        self.dictionary = None   #DDL dictionary referring to this block
     165        self.compat_mode = compat_mode   #old-style behaviour of setitem
     166
     167    def RemoveCifItem(self,itemname):
     168        """Remove `itemname` from the CifBlock"""
     169        self.RemoveItem(itemname)
    88170
    89171    def __setitem__(self,key,value):
    90         if key == "saves":
    91             self.saves[key] = value
    92         else:
    93             self.AddCifItem((key,value))
    94 
    95     def clear(self):
    96         CifLoopBlock.clear(self)
    97         self.saves = StarFile.BlockCollection(element_class=CifLoopBlock,type_tag="save_")
     172        self.AddItem(key,value)
     173        # for backwards compatibility make a single-element loop
     174        if self.compat_mode:
     175            if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList):
     176                 # single element loop
     177                 self.CreateLoop([key])
    98178
    99179    def copy(self):
    100         newblock = CifLoopBlock.copy(self)
    101         newblock.saves = self.saves.copy()
     180        newblock = super(CifBlock,self).copy()
    102181        return self.copy.im_class(newblock)   #catch inheritance
    103182
    104     def has_key(self,key):
    105         if key == "saves": return 1
    106         else: return CifLoopBlock.has_key(self,key)
    107 
    108     def __str__(self):
    109         retstr = ''
    110         for sb in self.saves.keys():
    111             retstr = retstr + '\nsave_%s\n\n' % sb
    112             self.saves[sb].SetOutputLength(self.wraplength,self.maxoutlength)
    113             retstr = retstr + str(self.saves[sb])
    114             retstr = retstr + '\nsave_\n\n'
    115         return retstr + CifLoopBlock.__str__(self)
    116 
    117     # this is not appropriate for save blocks.  Instead, the save block
    118     # should be accessed directly for update
    119      
    120     def update(self,adict):
    121         loopdone = []
    122         if not isinstance(adict,CifBlock):
    123             raise TypeError
    124         for key in adict.block.keys():
    125             self.AddCifItem((key,adict[key]))
    126         for aloop in adict.loops:
    127             self.insert_loop(aloop,audit=True)
    128 
    129183    def AddCifItem(self,data):
     184        """ *DEPRECATED*. Use `AddItem` instead."""
    130185        # we accept only tuples, strings and lists!!
    131         if not (isinstance(data[0],(StringType,TupleType,ListType))):
    132                   raise TypeError, 'Cif datanames are either a string, tuple or list'
    133         # single items passed straight through to underlying routine
     186        if not (isinstance(data[0],(unicode,tuple,list,str))):
     187                  raise TypeError('Cif datanames are either a string, tuple or list')
    134188        # we catch single item loops as well...
    135         if isinstance(data[0],StringType):
    136             if isinstance(data[1],(TupleType,ListType)) and not isinstance(data[1],(StarFile.StarList,StarFile.StarTuple)):
    137                 CifLoopBlock.AddLoopItem(self,((data[0],),((data[1],))))
    138             else:
    139                 CifLoopBlock.AddLoopItem(self,data)
     189        if isinstance(data[0],(unicode,str)):
     190            self.AddSingleCifItem(data[0],list(data[1]))
     191            if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
     192                self.CreateLoop([data[0]])
    140193            return
    141         # otherwise, we unpack one level and send along.  This is different
    142         # to the StarBlock behaviour, which assumes that any tuples imply an
    143         # inner loop.
    144         keyvals = zip(data[0],data[1])
    145         map(lambda a:CifLoopBlock.AddLoopItem(self,a),keyvals)
    146 
    147     def checklengths(self):
    148         toolong = filter(lambda a:len(a)>75, self.keys())
    149         outstring = ""
    150         for it in toolong: outstring += "\n" + it
    151         if toolong:
    152            raise CifError( 'Following data names too long:' + outstring)
     194        # otherwise, we loop over the datanames
     195        keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
     196        [self.AddSingleCifItem(a,b) for a,b in keyvals]
     197        # and create the loop
     198        self.CreateLoop(data[0][0])
     199
     200    def AddSingleCifItem(self,key,value):
     201        """*Deprecated*. Use `AddItem` instead"""
     202        """Add a single data item. If it is part of a loop, a separate call should be made"""
     203        self.AddItem(key,value)
    153204
    154205    def loopnames(self):
    155         return map(lambda a:a.keys(),self.loops)
    156 
    157     def assign_dictionary(self,dic):
    158         if not dic.diclang=="DDLm":
    159             print "Warning: ignoring dictionary %s" % dic.dic_as_cif.my_uri
    160             return
    161         self.dictionary = dic
    162 
    163     def merge(self,new_block,mode="strict",match_att=[],match_function=None,nosaves=False,
    164                    rel_keys = []):
    165         # deal with save frames
    166         if not nosaves:
    167             self["saves"].merge(new_block["saves"],mode,match_att=match_att,
    168                                                         match_function=match_function)
    169         if mode == 'strict':
    170            for key in new_block.item_order:
    171                if self.has_key(key) and key not in match_att:
    172                   raise CifError( "Identical keys %s in strict merge mode" % key)
    173                elif key not in match_att:           #no change otherwise
    174                   if isinstance(key,StringType):
    175                       self[key] = new_block[key]
    176                   else:
    177                       self.insert_loop(key)
    178         elif mode == 'replace':
    179            newkeys = new_block.keys()
    180            for ma in match_att:
    181               try:
    182                    newkeys.remove(ma)        #don't touch the special ones
    183               except ValueError:
    184                    pass
    185            for key in new_block.item_order:
    186                   if isinstance(key,StringType):
    187                       self[key] = new_block[key]
    188                   else:
    189                       self.insert_loop(key)   #assume is a loop
    190         elif mode == 'overlay':
    191            for attribute in new_block.keys():
    192                if attribute in match_att: continue      #ignore this one
    193                new_value = new_block[attribute]
    194                #non-looped items
    195                if isinstance(new_value,StringType):
    196                   self[attribute] = new_value
    197            these_atts = self.keys()
    198            for newloop in new_block.loops:             
    199                newkeys = newloop.keys()
    200                # note that the following line determines packet item order
    201                overlaps = filter(lambda a: a in these_atts,newkeys)
    202                if len(overlaps)< len(newloop):#completely new loop
    203                   self.insert_loop(newloop)
    204                elif len(overlaps)==len(newloop):
    205                   # appending packets
    206                   # print "In overlay merge mode, found extra packet items:"
    207                   # print `overlaps`
    208                   # get key position
    209                   loop_keys = filter(lambda a:a in rel_keys,overlaps)
    210                   try:
    211                      newkeypos = map(lambda a:newkeys.index(a),loop_keys)
    212                      newkeypos = newkeypos[0]      #one key per loop for now
    213                      loop_keys = loop_keys[0]
    214                   except (ValueError,IndexError):
    215                      newkeypos = []
    216                   overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data
    217                   new_data = map(lambda a:new_block[a],overlaps) #new packet data
    218                   packet_data = transpose(overlap_data)
    219                   new_p_data = transpose(new_data)
    220                   # remove any packets for which the keys match between old and new; we
    221                   # make the arbitrary choice that the old data stays
    222                   if newkeypos:
    223                       # get matching values in new list
    224                       print "Old, new data:\n%s\n%s" % (`overlap_data[newkeypos]`,`new_data[newkeypos]`)
    225                       key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos])
    226                       # filter out any new data with these key values
    227                       new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data)
    228                       if new_p_data:
    229                           new_data = transpose(new_p_data)
    230                       else: new_data = []
    231                   # wipe out the old data and enter the new stuff
    232                   byebyeloop = self.GetLoop(overlaps[0])
    233                   # print "Removing '%s' with overlaps '%s'" % (`byebyeloop`,`overlaps`)
    234                   # Note that if, in the original dictionary, overlaps are not
    235                   # looped, GetLoop will return the block itself.  So we check
    236                   # for this case...
    237                   if byebyeloop != self:
    238                       self.remove_loop(byebyeloop)
    239                   self.AddCifItem(((overlaps,),(overlap_data,)))  #adding old packets
    240                   for pd in new_p_data:                             #adding new packets
    241                      if pd not in packet_data:
    242                         for i in range(len(overlaps)):
    243                             #don't do this at home; we are appending
    244                             #to something in place
    245                             self[overlaps[i]].append(pd[i])
    246                              
     206        return [self.loops[a] for a in self.loops]
     207
    247208
    248209class CifFile(StarFile.StarFile):
    249     def __init__(self,datasource=None,strict=1,maxinlength=2048,maxoutlength=0,**kwargs):
    250         StarFile.StarFile.__init__(self,datasource=datasource,maxinlength=maxinlength,maxoutlength=maxoutlength,blocktype=CifBlock,**kwargs)
     210    def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs):
     211        super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs)
    251212        self.strict = strict
    252213        self.header_comment = \
    253 """#\\#CIF1.1
     214"""
    254215##########################################################################
    255 #               Crystallographic Information Format file 
     216#               Crystallographic Information Format file
    256217#               Produced by PyCifRW module
    257 # 
     218#
    258219#  This is a CIF file.  CIF has been adopted by the International
    259 #  Union of Crystallography as the standard for data archiving and 
     220#  Union of Crystallography as the standard for data archiving and
    260221#  transmission.
    261222#
     
    264225##########################################################################
    265226"""
    266     def NewBlock(self,blockname,*nkwargs,**kwargs):
    267        if len(blockname)>75:
    268            raise CifError , 'Blockname %s is longer than 75 characters' % blockname
    269        else:
    270            StarFile.StarFile.NewBlock(self,blockname,*nkwargs,**kwargs)
    271227
    272228
     
    275231        self.value = value
    276232    def __str__(self):
    277         return '\nCif Format error: '+ self.value 
     233        return '\nCif Format error: '+ self.value
    278234
    279235class ValidCifError(Exception):
     
    283239        return '\nCif Validity error: ' + self.value
    284240
    285 class CifDic(StarFile.BlockCollection):
    286     def __init__(self,dic,do_minimum=False,grammar='1.1'):
     241class CifRecursionError(Exception):
     242    def __init__(self,key_value,call_stack):
     243        self.key_value = key_value
     244        self.call_stack = call_stack
     245    def __str__(self):
     246        return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack))
     247
     248
     249class CifDic(StarFile.StarFile):
     250    """Create a Cif Dictionary object from the provided source, which can
     251    be a filename/URL or a CifFile.  Optional arguments (relevant to DDLm
     252    only):
     253
     254    * do_minimum (Boolean):
     255         Do not set up the dREL system for auto-calculation or perform
     256         imports.  This implies do_imports=False and do_dREL=False
     257
     258    * do_imports = No/Full/Contents/All:
     259         If not 'No', replace _import.get statements with the imported contents for
     260         Full mode/Contents mode/Both respectively.
     261
     262    * do_dREL = True/False:
     263         Parse and convert all dREL methods to Python. Implies do_imports=All
     264
     265    """
     266    def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True,
     267                                                             grammar='auto',**kwargs):
    287268        self.do_minimum = do_minimum
    288         self.dic_as_cif = dic
     269        if do_minimum:
     270            do_imports = 'No'
     271            do_dREL = False
     272        if do_dREL: do_imports = 'All'
    289273        self.template_cache = {}    #for DDLm imports
    290274        self.ddlm_functions = {}    #for DDLm functions
    291         self.switch_numpy(False)    #no Numpy arrays returned
    292         if isinstance(dic,StringType):
    293             self.dic_as_cif = CifFile(dic,grammar=grammar)
    294         (self.dicname,self.diclang,self.defdata) = self.dic_determine(self.dic_as_cif)
    295         StarFile.BlockCollection.__init__(self,element_class=CifBlock,datasource=self.defdata)
    296         self.scopes_mandatory = {"dictionary":[],"category":[],"item":[]}
    297         self.scopes_naughty = {"dictionary":[],"category":[],"item":[]}
     275        self.switch_numpy(False)    #no Numpy arrays returned
     276        super(CifDic,self).__init__(datasource=dic,grammar=grammar,**kwargs)
     277        self.standard = 'Dic'    #for correct output order
     278        self.scoping = 'dictionary'
     279        (self.dicname,self.diclang) = self.dic_determine()
     280        print('%s is a %s dictionary' % (self.dicname,self.diclang))
     281        self.scopes_mandatory = {}
     282        self.scopes_naughty = {}
    298283        # rename and expand out definitions using "_name" in DDL dictionaries
    299284        if self.diclang == "DDL1":
    300285            self.DDL1_normalise()   #this removes any non-definition entries
     286        self.create_def_block_table() #From now on, [] uses definition_id
     287        if self.diclang == "DDL1":
    301288            self.ddl1_cat_load()
    302289        elif self.diclang == "DDL2":
    303290            self.DDL2_normalise()   #iron out some DDL2 tricky bits
    304291        elif self.diclang == "DDLm":
    305             self.ddlm_normalise()
    306             self.ddlm_import()      #recursively calls this routine
    307             if not self.do_minimum:
    308                 print "Doing full dictionary initialisation"
    309                 self.ddlm_parse_valid() #extract validity information from data block
    310                 self.transform_drel()   #parse the drel functions
    311                 self.add_drel_funcs()   #put the drel functions into the namespace
    312         self.add_category_info()
     292            self.scoping = 'dictionary'   #expose all save frames
     293            if do_imports is not 'No':
     294               self.ddlm_import(import_mode=do_imports)#recursively calls this routine
     295            self.create_alias_table()
     296            self.create_cat_obj_table()
     297            self.create_cat_key_table()
     298            if do_dREL:
     299                print('Doing full dictionary initialisation')
     300                self.initialise_drel()
     301        self.add_category_info(full=do_dREL)
    313302        # initialise type information
    314303        self.typedic={}
    315304        self.primdic = {}   #typecode<->primitive type translation
    316305        self.add_type_info()
    317         self.item_validation_funs = [
    318             self.validate_item_type,
    319             self.validate_item_esd,
    320             self.validate_item_enum,   # functions which check conformance
    321             self.validate_enum_range,
    322             self.validate_looping]
    323         self.loop_validation_funs = [
    324             self.validate_loop_membership,
    325             self.validate_loop_key,
    326             self.validate_loop_references]    # functions checking loop values
    327         self.global_validation_funs = [
    328             self.validate_exclusion,
    329             self.validate_parent,
    330             self.validate_child,
    331             self.validate_dependents,
    332             self.validate_uniqueness] # where we need to look at other values
    333         self.block_validation_funs = [  # where only a full block will do
    334             self.validate_mandatory_category]
    335         self.global_remove_validation_funs = [
    336             self.validate_remove_parent_child] # removal is quicker with special checks
    337         self.optimize = False        # default value
    338         self.done_parents = []
    339         self.done_children = []
    340         self.done_keys = []
    341         # debug
    342         # j = open("dic_debug","w")
    343         # j.write(self.__str__())
    344         # j.close()
    345 
    346     def dic_determine(self,cifdic):
    347         if cifdic.has_key("on_this_dictionary"):
    348             self.master_key = "on_this_dictionary"
     306        self.install_validation_functions()
     307
     308    def dic_determine(self):
     309        if "on_this_dictionary" in self:
     310            self.master_block = super(CifDic,self).__getitem__("on_this_dictionary")
     311            self.def_id_spec = "_name"
     312            self.cat_id_spec = "_category.id"   #we add this ourselves
    349313            self.type_spec = "_type"
    350314            self.enum_spec = "_enumeration"
     
    354318            self.must_exist_spec = "_list_mandatory"
    355319            self.list_ref_spec = "_list_reference"
     320            self.key_spec = "_list_mandatory"
    356321            self.unique_spec = "_list_uniqueness"
    357322            self.child_spec = "_list_link_child"
     
    362327            self.dep_spec = "xxx"
    363328            self.cat_list = []   #to save searching all the time
    364             name = cifdic["on_this_dictionary"]["_dictionary_name"]
    365             version = cifdic["on_this_dictionary"]["_dictionary_version"]
    366             return (name+version,"DDL1",cifdic)
    367         elif len(cifdic.keys()) == 1:              # DDL2/DDLm
    368             self.master_key = cifdic.keys()[0]     
    369             name = cifdic[self.master_key]["_dictionary.title"]
    370             version = cifdic[self.master_key]["_dictionary.version"]
    371             if name != self.master_key:
    372                 print "Warning: DDL2 blockname %s not equal to dictionary name %s" % (self.master_key,name)
    373             if cifdic[self.master_key].has_key("_dictionary.class"):   #DDLm
    374                 self.unique_spec = "_category_key.generic"
    375                 return(name+version,"DDLm",cifdic[self.master_key]["saves"])
    376             #otherwise DDL2
    377             self.type_spec = "_item_type.code"
    378             self.enum_spec = "_item_enumeration.value"
    379             self.esd_spec = "_item_type_conditions.code"
    380             self.cat_spec = "_item.category_id"
    381             self.loop_spec = "there_is_no_loop_spec!"
    382             self.must_loop_spec = "xxx"
    383             self.must_exist_spec = "_item.mandatory_code"
    384             self.child_spec = "_item_linked.child_name"
    385             self.parent_spec = "_item_linked.parent_name"
    386             self.related_func = "_item_related.function_code"
    387             self.related_item = "_item_related.related_name"
    388             self.unique_spec = "_category_key.name"
    389             self.list_ref_spec = "xxx"
    390             self.primitive_type = "_type"
    391             self.dep_spec = "_item_dependent.dependent_name"
    392             return (name+version,"DDL2",cifdic[self.master_key]["saves"])
    393         else:
    394             raise CifError, "Unable to determine dictionary DDL version"
    395        
     329            name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"]
     330            version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"]
     331            return (name+version,"DDL1")
     332        elif len(self.get_roots()) == 1:              # DDL2/DDLm
     333            self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0])     
     334            # now change to dictionary scoping
     335            self.scoping = 'dictionary'
     336            name = self.master_block["_dictionary.title"]
     337            version = self.master_block["_dictionary.version"]
     338            if self.master_block.has_key("_dictionary.class"):   #DDLm
     339                self.enum_spec = '_enumeration_set.state'
     340                self.key_spec = '_category.key_id'
     341                self.must_exist_spec = None
     342                self.cat_spec = '_name.category_id'
     343                self.primitive_type = '_type.contents'
     344                self.cat_id_spec = "_definition.id"
     345                self.def_id_spec = "_definition.id"
     346                return(name+version,"DDLm")
     347            else:   #DDL2
     348                self.cat_id_spec = "_category.id"
     349                self.def_id_spec = "_item.name"
     350                self.key_spec = "_category_mandatory.name"
     351                self.type_spec = "_item_type.code"
     352                self.enum_spec = "_item_enumeration.value"
     353                self.esd_spec = "_item_type_conditions.code"
     354                self.cat_spec = "_item.category_id"
     355                self.loop_spec = "there_is_no_loop_spec!"
     356                self.must_loop_spec = "xxx"
     357                self.must_exist_spec = "_item.mandatory_code"
     358                self.child_spec = "_item_linked.child_name"
     359                self.parent_spec = "_item_linked.parent_name"
     360                self.related_func = "_item_related.function_code"
     361                self.related_item = "_item_related.related_name"
     362                self.unique_spec = "_category_key.name"
     363                self.list_ref_spec = "xxx"
     364                self.primitive_type = "_type"
     365                self.dep_spec = "_item_dependent.dependent_name"
     366                return (name+version,"DDL2")
     367        else:
     368            raise CifError("Unable to determine dictionary DDL version")
     369
    396370    def DDL1_normalise(self):
     371        # switch off block name collision checks
     372        self.standard = None
    397373        # add default type information in DDL2 style
    398374        # initial types and constructs
    399375        base_types = ["char","numb","null"]
    400         prim_types = base_types[:] 
     376        prim_types = base_types[:]
    401377        base_constructs = [".*",
    402378            '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
    403379            "\"\" "]
    404         for key,value in self.dictionary.items():
    405            if value.has_key("_name"):
     380        for key,value in self.items():
     381           newnames = [key]  #keep by default
     382           if "_name" in value:
    406383               real_name = value["_name"]
    407                if type(real_name) is ListType:        #looped values
     384               if isinstance(real_name,list):        #looped values
    408385                   for looped_name in real_name:
    409                        new_value = value.copy()
    410                        new_value["_name"] = looped_name  #only looped name
    411                        self.dictionary[looped_name] = new_value
    412                else: self.dictionary[real_name] = value
     386                      new_value = value.copy()
     387                      new_value["_name"] = looped_name  #only looped name
     388                      self[looped_name] = new_value
     389                   newnames = real_name
     390               else:
     391                      self[real_name] = value
     392                      newnames = [real_name]
    413393           # delete the old one
    414            del self.dictionary[key]
     394           if key not in newnames:
     395              del self[key]
    415396        # loop again to normalise the contents of each definition
    416         for key,value in self.dictionary.items():
     397        for key,value in self.items():
     398           #unlock the block
     399           save_overwrite = value.overwrite
     400           value.overwrite = True
    417401           # deal with a missing _list, _type_conditions
    418            if not value.has_key("_list"): value["_list"] = 'no'
    419            if not value.has_key("_type_conditions"): value["_type_conditions"] = 'none'
     402           if "_list" not in value: value["_list"] = 'no'
     403           if "_type_conditions" not in value: value["_type_conditions"] = 'none'
    420404           # deal with enumeration ranges
    421            if value.has_key("_enumeration_range"):
     405           if "_enumeration_range" in value:
    422406               max,min = self.getmaxmin(value["_enumeration_range"])
    423407               if min == ".":
    424                    self.dictionary[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
     408                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
    425409               elif max == ".":
    426                    self.dictionary[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
     410                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
    427411               else:
    428                    self.dictionary[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
     412                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
    429413           #add any type construct information
    430            if value.has_key("_type_construct"):
     414           if "_type_construct" in value:
    431415               base_types.append(value["_name"]+"_type")   #ie dataname_type
    432416               base_constructs.append(value["_type_construct"]+"$")
    433417               prim_types.append(value["_type"])     #keep a record
    434418               value["_type"] = base_types[-1]   #the new type name
    435                
    436        
     419
    437420        #make categories conform with ddl2
    438421        #note that we must remove everything from the last underscore
    439            if value["_category"] == "category_overview":
     422           if value.get("_category",None) == "category_overview":
    440423                last_under = value["_name"].rindex("_")
    441424                catid = value["_name"][1:last_under]
    442425                value["_category.id"] = catid  #remove square bracks
    443                 if catid not in self.cat_list: self.cat_list.append(catid)
     426                if catid not in self.cat_list: self.cat_list.append(catid)
     427           value.overwrite = save_overwrite
    444428        # we now add any missing categories before filling in the rest of the
    445429        # information
    446         for key,value in self.dictionary.items():
    447             if self[key].has_key("_category"):
     430        for key,value in self.items():
     431            #print('processing ddl1 definition %s' % key)
     432            if "_category" in self[key]:
    448433                if self[key]["_category"] not in self.cat_list:
    449434                    # rogue category, add it in
    450435                    newcat = self[key]["_category"]
    451                     fake_name = "_" + newcat + "_[]" 
     436                    fake_name = "_" + newcat + "_[]"
    452437                    newcatdata = CifBlock()
    453438                    newcatdata["_category"] = "category_overview"
     
    457442                    self.cat_list.append(newcat)
    458443        # write out the type information in DDL2 style
    459         self.dic_as_cif[self.master_key].AddLoopItem((
     444        self.master_block.AddLoopItem((
    460445            ("_item_type_list.code","_item_type_list.construct",
    461446              "_item_type_list.primitive_code"),
    462447            (base_types,base_constructs,prim_types)
    463448            ))
    464      
    465     def DDL2_normalise(self):
    466        listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),ListType),self.keys())
    467        # now filter out all the single element lists!
    468        dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
    469        for item_def in dodgy_defs:
    470           # print "DDL2 norm: processing %s" % item_def
    471           thisdef = self[item_def]
    472           packet_no = thisdef['_item.name'].index(item_def)
    473           realcat = thisdef['_item.category_id'][packet_no]
    474           realmand = thisdef['_item.mandatory_code'][packet_no]
    475           # first add in all the missing categories
    476           # we don't replace the entry in the list corresponding to the
    477           # current item, as that would wipe out the information we want
    478           for child_no in range(len(thisdef['_item.name'])):
    479               if child_no == packet_no: continue
    480               child_name = thisdef['_item.name'][child_no]
    481               child_cat = thisdef['_item.category_id'][child_no]
    482               child_mand = thisdef['_item.mandatory_code'][child_no]
    483               if not self.has_key(child_name):
    484                   self[child_name] = CifBlock()
    485                   self[child_name]['_item.name'] = child_name
    486               self[child_name]['_item.category_id'] = child_cat
    487               self[child_name]['_item.mandatory_code'] = child_mand
    488           self[item_def]['_item.name'] = item_def
    489           self[item_def]['_item.category_id'] = realcat
    490           self[item_def]['_item.mandatory_code'] = realmand
    491        # go through any _item_linked tables
    492        dodgy_defs = filter(lambda a:isinstance(self[a].get('_item_linked.child_name'),ListType),self.keys())
    493        dodgy_defs = filter(lambda a:len(self[a]['_item_linked.child_name']) > 1, dodgy_defs)
    494        for item_def in dodgy_defs:
    495           thisdef = self[item_def]
    496           child_list = thisdef.get('_item_linked.child_name',[])
    497           parents = thisdef.get('_item_linked.parent_name',[])
    498           # zap the parents, they will confuse us!!
    499           del thisdef['_item_linked.parent_name']
    500           if isinstance(child_list,StringType):
    501               self[child_list]['_item_linked.parent_name'] = parents
    502               self[parents]['_item_linked.child_name'] = child_list
    503           else:
    504               # for each parent, find the list of children.
    505               family = map(None,parents,child_list)
    506               notmychildren = family
    507               while len(notmychildren):
    508                   # get all children of first entry
    509                   mychildren = filter(lambda a:a[0]==notmychildren[0][0],family)
    510                   # print "Parent %s: %d children" % (notmychildren[0][0],len(mychildren))
    511                   for parent,child in mychildren:   #parent is the same for all
    512                       self[child]['_item_linked.parent_name'] = parent
    513                   # put all the children into the parent
    514                   try:
    515                       del self[mychildren[0][0]]['_item_linked.child_name']
    516                   except ValueError: pass
    517                   self[mychildren[0][0]]['_item_linked.child_name'] = map(lambda a:a[1],mychildren)
    518                   # now make a new,smaller list
    519                   notmychildren = filter(lambda a:a[0]!=mychildren[0][0],notmychildren)
    520        # now flatten any single element lists
    521        single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
    522        for flat_def in single_defs:
    523            flat_keys = self[flat_def].GetLoop('_item.name').keys()
    524            for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
    525        # now deal with the multiple lists
    526        # next we do aliases
    527        all_aliases = filter(lambda a:self[a].has_key('_item_aliases.alias_name'),self.keys())
    528        for aliased in all_aliases:
    529           my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
    530           for alias in my_aliases:
    531               self[alias] = self[aliased].copy()   #we are going to delete stuff...
    532               del self[alias]["_item_aliases.alias_name"]
    533  
    534     def ddlm_normalise(self):
    535         for key,value in self.dictionary.items():
    536            if value.has_key("_name.category_id"):
    537                real_name = "_" + value["_name.category_id"] + "." + value["_name.object_id"]
    538                self[real_name] = value
    539                # delete the old one
    540                del self[key]
    541        
    542     def ddlm_parse_valid(self):
    543         if not self.dic_as_cif[self.master_key].has_key("_dictionary_valid.scope"):
    544             return
    545         for scope_pack in self.dic_as_cif[self.master_key].GetLoop("_dictionary_valid.scope"):
    546             scope = getattr(scope_pack,"_dictionary_valid.scope")
    547             valid_info = getattr(scope_pack,"_dictionary_valid.attributes")
    548             valid_info = valid_info.split()
    549             for i in range(0,len(valid_info),2):
    550                 if valid_info[i]=="+":
    551                    self.scopes_mandatory[scope.lower()].append(valid_info[i+1].lower())
    552                 elif valid_info[i]=="!":
    553                    self.scopes_naughty[scope.lower()].append(valid_info[i+1].lower())
    554 
    555     def ddlm_import(self):
    556         import urllib
    557         #first check the outermost datablocks.  Note we expect our dREL
    558         #machinery to create _import_list.id only if the individual values are available
    559         #For this to happen, we need the ddl.dic to have been assigned
    560         try:
    561             to_be_imported = self.dic_as_cif[self.master_key]["_import_list.id"]
    562         except KeyError:
    563             pass
    564         else:
    565             # deal with foreshortened import blocks
    566             for import_target in to_be_imported:
    567                 if len(import_target)==3:                     #default values have been left off
    568                     import_target.append('Exit')
    569                     import_target.append('Exit')
    570             for scope,dict_block,file_loc,on_dupl,on_miss in to_be_imported:
    571                 scope = scope.lower()                         #work around capitalisation in draft dics
    572                 if scope == 'att' or scope == 'sta' or scope == 'val':
    573                     print 'Improper import directive at top level in %s: ignored' % self.master.key
    574                     continue
    575                 # resolve URI 
    576                 full_uri = self.resolve_path(file_loc)
    577                 dic_as_cif = CifFile(urllib.urlopen(full_uri),grammar="DDLm")
    578                 import_from = CifDic(dic_as_cif,do_minimum=True)  #this will recurse internal imports
    579                 # and now merge these definitions
    580                 if scope == "dic":
    581                     self.get_whole_dict(import_from,on_dupl,on_miss)
    582                 elif scope=="cat":
    583                     self.get_one_cat(import_from,dict_block,on_dupl,on_miss)
    584                 elif scope=="grp":
    585                     self.get_one_cat_with_children(import_from,dict_block,on_dupl,on_miss)
    586                 elif scope=="itm":  #not clear what to do if category is missing
    587                     self.add_one_defn(import_from,dict_block,on_dupl,on_miss)
    588             # it will never happen again...
    589             del self.dic_as_cif[self.master_key]["_import_list.id"]
    590         # next we resolve per-definition imports
    591         for one_def in self.keys():
    592             try:
    593                 to_be_imported = self[one_def]["_import_list.id"]
    594             except KeyError:
    595                 pass
    596             else:
    597                 if len(to_be_imported) == 5 and len(to_be_imported[0])!=5:
    598                     #catch an error in earlier versions of the dictionaries where
    599                     #the outer brackets were missing
    600                     to_be_imported = [to_be_imported]
    601                 # deal with foreshortened import blocks
    602                 for import_target in to_be_imported:
    603                     if len(import_target)==3:                     #default values have been left off
    604                         import_target.append('Exit')
    605                         import_target.append('Exit')
    606                 for scope,block,file_loc,on_dupl,on_miss in to_be_imported:
    607                     scope = scope.lower()                         #work around capitalisation in draft dics
    608                     if scope == 'dic' or scope == 'cat' or scope == 'grp' or scope == "itm":
    609                         print 'Improper import directive at definition level in %s: ignored' % self.master.key
    610                         continue
    611                     full_uri = self.resolve_path(file_loc)
    612                     if full_uri not in self.template_cache:
    613                         dic_as_cif = CifFile(urllib.urlopen(full_uri),grammar="DDLm")
    614                         self.template_cache[full_uri] = CifDic(dic_as_cif,do_minimum=True)  #this will recurse internal imports
    615                         print 'Added %s to cached dictionaries' % full_uri
    616                     import_from = self.template_cache[full_uri]
    617                     if scope == 'att':
    618                         self.import_attributes(one_def,import_from,block,on_dupl,on_miss)
    619                     elif scope == 'sta':
    620                         self.import_loop(one_def,import_from,block,'_enumeration_set.state',on_miss)
    621                     elif scope == 'val':
    622                         self.import_loop(one_def,import_from,block,'_enumeration_default.value',on_miss)
    623                     else:
    624                         raise CifError, "Unrecognised import scope %s" % scope
    625                 # remove the import attribute
    626                 del self[one_def]["_import_list.id"]   
    627                    
    628     def resolve_path(self,file_loc):
    629         import urlparse
    630         url_comps = urlparse.urlparse(file_loc)
    631         if url_comps[0]: return file_loc    #already full URI
    632         new_url = urlparse.urljoin(self.dic_as_cif.my_uri,file_loc)
    633         print "Transformed %s to %s for import " % (file_loc,new_url)
    634         return new_url
    635        
    636     def get_whole_dict(self,source_dict,on_dupl,on_miss):
    637         print "Cat_map: `%s`" % source_dict.cat_map.values()
    638         for source_cat in source_dict.cat_map.values():
    639             self.get_one_cat(source_dict,source_cat,on_dupl,on_miss)
    640        
    641     def get_one_cat(self,source_dict,source_cat,on_dupl,on_miss):
    642         ext_cat = source_dict.get(source_cat,"")
    643         this_cat = self.get(source_cat,"")
    644         print "Adding category %s" % source_cat
    645         if not ext_cat:
    646             if on_miss == "Ignore":
    647                pass
    648             else:
    649                raise CifError, "Missing category %s" % source_cat
    650         else:
    651             all_ext_defns = source_dict.keys()
    652             cat_list = filter(lambda a:source_dict[a].get("_name.category_id","").lower()==source_cat.lower(),
    653                                all_ext_defns)
    654             print "Items: %s" % `cat_list`
    655             if this_cat:     # The category block itself is duplicated
    656                 if on_dupl=="Ignore":
    657                     pass
    658                 elif on_dupl == "Exit":
    659                     raise CifError, "Duplicate category %s" % source_cat
    660                 else:
    661                     self[source_cat] = ext_cat
    662             else:
    663                 self[source_cat] = ext_cat
    664             # now do all member definitions
    665             for cat_defn in cat_list:
    666                 self.add_one_defn(source_dict,cat_defn,on_dupl)
    667 
    668     def add_one_defn(self,source_dict,cat_defn,on_dupl):
    669         if self.has_key(cat_defn):
    670            if on_dupl == "Ignore": pass
    671            elif on_dupl == "Exit":
    672                    raise CifError, "Duplicate definition %s" % cat_defn
    673            else: self[cat_defn] = source_dict[cat_defn]
    674         else: self[cat_defn] = source_dict[cat_defn]
    675         print "    "+cat_defn
    676        
    677     def get_one_cat_with_children(self,source_dict,source_cat,on_dupl,on_miss):
    678         self.get_one_cat(source_dict,source_cat,on_dupl,on_miss)
    679         child_cats = filter(lambda a:source_dict[a]["_category.parent_id"]==source_dict[source_cat]["_definition.id"],source_dict.cat_map.values())
    680         for child_cat in child_cats: self.get_one_cat(source_dict,child_cat,on_dupl,on_miss)
    681 
    682     def import_attributes(self,mykey,source_dict,source_def,on_dupl,on_miss):
    683         # process missing
    684         if not source_dict.has_key(source_def):
    685             if on_miss == 'Exit':
    686                 raise CifError, 'Missing definition for import %s' % source_def
    687             else: return          #nothing else to do
    688         # now do the import
    689         print 'Adding attributes from %s to %s' % (source_def,mykey)
    690         self[mykey].merge(source_dict[source_def],mode='replace',match_att= \
    691               ['_definition.id','_name.category_id','_name.object_id'])
    692 
    693     def import_loop(self,mykey,source_dict,source_def,loop_name,on_miss):
    694         # process imssing
    695         if not source_dict.has_key(source_def):
    696             if on_miss == 'Exit':
    697                 raise CifError, 'Missing definition for import %s' % source_def
    698             else: return          #nothing else to do
    699         print 'Adding %s attributes from %s to %s' % (loop_name,source_def,mykey)
    700         state_loop = source_dict[source_def].GetLoop(loop_name)
    701         self[mykey].insert_loop(state_loop)
    702        
    703449
    704450    def ddl1_cat_load(self):
     
    725471                if new_unique not in uis: uis.append(new_unique)
    726472                cat_unique_dic[thiscat] = uis
     473
     474        [get_cat_info(a) for a in deflist] # apply the above function
     475        for cat in cat_mand_dic.keys():
     476            self[cat]["_category_mandatory.name"] = cat_mand_dic[cat]
     477        for cat in cat_unique_dic.keys():
     478            self[cat]["_category_key.name"] = cat_unique_dic[cat]
     479
     480    def create_pcloop(self,definition):
     481        old_children = self[definition].get('_item_linked.child_name',[])
     482        old_parents = self[definition].get('_item_linked.parent_name',[])
     483        if isinstance(old_children,unicode):
     484             old_children = [old_children]
     485        if isinstance(old_parents,unicode):
     486             old_parents = [old_parents]
     487        if (len(old_children)==0 and len(old_parents)==0) or \
     488           (len(old_children) > 1 and len(old_parents)>1):
     489             return
     490        if len(old_children)==0:
     491             old_children = [definition]*len(old_parents)
     492        if len(old_parents)==0:
     493             old_parents = [definition]*len(old_children)
     494        newloop = CifLoopBlock(dimension=1)
     495        newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
     496        newloop.AddLoopItem(('_item_linked.child_name',old_children))
     497        try:
     498            del self[definition]['_item_linked.parent_name']
     499            del self[definition]['_item_linked.child_name']
     500        except KeyError:
     501            pass
     502        self[definition].insert_loop(newloop)
     503
     504
     505
     506    def DDL2_normalise(self):
     507       listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
     508       # now filter out all the single element lists!
     509       dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
     510       for item_def in dodgy_defs:
     511                # print("DDL2 norm: processing %s" % item_def)
     512                thisdef = self[item_def]
     513                packet_no = thisdef['_item.name'].index(item_def)
     514                realcat = thisdef['_item.category_id'][packet_no]
     515                realmand = thisdef['_item.mandatory_code'][packet_no]
     516                # first add in all the missing categories
     517                # we don't replace the entry in the list corresponding to the
     518                # current item, as that would wipe out the information we want
     519                for child_no in range(len(thisdef['_item.name'])):
     520                    if child_no == packet_no: continue
     521                    child_name = thisdef['_item.name'][child_no]
     522                    child_cat = thisdef['_item.category_id'][child_no]
     523                    child_mand = thisdef['_item.mandatory_code'][child_no]
     524                    if child_name not in self:
     525                        self[child_name] = CifBlock()
     526                        self[child_name]['_item.name'] = child_name
     527                    self[child_name]['_item.category_id'] = child_cat
     528                    self[child_name]['_item.mandatory_code'] = child_mand
     529                self[item_def]['_item.name'] = item_def
     530                self[item_def]['_item.category_id'] = realcat
     531                self[item_def]['_item.mandatory_code'] = realmand
     532
     533       target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
     534                                     '_item_linked.parent_name' in self[a]]
     535       # now dodgy_defs contains all definition blocks with more than one child/parent link
     536       for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
     537       for item_def in dodgy_defs:
     538             print('Processing %s' % item_def)
     539             thisdef = self[item_def]
     540             child_list = thisdef['_item_linked.child_name']
     541             parents = thisdef['_item_linked.parent_name']
     542             # for each parent, find the list of children.
     543             family = list(zip(parents,child_list))
     544             notmychildren = family         #We aim to remove non-children
     545             # Loop over the parents, relocating as necessary
     546             while len(notmychildren):
     547                # get all children of first entry
     548                mychildren = [a for a in family if a[0]==notmychildren[0][0]]
     549                print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
     550                for parent,child in mychildren:   #parent is the same for all
     551                         # Make sure that we simply add in the new entry for the child, not replace it,
     552                         # otherwise we might spoil the child entry loop structure
     553                         try:
     554                             childloop = self[child].GetLoop('_item_linked.parent_name')
     555                         except KeyError:
     556                             print('Creating new parent entry %s for definition %s' % (parent,child))
     557                             self[child]['_item_linked.parent_name'] = [parent]
     558                             childloop = self[child].GetLoop('_item_linked.parent_name')
     559                             childloop.AddLoopItem(('_item_linked.child_name',[child]))
     560                             continue
     561                         else:
     562                             # A parent loop already exists and so will a child loop due to the
     563                             # call to create_pcloop above
     564                             pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
     565                             goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
     566                             if len(goodpars)>0:   #no need to add it
     567                                 print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
     568                                 continue
     569                             print('Adding %s to %s entry' % (parent,child))
     570                             newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
     571                             setattr(newpacket,'_item_linked.child_name',child)
     572                             setattr(newpacket,'_item_linked.parent_name',parent)
     573                             childloop.AddPacket(newpacket)
     574                #
     575                # Make sure the parent also points to the children.  We get
     576                # the current entry, then add our
     577                # new values if they are not there already
     578                #
     579                parent_name = mychildren[0][0]
     580                old_children = self[parent_name].get('_item_linked.child_name',[])
     581                old_parents = self[parent_name].get('_item_linked.parent_name',[])
     582                oldfamily = zip(old_parents,old_children)
     583                newfamily = []
     584                print('Old parents -> %s' % repr(old_parents))
     585                for jj, childname in mychildren:
     586                    alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
     587                    if len(alreadythere)>0: continue
     588                    'Adding new child %s to parent definition at %s' % (childname,parent_name)
     589                    old_children.append(childname)
     590                    old_parents.append(parent_name)
     591                # Now output the loop, blowing away previous definitions.  If there is something
     592                # else in this category, we are destroying it.
     593                newloop = CifLoopBlock(dimension=1)
     594                newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
     595                newloop.AddLoopItem(('_item_linked.child_name',old_children))
     596                del self[parent_name]['_item_linked.parent_name']
     597                del self[parent_name]['_item_linked.child_name']
     598                self[parent_name].insert_loop(newloop)
     599                print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
     600                # now make a new,smaller list
     601                notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]
     602
     603       # now flatten any single element lists
     604       single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
     605       for flat_def in single_defs:
     606           flat_keys = self[flat_def].GetLoop('_item.name').keys()
     607           for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
     608       # now deal with the multiple lists
     609       # next we do aliases
     610       all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
     611       for aliased in all_aliases:
     612          my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
     613          for alias in my_aliases:
     614              self[alias] = self[aliased].copy()   #we are going to delete stuff...
     615              del self[alias]["_item_aliases.alias_name"]
     616 
     617    def ddlm_parse_valid(self):
     618        if "_dictionary_valid.application" not in self.master_block:
     619            return
     620        for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"):
     621            scope = getattr(scope_pack,"_dictionary_valid.application")
     622            valid_info = getattr(scope_pack,"_dictionary_valid.attributes")
     623            if scope[1] == "Mandatory":
     624                self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info)
     625            elif scope[1] == "Prohibited":
     626                self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info)
     627               
     628    def ddlm_import(self,import_mode='All'):
     629        import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]])
     630        print ('Import mode %s applied to following frames' % import_mode)
     631        print (str([a[0] for a in import_frames]))
     632        if import_mode != 'All':
     633           for i in range(len(import_frames)):
     634                import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents') == import_mode])
     635           print('Importing following frames in mode %s' % import_mode)
     636           print(str(import_frames))
     637        #resolve all references
     638        for parent_block,import_list in import_frames:
     639          for import_ref in import_list:
     640            file_loc = import_ref["file"]
     641            full_uri = self.resolve_path(file_loc)
     642            if full_uri not in self.template_cache:
     643                dic_as_cif = CifFile(urlopen(full_uri),grammar=self.grammar)
     644                self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,do_dREL=False)  #this will recurse internal imports
     645                print('Added %s to cached dictionaries' % full_uri)
     646            import_from = self.template_cache[full_uri]
     647            dupl = import_ref.get('dupl','Exit')
     648            miss = import_ref.get('miss','Exit')
     649            target_key = import_ref["save"]
     650            try:
     651                import_target = import_from[target_key]
     652            except KeyError:
     653                if miss == 'Exit':
     654                   raise CifError('Import frame %s not found in %s' % (target_key,full_uri))
     655                else: continue
     656            # now import appropriately
     657            mode = import_ref.get("mode",'Contents').lower()
     658            if target_key in self and mode=='full':  #so blockname will be duplicated
     659                if dupl == 'Exit':
     660                    raise CifError('Import frame %s already in dictionary' % target_key)
     661                elif dupl == 'Ignore':
     662                    continue
     663            if mode == 'contents':   #merge attributes only
     664                self[parent_block].merge(import_target)
     665            elif mode =="full":
     666                # Do the syntactic merge
     667                syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting
     668                from_cat_head = import_target['_name.object_id']
     669                child_frames = import_from.ddlm_all_children(from_cat_head)
     670                 # Check for Head merging Head
     671                if self[parent_block].get('_definition.class','Datum')=='Head' and \
     672                   import_target.get('_definition.class','Datum')=='Head':
     673                      head_to_head = True
     674                else:
     675                      head_to_head = False
     676                      child_frames.remove(from_cat_head)
     677                # As we are in syntax land, we call the CifFile methods
     678                child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames])
     679                child_blocks = super(CifDic,import_from).makebc(child_blocks)
     680                # Prune out any datablocks that have identical definitions
     681                from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()])
     682                double_defs = list([b for b in from_defs.items() if self.has_key(b[1])])
     683                print ('Definitions for %s superseded' % repr(double_defs))
     684                for b in double_defs:
     685                    del child_blocks[b[0]]
     686                super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head)      #
     687                print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames),
     688                   mode,len(self)))
     689                # Now the semantic merge
     690                # First expand our definition <-> blockname tree
     691                self.create_def_block_table()
     692                merging_cat = self[parent_block]['_name.object_id']      #new parent
     693                if head_to_head:
     694                    child_frames = self.ddlm_immediate_children(from_cat_head)    #old children
     695                    #the new parent is the importing category for all old children
     696                    for f in child_frames:
     697                        self[f].overwrite = True
     698                        self[f]['_name.category_id'] = merging_cat
     699                        self[f].overwrite = False
     700                    # remove the old head
     701                    del self[from_cat_head]
     702                    print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat))
     703                else:  #imported category is only child
     704                    from_frame = import_from[target_key]['_definition.id'] #so we can find it
     705                    child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0]
     706                    self[child_frame]['_name.category_id'] = merging_cat
     707                    print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat))
     708            # it will never happen again...
     709            del self[parent_block]["_import.get"]
     710
     711    def resolve_path(self,file_loc):
     712        url_comps = urlparse(file_loc)
     713        if url_comps[0]: return file_loc    #already full URI
     714        new_url = urljoin(self.my_uri,file_loc)
     715        #print("Transformed %s to %s for import " % (file_loc,new_url))
     716        return new_url
     717
     718
     719
     720    def create_def_block_table(self):
     721        """ Create an internal table matching definition to block id """
     722        proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()]
     723        # now get the actual ids instead of blocks
     724        proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table])
     725        # remove non-definitions
     726        if self.diclang != "DDL1":
     727            top_blocks = list([a[0].lower() for a in self.get_roots()])
     728        else:
     729            top_blocks = ["on_this_dictionary"]
     730        # catch dodgy duplicates
     731        uniques = set([a[0] for a in proto_table])
     732        if len(uniques)<len(proto_table):
     733            def_names = list([a[0] for a in proto_table])
     734            dodgy = [a for a in def_names if def_names.count(a)>1]
     735            raise CifError('Duplicate definitions in dictionary:' + repr(dodgy))
     736        self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks])
     737       
     738    def __getitem__(self,key):
     739        """Access a datablock by definition id, after the lookup has been created"""
     740        try:
     741            return super(CifDic,self).__getitem__(self.block_id_table[key.lower()])
     742        except AttributeError:   #block_id_table not present yet
     743            return super(CifDic,self).__getitem__(key)
     744        except KeyError: # key is missing
     745            # print(Definition for %s not found, reverting to CifFile' % key)
     746            return super(CifDic,self).__getitem__(key)
     747
     748    def __setitem__(self,key,value):
     749        """Add a new definition block"""
     750        super(CifDic,self).__setitem__(key,value)
     751        try:
     752            self.block_id_table[value['_definition.id']]=key
     753        except AttributeError:   #does not exist yet
     754            pass
     755
     756    def __delitem__(self,key):
     757        """Remove a definition"""
     758        try:
     759            super(CifDic,self).__delitem__(self.block_id_table[key.lower()])
     760            del self.block_id_table[key.lower()]
     761        except (AttributeError,KeyError):   #block_id_table not present yet
     762            super(CifDic,self).__delitem__(key)
     763            return
     764        # fix other datastructures
     765        # cat_obj table
     766       
     767    def keys(self):
     768        """Return all definitions"""
     769        try:
     770            return self.block_id_table.keys()
     771        except AttributeError:
     772            return super(CifDic,self).keys()
     773
     774    def has_key(self,key):
     775        return key in self
     776
     777    def __contains__(self,key):
     778        try:
     779            return key.lower() in self.block_id_table
     780        except AttributeError:
     781            return super(CifDic,self).__contains__(key)
    727782           
    728         map(get_cat_info,deflist)       # apply the above function
    729         for cat in cat_mand_dic.keys():
    730             cat_entry = self.get_ddl1_entry(cat)
    731             self[cat_entry]["_category_mandatory.name"] = cat_mand_dic[cat]
    732         for cat in cat_unique_dic.keys():
    733             cat_entry = self.get_ddl1_entry(cat)
    734             self[cat_entry]["_category_key.name"] = cat_unique_dic[cat]
    735 
    736     # A helper function get find the entry corresponding to a given category name:
    737     # yes, in DDL1 the actual name is different in the category block due to the
    738     # addition of square brackets which may or may not contain stuff.
    739 
    740     def get_ddl1_entry(self,cat_name):
    741         chop_len = len(cat_name)
    742         possibles = filter(lambda a:a[1:chop_len+3]==cat_name+"_[",self.keys())
    743         if len(possibles) > 1 or possibles == []:
    744             raise ValidCifError, "Category name %s can't be matched to category entry" % cat_name
    745         else:
    746             return possibles[0]
     783    def items(self):
     784        """Return (key,value) pairs"""
     785        return list([(a,self[a]) for a in self.keys()])
     786
     787    def unlock(self):
     788        """Allow overwriting of all definitions in this collection"""
     789        for a in self.keys():
     790            self[a].overwrite=True
     791
     792    def lock(self):
     793        """Disallow changes in definitions"""
     794        for a in self.keys():
     795            self[a].overwrite=False
     796
     797    def rename(self,oldname,newname,blockname_as_well=True):
     798        """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True,
     799        change the underlying blockname too."""
     800        if blockname_as_well:
     801            super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname)       
     802            self.block_id_table[newname.lower()]=newname
     803            if oldname.lower() in self.block_id_table: #not removed
     804               del self.block_id_table[oldname.lower()]
     805        else:
     806            self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()]
     807            del self.block_id_table[oldname.lower()]
     808            return
     809                                                 
     810    def get_root_category(self):
     811        """Get the single 'Head' category of this dictionary"""
     812        root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head']
     813        if len(root_cats)>1 or len(root_cats)==0:
     814            raise CifError("Cannot determine a unique Head category, got" % repr(root_cats))
     815        return root_cats[0]
     816
     817    def ddlm_immediate_children(self,catname):
     818        """Return a list of datanames for the immediate children of catname.  These are
     819        semantic children (i.e. based on _name.category_id), not structural children as
     820        in the case of StarFile.get_immediate_children"""
     821                                                 
     822        straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()]
     823        return list(straight_children)
     824
     825    def ddlm_all_children(self,catname):
     826        """Return a list of all children, including the `catname`"""
     827        all_children = self.ddlm_immediate_children(catname)
     828        cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category']
     829        for c in cat_children:
     830            all_children.remove(c)
     831            all_children += self.ddlm_all_children(c)
     832        return all_children + [catname]
     833
     834    def is_semantic_child(self,parent,maybe_child):
     835        """Return true if `maybe_child` is a child of `parent`"""
     836        all_children = self.ddlm_all_children(parent)
     837        return maybe_child in all_children
     838
     839    def ddlm_danglers(self):
     840        """Return a list of definitions that do not have a category defined
     841        for them, or are children of an unattached category"""
     842        top_block = self.get_root_category()
     843        connected = set(self.ddlm_all_children(top_block))
     844        all_keys = set(self.keys())
     845        unconnected = all_keys - connected
     846        return list(unconnected)
     847
     848    def get_ddlm_parent(self,itemname):
     849        """Get the parent category of itemname"""
     850        parent = self[itemname].get('_name.category_id','')
     851        if parent == '':  # use the top block by default
     852            raise CifError("%s has no parent" % itemname)
     853        return parent
     854
     855    def expand_category_opt(self,name_list):
     856        """Return a list of all non-category items in a category or return the name
     857           if the name is not a category"""
     858        new_list = []
     859        for name in name_list:
     860          if self.get(name,{}).get('_definition.scope','Item') == 'Category':
     861            new_list += self.expand_category_opt([a for a in self.keys() if \
     862                     self[a].get('_name.category_id','').lower() == name.lower()])
     863          else:
     864            new_list.append(name)
     865        return new_list
     866
     867    def get_categories(self):
     868        """Return a list of category names"""
     869        return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category'])
     870
     871    def names_in_cat(self,cat,names_only=False):
     872        names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()]
     873        if not names_only:
     874            return list([a for a in names if self[a].get('_definition.scope','Item')=='Item'])
     875        else:
     876            return list([self[a]["_name.object_id"] for a in names])
     877
     878                           
     879
     880    def create_alias_table(self):
     881        """Populate an alias table that we can look up when searching for a dataname"""
     882        all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]]
     883        self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases])
     884
     885    def create_cat_obj_table(self):
     886        """Populate a table indexed by (cat,obj) and returning the correct dataname"""
     887        base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \
     888                           for a in self.keys() if self[a].get('_definition.scope','Item')=='Item'])
     889        loopable = self.get_loopable_cats()
     890        loopers = [self.ddlm_immediate_children(a) for a in loopable]
     891        print('Loopable cats:' + repr(loopable))
     892        loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers]
     893        expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0])
     894        print("Expansion list:" + repr(expand_list))
     895        extra_table = {}   #for debugging we keep it separate from base_table until the end
     896        def expand_base_table(parent_cat,child_cats):
     897            extra_names = []
     898            # first deal with all the child categories
     899            for child_cat in child_cats:
     900              nn = []
     901              if child_cat in expand_list:  # a nested category: grab its names
     902                nn = expand_base_table(child_cat,expand_list[child_cat])
     903                # store child names
     904                extra_names += nn
     905              # add all child names to the table
     906              child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
     907                             for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key']
     908              child_names += extra_names
     909              extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table]))
     910            # and the repeated ones get appended instead
     911            repeats = [a for a in child_names if a in extra_table]
     912            for obj,name in repeats:
     913                extra_table[(parent_cat,obj)] += [name]
     914            # and finally, add our own names to the return list
     915            child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
     916                            for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key']
     917            return child_names
     918        [expand_base_table(parent,child) for parent,child in expand_list.items()]
     919        print('Expansion cat/obj values: ' + repr(extra_table))
     920        # append repeated ones
     921        non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table])
     922        repeats = [a for a in extra_table.keys() if a in base_table]
     923        base_table.update(non_repeats)
     924        for k in repeats:
     925            base_table[k] += extra_table[k]
     926        self.cat_obj_lookup_table = base_table
     927        self.loop_expand_list = expand_list
     928
     929    def get_loopable_cats(self):
     930        """A short utility function which returns a list of looped categories. This
     931        is preferred to a fixed attribute as that fixed attribute would need to be
     932        updated after any edits"""
     933        return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop']
     934
     935    def create_cat_key_table(self):
     936        """Create a utility table with a list of keys applicable to each category. A key is
     937        a compound key, that is, it is a list"""
     938        self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name",
     939            [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()])
     940        def collect_keys(parent_cat,child_cats):
     941                kk = []
     942                for child_cat in child_cats:
     943                    if child_cat in self.loop_expand_list:
     944                        kk += collect_keys(child_cat)
     945                    # add these keys to our list
     946                    kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))]
     947                self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk
     948                return kk
     949        for k,v in self.loop_expand_list.items():
     950            collect_keys(k,v)
     951        print('Keys for categories' + repr(self.cat_key_table))
    747952
    748953    def add_type_info(self):
    749         if self.dic_as_cif[self.master_key].has_key("_item_type_list.construct"):
    750             types = self.dic_as_cif[self.master_key]["_item_type_list.code"]
    751             prim_types = self.dic_as_cif[self.master_key]["_item_type_list.primitive_code"]
    752             constructs = map(lambda a: a + "$", self.dic_as_cif[self.master_key]["_item_type_list.construct"])
     954        if "_item_type_list.construct" in self.master_block:
     955            types = self.master_block["_item_type_list.code"]
     956            prim_types = self.master_block["_item_type_list.primitive_code"]
     957            constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]])
    753958            # add in \r wherever we see \n, and change \{ to \\{
    754959            def regex_fiddle(mm_regex):
    755                 brack_match = r"((.*\[.+)(\\{)(.*\].*))" 
    756                 ret_match = r"((.*\[.+)(\\n)(.*\].*))" 
     960                brack_match = r"((.*\[.+)(\\{)(.*\].*))"
     961                ret_match = r"((.*\[.+)(\\n)(.*\].*))"
    757962                fixed_regexp = mm_regex[:]  #copy
    758963                # fix the brackets
    759964                bm = re.match(brack_match,mm_regex)
    760                 if bm != None: 
     965                if bm != None:
    761966                    fixed_regexp = bm.expand(r"\2\\\\{\4")
    762967                # fix missing \r
    763968                rm = re.match(ret_match,fixed_regexp)
    764969                if rm != None:
    765                     fixed_regexp = rm.expand(r"\2\3\\r\4")   
    766                 #print "Regexp %s becomes %s" % (mm_regex,fixed_regexp)
     970                    fixed_regexp = rm.expand(r"\2\3\\r\4")
     971                #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp))
    767972                return fixed_regexp
    768973            constructs = map(regex_fiddle,constructs)
    769             packed_up = map(None,types,constructs)
    770             for typecode,construct in packed_up:
     974            for typecode,construct in zip(types,constructs):
    771975                self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL)
    772976            # now make a primitive <-> type construct mapping
    773             packed_up = map(None,types,prim_types)
    774             for typecode,primtype in packed_up:
     977            for typecode,primtype in zip(types,prim_types):
    775978                self.primdic[typecode] = primtype
    776979
    777     def add_category_info(self):
     980    def add_category_info(self,full=True):
    778981        if self.diclang == "DDLm":
    779             categories = filter(lambda a:self[a].get("_definition.scope","Item")=="Category",self.keys())
    780             category_ids = map(lambda a:self[a]["_definition.id"],categories)
    781 
    782 
    783         else:
    784             categories = filter(lambda a:self[a].has_key("_category.id"),self.keys())
    785             # get the category id
    786             category_ids = map(lambda a:self[a]["_category.id"],categories)
    787 
    788         # match ids and entries in the dictionary
    789         catpairs = map(None,category_ids,categories)
    790         self.cat_map = {}
    791         for catid,cat in catpairs:self.cat_map[catid] = cat
    792 
    793     def names_in_cat(self,cat):
    794         nameblocks = filter(lambda a:self[a].get("_name.category_id","").lower()
    795                              ==cat.lower(),self.keys())
    796         return map(lambda a:"_" + self[a]["_name.category_id"]+"." + self[a]["_name.object_id"],nameblocks)
     982            catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category']
     983            looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop']
     984            self.parent_lookup = {}
     985            for one_cat in looped_cats:
     986                parent_cat = one_cat
     987                parent_def = self[parent_cat]
     988                next_up = parent_def['_name.category_id'].lower()
     989                while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop':
     990                    parent_def = self[next_up]
     991                    parent_cat = next_up
     992                    next_up = parent_def['_name.category_id'].lower()
     993                self.parent_lookup[one_cat] = parent_cat
     994
     995            if full:
     996                self.key_equivs = {}
     997                for one_cat in looped_cats:   #follow them up
     998                    lower_keys = listify(self[one_cat]['_category_key.name'])
     999                    start_keys = lower_keys[:]
     1000                    while len(lower_keys)>0:
     1001                        this_cat = self[lower_keys[0]]['_name.category_id']
     1002                        parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a]
     1003                        #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent)))
     1004                        if len(parent)>1:
     1005                            raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent)))
     1006                        if len(parent)==0: break
     1007                        parent = parent[0]
     1008                        parent_keys = listify(self[parent]['_category_key.name'])
     1009                        linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys]
     1010                        # sanity check
     1011                        if set(parent_keys) != set(linked_keys):
     1012                            raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys))
     1013                            # now add in our information
     1014                        for parent,child in zip(linked_keys,start_keys):
     1015                            self.key_equivs[child] = self.key_equivs.get(child,[])+[parent]
     1016                        lower_keys = linked_keys  #preserves order of start keys
     1017
     1018        else:
     1019            self.parent_lookup = {}
     1020            self.key_equivs = {}
     1021
     1022    def change_category_name(self,oldname,newname):
     1023        self.unlock()
     1024        """Change the category name from [[oldname]] to [[newname]]"""
     1025        if oldname not in self:
     1026            raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname))
     1027        if newname in self:
     1028            raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname))
     1029        child_defs = self.ddlm_immediate_children(oldname)
     1030        self.rename(oldname,newname)   #NB no name integrity checks
     1031        self[newname]['_name.object_id']=newname
     1032        self[newname]['_definition.id']=newname
     1033        for child_def in child_defs:
     1034            self[child_def]['_name.category_id'] = newname
     1035            if self[child_def].get('_definition.scope','Item')=='Item':
     1036                newid = self.create_catobj_name(newname,self[child_def]['_name.object_id'])
     1037                self[child_def]['_definition.id']=newid
     1038                self.rename(child_def,newid[1:])  #no underscore at the beginning
     1039        self.lock()
     1040
     1041    def create_catobj_name(self,cat,obj):
     1042        """Combine category and object in approved fashion to create id"""
     1043        return ('_'+cat+'.'+obj)
     1044
     1045    def change_category(self,itemname,catname):
     1046        """Move itemname into catname, return new handle"""
     1047        defid = self[itemname]
     1048        if defid['_name.category_id'].lower()==catname.lower():
     1049            print('Already in category, no change')
     1050            return itemname
     1051        if catname not in self:    #don't have it
     1052            print('No such category %s' % catname)
     1053            return itemname
     1054        self.unlock()
     1055        objid = defid['_name.object_id']
     1056        defid['_name.category_id'] = catname
     1057        newid = itemname # stays the same for categories
     1058        if defid.get('_definition.scope','Item') == 'Item':
     1059            newid = self.create_catobj_name(catname,objid)
     1060            defid['_definition.id']= newid
     1061            self.rename(itemname,newid)
     1062        self.set_parent(catname,newid)
     1063        self.lock()
     1064        return newid
     1065
     1066    def change_name(self,one_def,newobj):
     1067        """Change the object_id of one_def to newobj. This is not used for
     1068        categories, but can be used for dictionaries"""
     1069        if '_dictionary.title' not in self[one_def]:  #a dictionary block
     1070            newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj)
     1071            self.unlock()
     1072            self.rename(one_def,newid)
     1073            self[newid]['_definition.id']=newid
     1074            self[newid]['_name.object_id']=newobj
     1075        else:
     1076            self.unlock()
     1077            newid = newobj
     1078            self.rename(one_def,newobj)
     1079            self[newid]['_dictionary.title'] = newid
     1080        self.lock()
     1081        return newid
     1082
     1083    # Note that our semantic parent is given by catparent, but our syntactic parent is
     1084    # always just the root block
     1085    def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False):
     1086        """Add a new category to the dictionary with name [[catname]].
     1087           If [[catparent]] is None, the category will be a child of
     1088           the topmost 'Head' category or else the top data block. If
     1089           [[is_loop]] is false, a Set category is created. If [[allow_dangler]]
     1090           is true, the parent category does not have to exist."""
     1091        if catname in self:
     1092            raise CifError('Attempt to add existing category %s' % catname)
     1093        self.unlock()
     1094        syntactic_root = self.get_roots()[0][0]
     1095        if catparent is None:
     1096            semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head']
     1097            if len(semantic_root)>0:
     1098                semantic_root = semantic_root[0]
     1099            else:
     1100                semantic_root = syntactic_root
     1101        else:
     1102            semantic_root = catparent
     1103        realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root)
     1104        self.block_id_table[catname.lower()]=realname
     1105        self[catname]['_name.object_id'] = catname
     1106        if not allow_dangler or catparent is None:
     1107            self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id']
     1108        else:
     1109            self[catname]['_name.category_id'] = catparent
     1110        self[catname]['_definition.id'] = catname
     1111        self[catname]['_definition.scope'] = 'Category'
     1112        if is_loop:
     1113            self[catname]['_definition.class'] = 'Loop'
     1114        else:
     1115            self[catname]['_definition.class'] = 'Set'
     1116        self[catname]['_description.text'] = 'No definition provided'
     1117        self.lock()
     1118        return catname
     1119
     1120    def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False):
     1121        """Add itemname to category [[catparent]]. If itemname contains periods,
     1122        all text before the final period is ignored. If [[allow_dangler]] is True,
     1123        no check for a parent category is made."""
     1124        self.unlock()
     1125        if '.' in itemname:
     1126            objname = itemname.split('.')[-1]
     1127        else:
     1128            objname = itemname
     1129        objname = objname.strip('_')
     1130        if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'):
     1131            raise CifError('No category %s in dictionary' % catparent)
     1132        fullname = '_'+catparent.lower()+'.'+objname
     1133        print('New name: %s' % fullname)
     1134        syntactic_root = self.get_roots()[0][0]
     1135        realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change
     1136        # update our dictionary structures
     1137        self.block_id_table[fullname]=realname
     1138        self[fullname]['_definition.id']=fullname
     1139        self[fullname]['_name.object_id']=objname
     1140        self[fullname]['_name.category_id']=catparent
     1141        self[fullname]['_definition.class']='Datum'
     1142        self[fullname]['_description.text']=def_text
    7971143       
     1144    def remove_definition(self,defname):
     1145        """Remove a definition from the dictionary."""
     1146        if defname not in self:
     1147            return
     1148        if self[defname].get('_definition.scope')=='Category':
     1149            children = self.ddlm_immediate_children(defname)
     1150            [self.remove_definition(a) for a in children]
     1151            cat_id = self[defname]['_definition.id'].lower()
     1152        del self[defname]
     1153
     1154    def get_cat_obj(self,name):
     1155        """Return (cat,obj) tuple. [[name]] must contain only a single period"""
     1156        cat,obj = name.split('.')
     1157        return (cat.strip('_'),obj)
     1158
     1159    def get_name_by_cat_obj(self,category,object,give_default=False):
     1160        """Return the dataname corresponding to the given category and object"""
     1161        if category[0] == '_':    #accidentally left in
     1162           true_cat = category[1:].lower()
     1163        else:
     1164           true_cat = category.lower()
     1165        try:
     1166            return self.cat_obj_lookup_table[(true_cat,object.lower())][0]
     1167        except KeyError:
     1168            if give_default:
     1169               return '_'+true_cat+'.'+object
     1170        raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object))
     1171
     1172
     1173    def WriteOut(self,**kwargs):
     1174        myblockorder = self.get_full_child_list()
     1175        self.set_grammar(self.grammar)
     1176        self.standard = 'Dic'
     1177        return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)
     1178
     1179    def get_full_child_list(self):
     1180        """Return a list of definition blocks in order parent-child-child-child-parent-child..."""
     1181        top_block = self.get_roots()[0][0]
     1182        root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head']
     1183        if len(root_cat) == 1:
     1184            all_names = [top_block] + self.recurse_child_list(root_cat[0])
     1185            unrooted = self.ddlm_danglers()
     1186            double_names =  set(unrooted).intersection(set(all_names))
     1187            if len(double_names)>0:
     1188                raise CifError('Names are children of internal and external categories:%s' % repr(double_names))
     1189            remaining = unrooted[:]
     1190            for no_root in unrooted:
     1191                if self[no_root].get('_definition.scope','Item')=='Category':
     1192                    all_names += [no_root]
     1193                    remaining.remove(no_root)
     1194                    these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()]
     1195                    all_names += these_children
     1196                    [remaining.remove(n) for n in these_children]
     1197            # now sort by category
     1198            ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining])
     1199            for e in ext_cats:
     1200                cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e]
     1201                [remaining.remove(n) for n in cat_items]
     1202                all_names += cat_items
     1203            if len(remaining)>0:
     1204                print('WARNING: following items do not seem to belong to a category??')
     1205                print(repr(remaining))
     1206                all_names += remaining
     1207            print('Final block order: ' + repr(all_names))
     1208            return all_names
     1209        raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead')
     1210
     1211    def cat_from_name(self,one_name):
     1212        """Guess the category from the name. This should be used only when this is not important semantic information,
     1213        for example, when printing out"""
     1214        (cat,obj) = one_name.split(".")
     1215        if cat[0] == "_": cat = cat[1:]
     1216        return cat
     1217
     1218    def recurse_child_list(self,parentname):
     1219        """Recursively expand the logical child list of [[parentname]]"""
     1220        final_list = [parentname]
     1221        child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()]
     1222        child_blocks.sort()    #we love alphabetical order
     1223        child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item']
     1224        final_list += child_items
     1225        child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category']
     1226        for child_cat in child_cats:
     1227            final_list += self.recurse_child_list(child_cat)
     1228        return final_list
     1229
     1230
     1231
    7981232    def get_key_pack(self,category,value,data):
    7991233        keyname = self[category][self.unique_spec]
    8001234        onepack = data.GetPackKey(keyname,value)
    8011235        return onepack
    802      
     1236
    8031237    def get_number_with_esd(numstring):
    8041238        import string
    805         numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' 
     1239        numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
    8061240        our_match = re.match(numb_re,numstring)
    8071241        if our_match:
    8081242            a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
    809         #    print "Debug: %s -> %s" % (numstring, `our_match.groups()`)
     1243            # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
    8101244        else:
    8111245            return None,None
    8121246        if dot or q: return None,None     #a dot or question mark
    813         if exp:          #has exponent 
    814            exp = string.replace(exp,"d","e")     # mop up old fashioned numbers
    815            exp = string.replace(exp,"D","e")
     1247        if exp:          #has exponent
     1248           exp = exp.replace("d","e")     # mop up old fashioned numbers
     1249           exp = exp.replace("D","e")
    8161250           base_num = base_num + exp
    817         #print "Debug: have %s for base_num from %s" % (base_num,numstring)
     1251        # print("Debug: have %s for base_num from %s" % (base_num,numstring))
    8181252        base_num = float(base_num)
    8191253        # work out esd, if present.
     
    8271261
    8281262    def getmaxmin(self,rangeexp):
    829         regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*' 
     1263        regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*'
    8301264        regexp = regexp + ":" + regexp
    8311265        regexp = re.match(regexp,rangeexp)
     
    8341268            maximum = regexp.group(7)
    8351269        except AttributeError:
    836             print "Can't match %s" % rangeexp
    837         if minimum == None: minimum = "." 
     1270            print("Can't match %s" % rangeexp)
     1271        if minimum == None: minimum = "."
    8381272        else: minimum = float(minimum)
    839         if maximum == None: maximum = "." 
     1273        if maximum == None: maximum = "."
    8401274        else: maximum = float(maximum)
    8411275        return maximum,minimum
    8421276
     1277    def initialise_drel(self):
     1278        """Parse drel functions and prepare data structures in dictionary"""
     1279        self.ddlm_parse_valid() #extract validity information from data block
     1280        self.transform_drel()   #parse the drel functions
     1281        self.add_drel_funcs()   #put the drel functions into the namespace
     1282
    8431283    def transform_drel(self):
    844         import drel_yacc
    845         parser = drel_yacc.parser
     1284        from .drel import drel_ast_yacc
     1285        from .drel import py_from_ast
     1286        import traceback
     1287        parser = drel_ast_yacc.parser
     1288        lexer = drel_ast_yacc.lexer
    8461289        my_namespace = self.keys()
    847         my_namespace = dict(map(None,my_namespace,my_namespace))
    848         parser.loopable_cats = filter(lambda a:self[a].get("_definition.class","Datum")=="List",self.keys())
    849         parser.loopable_cats = map(lambda a:self[a]["_definition.id"],parser.loopable_cats)
    850         parser.listable_items = filter(lambda a:"*" in self[a].get("_type.dimension",""),self.keys())
    851         derivable_list = filter(lambda a:self[a].has_key("_method.expression") and self[a].get("_definition.scope","")!='Category' and self[a].get("_name.category_id","")!= "function",self.keys())
     1290        my_namespace = dict(zip(my_namespace,my_namespace))
     1291        # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...})
     1292        loopable_cats = self.get_loopable_cats()
     1293        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
     1294        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
     1295        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
     1296        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
     1297        # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")]
     1298        derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \
     1299                              and self[a].get("_name.category_id","")!= "function"]
    8521300        for derivable in derivable_list:
    853             parser.target_id = derivable
     1301            target_id = derivable
    8541302            # reset the list of visible names for parser
    855             parser.special_id = [my_namespace]
    856             # reset list of looped with statements
    857             parser.withtable = {}
    858             print "Target id: %s" % derivable
    859             drel_expr = self[derivable]["_method.expression"]
    860             if isinstance(drel_expr,ListType):
    861                drel_expr = drel_expr[0]
    862             print "Transforming %s" % drel_expr
    863             # List categories are treated differently...
    864             pyth_meth = parser.parse(drel_expr,debug=True)
    865             self[derivable]["_loop_categories"] = pyth_meth[1].keys()
    866             self[derivable]["_method.expression"] = drel_yacc.make_func(pyth_meth,"pyfunc",None)
    867             print "Final result:\n " + self[derivable]["_method.expression"]
     1303            special_ids = [dict(zip(self.keys(),self.keys()))]
     1304            print("Target id: %s" % derivable)
     1305            drel_exprs = self[derivable]["_method.expression"]
     1306            drel_purposes = self[derivable]["_method.purpose"]
     1307            all_methods = []
     1308            if not isinstance(drel_exprs,list):
     1309                drel_exprs = [drel_exprs]
     1310                drel_purposes = [drel_purposes]
     1311            for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs):
     1312                if drel_purpose != 'Evaluation':
     1313                    continue
     1314                drel_expr = "\n".join(drel_expr.splitlines())
     1315                # print("Transforming %s" % drel_expr)
     1316                # List categories are treated differently...
     1317                try:
     1318                    meth_ast = parser.parse(drel_expr+"\n",lexer=lexer)
     1319                except:
     1320                    print('Syntax error in method for %s; leaving as is' % derivable)
     1321                    a,b = sys.exc_info()[:2]
     1322                    print((repr(a),repr(b)))
     1323                    print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout))
     1324                    # reset the lexer
     1325                    lexer.begin('INITIAL')
     1326                    continue
     1327                # Construct the python method
     1328                cat_meth = False
     1329                if self[derivable].get('_definition.scope','Item') == 'Category':
     1330                    cat_meth = True
     1331                pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id,
     1332                                                                           loopable=loop_info,
     1333                                                             cif_dic = self,cat_meth=cat_meth)
     1334                all_methods.append(pyth_meth)
     1335            if len(all_methods)>0:
     1336                save_overwrite = self[derivable].overwrite
     1337                self[derivable].overwrite = True
     1338                self[derivable]["_method.py_expression"] = all_methods
     1339                self[derivable].overwrite = save_overwrite
     1340            #print("Final result:\n " + repr(self[derivable]["_method.py_expression"]))
    8681341
    8691342    def add_drel_funcs(self):
    870         import drel_yacc
    871         funclist = filter(lambda a:self[a].get("_name.category_id","")=='function',self.keys())
    872         funcnames = map(lambda a:self[a]["_name.object_id"],funclist)
    873         funcbodys = map(lambda a:self[a]["_method.expression"],funclist)
     1343        from .drel import drel_ast_yacc
     1344        from .drel import py_from_ast
     1345        funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function']
     1346        funcnames = [(self[a]["_name.object_id"],
     1347                      getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist]
    8741348        # create executable python code...
    875         parser = drel_yacc.parser
    876         for funcname,funcbody in zip(funcnames,funcbodys):
     1349        parser = drel_ast_yacc.parser
     1350        # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...})
     1351        loopable_cats = self.get_loopable_cats()
     1352        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
     1353        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
     1354        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
     1355        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
     1356        for funcname,funcbody in funcnames:
     1357            newline_body = "\n".join(funcbody.splitlines())
    8771358            parser.target_id = funcname
    878             parser.special_id = [{}]   #first element is always global namespace of dictionary
    879             parser.withtable = {}
    880             res,ww = parser.parse(funcbody[0])
    881             print 'dREL library function ->\n' + res
     1359            res_ast = parser.parse(newline_body)
     1360            py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self)
     1361            #print('dREL library function ->\n' + py_function)
    8821362            global_table = globals()
    883             global_table.update(self.ddlm_functions)
    884             exec res in global_table    #add to namespace
    885         print "All functions -> " + `self.ddlm_functions`
    886  
    887     def switch_numpy(self,to_val):
    888         if to_val:
    889             self.recursive_numerify = self.numpy_numerify
    890         else:
    891             self.recursive_numerify = self.normal_numerify
    892 
    893     def derive_item(self,key,cifdata,store_value = False):
     1363            exec(py_function, global_table)    #add to namespace
     1364        #print('Globals after dREL functions added:' + repr(globals()))
     1365        self.ddlm_functions = globals()  #for outside access
     1366
     1367    @track_recursion
     1368    def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True):
     1369        key = start_key   #starting value
     1370        result = None     #success is a non-None value
     1371        default_result = False #we have not used a default value
     1372        # check for aliases
     1373        # check for an older form of a new value
     1374        found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata]
     1375        if len(found_it)>0:
     1376            corrected_type = self.change_type(key,cifdata[found_it[0]])
     1377            return corrected_type
     1378        # now do the reverse check - any alternative form
     1379        alias_name = [a for a in self.alias_table.items() if key in a[1]]
     1380        print('Aliases for %s: %s' % (key,repr(alias_name)))
     1381        if len(alias_name)==1:
     1382            key = alias_name[0][0]   #actual definition name
     1383            if key in cifdata: return self.change_type(key,cifdata[key])
     1384            found_it = [k for k in alias_name[0][1] if k in cifdata]
     1385            if len(found_it)>0:
     1386                return self.change_type(key,cifdata[found_it[0]])
     1387        elif len(alias_name)>1:
     1388            raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name))
     1389
     1390        the_category = self[key]["_name.category_id"]
     1391        cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
     1392        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
    8941393        # store any default value in case we have a problem
    8951394        def_val = self[key].get("_enumeration.default","")
    8961395        def_index_val = self[key].get("_enumeration.def_index_id","")
    897         the_func = self[key].get("_method.expression","") 
    898         if def_val and not the_func : return def_val
    899         if def_index_val and not the_func:            #derive a default value
    900             index_vals = self[key]["_enumeration_default.index"]
    901             val_to_index = cifdata[def_index_val]     #what we are keying on
    902             # Handle loops
    903             if isinstance(val_to_index,ListType):
    904                 keypos = map(lambda a:index_vals.index(a),val_to_index)
    905                 result = map(lambda a:self[key]["_enumeration_default.value"][a] ,keypos)
    906             else:
    907                 keypos = index_vals.index(val_to_index)   #value error if no such value available
    908                 result = self[key]["_enumeration_default.value"][keypos]
    909             print "Indexed on %s to get %s for %s" % (def_index_val,`result`,`val_to_index`)
    910             return result
     1396        if len(has_cat_names)==0: # try category method
     1397            cat_result = {}
     1398            pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]]
     1399            pulled_from_cats = [(k,[
     1400                                  self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']]
     1401                               ) for k in pulled_from_cats]
     1402            pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]]
     1403            if '_category_construct_local.type' in self[the_category]:
     1404                print("**Now constructing category %s using DDLm attributes**" % the_category)
     1405                try:
     1406                    cat_result = self.construct_category(the_category,cifdata,store_value=True)
     1407                except (CifRecursionError,StarFile.StarDerivationError):
     1408                    print('** Failed to construct category %s (error)' % the_category)
     1409            # Trying a pull-back when the category is partially populated
     1410            # will not work, hence we test that cat_result has no keys
     1411            if len(pulled_to_cats)>0 and len(cat_result)==0:
     1412                print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats)))
     1413                try:
     1414                    cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True)
     1415                except (CifRecursionError,StarFile.StarDerivationError):
     1416                    print('** Failed to construct category %s from pullback information (error)' % the_category)
     1417            if '_method.py_expression' in self[the_category] and key not in cat_result:
     1418                print("**Now applying category method for %s in search of %s**" % (the_category,key))
     1419                cat_result = self.derive_item(the_category,cifdata,store_value=True)
     1420            print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result))
     1421            # do we now have our value?
     1422            if key in cat_result:
     1423                return cat_result[key]
     1424
     1425        # Recalculate in case it actually worked
     1426        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
     1427        the_funcs = self[key].get('_method.py_expression',"")
     1428        if the_funcs:   #attempt to calculate it
     1429            #global_table = globals()
     1430            #global_table.update(self.ddlm_functions)
     1431            for one_func in the_funcs:
     1432                print('Executing function for %s:' % key)
     1433                #print(one_func)
     1434                exec(one_func, globals())  #will access dREL functions, puts "pyfunc" in scope
     1435                # print('in following global environment: ' + repr(global_table))
     1436                stored_setting = cifdata.provide_value
     1437                cifdata.provide_value = True
     1438                try:
     1439                    result = pyfunc(cifdata)
     1440                except CifRecursionError as s:
     1441                    print(s)
     1442                    result = None
     1443                except StarFile.StarDerivationError as s:
     1444                    print(s)
     1445                    result = None
     1446                finally:
     1447                    cifdata.provide_value = stored_setting
     1448                if result is not None:
     1449                    break
     1450                #print("Function returned {!r}".format(result))
     1451
     1452        if result is None and allow_defaults:   # try defaults
     1453            if def_val:
     1454                result = self.change_type(key,def_val)
     1455                default_result = True
     1456            elif def_index_val:            #derive a default value
     1457                index_vals = self[key]["_enumeration_default.index"]
     1458                val_to_index = cifdata[def_index_val]     #what we are keying on
     1459                if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']:
     1460                    lcase_comp = True
     1461                    index_vals = [a.lower() for a in index_vals]
     1462                # Handle loops
     1463                if isinstance(val_to_index,list):
     1464                    if lcase_comp:
     1465                        val_to_index = [a.lower() for a in val_to_index]
     1466                    keypos = [index_vals.index(a) for a in val_to_index]
     1467                    result = [self[key]["_enumeration_default.value"][a]  for a in keypos]
     1468                else:
     1469                    if lcase_comp:
     1470                        val_to_index = val_to_index.lower()
     1471                    keypos = index_vals.index(val_to_index)   #value error if no such value available
     1472                    result = self[key]["_enumeration_default.value"][keypos]
     1473                    default_result = True   #flag that it must be extended
     1474                result = self.change_type(key,result)
     1475                print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index)))
     1476
    9111477        # read it in
    912         the_category = self[key]["_name.category_id"]
    913         the_type = self[the_category]["_definition.class"]
    914         global_table = globals()
    915         global_table.update(self.ddlm_functions)
    916         exec the_func in global_table,locals() #will access dREL functions, puts "pyfunc" in scope
    917         print 'Executing following function'
    918         print the_func
    919         print 'With following loop categories:' + `self[key].get("_loop_categories","")`
    920         # print 'in following global environment: ' + `global_table`
    921         if self[key].get("_loop_categories",""):
    922            loop_category = self[key]["_loop_categories"][0]
    923            loop_names = self.names_in_cat(loop_category)
    924            no_of_packs = len(cifdata[loop_names[0]])
    925            packlist = []
    926            for pack_index in range(no_of_packs):
    927               packlist.append(pyfunc(self,cifdata,pack_index))
     1478        if result is None:   #can't do anything else
     1479            print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults)))
     1480            raise StarFile.StarDerivationError(start_key)
     1481        is_looped = False
     1482        if self[the_category].get('_definition.class','Set')=='Loop':
     1483            is_looped = True
     1484            if len(has_cat_names)>0:   #this category already exists
     1485                if result is None or default_result: #need to create a list of values
     1486                    loop_len = len(cifdata[has_cat_names[0]])
     1487                    out_result = [result]*loop_len
     1488                    result = out_result
     1489            else:   #nothing exists in this category, we can't store this at all
     1490                print('Resetting result %s for %s to null list as category is empty' % (key,result))
     1491                result = []
     1492
    9281493        # now try to insert the new information into the right place
    9291494        # find if items of this category already appear...
    930            if store_value:
    931                cat_names = filter(lambda a:self[a].get["_name.category_id",None]==the_category,self.keys())
    932                has_cat_names = filter(lambda a:cifdata.has_key(a),cat_names)
    933                if len(has_cat_names)>0:
    934                   target_loop = cifdata.GetLoop(has_cat_names[0])
    935                   target_loop[key] = packlist      #lengths must match or else!!
    936                else:
    937                   cifdata[key] = packlist 
    938            return packlist
    939         else:              # No looped categories
    940            return pyfunc(self,cifdata)
     1495        # Never cache empty values
     1496        if not (isinstance(result,list) and len(result)==0) and\
     1497          store_value:
     1498            if self[key].get("_definition.scope","Item")=='Item':
     1499                if is_looped:
     1500                    result = self.store_new_looped_value(key,cifdata,result,default_result)
     1501                else:
     1502                    result = self.store_new_unlooped_value(key,cifdata,result)
     1503            else:
     1504                self.store_new_cat_values(cifdata,result,the_category)
     1505        return result
     1506
     1507    def store_new_looped_value(self,key,cifdata,result,default_result):
     1508          """Store a looped value from the dREL system into a CifFile"""
     1509          # try to change any matrices etc. to lists
     1510          the_category = self[key]["_name.category_id"]
     1511          out_result = result
     1512          if result is not None and not default_result:
     1513                  # find any numpy arrays
     1514                  def conv_from_numpy(one_elem):
     1515                      if not hasattr(one_elem,'dtype'):
     1516                         if isinstance(one_elem,(list,tuple)):
     1517                            return StarFile.StarList([conv_from_numpy(a) for a in one_elem])
     1518                         return one_elem
     1519                      if one_elem.size > 1:   #so is not a float
     1520                         return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()])
     1521                      else:
     1522                          try:
     1523                            return one_elem.item(0)
     1524                          except:
     1525                            return one_elem
     1526                  out_result = [conv_from_numpy(a) for a in result]
     1527          # so out_result now contains a value suitable for storage
     1528          cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
     1529          has_cat_names = [a for a in cat_names if a in cifdata]
     1530          print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names))
     1531          if len(has_cat_names)>0:   #this category already exists
     1532              cifdata[key] = out_result      #lengths must match or else!!
     1533              cifdata.AddLoopName(has_cat_names[0],key)
     1534          else:
     1535              cifdata[key] = out_result
     1536              cifdata.CreateLoop([key])
     1537          print('Loop info:' + repr(cifdata.loops))
     1538          return out_result
     1539
     1540    def store_new_unlooped_value(self,key,cifdata,result):
     1541          """Store a single value from the dREL system"""
     1542          if result is not None and hasattr(result,'dtype'):
     1543              if result.size > 1:
     1544                  out_result = StarFile.StarList(result.tolist())
     1545                  cifdata[key] = out_result
     1546              else:
     1547                  cifdata[key] = result.item(0)
     1548          else:
     1549              cifdata[key] = result
     1550          return result
     1551
     1552    def construct_category(self,category,cifdata,store_value=True):
     1553        """Construct a category using DDLm attributes"""
     1554        con_type = self[category].get('_category_construct_local.type',None)
     1555        if con_type == None:
     1556            return {}
     1557        if con_type == 'Pullback' or con_type == 'Filter':
     1558            morphisms  = self[category]['_category_construct_local.components']
     1559            morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat
     1560            cats = [self[a]['_name.category_id'] for a in morphisms]
     1561            cat_keys = [self[a]['_category.key_id'] for a in cats]
     1562            cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat
     1563            if con_type == 'Filter':
     1564                int_filter = self[category].get('_category_construct_local.integer_filter',None)
     1565                text_filter = self[category].get('_category_construct_local.text_filter',None)
     1566                if int_filter is not None:
     1567                    morph_values.append([int(a) for a in int_filter])
     1568                if text_filter is not None:
     1569                    morph_values.append(text_filter)
     1570                cat_values.append(range(len(morph_values[-1])))
     1571            # create the mathematical product filtered by equality of dataname values
     1572            pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \
     1573                            if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]]
     1574            # now prepare for return
     1575            if len(pullback_ids)==0:
     1576                return {}
     1577            newids = self[category]['_category_construct_local.new_ids']
     1578            fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids]
     1579            if con_type == 'Pullback':
     1580                final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]}
     1581                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
     1582                final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids))
     1583            elif con_type == 'Filter':   #simple filter
     1584                final_results = {fullnewids[0]:[x[0] for x in pullback_ids]}
     1585                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
     1586            if store_value:
     1587                self.store_new_cat_values(cifdata,final_results,category)
     1588            return final_results
     1589
     1590    def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True):
     1591        """Each of the categories in source_categories are pullbacks that include
     1592        the target_category"""
     1593        target_key = self[target_category]['_category.key_id']
     1594        result = {target_key:[]}
     1595        first_time = True
     1596        # for each source category, determine which element goes to the target
     1597        for sc in source_categories:
     1598            components = self[sc]['_category_construct_local.components']
     1599            comp_cats = [self[c]['_name.category_id'] for c in components]
     1600            new_ids = self[sc]['_category_construct_local.new_ids']
     1601            source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids]
     1602            if len(components) == 2:  # not a filter
     1603                element_pos = comp_cats.index(target_category)
     1604                old_id = source_ids[element_pos]
     1605                print('Using %s to populate %s' % (old_id,target_key))
     1606                result[target_key].extend(cifdata[old_id])
     1607                # project through all identical names
     1608                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key])
     1609                # we only include keys that are common to all categories
     1610                if first_time:
     1611                    result.update(extra_result)
     1612                else:
     1613                    for k in extra_result.keys():
     1614                        if k in result:
     1615                            print('Updating %s: was %s' % (k,repr(result[k])))
     1616                            result[k].extend(extra_result[k])
     1617            else:
     1618                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids)
     1619                if len(extra_result)>0 or source_ids[0] in cifdata:  #something is present
     1620                    result[target_key].extend(cifdata[source_ids[0]])
     1621                    for k in extra_result.keys():
     1622                        if k in result:
     1623                            print('Reverse filter: Updating %s: was %s' % (k,repr(result[k])))
     1624                            result[k].extend(extra_result[k])
     1625                        else:
     1626                            result[k]=extra_result[k]
     1627    # Bonus derivation if there is a singleton filter
     1628                    if self[sc]['_category_construct_local.type'] == 'Filter':
     1629                        int_filter = self[sc].get('_category_construct_local.integer_filter',None)
     1630                        text_filter = self[sc].get('_category_construct_local.text_filter',None)
     1631                        if int_filter is not None:
     1632                            filter_values = int_filter
     1633                        else:
     1634                            filter_values = text_filter
     1635                        if len(filter_values)==1:    #a singleton
     1636                            extra_dataname = self[sc]['_category_construct_local.components'][0]
     1637                            if int_filter is not None:
     1638                                new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]])
     1639                            else:
     1640                                new_value = filter_values * len(cifdata[source_ids[0]])
     1641                            if extra_dataname not in result:
     1642                                result[extra_dataname] = new_value
     1643                            else:
     1644                                result[extra_dataname].extend(new_value)
     1645                    else:
     1646                        raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type'])
     1647            first_time = False
     1648        # check for sanity - all dataname lengths must be identical
     1649        datalen = len(set([len(a) for a in result.values()]))
     1650        if datalen != 1:
     1651            raise AssertionError('Failed to construct equal-length category items,'+ repr(result))
     1652        if store_value:
     1653            print('Now storing ' + repr(result))
     1654            self.store_new_cat_values(cifdata,result,target_category)
     1655        return result
     1656
     1657    def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]):
     1658        """Copy across datanames for which the from_category key equals [[key_vals]]"""
     1659        result = {}
     1660        s_names_in_cat = set(self.names_in_cat(from_category,names_only=True))
     1661        t_names_in_cat = set(self.names_in_cat(to_category,names_only=True))
     1662        can_project = s_names_in_cat & t_names_in_cat
     1663        can_project -= set(skip_names)  #already dealt with
     1664        source_key = self[from_category]['_category.key_id']
     1665        print('Source dataname set: ' + repr(s_names_in_cat))
     1666        print('Target dataname set: ' + repr(t_names_in_cat))
     1667        print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project))
     1668        for project_name in can_project:
     1669            full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0]
     1670            full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0]
     1671            if key_vals is None:
     1672                try:
     1673                    result[full_to_name] = cifdata[full_from_name]
     1674                except StarFile.StarDerivationError:
     1675                    pass
     1676            else:
     1677                all_key_vals = cifdata[source_key]
     1678                filter_pos = [all_key_vals.index(a) for a in key_vals]
     1679                try:
     1680                    all_data_vals = cifdata[full_from_name]
     1681                except StarFile.StarDerivationError:
     1682                    pass
     1683                result[full_to_name] = [all_data_vals[i] for i in filter_pos]
     1684        return result
     1685
     1686    def store_new_cat_values(self,cifdata,result,the_category):
     1687        """Store the values in [[result]] into [[cifdata]]"""
     1688        the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key']
     1689        double_names = [a for a in result.keys() if a in cifdata]
     1690        if len(double_names)>0:
     1691            already_present = [a for a in self.names_in_cat(the_category) if a in cifdata]
     1692            if set(already_present) != set(result.keys()):
     1693                print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys()))))
     1694                return
     1695            #check key values
     1696            old_keys = set(cifdata[the_key])
     1697            common_keys = old_keys & set(result[the_key])
     1698            if len(common_keys)>0:
     1699                print("Category %s not updated, key values in common:" % (common_keys))
     1700                return
     1701            #extend result values with old values
     1702            for one_name,one_value in result.items():
     1703                result[one_name].extend(cifdata[one_name])
     1704        for one_name, one_value in result.items():
     1705            try:
     1706                self.store_new_looped_value(one_name,cifdata,one_value,False)
     1707            except StarFile.StarError:
     1708                print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value)))
     1709        #put the key as the first item
     1710        print('Fixing item order for {}'.format(repr(the_key)))
     1711        for one_key in the_key:  #should only be one
     1712            cifdata.ChangeItemOrder(one_key,0)
     1713
     1714
     1715    def generate_default_packet(self,catname,catkey,keyvalue):
     1716        """Return a StarPacket with items from ``catname`` and a key value
     1717        of ``keyvalue``"""
     1718        newpack = StarPacket()
     1719        for na in self.names_in_cat(catname):
     1720            def_val = self[na].get("_enumeration.default","")
     1721            if def_val:
     1722                final_val = self.change_type(na,def_val)
     1723                newpack.extend(final_val)
     1724                setattr(newpack,na,final_val)
     1725        if len(newpack)>0:
     1726            newpack.extend(keyvalue)
     1727            setattr(newpack,catkey,keyvalue)
     1728        return newpack
     1729
     1730
     1731    def switch_numpy(self,to_val):
     1732        pass
    9411733
    9421734    def change_type(self,itemname,inval):
    9431735        import numpy
    944         # we need to iterate over the structure description.  For now we deal only with
    945         # Single and Array containers, with types that are a simple specification
    946         item_type = self[itemname]["_type.contents"]
    947         item_container = self[itemname]["_type.container"]
    948         isnumeric = (item_type == "Real" or \
    949            item_type == "Float" or \
    950            item_type == "Count" or \
    951            item_type == "Integer" or \
    952            item_type == "Digit")
    953         if not isnumeric: return inval   # we don't attempt any changes
    954         # even for a 'Single' container, it may be looped
    955         # print 'Changing type for %s' % `inval`
    956         if StarFile.get_dim(inval)[0] == 0:
    957             if item_container == 'Single': return float_with_esd(inval)
    958             if item_container == 'Array':
    959                 return self.recursive_numerify(inval)
    960         else:
    961             if item_container == 'Single': return map(float_with_esd,inval)
    962             if item_container == 'Array': return map(self.recursive_numerify,inval)
    963      
    964     # A utility function to recursively make all atomic values numeric
    965     # All embedded values will be either StarTuples or StarLists
    966     def normal_numerify(self,valarray):
    967         # print 'Recursive evaluation of %s' % `valarray`
    968         if isinstance(valarray,StarFile.StarTuple):
    969             return StarFile.StarTuple(map(self.recursive_numerify,valarray))
    970         if isinstance(valarray,StarFile.StarList):
    971             return StarFile.StarList(map(self.recursive_numerify,valarray))
    972         if isinstance(valarray,(StringType,IntType,LongType)):
    973             return float_with_esd(valarray)
    974         else:
    975             return valarray    #assume is OK
    976                
    977     # Identical to the above except that a numpy array is returned.  We
    978     # do the normal_numerify call in order to perform the float conversion.
    979     #
    980     def numpy_numerify(self,valarray):
    981         import numpy
    982         return numpy.array(self.normal_numerify(valarray))
     1736        if inval == "?": return inval
     1737        change_function = convert_type(self[itemname])
     1738        if isinstance(inval,list) and not isinstance(inval,StarFile.StarList):   #from a loop
     1739            newval = list([change_function(a) for a in inval])
     1740        else:
     1741            newval = change_function(inval)
     1742        return newval
     1743
     1744    def install_validation_functions(self):
     1745        """Install the DDL-appropriate validation checks"""
     1746        if self.diclang != 'DDLm':
     1747          self.item_validation_funs = [
     1748            self.validate_item_type,
     1749            self.validate_item_esd,
     1750            self.validate_item_enum,   # functions which check conformance
     1751            self.validate_enum_range,
     1752            self.validate_looping]
     1753          self.loop_validation_funs = [
     1754            self.validate_loop_membership,
     1755            self.validate_loop_key,
     1756            self.validate_loop_references]    # functions checking loop values
     1757          self.global_validation_funs = [
     1758            self.validate_exclusion,
     1759            self.validate_parent,
     1760            self.validate_child,
     1761            self.validate_dependents,
     1762            self.validate_uniqueness] # where we need to look at other values
     1763          self.block_validation_funs = [  # where only a full block will do
     1764            self.validate_mandatory_category]
     1765          self.global_remove_validation_funs = [
     1766            self.validate_remove_parent_child] # removal is quicker with special checks
     1767        elif self.diclang == 'DDLm':
     1768            self.item_validation_funs = [
     1769                self.validate_item_enum,
     1770                self.validate_item_esd_ddlm,
     1771                ]
     1772            self.loop_validation_funs = [
     1773                self.validate_looping_ddlm,
     1774                self.validate_loop_key_ddlm,
     1775                self.validate_loop_membership
     1776                ]
     1777            self.global_validation_funs = []
     1778            self.block_validation_funs = [
     1779                self.check_mandatory_items,
     1780                self.check_prohibited_items
     1781                ]
     1782            self.global_remove_validation_funs = []
     1783        self.optimize = False        # default value
     1784        self.done_parents = []
     1785        self.done_children = []
     1786        self.done_keys = []
    9831787
    9841788    def validate_item_type(self,item_name,item_value):
    985         def mymatch(m,a): 
     1789        def mymatch(m,a):
    9861790            res = m.match(a)
    987             if res != None: return res.group() 
     1791            if res != None: return res.group()
    9881792            else: return ""
    989         target_type = self[item_name].get(self.type_spec) 
     1793        target_type = self[item_name].get(self.type_spec)
    9901794        if target_type == None:          # e.g. a category definition
    9911795            return {"result":True}                  # not restricted in any way
     
    9931797        item_values = listify(item_value)
    9941798        #for item in item_values:
    995             #print "Type match " + item_name + " " + item + ":",
     1799            #print("Type match " + item_name + " " + item + ":",)
    9961800        #skip dots and question marks
    997         check_all = filter(lambda a: a !="." and a != "?",item_values)
    998         check_all = filter(lambda a: mymatch(matchexpr,a) != a, check_all)
     1801        check_all = [a for a in item_values if a !="." and a != "?"]
     1802        check_all = [a for a in check_all if mymatch(matchexpr,a) != a]
    9991803        if len(check_all)>0: return {"result":False,"bad_values":check_all}
    10001804        else: return {"result":True}
     1805
     1806    def decide(self,result_list):
     1807        """Construct the return list"""
     1808        if len(result_list)==0:
     1809               return {"result":True}
     1810        else:
     1811               return {"result":False,"bad_values":result_list}
     1812
     1813    def validate_item_container(self, item_name,item_value):
     1814        container_type = self[item_name]['_type.container']
     1815        item_values = listify(item_value)
     1816        if container_type == 'Single':
     1817           okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))]
     1818           return decide(okcheck)
     1819        if container_type in ('Multiple','List'):
     1820           okcheck = [a for a in item_values if not isinstance(a,StarList)]
     1821           return decide(okcheck)
     1822        if container_type == 'Array':    #A list with numerical values
     1823           okcheck = [a for a in item_values if not isinstance(a,StarList)]
     1824           first_check = decide(okcheck)
     1825           if not first_check['result']: return first_check
     1826           #num_check = [a for a in item_values if len([b for b in a if not isinstance
    10011827
    10021828    def validate_item_esd(self,item_name,item_value):
    10031829        if self[item_name].get(self.primitive_type) != 'numb':
    10041830            return {"result":None}
    1005         can_esd = self[item_name].get(self.esd_spec,"none") == "esd" 
     1831        can_esd = self[item_name].get(self.esd_spec,"none") == "esd"
    10061832        if can_esd: return {"result":True}         #must be OK!
    10071833        item_values = listify(item_value)
    1008         check_all = filter(lambda a: get_number_with_esd(a)[1] != None, item_values)
     1834        check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None])
    10091835        if len(check_all)>0: return {"result":False,"bad_values":check_all}
    10101836        return {"result":True}
    10111837
     1838    def validate_item_esd_ddlm(self,item_name,item_value):
     1839        if self[item_name].get('self.primitive_type') not in \
     1840        ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']:
     1841            return {"result":None}
     1842        can_esd = True
     1843        if self[item_name].get('_type.purpose') != 'Measurand':
     1844            can_esd = False
     1845        item_values = listify(item_value)
     1846        check_all = [get_number_with_esd(a)[1] for a in item_values]
     1847        check_all = [v for v in check_all if (can_esd and v == None) or \
     1848                 (not can_esd and v != None)]
     1849        if len(check_all)>0: return {"result":False,"bad_values":check_all}
     1850        return {"result":True}
     1851
    10121852    def validate_enum_range(self,item_name,item_value):
    1013         if not self[item_name].has_key("_item_range.minimum") and \
    1014            not self[item_name].has_key("_item_range.maximum"):
     1853        if "_item_range.minimum" not in self[item_name] and \
     1854           "_item_range.maximum" not in self[item_name]:
    10151855            return {"result":None}
    10161856        minvals = self[item_name].get("_item_range.minimum",default = ["."])
     
    10211861        maxvals = map(makefloat, maxvals)
    10221862        minvals = map(makefloat, minvals)
    1023         rangelist = map(None,minvals,maxvals)
     1863        rangelist = list(zip(minvals,maxvals))
    10241864        item_values = listify(item_value)
    10251865        def map_check(rangelist,item_value):
     
    10341874                if upper == lower and iv == upper: return True
    10351875            # debug
    1036             # print "Value %s fails range check %d < x < %d" % (item_value,lower,upper)
     1876            # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper))
    10371877            return False
    1038         check_all = filter(lambda a,b=rangelist: map_check(b,a) != True, item_values)
     1878        check_all = [a for a in item_values if map_check(rangelist,a) != True]
    10391879        if len(check_all)>0: return {"result":False,"bad_values":check_all}
    10401880        else: return {"result":True}
    1041                
     1881
    10421882    def validate_item_enum(self,item_name,item_value):
    1043         try: 
     1883        try:
    10441884            enum_list = self[item_name][self.enum_spec][:]
    10451885        except KeyError:
     
    10481888        enum_list.append("?")   #unknown
    10491889        item_values = listify(item_value)
    1050         #print "Enum check: %s in %s" % (`item_values`,`enum_list`)
    1051         check_all = filter(lambda a: a not in enum_list,item_values)
     1890        #print("Enum check: {!r} in {!r}".format(item_values, enum_list))
     1891        check_all = [a for a in item_values if a not in enum_list]
    10521892        if len(check_all)>0: return {"result":False,"bad_values":check_all}
    10531893        else: return {"result":True}
     
    10581898        except KeyError:
    10591899            return {"result":None}
    1060         if must_loop == 'yes' and isinstance(item_value,StringType): # not looped
     1900        if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped
    10611901            return {"result":False}      #this could be triggered
    1062         if must_loop == 'no' and not isinstance(item_value,StringType):
     1902        if must_loop == 'no' and not isinstance(item_value,(unicode,str)):
    10631903            return {"result":False}
    10641904        return {"result":True}
    10651905
     1906    def validate_looping_ddlm(self,loop_names):
     1907        """Check that all names are loopable"""
     1908        truly_loopy = self.get_final_cats(loop_names)
     1909        if len(truly_loopy)<len(loop_names):  #some are bad
     1910            categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names]
     1911            not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()]
     1912            return {"result":False,"bad_items":not_looped}
     1913        return {"result":True}
     1914
    10661915
    10671916    def validate_loop_membership(self,loop_names):
    1068         try:
    1069             categories = map(lambda a:self[a][self.cat_spec],loop_names)
    1070         except KeyError:       #category is mandatory
    1071             raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0]))
    1072         bad_items =  filter(lambda a:a != categories[0],categories)
     1917        final_cat = self.get_final_cats(loop_names)
     1918        bad_items =  [a for a in final_cat if a != final_cat[0]]
    10731919        if len(bad_items)>0:
    10741920            return {"result":False,"bad_items":bad_items}
    10751921        else: return {"result":True}
    10761922
     1923    def get_final_cats(self,loop_names):
     1924        """Return a list of the uppermost parent categories for the loop_names. Names
     1925        that are not from loopable categories are ignored."""
     1926        try:
     1927            categories = [self[a][self.cat_spec].lower() for a in loop_names]
     1928        except KeyError:       #category is mandatory
     1929            raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0]))
     1930        truly_looped = [a for a in categories if a in self.parent_lookup.keys()]
     1931        return [self.parent_lookup[a] for a in truly_looped]
     1932
    10771933    def validate_loop_key(self,loop_names):
    10781934        category = self[loop_names[0]][self.cat_spec]
    10791935        # find any unique values which must be present
    1080         entry_name = self.cat_map[category]
    1081         key_spec = self[entry_name].get("_category_mandatory.name",[])
     1936        key_spec = self[category].get(self.key_spec,[])
    10821937        for names_to_check in key_spec:
    1083             if isinstance(names_to_check,StringType):   #only one
     1938            if isinstance(names_to_check,unicode):   #only one
    10841939                names_to_check = [names_to_check]
    10851940            for loop_key in names_to_check:
    1086                 if loop_key not in loop_names: 
     1941                if loop_key not in loop_names:
    10871942                    #is this one of those dang implicit items?
    10881943                    if self[loop_key].get(self.must_exist_spec,None) == "implicit":
    10891944                        continue          #it is virtually there...
    10901945                    alternates = self.get_alternates(loop_key)
    1091                     if alternates == []: 
     1946                    if alternates == []:
    10921947                        return {"result":False,"bad_items":loop_key}
    10931948                    for alt_names in alternates:
    1094                         alt = filter(lambda a:a in loop_names,alt_names)
    1095                         if len(alt) == 0: 
    1096                             return {"result":False,"bad_items":loop_key}  # no alternates   
     1949                        alt = [a for a in alt_names if a in loop_names]
     1950                        if len(alt) == 0:
     1951                            return {"result":False,"bad_items":loop_key}  # no alternates
    10971952        return {"result":True}
    1098        
     1953
     1954    def validate_loop_key_ddlm(self,loop_names):
     1955        """Make sure at least one of the necessary keys are available"""
     1956        final_cats = self.get_final_cats(loop_names)
     1957        if len(final_cats)>0:
     1958            poss_keys = self.cat_key_table[final_cats[0]]
     1959            found_keys = [a for a in poss_keys if a in loop_names]
     1960            if len(found_keys)>0:
     1961                return {"result":True}
     1962            else:
     1963                return {"result":False,"bad_items":poss_keys}
     1964        else:
     1965            return {"result":True}
     1966
    10991967    def validate_loop_references(self,loop_names):
    1100         must_haves = map(lambda a:self[a].get(self.list_ref_spec,None),loop_names)
    1101         must_haves = filter(lambda a:a != None,must_haves)
     1968        must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names]
     1969        must_haves = [a for a in must_haves if a != None]
    11021970        # build a flat list.  For efficiency we don't remove duplicates,as
    11031971        # we expect no more than the order of 10 or 20 looped names.
    1104         def flat_func(a,b): 
    1105             if isinstance(b,StringType):
     1972        def flat_func(a,b):
     1973            if isinstance(b,unicode):
    11061974               a.append(b)       #single name
    11071975            else:
    11081976               a.extend(b)       #list of names
    11091977            return a
    1110         flat_mh = reduce(flat_func,must_haves,[])
     1978        flat_mh = []
     1979        [flat_func(flat_mh,a) for a in must_haves]
    11111980        group_mh = filter(lambda a:a[-1]=="_",flat_mh)
    11121981        single_mh = filter(lambda a:a[-1]!="_",flat_mh)
    1113         res = filter(lambda a: a not in loop_names,single_mh)
     1982        res = [a for a in single_mh if a not in loop_names]
    11141983        def check_gr(s_item, name_list):
    11151984            nl = map(lambda a:a[:len(s_item)],name_list)
    11161985            if s_item in nl: return True
    11171986            return False
    1118         res_g = filter(lambda a:check_gr(a,loop_names),group_mh)
     1987        res_g = [a for a in group_mh if check_gr(a,loop_names)]
    11191988        if len(res) == 0 and len(res_g) == 0: return {"result":True}
    11201989        # construct alternate list
    11211990        alternates = map(lambda a: (a,self.get_alternates(a)),res)
    1122         alternates = filter(lambda a:a[1] != [], alternates)
    1123         # next two lines purely for error reporting
    1124         missing_alts = filter(lambda a: a[1] == [], alternates)
    1125         missing_alts = map(lambda a:a[0],missing_alts)
    1126         if len(alternates) != len(res):
     1991        alternates = [a for a in alternates if a[1] != []]
     1992        # next line purely for error reporting
     1993        missing_alts = [a[0] for a in alternates if a[1] == []]
     1994        if len(alternates) != len(res):
    11271995           return {"result":False,"bad_items":missing_alts}   #short cut; at least one
    11281996                                                       #doesn't have an altern
    11291997        #loop over alternates
    11301998        for orig_name,alt_names in alternates:
    1131              alt = filter(lambda a:a in loop_names,alt_names)
    1132              if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates   
     1999             alt = [a for a in alt_names if a in loop_names]
     2000             if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates
    11332001        return {"result":True}        #found alternates
    1134              
     2002
    11352003    def get_alternates(self,main_name,exclusive_only=False):
    11362004        alternates = self[main_name].get(self.related_func,None)
    11372005        alt_names = []
    1138         if alternates != None: 
     2006        if alternates != None:
    11392007            alt_names =  self[main_name].get(self.related_item,None)
    1140             if isinstance(alt_names,StringType):
     2008            if isinstance(alt_names,unicode):
    11412009                alt_names = [alt_names]
    11422010                alternates = [alternates]
    1143             together = map(None,alt_names,alternates)
     2011            together = zip(alt_names,alternates)
    11442012            if exclusive_only:
    1145                 alt_names = filter(lambda a:a[1]=="alternate_exclusive" \
    1146                                              or a[1]=="replace", together)
     2013                alt_names = [a for a in together if a[1]=="alternate_exclusive" \
     2014                                             or a[1]=="replace"]
    11472015            else:
    1148                 alt_names = filter(lambda a:a[1]=="alternate" or a[1]=="replace",together)
    1149             alt_names = map(lambda a:a[0],alt_names)
     2016                alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"]
     2017            alt_names = list([a[0] for a in alt_names])
    11502018        # now do the alias thing
    11512019        alias_names = listify(self[main_name].get("_item_aliases.alias_name",[]))
    11522020        alt_names.extend(alias_names)
    1153         # print "Alternates for %s: %s" % (main_name,`alt_names`)
     2021        # print("Alternates for {}: {!r}".format(main_name, alt_names))
    11542022        return alt_names
    1155        
     2023
    11562024
    11572025    def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}):
    1158        alternates = map(lambda a:a.lower(),self.get_alternates(item_name,exclusive_only=True))
    1159        item_name_list = map(lambda a:a.lower(),whole_block.keys())
    1160        item_name_list.extend(map(lambda a:a.lower(),provisional_items.keys()))
    1161        item_name_list.extend(map(lambda a:a.lower(),globals.keys()))
    1162        bad = filter(lambda a:a in item_name_list,alternates)
     2026       alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)]
     2027       item_name_list = [a.lower() for a in whole_block.keys()]
     2028       item_name_list.extend([a.lower() for a in provisional_items.keys()])
     2029       bad = [a for a in alternates if a in item_name_list]
    11632030       if len(bad)>0:
    1164            print "Bad: %s, alternates %s" % (`bad`,`alternates`)
     2031           print("Bad: %s, alternates %s" % (repr(bad),repr(alternates)))
    11652032           return {"result":False,"bad_items":bad}
    11662033       else: return {"result":True}
     
    11702037        parent_item = self[item_name].get(self.parent_spec)
    11712038        if not parent_item: return {"result":None}   #no parent specified
    1172         if isinstance(parent_item,ListType):
     2039        if isinstance(parent_item,list):
    11732040            parent_item = parent_item[0]
    11742041        if self.optimize:
    11752042            if parent_item in self.done_parents:
    11762043                return {"result":None}
    1177             else: 
     2044            else:
    11782045                self.done_parents.append(parent_item)
    1179                 print "Done parents %s" % `self.done_parents`
     2046                print("Done parents %s" % repr(self.done_parents))
    11802047        # initialise parent/child values
    1181         if isinstance(item_value,StringType):
     2048        if isinstance(item_value,unicode):
    11822049            child_values = [item_value]
    11832050        else: child_values = item_value[:]    #copy for safety
    11842051        # track down the parent
    1185         # print "Looking for %s parent item %s in %s" % (item_name,parent_item,`whole_block`)
    1186         # if globals contains the parent values, we are doing a DDL2 dictionary, and so 
     2052        # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block))
     2053        # if globals contains the parent values, we are doing a DDL2 dictionary, and so
    11872054        # we have collected all parent values into the global block - so no need to search
    1188         # for them elsewhere. 
    1189         # print "Looking for %s" % `parent_item`
     2055        # for them elsewhere.
     2056        # print("Looking for {!r}".format(parent_item))
    11902057        parent_values = globals.get(parent_item)
    11912058        if not parent_values:
    11922059            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
    1193         if not parent_values: 
     2060        if not parent_values:
    11942061            # go for alternates
    11952062            namespace = whole_block.keys()
     
    11982065            alt_names = filter_present(self.get_alternates(parent_item),namespace)
    11992066            if len(alt_names) == 0:
    1200                 if len(filter(lambda a:a != "." and a != "?",child_values))>0:
     2067                if len([a for a in child_values if a != "." and a != "?"])>0:
    12012068                    return {"result":False,"parent":parent_item}#no parent available -> error
    12022069                else:
    12032070                    return {"result":None}       #maybe True is more appropriate??
    1204             parent_item = alt_names[0]           #should never be more than one?? 
     2071            parent_item = alt_names[0]           #should never be more than one??
    12052072            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
    12062073            if not parent_values:   # check global block
    12072074                parent_values = globals.get(parent_item)
    1208         if isinstance(parent_values,StringType):
    1209             parent_values = [parent_values]   
    1210         #print "Checking parent %s against %s, values %s/%s" % (parent_item,
    1211         #                                          item_name,`parent_values`,`child_values`)
     2075        if isinstance(parent_values,unicode):
     2076            parent_values = [parent_values]
     2077        #print("Checking parent %s against %s, values %r/%r" % (parent_item,
     2078        #                                          item_name, parent_values, child_values))
    12122079        missing = self.check_parent_child(parent_values,child_values)
    12132080        if len(missing) > 0:
     
    12212088            return {"result":None}    #not relevant
    12222089        # special case for dictionaries  -> we check parents of children only
    1223         if globals.has_key(item_name):  #dictionary so skip
     2090        if item_name in globals:  #dictionary so skip
    12242091            return {"result":None}
    1225         if isinstance(child_items,StringType): # only one child
     2092        if isinstance(child_items,unicode): # only one child
    12262093            child_items = [child_items]
    1227         if isinstance(item_value,StringType): # single value
     2094        if isinstance(item_value,unicode): # single value
    12282095            parent_values = [item_value]
    12292096        else: parent_values = item_value[:]
     
    12362103                if child_item in self.done_children:
    12372104                    return {"result":None}
    1238                 else: 
     2105                else:
    12392106                    self.done_children.append(child_item)
    1240                     print "Done children %s" % `self.done_children`
    1241             if provisional_items.has_key(child_item):
     2107                    print("Done children %s" % repr(self.done_children))
     2108            if child_item in provisional_items:
    12422109                child_values = provisional_items[child_item][:]
    1243             elif whole_block.has_key(child_item):
     2110            elif child_item in whole_block:
    12442111                child_values = whole_block[child_item][:]
    1245             else:  continue 
    1246             if isinstance(child_values,StringType):
     2112            else:  continue
     2113            if isinstance(child_values,unicode):
    12472114                child_values = [child_values]
    1248             #    print "Checking child %s against %s, values %s/%s" % (child_item,
    1249             #                                          item_name,`child_values`,`parent_values`)
     2115                # print("Checking child %s against %s, values %r/%r" % (child_item,
     2116                #       item_name, child_values, parent_values))
    12502117            missing = self.check_parent_child(parent_values,child_values)
    12512118            if len(missing)>0:
    12522119                return {"result":False,"bad_values":missing,"child":child_item}
    12532120        return {"result":True}       #could mean that no child items present
    1254            
     2121
    12552122    #a generic checker: all child vals should appear in parent_vals
    12562123    def check_parent_child(self,parent_vals,child_vals):
     
    12582125        pv = parent_vals[:]
    12592126        pv.extend([".","?"])
    1260         res =  filter(lambda a:a not in pv,child_vals)
    1261         #print "Missing: %s" % res
     2127        res =  [a for a in child_vals if a not in pv]
     2128        #print("Missing: %s" % res)
    12622129        return res
    12632130
     
    12672134        except KeyError:
    12682135            return {"result":None}
    1269         if isinstance(child_items,StringType): # only one child
     2136        if isinstance(child_items,unicode): # only one child
    12702137            child_items = [child_items]
    12712138        for child_item in child_items:
    1272             if whole_block.has_key(child_item):
     2139            if child_item in whole_block:
    12732140                return {"result":False,"child":child_item}
    12742141        return {"result":True}
    1275          
     2142
    12762143    def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}):
    12772144        try:
     
    12792146        except KeyError:
    12802147            return {"result":None}    #not relevant
    1281         if isinstance(dep_items,StringType):
     2148        if isinstance(dep_items,unicode):
    12822149            dep_items = [dep_items]
    12832150        actual_names = whole_block.keys()
    12842151        actual_names.extend(prov.keys())
    12852152        actual_names.extend(globals.keys())
    1286         missing = filter(lambda a:a not in actual_names,dep_items)
     2153        missing = [a for a in dep_items if a not in actual_names]
    12872154        if len(missing) > 0:
    12882155            alternates = map(lambda a:[self.get_alternates(a),a],missing)
    1289             # compact way to get a list of alternative items which are 
     2156            # compact way to get a list of alternative items which are
    12902157            # present
    1291             have_check = map(lambda b:[filter_present(b[0],actual_names),
    1292                                        b[1]],alternates)
    1293             have_check = filter(lambda a:len(a[0])==0,have_check)
     2158            have_check = [(filter_present(b[0],actual_names),
     2159                                       b[1]) for b in alternates]
     2160            have_check = list([a for a in have_check if len(a[0])==0])
    12942161            if len(have_check) > 0:
    1295                 have_check = map(lambda a:a[1],have_check)
     2162                have_check = [a[1] for a in have_check]
    12962163                return {"result":False,"bad_items":have_check}
    12972164        return {"result":True}
    1298        
     2165
    12992166    def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={},
    13002167                                                                  globals={}):
    13012168        category = self[item_name].get(self.cat_spec)
    13022169        if category == None:
    1303             print "No category found for %s" % item_name
     2170            print("No category found for %s" % item_name)
    13042171            return {"result":None}
    1305         # print "Category %s for item %s" % (`category`,item_name)
    1306         catentry = self.cat_map[category]
     2172        # print("Category {!r} for item {}".format(category, item_name))
    13072173        # we make a copy in the following as we will be removing stuff later!
    1308         unique_i = self[catentry].get("_category_key.name",[])[:]
    1309         if isinstance(unique_i,StringType):
     2174        unique_i = self[category].get("_category_key.name",[])[:]
     2175        if isinstance(unique_i,unicode):
    13102176            unique_i = [unique_i]
    13112177        if item_name not in unique_i:       #no need to verify
    13122178            return {"result":None}
    1313         if isinstance(item_value,StringType):  #not looped
     2179        if isinstance(item_value,unicode):  #not looped
    13142180            return {"result":None}
    1315         # print "Checking %s -> %s -> %s ->Unique: " % (item_name,category,catentry) + `unique_i`
     2181        # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i))
    13162182        # check that we can't optimize by not doing this check
    13172183        if self.optimize:
     
    13292195           # the logic being that anything in the provisional dict overrides the
    13302196           # main block
    1331                if provisional_items.has_key(other_name):
    1332                    other_data.append(provisional_items[other_name]) 
    1333                elif whole_block.has_key(other_name):
     2197               if other_name in provisional_items:
     2198                   other_data.append(provisional_items[other_name])
     2199               elif other_name in whole_block:
    13342200                   other_data.append(whole_block[other_name])
    13352201               elif self[other_name].get(self.must_exist_spec)=="implicit":
     
    13392205        # ok, so we go through all of our values
    13402206        # this works by comparing lists of strings to one other, and
    1341         # so could be fooled if you think that '1.' and '1' are 
     2207        # so could be fooled if you think that '1.' and '1' are
    13422208        # identical
    13432209        for i in range(len(item_value)):
    1344             #print "Value no. %d" % i ,
     2210            #print("Value no. %d" % i, end=" ")
    13452211            this_entry = item_value[i]
    13462212            for j in range(len(other_data)):
    1347                 this_entry = " ".join([this_entry,other_data[j][i]]) 
    1348             #print "Looking for %s in %s: " % (`this_entry`,`val_list`)
    1349             if this_entry in val_list: 
     2213                this_entry = " ".join([this_entry,other_data[j][i]])
     2214            #print("Looking for {!r} in {!r}: ".format(this_entry, val_list))
     2215            if this_entry in val_list:
    13502216                return {"result":False,"bad_values":this_entry}
    13512217            val_list.append(this_entry)
     
    13532219
    13542220
    1355     def validate_mandatory_category(self,whole_block,globals={},fake_mand=False):
    1356         if fake_mand:
     2221    def validate_mandatory_category(self,whole_block):
     2222        mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"]
     2223        if len(mand_cats) == 0:
    13572224            return {"result":True}
    1358         mand_cats = filter(lambda a:self[a].get("_category.mandatory_code","no")=="yes",
    1359                     self.keys())
    1360         # map to actual ids
    1361         catlist = self.cat_map.items()
    1362         # print "Mandatory categories - %s" % `mand_cats`
    1363         all_keys = whole_block.keys() #non-save block keys
    1364         if globals:         #
    1365             all_keys.extend(globals.abs_all_keys)
    1366         for mand_cat in mand_cats:
    1367             cat_id = filter(lambda a:a[1]==mand_cat,catlist)[0][0]
    1368             no_of_items = len(filter(lambda a:self[a].get(self.cat_spec)==cat_id,
    1369                                  all_keys))
    1370             if no_of_items == 0:
    1371                 return {"result":False,"bad_items":cat_id}
     2225        # print("Mandatory categories - {!r}".format(mand_cats)
     2226        # find which categories each of our datanames belongs to
     2227        all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()]
     2228        missing = set(mand_cats) - set(all_cats)
     2229        if len(missing) > 0:
     2230            return {"result":False,"bad_items":repr(missing)}
    13722231        return {"result":True}
    13732232
    1374     def find_prob_cats(self,whole_block):
    1375         mand_cats = filter(lambda a:self[a].get("_category.mandatory_code","no")=="yes",
    1376                     self.keys())
    1377         # map to actual ids
    1378         catlist = self.cat_map.items()
    1379         # find missing categories
    1380         wbs = whole_block["saves"]
    1381         abs_all_keys = whole_block.keys()
    1382         abs_all_keys.extend(reduce(lambda a,b:a+(wbs[b].keys()),wbs.keys(),[]))
    1383         prob_cats = []
    1384         for mand_cat in mand_cats:
    1385             cat_id = filter(lambda a:a[1]==mand_cat,catlist)[0][0]
    1386            
    1387             if len(filter(lambda a:self[a].get(self.cat_spec)==cat_id,abs_all_keys))==0:
    1388                 prob_cats.append(cat_id)
    1389         if len(prob_cats) > 0:
    1390             return (False,{'whole_block':[('validate_mandatory_category',{"result":False,"bad_items":problem_cats})]})
    1391         else:
    1392             return (True,{})
     2233    def check_mandatory_items(self,whole_block,default_scope='Item'):
     2234        """Return an error if any mandatory items are missing"""
     2235        if len(self.scopes_mandatory)== 0: return {"result":True}
     2236        if default_scope == 'Datablock':
     2237            return {"result":True}     #is a data file
     2238        scope = whole_block.get('_definition.scope',default_scope)
     2239        if '_dictionary.title' in whole_block:
     2240           scope = 'Dictionary'
     2241        missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block])
     2242        if len(missing)==0:
     2243            return {"result":True}
     2244        else:
     2245            return {"result":False,"bad_items":missing}
     2246
     2247    def check_prohibited_items(self,whole_block,default_scope='Item'):
     2248        """Return an error if any prohibited items are present"""
     2249        if len(self.scopes_naughty)== 0: return {"result":True}
     2250        if default_scope == 'Datablock':
     2251            return {"result":True}     #is a data file
     2252        scope = whole_block.get('_definition.scope',default_scope)
     2253        if '_dictionary.title' in whole_block:
     2254           scope = 'Dictionary'
     2255        present = list([a for a in self.scopes_naughty[scope] if a in whole_block])
     2256        if len(present)==0:
     2257            return {"result":True}
     2258        else:
     2259            return {"result":False,"bad_items":present}
    13932260
    13942261
    13952262    def run_item_validation(self,item_name,item_value):
    1396         return {item_name:map(lambda f:(f.__name__,f(item_name,item_value)),self.item_validation_funs)}
     2263        return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])}
    13972264
    13982265    def run_loop_validation(self,loop_names):
    1399         return {loop_names[0]:map(lambda f:(f.__name__,f(loop_names)),self.loop_validation_funs)}
     2266        return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])}
    14002267
    14012268    def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}):
    1402         results = map(lambda f:(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)),self.global_validation_funs)
     2269        results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs])
    14032270        return {item_name:results}
    14042271
    1405     def run_block_validation(self,whole_block,globals={},fake_mand=False):
    1406         results = map(lambda f:(f.__name__,f(whole_block,globals,fake_mand)),self.block_validation_funs)
     2272    def run_block_validation(self,whole_block,block_scope='Item'):
     2273        results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs])
    14072274        # fix up the return values
    14082275        return {"whole_block":results}
     
    14212288
    14222289
     2290
    14232291class ValidCifBlock(CifBlock):
     2292    """A `CifBlock` that is valid with respect to a given CIF dictionary.  Methods
     2293    of `CifBlock` are overridden where necessary to disallow addition of invalid items to the
     2294    `CifBlock`.
     2295
     2296    ## Initialisation
     2297
     2298    * `dic` is a `CifDic` object to be used for validation.
     2299
     2300    """
    14242301    def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords):
    1425         CifBlock.__init__(self,*args,**kwords)   
     2302        CifBlock.__init__(self,*args,**kwords)
    14262303        if dic and diclist:
    1427             print "Warning: diclist argument ignored when initialising ValidCifBlock"
     2304            print("Warning: diclist argument ignored when initialising ValidCifBlock")
    14282305        if isinstance(dic,CifDic):
    14292306            self.fulldic = dic
     
    14432320            update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname]))
    14442321            update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self))
    1445         for loop in self.loops:
    1446             update_value(self.v_result,self.fulldic.run_loop_validation(loop.keys()))
     2322        for loop_names in self.loops.values():
     2323            update_value(self.v_result,self.fulldic.run_loop_validation(loop_names))
    14472324        # now run block-level checks
    14482325        update_value(self.v_result,self.fulldic.run_block_validation(self))
    14492326        # return false and list of baddies if anything didn't match
    14502327        self.fulldic.optimize_off()
    1451         for test_key in self.v_result.keys():
    1452             #print "%s: %s" % (test_key,`self.v_result[test_key]`)
    1453             self.v_result[test_key] = filter(lambda a:a[1]["result"]==False,self.v_result[test_key])
    1454             if len(self.v_result[test_key]) == 0:
     2328        all_keys = list(self.v_result.keys()) #dictionary will change
     2329        for test_key in all_keys:
     2330            #print("%s: %r" % (test_key, self.v_result[test_key]))
     2331            self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False]
     2332            if len(self.v_result[test_key]) == 0:
    14552333                del self.v_result[test_key]
    14562334        isvalid = len(self.v_result)==0
    14572335        #if not isvalid:
    1458         #    print "Baddies:" + `self.v_result`
     2336        #    print("Baddies: {!r}".format(self.v_result))
    14592337        return isvalid,self.v_result
    14602338
    14612339    def single_item_check(self,item_name,item_value):
    14622340        #self.match_single_item(item_name)
    1463         if not self.fulldic.has_key(item_name):
     2341        if item_name not in self.fulldic:
    14642342            result = {item_name:[]}
    14652343        else:
    14662344            result = self.fulldic.run_item_validation(item_name,item_value)
    1467         baddies = filter(lambda a:a[1]["result"]==False, result[item_name])
     2345        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
    14682346        # if even one false one is found, this should trigger
    14692347        isvalid = (len(baddies) == 0)
    1470         # if not isvalid: print "Failures for %s:" % item_name + `baddies`
     2348        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
    14712349        return isvalid,baddies
    14722350
    14732351    def loop_item_check(self,loop_names):
    1474         in_dic_names = filter(lambda a:self.fulldic.has_key(a),loop_names)
     2352        in_dic_names = list([a for a in loop_names if a in self.fulldic])
    14752353        if len(in_dic_names)==0:
    14762354            result = {loop_names[0]:[]}
    14772355        else:
    14782356            result = self.fulldic.run_loop_validation(in_dic_names)
    1479         baddies = filter(lambda a:a[1]["result"]==False,result[in_dic_names[0]])
     2357        baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False])
    14802358        # if even one false one is found, this should trigger
    14812359        isvalid = (len(baddies) == 0)
    1482         # if not isvalid: print "Failures for %s:" % `loop_names` + `baddies`
     2360        # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies))
    14832361        return isvalid,baddies
    14842362
    14852363    def global_item_check(self,item_name,item_value,provisional_items={}):
    1486         if not self.fulldic.has_key(item_name):
     2364        if item_name not in self.fulldic:
    14872365            result = {item_name:[]}
    14882366        else:
    14892367            result = self.fulldic.run_global_validation(item_name,
    14902368               item_value,self,provisional_items = provisional_items)
    1491         baddies = filter(lambda a:a[1]["result"]==False,result[item_name])
     2369        baddies = list([a for a in result[item_name] if a[1]["result"] is False])
    14922370        # if even one false one is found, this should trigger
    14932371        isvalid = (len(baddies) == 0)
    1494         # if not isvalid: print "Failures for %s:" % item_name + `baddies`
     2372        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
    14952373        return isvalid,baddies
    14962374
    14972375    def remove_global_item_check(self,item_name):
    1498         if not self.fulldic.has_key(item_name):
     2376        if item_name not in self.fulldic:
    14992377            result = {item_name:[]}
    15002378        else:
    15012379            result = self.fulldic.run_remove_global_validation(item_name,self,False)
    1502         baddies = filter(lambda a:a[1]["result"]==False,result[item_name])
     2380        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
    15032381        # if even one false one is found, this should trigger
    15042382        isvalid = (len(baddies) == 0)
    1505         # if not isvalid: print "Failures for %s:" % item_name + `baddies`
     2383        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
    15062384        return isvalid,baddies
    15072385
     
    15102388        paired_data = loopdata.items()
    15112389        for name,value in paired_data:
    1512             valid,problems = self.single_item_check(name,value) 
     2390            valid,problems = self.single_item_check(name,value)
    15132391            self.report_if_invalid(valid,problems)
    15142392        # loop item checks; merge with current loop
    15152393        found = 0
    15162394        for aloop in self.block["loops"]:
    1517             if aloop.has_key(dataname):
     2395            if dataname in aloop:
    15182396                loopnames = aloop.keys()
    15192397                for new_name in loopdata.keys():
     
    15222400                self.report_if_invalid(valid,problems)
    15232401        prov_dict = loopdata.copy()
    1524         for name,value in paired_data: 
     2402        for name,value in paired_data:
    15252403            del prov_dict[name]   # remove temporarily
    15262404            valid,problems = self.global_item_check(name,value,prov_dict)
     
    15282406            self.report_if_invalid(valid,problems)
    15292407        CifBlock.AddToLoop(self,dataname,loopdata)
    1530  
     2408
    15312409    def AddCifItem(self,data):
    1532         if isinstance(data[0],StringType):   # single item
     2410        if isinstance(data[0],(unicode,str)):   # single item
    15332411            valid,problems = self.single_item_check(data[0],data[1])
    15342412            self.report_if_invalid(valid,problems,data[0])
    15352413            valid,problems = self.global_item_check(data[0],data[1])
    15362414            self.report_if_invalid(valid,problems,data[0])
    1537         elif isinstance(data[0],TupleType) or isinstance(data[0],ListType):
    1538             paired_data = map(None,data[0],data[1])
     2415        elif isinstance(data[0],tuple) or isinstance(data[0],list):
     2416            paired_data = list(zip(data[0],data[1]))
    15392417            for name,value in paired_data:
    1540                 valid,problems = self.single_item_check(name,value) 
     2418                valid,problems = self.single_item_check(name,value)
    15412419                self.report_if_invalid(valid,problems,name)
    15422420            valid,problems = self.loop_item_check(data[0])
     
    15442422            prov_dict = {}            # for storing temporary items
    15452423            for name,value in paired_data: prov_dict[name]=value
    1546             for name,value in paired_data: 
     2424            for name,value in paired_data:
    15472425                del prov_dict[name]   # remove temporarily
    15482426                valid,problems = self.global_item_check(name,value,prov_dict)
    15492427                prov_dict[name] = value  # add back in
    15502428                self.report_if_invalid(valid,problems,name)
    1551         CifBlock.AddCifItem(self,data)
     2429        else:
     2430            raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
     2431        super(ValidCifBlock,self).AddCifItem(data)
     2432
     2433    def AddItem(self,key,value,**kwargs):
     2434        """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
     2435        valid,problems = self.single_item_check(key,value)
     2436        self.report_if_invalid(valid,problems,key)
     2437        valid,problems = self.global_item_check(key,value)
     2438        self.report_if_invalid(valid,problems,key)
     2439        super(ValidCifBlock,self).AddItem(key,value,**kwargs)
    15522440
    15532441    # utility function
    15542442    def report_if_invalid(self,valid,bad_list,data_name):
    15552443        if not valid:
    1556             error_string = reduce(lambda a,b: a + "," + b[0], bad_list, "")
    1557             error_string = `data_name` + " fails following validity checks: "  + error_string
     2444            bad_tests = [a[0] for a in bad_list]
     2445            error_string = ",".join(bad_tests)
     2446            error_string = repr(data_name) + " fails following validity checks: "  + error_string
    15582447            raise ValidCifError( error_string)
    15592448
     
    15612450        # we don't need to run single item checks; we do need to run loop and
    15622451        # global checks.
    1563         if self.has_key(key):
    1564             try: 
     2452        if key in self:
     2453            try:
    15652454                loop_items = self.GetLoop(key)
    15662455            except TypeError:
    15672456                loop_items = []
    15682457            if loop_items:             #need to check loop conformance
    1569                 loop_names = map(lambda a:a[0],loop_items)
    1570                 loop_names = filter(lambda a:a != key,loop_names)
     2458                loop_names = [a[0] for a in loop_items if a[0] != key]
    15712459                valid,problems = self.loop_item_check(loop_names)
    15722460                self.report_if_invalid(valid,problems)
     
    15772465
    15782466    def report(self):
    1579        import cStringIO
    1580        outstr = cStringIO.StringIO()
     2467       outstr = StringIO()
    15812468       outstr.write( "Validation results\n")
    15822469       outstr.write( "------------------\n")
    1583        print "%d invalid items found\n" % len(self.v_result)
     2470       print("%d invalid items found\n" % len(self.v_result))
    15842471       for item_name,val_func_list in self.v_result.items():
    15852472           outstr.write("%s fails following tests:\n" % item_name)
     
    15902477
    15912478class ValidCifFile(CifFile):
     2479    """A CIF file for which all datablocks are valid.  Argument `dic` to
     2480    initialisation specifies a `CifDic` object to use for validation."""
    15922481    def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs):
    15932482        if not diclist and not dic and not hasattr(self,'bigdic'):
     
    15982487            self.bigdic = dic
    15992488        CifFile.__init__(self,*args,**kwargs)
    1600         #for blockname in self.keys():
    1601     #       self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)
     2489        for blockname in self.keys():
     2490            self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)
    16022491
    16032492    def NewBlock(self,blockname,blockcontents,**kwargs):
     
    16102499
    16112500class ValidationResult:
    1612     """Represents validation result"""
     2501    """Represents validation result. It is initialised with """
    16132502    def __init__(self,results):
    16142503        """results is return value of validate function"""
     
    16322521                valid = True
    16332522        return valid
    1634    
     2523
    16352524    def has_no_match_items(self,block_name=None):
    16362525        """Return true if some items are not found in dictionary"""
     
    16382527            block_names = [block_name]
    16392528        else:
    1640             block_names = self.no_matches.iter_keys() 
     2529            block_names = self.no_matches.iter_keys()
    16412530        for block_name in block_names:
    16422531            if self.no_matches[block_name]:
     
    16462535                has_no_match_items = False
    16472536        return has_no_match_items
    1648    
    1649 
    1650        
    1651 def validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False,fake_mand=True):
    1652     check_file = CifFile(ciffile)
     2537
     2538
     2539
     2540def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False):
     2541    """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing,
     2542    to the results of merging the `CifDic` objects in `diclist` according to `mergemode`.  Flag
     2543    `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be
     2544    accessed for validation and that mandatory_category should be interpreted differently for DDL2."""
     2545    if not isinstance(ciffile,CifFile):
     2546        check_file = CifFile(ciffile)
     2547    else:
     2548        check_file = ciffile
    16532549    if not dic:
    16542550        fulldic = merge_dic(diclist,mergemode)
     
    16582554    valid_result = {}
    16592555    if isdic:          #assume one block only
    1660         blockname = check_file.keys()[0]
    1661         check_bc = check_file[blockname]["saves"]
    1662         check_globals = check_file[blockname]
     2556        check_file.scoping = 'instance' #only data blocks visible
     2557        top_level = check_file.keys()[0]
     2558        check_file.scoping = 'dictionary'   #all blocks visible
    16632559        # collect a list of parents for speed
    1664         poss_parents = fulldic.get_all("_item_linked.parent_name")
    1665         for parent in poss_parents:
    1666             curr_parent = listify(check_globals.get(parent,[]))
    1667             new_vals = check_bc.get_all(parent)
    1668             new_vals.extend(curr_parent)
    1669             if len(new_vals)>0:
    1670                 check_globals[parent] = new_vals
    1671                 # print "Added %s (len %d)" % (parent,len(check_globals[parent]))
    1672         # next dictionary problem: the main DDL2 dictionary has what
    1673         # I would characterise as a mandatory_category problem, but
    1674         # in order to gloss over it, we allow a different
    1675         # interpretation, which requires only a single check for one
    1676         # block.
    1677         if fake_mand:
    1678             valid_result[blockname] = fulldic.find_prob_cats(check_globals)
    1679             no_matches[blockname] = filter(lambda a:not fulldic.has_key(a),check_globals.keys())
    1680     else:
    1681         check_bc = check_file
    1682         check_globals = CifBlock()   #empty
    1683     for block in check_bc.keys():
    1684         #print "Validating block %s" % block
    1685         no_matches[block] = filter(lambda a:not fulldic.has_key(a),check_bc[block].keys())
     2560        if fulldic.diclang == 'DDL2':
     2561            poss_parents = fulldic.get_all("_item_linked.parent_name")
     2562            for parent in poss_parents:
     2563                curr_parent = listify(check_file.get(parent,[]))
     2564                new_vals = check_file.get_all(parent)
     2565                new_vals.extend(curr_parent)
     2566                if len(new_vals)>0:
     2567                    check_file[parent] = new_vals
     2568                print("Added %s (len %d)" % (parent,len(check_file[parent])))
     2569    # now run the validations
     2570    for block in check_file.keys():
     2571        if isdic and block == top_level:
     2572           block_scope = 'Dictionary'
     2573        elif isdic:
     2574           block_scope = 'Item'
     2575        else:
     2576           block_scope = 'Datablock'
     2577        no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic]
    16862578        # remove non-matching items
    1687         # print "Not matched: " + `no_matches[block]`
     2579        print("Not matched: " + repr(no_matches[block]))
    16882580        for nogood in no_matches[block]:
    1689              del check_bc[block][nogood]
    1690         valid_result[block] = run_data_checks(check_bc[block],fulldic,globals=check_globals,fake_mand=fake_mand)
     2581             del check_file[block][nogood]
     2582        print("Validating block %s, scope %s" % (block,block_scope))
     2583        valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope)
    16912584    return valid_result,no_matches
    16922585
    16932586def validate_report(val_result,use_html=False):
    1694     import cStringIO
    16952587    valid_result,no_matches = val_result
    1696     outstr = cStringIO.StringIO()
     2588    outstr = StringIO()
    16972589    if use_html:
    16982590        outstr.write("<h2>Validation results</h2>")
     
    17002592        outstr.write( "Validation results\n")
    17012593        outstr.write( "------------------\n")
    1702     if len(valid_result) > 10: 
     2594    if len(valid_result) > 10:
    17032595        suppress_valid = True         #don't clutter with valid messages
    17042596        if use_html:
     
    17222614                outstr.write(" (note that this does not invalidate the data block):</p>")
    17232615                outstr.write("<p><table>\n")
    1724                 map(lambda it:outstr.write("<tr><td>%s</td></tr>" % it),no_matches[block])
     2616                [outstr.write("<tr><td>%s</td></tr>" % it) for it in no_matches[block]]
    17252617                outstr.write("</table>\n")
    17262618            else:
    17272619                outstr.write( "\n The following items were not found in the dictionary:\n")
    17282620                outstr.write("Note that this does not invalidate the data block\n")
    1729                 map(lambda it:outstr.write("%s\n" % it),no_matches[block])
     2621                [outstr.write("%s\n" % it) for it in no_matches[block]]
    17302622        # now organise our results by type of error, not data item...
    17312623        error_type_dic = {}
     
    17532645        'validate_loop_key':\
    17542646            "A required dataname for this category is missing from the loop\n containing the dataname",
     2647        'validate_loop_key_ddlm':\
     2648            "A loop key is missing for the category containing the dataname",
    17552649        'validate_loop_references':\
    17562650            "A dataname required by the item is missing from the loop",
     
    17662660            "Both dataname and exclusive alternates or aliases are present in data block",
    17672661        'validate_mandatory_category':\
    1768             "A required category is missing from this block"}
     2662            "A required category is missing from this block",
     2663        'check_mandatory_items':\
     2664            "A required data attribute is missing from this block",
     2665        'check_prohibited_items':\
     2666            "A prohibited data attribute is present in this block"}
    17692667
    17702668        for test_name,test_results in error_type_dic.items():
    17712669           if use_html:
    1772                outstr.write(html_error_report(test_name,info_table[test_name],test_results)) 
     2670               outstr.write(html_error_report(test_name,info_table[test_name],test_results))
    17732671           else:
    1774                outstr.write(error_report(test_name,info_table[test_name],test_results)) 
     2672               outstr.write(error_report(test_name,info_table[test_name],test_results))
    17752673               outstr.write("\n\n")
    17762674    return outstr.getvalue()
    1777          
     2675
    17782676# A function to lay out a single error report.  We are passed
    17792677# the name of the error (one of our validation functions), the
    1780 # explanation to print out, and a dictionary with the error 
     2678# explanation to print out, and a dictionary with the error
    17812679# information.  We print no more than 50 characters of the item
    17822680
     
    17852683   headstring = "%-32s" % "Item name"
    17862684   bodystring = ""
    1787    if error_dics[0].has_key("bad_values"):
     2685   if "bad_values" in error_dics[0]:
    17882686      headstring += "%-20s" % "Bad value(s)"
    1789    if error_dics[0].has_key("bad_items"):
     2687   if "bad_items" in error_dics[0]:
    17902688      headstring += "%-20s" % "Bad dataname(s)"
    1791    if error_dics[0].has_key("child"):
     2689   if "child" in error_dics[0]:
    17922690      headstring += "%-20s" % "Child"
    1793    if error_dics[0].has_key("parent"):
    1794       headstring += "%-20s" % "Parent" 
     2691   if "parent" in error_dics[0]:
     2692      headstring += "%-20s" % "Parent"
    17952693   headstring +="\n"
    17962694   for error in error_dics:
    17972695      bodystring += "\n%-32s" % error["item_name"]
    1798       if error.has_key("bad_values"):
    1799           out_vals = map(lambda a:a[:50],error["bad_values"])
    1800           bodystring += "%-20s" % out_vals 
    1801       if error.has_key("bad_items"):
    1802           bodystring += "%-20s" % error["bad_items"]
    1803       if error.has_key("child"):
    1804           bodystring += "%-20s" % error["child"]
    1805       if error.has_key("parent"):
    1806           bodystring += "%-20s" % error["parent"]
    1807    return retstring + headstring + bodystring 
     2696      if "bad_values" in error:
     2697          out_vals = [repr(a)[:50] for a in error["bad_values"]]
     2698          bodystring += "%-20s" % out_vals
     2699      if "bad_items" in error:
     2700          bodystring += "%-20s" % repr(error["bad_items"])
     2701      if "child" in error:
     2702          bodystring += "%-20s" % repr(error["child"])
     2703      if "parent" in error:
     2704          bodystring += "%-20s" % repr(error["parent"])
     2705   return retstring + headstring + bodystring
    18082706
    18092707#  This lays out an HTML error report
     
    18142712   headstring = "<th>Item name</th>"
    18152713   bodystring = ""
    1816    if error_dics[0].has_key("bad_values"):
     2714   if "bad_values" in error_dics[0]:
    18172715      headstring += "<th>Bad value(s)</th>"
    1818    if error_dics[0].has_key("bad_items"):
     2716   if "bad_items" in error_dics[0]:
    18192717      headstring += "<th>Bad dataname(s)</th>"
    1820    if error_dics[0].has_key("child"):
     2718   if "child" in error_dics[0]:
    18212719      headstring += "<th>Child</th>"
    1822    if error_dics[0].has_key("parent"):
    1823       headstring += "<th>Parent</th>" 
     2720   if "parent" in error_dics[0]:
     2721      headstring += "<th>Parent</th>"
    18242722   headstring +="</tr>\n"
    18252723   for error in error_dics:
    18262724      bodystring += "<tr><td><tt>%s</tt></td>" % error["item_name"]
    1827       if error.has_key("bad_values"):
     2725      if "bad_values" in error:
    18282726          bodystring += "<td>%s</td>" % error["bad_values"]
    1829       if error.has_key("bad_items"):
     2727      if "bad_items" in error:
    18302728          bodystring += "<td><tt>%s</tt></td>" % error["bad_items"]
    1831       if error.has_key("child"):
     2729      if "child" in error:
    18322730          bodystring += "<td><tt>%s</tt></td>" % error["child"]
    1833       if error.has_key("parent"):
     2731      if "parent" in error:
    18342732          bodystring += "<td><tt>%s</tt></td>" % error["parent"]
    18352733      bodystring += "</tr>\n"
    18362734   return retstring + headstring + bodystring + "</table>\n"
    18372735
    1838 def run_data_checks(check_block,fulldic,globals={},fake_mand=False):
     2736def run_data_checks(check_block,fulldic,block_scope='Item'):
    18392737    v_result = {}
    18402738    for key in check_block.keys():
    18412739        update_value(v_result, fulldic.run_item_validation(key,check_block[key]))
    1842         update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block,globals=globals))
    1843     for loop in check_block.loops:
    1844         update_value(v_result, fulldic.run_loop_validation(loop.keys()))
    1845     update_value(v_result,fulldic.run_block_validation(check_block,globals=globals,fake_mand=fake_mand))
     2740        update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block))
     2741    for loopnames in check_block.loops.values():
     2742        update_value(v_result, fulldic.run_loop_validation(loopnames))
     2743    update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope))
    18462744    # return false and list of baddies if anything didn't match
    1847     for test_key in v_result.keys():
    1848         v_result[test_key] = filter(lambda a:a[1]["result"]==False,v_result[test_key])
    1849         if len(v_result[test_key]) == 0:
     2745    all_keys = list(v_result.keys())
     2746    for test_key in all_keys:
     2747        v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False]
     2748        if len(v_result[test_key]) == 0:
    18502749            del v_result[test_key]
    18512750    # if even one false one is found, this should trigger
    1852     # print "Baddies:" + `v_result`
     2751    # print("Baddies: {!r}".format(v_result))
    18532752    isvalid = len(v_result)==0
    18542753    return isvalid,v_result
    1855    
     2754
    18562755
    18572756def get_number_with_esd(numstring):
    18582757    import string
    1859     numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' 
     2758    numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
    18602759    our_match = re.match(numb_re,numstring)
    18612760    if our_match:
    18622761        a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
    1863     #    print "Debug: %s -> %s" % (numstring, `our_match.groups()`)
     2762        # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
    18642763    else:
    18652764        return None,None
    18662765    if dot or q: return None,None     #a dot or question mark
    1867     if exp:          #has exponent 
    1868        exp = string.replace(exp,"d","e")     # mop up old fashioned numbers
    1869        exp = string.replace(exp,"D","e")
     2766    if exp:          #has exponent
     2767       exp = exp.replace("d","e")     # mop up old fashioned numbers
     2768       exp = exp.replace("D","e")
    18702769       base_num = base_num + exp
    1871     #print "Debug: have %s for base_num from %s" % (base_num,numstring)
     2770    # print("Debug: have %s for base_num from %s" % (base_num,numstring))
    18722771    base_num = float(base_num)
    18732772    # work out esd, if present.
     
    18812780
    18822781def float_with_esd(inval):
    1883     if isinstance(inval,StringType):
     2782    if isinstance(inval,unicode):
    18842783        j = inval.find("(")
    18852784        if j>=0:  return float(inval[:j])
    18862785    return float(inval)
    1887        
    1888    
    1889                
     2786
     2787
     2788
     2789def convert_type(definition):
     2790    """Convert value to have the type given by definition"""
     2791    #extract the actual required type information
     2792    container = definition['_type.container']
     2793    dimension = definition.get('_type.dimension',StarFile.StarList([]))
     2794    structure = interpret_structure(definition['_type.contents'])
     2795    if container == 'Single':   #a single value to convert
     2796        return convert_single_value(structure)
     2797    elif container == 'List':   #lots of the same value
     2798        return convert_list_values(structure,dimension)
     2799    elif container == 'Multiple': #no idea
     2800        return None
     2801    elif container in ('Array','Matrix'): #numpy array
     2802        return convert_matrix_values(structure)
     2803    return lambda a:a    #unable to convert
     2804
     2805def convert_single_value(type_spec):
     2806    """Convert a single item according to type_spec"""
     2807    if type_spec == 'Real':
     2808        return float_with_esd
     2809    if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'):
     2810        return int
     2811    if type_spec == 'Complex':
     2812        return complex
     2813    if type_spec == 'Imag':
     2814        return lambda a:complex(0,a)
     2815    if type_spec in ('Code','Name','Tag'):  #case-insensitive -> lowercase
     2816        return lambda a:a.lower()
     2817    return lambda a:a   #can't do anything numeric
     2818
     2819def convert_list_values(structure,dimension):
     2820    """Convert the values according to the element
     2821       structure given in [[structure]]"""
     2822    if isinstance(structure,(unicode,str)):   #simple repetition
     2823        func_def =  "element_convert = convert_single_value('%s')" % structure
     2824    else:
     2825        func_def =       "def element_convert(element):\n"
     2826        func_def +=      "   final_val = []\n"
     2827        for pos_no in range(len(structure)):
     2828            func_def +=  "   final_val.append("
     2829            type_spec = structure[pos_no]
     2830            if type_spec == 'Real':
     2831                cf = "float_with_esd("
     2832            elif type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'):
     2833                cf = 'int('
     2834            elif type_spec == 'Complex':
     2835                cf = 'complex('
     2836            elif type_spec == 'Imag':
     2837                cf = 'complex(0,'
     2838            elif type_spec in ('Code','Name','Tag'):
     2839                cf = '('
     2840            else: cf = ''
     2841            func_def += cf
     2842            func_def += "element[%d]" % pos_no
     2843            if "(" in cf: func_def +=")"
     2844            if type_spec in ('Code','Name','Tag'):
     2845                func_def +=".lower()"
     2846            func_def +=")\n"  # close append
     2847        func_def +=      "   return final_val\n"
     2848    print(func_def)
     2849    exec(func_def, globals()) #(re)defines element_convert in global namespace
     2850    if len(dimension)> 0 and int(dimension[0]) != 1:
     2851        return lambda a: list(map(element_convert,a))
     2852    else: return element_convert
     2853
     2854def convert_matrix_values(valtype):
     2855    """Convert a dREL String or Float valued List structure to a numpy matrix structure"""
     2856    # first convert to numpy array, then let numpy do the work
     2857    try: import numpy
     2858    except:
     2859        return lambda a:a   #cannot do it
     2860    func_def =     "def matrix_convert(a):\n"
     2861    func_def +=    "    import numpy\n"
     2862    func_def +=    "    p = numpy.array(a)\n"
     2863    if valtype == 'Real':
     2864        func_def+= "    return p.astype('float')\n"
     2865    elif valtype == 'Integer':
     2866        func_def +="    return p.astype('int')\n"
     2867    elif valtype == 'Complex':
     2868        func_def +="    return p.astype('complex')\n"
     2869    else:
     2870        raise ValueError('Unknown matrix value type')
     2871    exec(func_def,globals())  #matrix convert is defined
     2872    return matrix_convert
     2873
     2874def interpret_structure(struc_spec):
     2875    """Interpret a DDLm structure specification"""
     2876    from . import TypeContentsParser as t
     2877    p = t.TypeParser(t.TypeParserScanner(struc_spec))
     2878    return getattr(p,"input")()
     2879
     2880
    18902881# A utility function to append to item values rather than replace them
    18912882def update_value(base_dict,new_items):
    18922883    for new_key in new_items.keys():
    1893         if base_dict.has_key(new_key):
     2884        if new_key in base_dict:
    18942885            base_dict[new_key].extend(new_items[new_key])
    18952886        else:
     
    19022893    opt_range = range(full_length)
    19032894    for i in range(len(base_list[0])):
    1904        new_packet = [] 
     2895       new_packet = []
    19052896       for j in opt_range:
    19062897          new_packet.append(base_list[j][i])
     
    19102901# listify strings - used surprisingly often
    19112902def listify(item):
    1912     if isinstance(item,StringType): return [item]
     2903    if isinstance(item,(unicode,str)): return [item]
    19132904    else: return item
    19142905
    1915 # given a list of search items, return a list of items 
     2906# given a list of search items, return a list of items
    19162907# actually contained in the given data block
    19172908def filter_present(namelist,datablocknames):
    1918     return filter(lambda a:a in datablocknames,namelist)
     2909    return [a for a in namelist if a in datablocknames]
     2910
     2911# Make an item immutable, used if we want a list to be a key
     2912def make_immutable(values):
     2913    """Turn list of StarList values into a list of immutable items"""
     2914    if not isinstance(values[0],StarList):
     2915        return values
     2916    else:
     2917        return [tuple(a) for a in values]
    19192918
    19202919# merge ddl dictionaries.  We should be passed filenames or CifFile
     
    19242923    for dic in diclist:
    19252924        if not isinstance(dic,CifFile) and \
    1926            not isinstance(dic,StringType):
    1927                raise TypeError, "Require list of CifFile names/objects for dictionary merging"
     2925           not isinstance(dic,(unicode,str)):
     2926               raise TypeError("Require list of CifFile names/objects for dictionary merging")
    19282927        if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic))
    19292928        else: dic_as_cif_list.append(dic)
    19302929    # we now merge left to right
    19312930    basedic = dic_as_cif_list[0]
    1932     if basedic.has_key("on_this_dictionary"):   #DDL1 style only
     2931    if "on_this_dictionary" in basedic:   #DDL1 style only
    19332932        for dic in dic_as_cif_list[1:]:
    19342933           basedic.merge(dic,mode=mergemode,match_att=["_name"])
    1935     elif len(basedic.keys()) == 1:                     #One block: DDL2 style
     2934    elif len(basedic.keys()) == 1:                     #One block: DDL2/m style
    19362935        old_block = basedic[basedic.keys()[0]]
    19372936        for dic in dic_as_cif_list[1:]:
     
    19432942
    19442943def find_parent(ddl2_def):
    1945     if not ddl2_def.has_key("_item.name"):
    1946        return None 
    1947     if isinstance(ddl2_def["_item.name"],StringType):
     2944    if "_item.name" not in ddl2_def:
     2945       return None
     2946    if isinstance(ddl2_def["_item.name"],unicode):
    19482947        return ddl2_def["_item.name"]
    1949     if not ddl2_def.has_key("_item_linked.child_name"):
     2948    if "_item_linked.child_name" not in ddl2_def:
    19502949        raise CifError("Asked to find parent in block with no child_names")
    1951     if not ddl2_def.has_key("_item_linked.parent_name"):
     2950    if "_item_linked.parent_name" not in ddl2_def:
    19522951        raise CifError("Asked to find parent in block with no parent_names")
    1953     result = filter(lambda a:a not in ddl2_def["_item_linked.child_name"],ddl2_def["_item.name"])
     2952    result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]])
    19542953    if len(result)>1 or len(result)==0:
    19552954        raise CifError("Unable to find single unique parent data item")
     
    19572956
    19582957
    1959 def ReadCif(filename,strict=1,maxlength=2048,scantype="standard",grammar="1.1"):
    1960     proto_cif = StarFile.ReadStar(filename,maxlength,scantype=scantype,grammar=grammar)
    1961     # convert to CifFile
    1962     proto_cif = CifFile(proto_cif)
    1963     # check for nested loops
    1964     for bname,bvalue in proto_cif.items():
    1965         nests = filter(lambda a:len(a.loops)>0,bvalue.loops)
    1966         if len(nests) > 0:
    1967             raise CifError( "Block %s contains nested loops")
    1968         # check for save frame references (not yet implemented in PySTARRW)
    1969         # check for global blocks (not yet implemented in PySTARRW)
    1970     return proto_cif
    1971 
    1972 
     2958def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF'):
     2959    """ Read in a CIF file, returning a `CifFile` object.
     2960
     2961    * `filename` may be a URL, a file
     2962    path on the local system, or any object with a `read` method.
     2963
     2964    * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1`
     2965    is identical except for the exclusion of square brackets as the first characters in
     2966    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
     2967    read files according to the STAR2 publication.  If grammar is `None`, autodetection
     2968    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for
     2969    properly-formed CIF2.0 files.  Note that only Unicode characters in the basic multilingual
     2970    plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).
     2971
     2972    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
     2973    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
     2974    fast C routines, but is not available for CIF2/STAR2 files.  Note that running PyCIFRW in
     2975    Jython uses native Java regular expressions
     2976    to provide a speedup regardless of this argument (and does not yet support CIF2).
     2977
     2978    * `scoping` is only relevant where nested save frames are expected (STAR2 only).
     2979    `instance` scoping makes nested save frames
     2980    invisible outside their hierarchy, allowing duplicate save frame names in separate
     2981    hierarchies. `dictionary` scoping makes all save frames within a data block visible to each
     2982    other, thereby restricting all save frames to have unique names.
     2983    Currently the only recognised value for `standard` is `CIF`, which when set enforces a
     2984    maximum length of 75 characters for datanames and has no other effect. """
     2985
     2986    finalcif = CifFile(scoping=scoping,standard=standard)
     2987    return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype)
     2988    #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs)
     2989
     2990class CifLoopBlock(StarFile.LoopBlock):
     2991    def __init__(self,data=(),**kwargs):
     2992        super(CifLoopBlock,self).__init__(data,**kwargs)
     2993
     2994#No documentation flags
     2995
  • trunk/CifFile/StarFile.py

    r469 r3137  
     1# To maximize python3/python2 compatibility
     2from __future__ import print_function
     3from __future__ import unicode_literals
     4from __future__ import division
     5from __future__ import absolute_import
     6
     7__copyright = """
     8PYCIFRW License Agreement (Python License, Version 2)
     9-----------------------------------------------------
     10
     111. This LICENSE AGREEMENT is between the Australian Nuclear Science
     12and Technology Organisation ("ANSTO"), and the Individual or
     13Organization ("Licensee") accessing and otherwise using this software
     14("PyCIFRW") in source or binary form and its associated documentation.
     15
     162. Subject to the terms and conditions of this License Agreement,
     17ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
     18license to reproduce, analyze, test, perform and/or display publicly,
     19prepare derivative works, distribute, and otherwise use PyCIFRW alone
     20or in any derivative version, provided, however, that this License
     21Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
     222001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
     23in any derivative version prepared by Licensee.
     24
     253. In the event Licensee prepares a derivative work that is based on
     26or incorporates PyCIFRW or any part thereof, and wants to make the
     27derivative work available to others as provided herein, then Licensee
     28hereby agrees to include in any such work a brief summary of the
     29changes made to PyCIFRW.
     30
     314. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
     32basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
     33IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
     34DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
     35FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
     36INFRINGE ANY THIRD PARTY RIGHTS.
     37
     385. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
     39FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
     40RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
     41DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
     42
     436. This License Agreement will automatically terminate upon a material
     44breach of its terms and conditions.
     45
     467. Nothing in this License Agreement shall be deemed to create any
     47relationship of agency, partnership, or joint venture between ANSTO
     48and Licensee. This License Agreement does not grant permission to use
     49ANSTO trademarks or trade name in a trademark sense to endorse or
     50promote products or services of Licensee, or any third party.
     51
     528. By copying, installing or otherwise using PyCIFRW, Licensee agrees
     53to be bound by the terms and conditions of this License Agreement.
     54
    155"""
    2 1.This Software copyright \u00A9 Australian Synchrotron Research Program Inc, ("ASRP").
    3 
    4 2.Subject to ensuring that this copyright notice and licence terms
    5 appear on all copies and all modified versions, of PyCIFRW computer
    6 code ("this Software"), a royalty-free non-exclusive licence is hereby
    7 given (i) to use, copy and modify this Software including the use of
    8 reasonable portions of it in other software and (ii) to publish,
    9 bundle and otherwise re-distribute this Software or modified versions
    10 of this Software to third parties, provided that this copyright notice
    11 and terms are clearly shown as applying to all parts of software
    12 derived from this Software on each occasion it is published, bundled
    13 or re-distributed.  You are encouraged to communicate useful
    14 modifications to ASRP for inclusion for future versions.
    15 
    16 3.No part of this Software may be sold as a standalone package.
    17 
    18 4.If any part of this Software is bundled with Software that is sold,
    19 a free copy of the relevant version of this Software must be made
    20 available through the same distribution channel (be that web server,
    21 tape, CD or otherwise).
    22 
    23 5.It is a term of exercise of any of the above royalty free licence
    24 rights that ASRP gives no warranty, undertaking or representation
    25 whatsoever whether express or implied by statute, common law, custom
    26 or otherwise, in respect of this Software or any part of it.  Without
    27 limiting the generality of the preceding sentence, ASRP will not be
    28 liable for any injury, loss or damage (including consequential loss or
    29 damage) or other loss, loss of profits, costs, charges or expenses
    30 however caused which may be suffered, incurred or arise directly or
    31 indirectly in respect of this Software.
    32 
    33 6. This Software is not licenced for use in medical applications.
    34 """
    35 
    36 from types import *
    37 from urllib import *         # for arbitrary opening
    38 import re
     56
     57
     58# Python 2,3 compatibility
     59try:
     60    from urllib import urlopen         # for arbitrary opening
     61    from urlparse import urlparse, urlunparse
     62except:
     63    from urllib.request import urlopen
     64    from urllib.parse import urlparse,urlunparse
     65import re,os
    3966import copy
     67import textwrap
     68
     69try:
     70    from StringIO import StringIO #not cStringIO as we cannot subclass
     71except ImportError:
     72    from io import StringIO
     73
     74if isinstance(u"abc",str):   #Python 3
     75    unicode = str
     76   
     77try:
     78    import numpy
     79    have_numpy = True
     80except ImportError:
     81    have_numpy = False
     82
    4083class StarList(list):
    41     pass
    42 
    43 # Because DDLm makes a tuple from a tuple...
    44 class StarTuple(tuple):
    45     def __new__(cls,*arglist):
    46         return tuple.__new__(cls,arglist)
     84    def __getitem__(self,args):
     85        if isinstance(args,(int,slice)):
     86            return super(StarList,self).__getitem__(args)
     87        elif isinstance(args,tuple) and len(args)>1:   #extended comma notation
     88            return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:])
     89        else:
     90            return super(StarList,self).__getitem__(args[0])
     91
     92    def __str__(self):
     93        return "SL("+super(StarList,self).__str__() + ")"
    4794
    4895class StarDict(dict):
    4996    pass
    5097
    51 class LoopBlock:
    52     def __init__(self,data = (), dimension = 0, maxoutlength=2048, wraplength=80, overwrite=True):
    53         # print 'Creating new loop block, dimension %d' % dimension
    54         self.block = {}
    55         self.loops = []
    56         self.no_packets = 0
    57         self.item_order = []
    58         self.lower_keys = []    #for efficiency
    59         self.comment_list = {}
    60         self.dimension = dimension
    61         self.popout = False         #used during load iteration
    62         self.curitem = -1           #used during iteration
    63         self.maxoutlength = maxoutlength
    64         self.wraplength = wraplength
    65         self.overwrite = overwrite
    66         if not hasattr(self,'loopclass'):  #in case are derived class
    67             self.loopclass = LoopBlock  #when making new loops
    68         self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
    69         if isinstance(data,(TupleType,ListType)):
    70             for item in data:
    71                 self.AddLoopItem(item)
    72         elif isinstance(data,LoopBlock):
    73             self.block = data.block.copy()
    74             self.item_order = data.item_order[:]
    75             self.lower_keys = data.lower_keys[:]
    76             self.comment_list = data.comment_list.copy()
    77             self.dimension = data.dimension
    78             # loops as well; change loop class
    79             for loopno in range(len(data.loops)):
    80                 try:
    81                     placeholder = self.item_order.index(data.loops[loopno])
    82                 except ValueError:
    83                     print "Warning: loop %s (%s) in loops, but not in item_order (%s)" % (`data.loops[loopno]`,str(data.loops[loopno]),`self.item_order`)
    84                     placeholder = -1
    85                 self.item_order.remove(data.loops[loopno])   #gone
    86                 newobject = self.loopclass(data.loops[loopno])
    87                 # print "Recasting and adding loop %s -> %s" % (`data.loops[loopno]`,`newobject`)
    88                 self.insert_loop(newobject,position=placeholder)
    89 
    90     def __str__(self):
    91         return self.printsection()
    92 
    93     def __setitem__(self,key,value):
    94         # catch a one member loop, for convenience
    95         # we assume the key is a string value only
    96         self.AddLoopItem((key,value))
    97 
    98     def __getitem__(self,key):
    99         if isinstance(key,IntType):   #return a packet!!
    100             return self.GetPacket(key)       
    101         return self.GetLoopItem(key)
    102 
    103     def __delitem__(self,key):
    104         self.RemoveLoopItem(key)
    105 
    106     def __len__(self):
    107         blen = len(self.block)
    108         for aloop in self.loops:
    109             # print 'Aloop is %s' % `aloop`
    110             blen = blen + len(aloop)  # also a LoopBlock
    111         return blen   
    112 
    113     def __nonzero__(self):
    114         if self.__len__() > 0: return 1
    115         return 0
    116 
    117     # keys returns all internal keys
     98
     99class LoopBlock(object):
     100    def __init__(self,parent_block,dataname):
     101        self.loop_no = parent_block.FindLoop(dataname)
     102        if self.loop_no < 0:
     103            raise KeyError('%s is not in a loop structure' % dataname)
     104        self.parent_block = parent_block
     105
    118106    def keys(self):
    119         thesekeys = self.block.keys()
    120         for aloop in self.loops:
    121             thesekeys.extend(aloop.keys())
    122         return thesekeys
     107        return self.parent_block.loops[self.loop_no]
    123108
    124109    def values(self):
    125         ourkeys = self.keys()
    126         return map(lambda a:self[a],ourkeys)
    127 
     110        return [self.parent_block[a] for a in self.keys()]
     111
     112    #Avoid iterator even though that is Python3-esque
    128113    def items(self):
    129         ourkeys = self.keys()
    130         return map(lambda a,b:(a,b),self.keys(),self.values())
     114        return list(zip(self.keys(),self.values()))
     115
     116    def __getitem__(self,dataname):
     117        if isinstance(dataname,int):   #a packet request
     118            return self.GetPacket(dataname)
     119        if dataname in self.keys():
     120            return self.parent_block[dataname]
     121        else:
     122            raise KeyError('%s not in loop block' % dataname)
     123
     124    def __setitem__(self,dataname,value):
     125        self.parent_block[dataname] = value
     126        self.parent_block.AddLoopName(self.keys()[0],dataname)
     127
     128    def __contains__(self,key):
     129        return key in self.parent_block.loops[self.loop_no]
    131130
    132131    def has_key(self,key):
    133         if key.lower() in self.lower_keys:
    134             return 1
    135         for aloop in self.loops:
    136             if aloop.has_key(key): return 1
    137         return 0
    138 
    139     def get(self,key,default=None):
    140         if self.has_key(key):
    141             retval = self.GetLoopItem(key)
    142         else:
    143             retval = default
    144         return retval
    145 
    146     def clear(self):
    147         self.block = {}
    148         self.loops = []
    149         self.item_order = []
    150         self.lower_keys = []
    151         self.no_packets = 0
    152 
    153     # doesn't appear to work
    154     def copy(self):
    155         newcopy = self.copy.im_class(dimension = self.dimension)
    156         newcopy.block = self.block.copy()
    157         newcopy.loops = []
    158         newcopy.no_packets = self.no_packets
    159         newcopy.item_order = self.item_order[:]
    160         newcopy.lower_keys = self.lower_keys[:]
    161         for loop in self.loops:
    162             try:
    163                 placeholder = self.item_order.index(loop)
    164             except ValueError:
    165                 print "Warning: loop %s (%s) in loops, but not in item_order (%s)" % (`loop`,str(loop),`self.item_order`)
    166                 placeholder = -1
    167             newcopy.item_order.remove(loop)   #gone
    168             newobject = loop.copy()
    169             # print "Adding loop %s -> %s" % (`loop`,`newobject`)
    170             newcopy.insert_loop(newobject,position=placeholder)
    171         return newcopy
    172 
    173     # this is not appropriate for subloops.  Instead, the loop block
    174     # should be accessed directly for update
    175      
    176     def update(self,adict):
    177         for key in adict.keys():
    178             self.AddLoopItem((key,adict[key]))
     132        return key in self
     133
     134    def __iter__(self):
     135        packet_list = zip(*self.values())
     136        names = self.keys()
     137        for p in packet_list:
     138            r = StarPacket(p)
     139            for n in range(len(names)):
     140                setattr(r,names[n].lower(),r[n])
     141            yield r
     142
     143    # for compatibility
     144    def __getattr__(self,attname):
     145        return getattr(self.parent_block,attname)
    179146
    180147    def load_iter(self,coords=[]):
    181         count = 0        #to create packet index 
     148        count = 0        #to create packet index
    182149        while not self.popout:
    183150            # ok, we have a new packet:  append a list to our subloops
     
    217184            for iname in self.keys():  #includes lower levels
    218185                target_list = self[iname]
    219                 for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 
     186                for i in range(3,self.dimension): #dim 2 upwards are lists of lists of...
    220187                    target_list = target_list[-1]
    221188                target_list.append([])
     
    232199                drill_values=drill_values[0]   #drill in
    233200            else:
    234                 raise StarError("Malformed loop packet %s" % `top_items[0]`)
    235         my_length = len(drill_values)
     201                raise StarError("Malformed loop packet %s" % repr( top_items[0] ))
     202        my_length = len(drill_values[0])       #length of 'string' entry
    236203        if self.dimension == 0:                #top level
    237204            for aloop in self.loops:
    238205                for apacket in aloop.recursive_iter():
    239                     # print "Recursive yielding %s" % `dict(top_items + apacket.items())`
     206                    # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) )
    240207                    prep_yield = StarPacket(top_values+apacket.values())  #straight list
    241208                    for name,value in top_items + apacket.items():
     
    246213                kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys())
    247214                kvvals = map(lambda a:a[1],kvpairs)   #just values
    248                 # print "Recursive kvpairs at %d: %s" % (i,`kvpairs`)
     215                # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs ))
    249216                if self.loops:
    250217                  for aloop in self.loops:
    251218                    for apacket in aloop.recursive_iter(coord=coord+[i]):
    252                         # print "Recursive yielding %s" % `dict(kvpairs + apacket.items())`
     219                        # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) )
    253220                        prep_yield = StarPacket(kvvals+apacket.values())
    254221                        for name,value in kvpairs + apacket.items():
     
    256223                        yield prep_yield
    257224                else:           # we're at the bottom of the tree
    258                     # print "Recursive yielding %s" % `dict(kvpairs)`
     225                    # print "Recursive yielding %s" % repr( dict(kvpairs) )
    259226                    prep_yield = StarPacket(kvvals)
    260227                    for name,value in kvpairs:
     
    262229                    yield prep_yield
    263230
    264     # small function to use the coordinates. 
     231    # small function to use the coordinates.
    265232    def coord_to_group(self,dataname,coords):
    266           if not isinstance(dataname,StringType):
     233          if not isinstance(dataname,unicode):
    267234             return dataname     # flag inner loop processing
    268235          newm = self[dataname]          # newm must be a list or tuple
    269236          for c in coords:
    270               # print "Coord_to_group: %s ->" % (`newm`),
     237              # print "Coord_to_group: %s ->" % (repr( newm )),
    271238              newm = newm[c]
    272               # print `newm`
    273           return newm 
     239              # print repr( newm )
     240          return newm
    274241
    275242    def flat_iterator(self):
    276         if self.dimension == 0:   
    277             yield copy.copy(self)
    278         else:
    279243            my_length = 0
    280244            top_keys = self.block.keys()
     
    283247            for pack_no in range(my_length):
    284248                yield(self.collapse(pack_no))
    285            
    286 
    287     def insert_loop(self,newloop,position=-1,audit=True):
    288         # check that new loop is kosher
    289         if newloop.dimension != self.dimension + 1:
    290             raise StarError( 'Insertion of loop of wrong nesting level %d, should be %d' % (newloop.dimension, self.dimension+1))
    291         self.loops.append(newloop)
    292         if audit:
    293             dupes = self.audit()
    294             if dupes:
    295                 dupenames = map(lambda a:a[0],dupes)
    296                 raise StarError( 'Duplicate names: %s' % `dupenames`)
    297         if position >= 0:
    298             self.item_order.insert(position,newloop)
    299         else:
    300             self.item_order.append(newloop)
    301         # print "Insert loop: item_order now" + `self.item_order`
    302 
    303     def remove_loop(self,oldloop):
    304         # print "Removing %s: item_order %s" % (`oldloop`,self.item_order)
    305         # print "Length %d" % len(oldloop)
    306         self.item_order.remove(oldloop)
    307         self.loops.remove(oldloop)
    308      
    309     def AddComment(self,itemname,comment):
    310         self.comment_list[itemname.lower()] = comment
    311 
    312     def RemoveComment(self,itemname):
    313         del self.comment_list[itemname.lower()]
    314 
    315     def GetLoopItem(self,itemname):
    316         # assume case is correct first
    317         try:
    318             return self.block[itemname]
    319         except KeyError:
    320             for loop in self.loops:
    321                 try:
    322                     return loop[itemname]
    323                 except KeyError:
    324                     pass
    325         if itemname.lower() not in self.lower_keys:
    326             raise KeyError, 'Item %s not in block' % itemname
    327         # it is there somewhere, now we need to find it
    328         real_keys = self.block.keys()
    329         lower_keys = map(lambda a:a.lower(),self.block.keys())
    330         try:
    331             k_index = lower_keys.index(itemname.lower())
    332         except ValueError:
    333             raise KeyError, 'Item %s not in block' % itemname
    334         return self.block[real_keys[k_index]]
     249
     250
     251    def RemoveItem(self,itemname):
     252        """Remove `itemname` from the block."""
     253        # first check any loops
     254        loop_no = self.FindLoop(itemname)
     255        testkey = itemname.lower()
     256        if testkey in self:
     257            del self.block[testkey]
     258            del self.true_case[testkey]
     259            # now remove from loop
     260            if loop_no >= 0:
     261                self.loops[loop_no].remove(testkey)
     262                if len(self.loops[loop_no])==0:
     263                    del self.loops[loop_no]
     264                    self.item_order.remove(loop_no)
     265            else:  #will appear in order list
     266                self.item_order.remove(testkey)
    335267
    336268    def RemoveLoopItem(self,itemname):
    337         if self.has_key(itemname):
    338             testkey = itemname.lower()
    339             real_keys = self.block.keys()
    340             lower_keys = map(lambda a:a.lower(),real_keys)
    341             try:
    342                 k_index = lower_keys.index(testkey)
    343             except ValueError:    #must be in a lower loop
    344                 for aloop in self.loops:
    345                     if aloop.has_key(itemname):
    346                         # print "Deleting %s (%s)" % (itemname,aloop[itemname])
    347                         del aloop[itemname]
    348                         if len(aloop)==0:  # all gone
    349                            self.remove_loop(aloop)
    350                         break
    351             else:
    352               del self.block[real_keys[k_index]]
    353               self.lower_keys.remove(testkey)
    354               # now remove the key in the order list
    355               for i in range(len(self.item_order)):
    356                 if isinstance(self.item_order[i],StringType): #may be loop
    357                     if self.item_order[i].lower()==testkey:
    358                         del self.item_order[i]
    359                         break
    360             if len(self.block)==0:    #no items in loop, length -> 0
    361                 self.no_packets = 0
    362             return        #no duplicates, no more checking needed
    363 
    364     def AddLoopItem(self,data,precheck=False,maxlength=-1):
    365         # print "Received data %s" % `data`
    366         # we accept only tuples, strings and lists!!
    367         if isinstance(data[0],(TupleType,ListType)):
    368            # internal loop
    369            # first we remove any occurences of these datanames in
    370            # other loops
    371            for one_item in data[0]:
    372                if self.has_key(one_item):
    373                    if not self.overwrite:
    374                        raise StarError( 'Attempt to insert duplicate item name %s' % data[0])
    375                    else:
    376                        del self[one_item]
    377            newloop = self.loopclass(dimension = self.dimension+1)
    378            keyvals = zip(data[0],data[1])
    379            for key,val in keyvals:
    380                newloop.AddLoopItem((key,val))
    381            self.insert_loop(newloop)
    382         elif not isinstance(data[0],StringType):
    383                   raise TypeError, 'Star datanames are strings only (got %s)' % `data[0]`
    384         else:
    385            if data[1] == [] or get_dim(data[1])[0] == self.dimension:
    386                if not precheck:
    387                    self.check_data_name(data[0],maxlength)    # make sure no nasty characters   
    388                # check that we can replace data
    389                if not self.overwrite:
    390                    if self.has_key(data[0]):
    391                        raise StarError( 'Attempt to insert duplicate item name %s' % data[0])
    392                # now make sure the data is OK type
    393                regval = self.regularise_data(data[1])
    394                if not precheck:
    395                    try:
    396                        self.check_item_value(regval)
    397                    except StarError, errmes:
    398                        raise StarError( "Item name " + data[0] + " " + `errmes`)
    399                if self.dimension > 0:
    400                    if self.no_packets <= 0:
    401                        self.no_packets = len(data[1])  #first item in this loop
    402                    if len(data[1]) != self.no_packets:
    403                        raise StarLengthError, 'Not enough values supplied for %s' % (data[0])
    404                try:
    405                    oldpos = self.GetItemPosition(data[0])
    406                except ValueError:
    407                    oldpos = len(self.item_order)#end of list
    408                self.RemoveLoopItem(data[0])     # may be different case, so have to do this
    409                self.block.update({data[0]:regval})  # trust the data is OK
    410                self.lower_keys.insert(oldpos,data[0].lower())
    411                self.item_order.insert(oldpos,data[0])
    412                #    self.lower_keys.append(data[0].lower())
    413                #    self.item_order.append(data[0])
    414                
    415            else:            #dimension mismatch
    416                raise StarLengthError, "input data dim %d != required dim %d: %s %s" % (get_dim(data[1])[0],self.dimension,data[0],`data[1]`)
    417 
    418     def check_data_name(self,dataname,maxlength=-1):
    419         if maxlength > 0:
    420             if len(dataname)>maxlength:
    421                 raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
    422         if dataname[0]!='_':
    423             raise StarError( 'Dataname ' + dataname + ' does not begin with _')
    424         if len (filter (lambda a: ord(a) < 33 or ord(a) > 126, dataname)) > 0:
    425             raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
    426 
    427     def check_item_value(self,item):
    428         test_item = item
    429         if type(item) != TupleType and type(item) != ListType:
    430            test_item = [item]         #single item list
    431         def check_one (it):
    432             if type(it) == StringType:
    433                 if it=='': return
    434                 me = self.char_check.match(it)           
    435                 if not me:
    436                     raise StarError( 'Bad character in %s' % it)
    437                 else:
    438                     if me.span() != (0,len(it)):
    439                         raise StarError('Data item "' + it + '"... contains forbidden characters')
    440         map(check_one,test_item)
    441 
    442     def regularise_data(self,dataitem):
    443         alrighttypes = [IntType, LongType,
    444                         FloatType, StringType]
    445         okmappingtypes = [TupleType, ListType]
    446         thistype = type(dataitem)
    447         if thistype in alrighttypes or thistype in okmappingtypes:
    448             return dataitem
    449         if isinstance(dataitem,StarTuple) or \
    450            isinstance(dataitem,StarList) or \
    451            isinstance(dataitem,StarDict):
    452             return dataitem
    453         # so try to make into a list
    454         try:
    455             regval = list(dataitem)
    456         except TypeError, value:
    457             raise StarError( str(dataitem) + ' is wrong type for data value\n' )
    458         return regval
    459        
     269        """*Deprecated*. Use `RemoveItem` instead"""
     270        self.RemoveItem(itemname)
     271
    460272    def GetLoop(self,keyname):
    461         if keyname in self.block:        #python 2.2 or above
    462             return self
    463         for aloop in self.loops:
    464             try:
    465                 return aloop.GetLoop(keyname)
    466             except KeyError:
    467                 pass
    468         raise KeyError, 'Item %s does not exist' % keyname
     273        """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
     274        `keyname` is only significant as a way to specify the loop."""
     275        return LoopBlock(self,keyname)
    469276
    470277    def GetPacket(self,index):
    471278        thispack = StarPacket([])
    472         for myitem in self.item_order:
    473             if isinstance(myitem,LoopBlock):
    474                 pack_list = map(lambda b:myitem[b][index],myitem.item_order)
    475                 # print 'Pack_list -> %s' % `pack_list`
    476                 thispack.append(pack_list)
    477             elif self.dimension==0:
    478                 thispack.append(self[myitem])
    479             else:
    480                 thispack.append(self[myitem][index])
    481                 setattr(thispack,myitem,thispack[-1])
    482         return thispack
     279        for myitem in self.parent_block.loops[self.loop_no]:
     280            thispack.append(self[myitem][index])
     281            setattr(thispack,myitem,thispack[-1])
     282        return thispack
    483283
    484284    def AddPacket(self,packet):
    485         if self.dimension==0:
    486             raise StarError,"Attempt to add packet to top level block"
    487         for myitem in self.item_order:
    488             self[myitem] = list(self[myitem])   #in case we have stored a tuple
    489             self[myitem].append(packet.__getattribute__(myitem))
    490         self.no_packets +=1
    491             # print "%s now %s" % (myitem,`self[myitem]`)
    492        
    493     def RemoveKeyedPacket(self,keyname,keyvalue):
    494         packet_coord = list(self[keyname]).index(keyvalue)
    495         loophandle = self.GetLoop(keyname)
    496         for packet_entry in loophandle.item_order:
    497             loophandle[packet_entry] = list(loophandle[packet_entry])
    498             del loophandle[packet_entry][packet_coord]
    499         self.no_packets -= 1
    500        
    501     def GetKeyedPacket(self,keyname,keyvalue):
    502         #print "Looking for %s in %s" % (keyvalue, self[keyname])
    503         one_pack= filter(lambda a:getattr(a,keyname)==keyvalue,self)
    504         if len(one_pack)!=1:
    505             raise KeyError, "Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))
    506         #print "Keyed packet: %s" % one_pack[0]
    507         return one_pack[0]
     285        for myitem in self.parent_block.loops[self.loop_no]:
     286            old_values = self.parent_block[myitem]
     287            old_values.append(packet.__getattribute__(myitem))
     288            self.parent_block[myitem] = old_values
    508289
    509290    def GetItemOrder(self):
    510         return self.item_order[:]
     291        """Return a list of datanames in this `LoopBlock` in the order that they will be
     292        printed"""
     293        return self.parent_block.loops[self.loop_no][:]
     294
     295
     296    def GetItemOrder(self):
     297        """Return a list of datanames in this `LoopBlock` in the order that they will be
     298        printed"""
     299        return self.parent_block.loops[self.loop_no][:]
    511300
    512301    def ChangeItemOrder(self,itemname,newpos):
    513         testpos = self.GetItemPosition(itemname)
    514         del self.item_order[testpos]
    515         # so we have an object ready for action
    516         self.item_order.insert(newpos,itemname)
     302        """Change the position at which `itemname` appears when printing out to `newpos`."""
     303        self.parent_block.loops[self.loop_no].remove(itemname.lower())
     304        self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())
    517305
    518306    def GetItemPosition(self,itemname):
     307        """A utility function to get the numerical order in the printout
     308        of `itemname`.  An item has coordinate `(loop_no,pos)` with
     309        the top level having a `loop_no` of -1.  If an integer is passed to
     310        the routine then it will return the position of the loop
     311        referenced by that number."""
    519312        import string
    520         def low_case(item):
    521             try:
    522                 return string.lower(item)
    523             except AttributeError:
    524                 return item
    525         try:
    526             testname = string.lower(itemname)
    527         except AttributeError:
    528             testname = itemname
    529         lowcase_order = map(low_case,self.item_order)
    530         return lowcase_order.index(testname)
    531 
    532     def collapse(self,packet_no):
    533         if self.dimension == 0:
    534             raise StarError( "Attempt to select non-existent packet")
    535         newlb = LoopBlock(dimension=self.dimension-1)
    536         for one_item in self.item_order:
    537             if isinstance(one_item,LoopBlock):
    538                 newlb.insert_loop(one_item.collapse(packet_no))
    539             else:
    540                 # print "Collapse: %s -> %s" % (one_item,`self[one_item][packet_no]`)
    541                 newlb[one_item] = self[one_item][packet_no]
    542         return newlb
    543        
    544     def audit(self):
    545         import sets
    546         allkeys = self.keys()
    547         uniquenames = sets.Set(allkeys)
    548         if len(uniquenames) == len(allkeys): return []
    549         else:             
    550             keycount = map(lambda a:(a,allkeys.count(a)),uniquenames)
    551             return filter(lambda a:a[1]>1,keycount)
    552        
     313        if isinstance(itemname,int):
     314            # return loop position
     315            return (-1, self.item_order.index(itemname))
     316        if not itemname in self:
     317            raise ValueError('No such dataname %s' % itemname)
     318        testname = itemname.lower()
     319        if testname in self.item_order:
     320            return (-1,self.item_order.index(testname))
     321        loop_no = self.FindLoop(testname)
     322        loop_pos = self.loops[loop_no].index(testname)
     323        return loop_no,loop_pos
     324
    553325    def GetLoopNames(self,keyname):
    554326        if keyname in self:
    555327            return self.keys()
    556328        for aloop in self.loops:
    557             try: 
     329            try:
    558330                return aloop.GetLoopNames(keyname)
    559331            except KeyError:
    560332                pass
    561         raise KeyError, 'Item does not exist'
     333        raise KeyError('Item does not exist')
     334
     335    def GetLoopNames(self,keyname):
     336        """Return all datanames appearing together with `keyname`"""
     337        loop_no = self.FindLoop(keyname)
     338        if loop_no >= 0:
     339            return self.loops[loop_no]
     340        else:
     341            raise KeyError('%s is not in any loop' % keyname)
    562342
    563343    def AddToLoop(self,dataname,loopdata):
     
    566346            thisloop[itemname] = itemvalue
    567347
     348    def AddToLoop(self,dataname,loopdata):
     349        """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
     350
     351        Add multiple columns to the loop containing `dataname`. `loopdata` is a
     352        collection of (key,value) pairs, where `key` is the new dataname and `value`
     353        is a list of values for that dataname"""
     354        # check lengths
     355        thisloop = self.FindLoop(dataname)
     356        loop_len = len(self[dataname])
     357        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
     358        if len(bad_vals)>0:
     359           raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
     360               % (repr( bad_vals ),loop_len))
     361        self.update(loopdata)
     362        self.loops[thisloop]+=loopdata.keys()
     363
     364
     365class StarBlock(object):
     366    def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True,
     367                 characterset='ascii',maxnamelength=-1):
     368        self.block = {}    #the actual data storage (lower case keys)
     369        self.loops = {}    #each loop is indexed by a number and contains a list of datanames
     370        self.item_order = []  #lower case, loops referenced by integer
     371        self.formatting_hints = {}
     372        self.true_case = {} #transform lower case to supplied case
     373        self.provide_value = False  #prefer string version always
     374        self.dictionary = None      #DDLm dictionary
     375        self.popout = False         #used during load iteration
     376        self.curitem = -1           #used during iteration
     377        self.cache_vals = True      #store all calculated values
     378        self.maxoutlength = maxoutlength
     379        self.setmaxnamelength(maxnamelength)  #to enforce CIF limit of 75 characters
     380        self.set_characterset(characterset)   #to check input names
     381        self.wraplength = wraplength
     382        self.overwrite = overwrite
     383        self.string_delimiters = ["'",'"',"\n;"]   #universal CIF set
     384        self.list_delimiter = "  "                 #CIF2 default
     385        self.wrapper = textwrap.TextWrapper()
     386        if isinstance(data,(tuple,list)):
     387            for item in data:
     388                self.AddLoopItem(item)
     389        elif isinstance(data,StarBlock):
     390            self.block = data.block.copy()
     391            self.item_order = data.item_order[:]
     392            self.true_case = data.true_case.copy()
     393            # loops as well
     394            self.loops = data.loops.copy()
     395
     396    def setmaxnamelength(self,maxlength):
     397        """Set the maximum allowable dataname length (-1 for no check)"""
     398        self.maxnamelength = maxlength
     399        if maxlength > 0:
     400            bad_names = [a for a in self.keys() if len(a)>self.maxnamelength]
     401            if len(bad_names)>0:
     402                raise StarError('Datanames too long: ' + repr( bad_names ))
     403
     404    def set_characterset(self,characterset):
     405        """Set the characterset for checking datanames: may be `ascii` or `unicode`"""
     406        import sys
     407        self.characterset = characterset
     408        if characterset == 'ascii':
     409            self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
     410        elif characterset == 'unicode':
     411            if sys.maxunicode < 1114111:
     412               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M)
     413            else:
     414               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)
     415
     416    def __str__(self):
     417        return self.printsection()
     418
     419    def __setitem__(self,key,value):
     420        if key == "saves":
     421            raise StarError("""Setting the saves key is deprecated. Add the save block to
     422    an enclosing block collection (e.g. CIF or STAR file) with this block as child""")
     423        self.AddItem(key,value)
     424
     425    def __getitem__(self,key):
     426        if key == "saves":
     427            raise StarError("""The saves key is deprecated. Access the save block from
     428    the enclosing block collection (e.g. CIF or STAR file object)""")
     429        try:
     430           rawitem,is_value = self.GetFullItemValue(key)
     431        except KeyError:
     432           if self.dictionary:
     433               # send the dictionary the required key and a pointer to us
     434               try:
     435                   new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False)
     436               except StarDerivationFailure:   #try now with defaults included
     437                   try:
     438                       new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True)
     439                   except StarDerivationFailure as s:
     440                       print("In StarBlock.__getitem__, " + repr(s))
     441                       raise KeyError('No such item: %s' % key)
     442               print('Set %s to derived value %s' % (key, repr(new_value)))
     443               return new_value
     444           else:
     445               raise KeyError('No such item: %s' % key)
     446        # we now have an item, we can try to convert it to a number if that is appropriate
     447        # note numpy values are never stored but are converted to lists
     448        if not self.dictionary or not key in self.dictionary: return rawitem
     449        print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem )))
     450        if is_value:
     451            if self.provide_value: return rawitem
     452            else:
     453               print('Turning %s into string' % repr( rawitem ))
     454               return self.convert_to_string(key)
     455        else:    # a string
     456            if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \
     457                                      (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)):
     458                return self.dictionary.change_type(key,rawitem)
     459            elif self.provide_value: # catch the question marks
     460                do_calculate = False
     461                if isinstance(rawitem,(list,tuple)):
     462                    known = [a for a in rawitem if a != '?']
     463                    if len(known) == 0:   #all questions
     464                        do_calculate = True
     465                elif rawitem == '?':
     466                        do_calculate = True
     467                if do_calculate:
     468                   # remove old value
     469                   del self[key]
     470                   try:
     471                       new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False)
     472                   except StarDerivationFailure as s:
     473                       try:
     474                           new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True)
     475                       except StarDerivationFailure as s:
     476
     477                           print("Could not turn %s into a value:" + repr(s))
     478                           return rawitem
     479                   else:
     480                       print('Set %s to derived value %s' % (key, repr( new_value )))
     481                       return new_value
     482            return rawitem   #can't do anything
     483
     484    def __delitem__(self,key):
     485        self.RemoveItem(key)
     486
     487    def __len__(self):
     488        blen = len(self.block)
     489        return blen
     490
     491    def __nonzero__(self):
     492        if self.__len__() > 0: return 1
     493        return 0
     494
     495    # keys returns all internal keys
     496    def keys(self):
     497        return list(self.block.keys())    #always lower case
     498
     499    def values(self):
     500        return [self[a] for a in self.keys()]
     501
     502    def items(self):
     503        return list(zip(self.keys(),self.values()))
     504
     505    def __contains__(self,key):
     506        if isinstance(key,(unicode,str)) and key.lower() in self.keys():
     507            return True
     508        return False
     509
     510    def has_key(self,key):
     511        return key in self
     512
     513    def has_key_or_alias(self,key):
     514        """Check if a dataname or alias is available in the block"""
     515        initial_test = key in self
     516        if initial_test: return True
     517        elif self.dictionary:
     518            aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)]
     519            if len(aliases)>0:
     520               return True
     521        return False
     522       
     523    def get(self,key,default=None):
     524        if key in self:
     525            retval = self.__getitem__(key)
     526        else:
     527            retval = default
     528        return retval
     529
     530    def clear(self):
     531        self.block = {}
     532        self.loops = {}
     533        self.item_order = []
     534        self.true_case = {}
     535
     536    # doesn't appear to work
     537    def copy(self):
     538        newcopy = StarBlock()
     539        newcopy.block = self.block.copy()
     540        newcopy.loops = []
     541        newcopy.item_order = self.item_order[:]
     542        newcopy.true_case = self.true_case.copy()
     543        newcopy.loops = self.loops.copy()
     544    #    return self.copy.im_class(newcopy)   #catch inheritance
     545        return newcopy
     546
     547    def update(self,adict):
     548        for key in adict.keys():
     549            self.AddItem(key,adict[key])
     550
     551    def GetItemPosition(self,itemname):
     552        """A utility function to get the numerical order in the printout
     553        of `itemname`.  An item has coordinate `(loop_no,pos)` with
     554        the top level having a `loop_no` of -1.  If an integer is passed to
     555        the routine then it will return the position of the loop
     556        referenced by that number."""
     557        import string
     558        if isinstance(itemname,int):
     559            # return loop position
     560            return (-1, self.item_order.index(itemname))
     561        if not itemname in self:
     562            raise ValueError('No such dataname %s' % itemname)
     563        testname = itemname.lower()
     564        if testname in self.item_order:
     565            return (-1,self.item_order.index(testname))
     566        loop_no = self.FindLoop(testname)
     567        loop_pos = self.loops[loop_no].index(testname)
     568        return loop_no,loop_pos
     569
     570    def ChangeItemOrder(self,itemname,newpos):
     571        """Move the printout order of `itemname` to `newpos`. If `itemname` is
     572        in a loop, `newpos` refers to the order within the loop."""
     573        if isinstance(itemname,(unicode,str)):
     574            true_name = itemname.lower()
     575        else:
     576            true_name = itemname
     577        loopno = self.FindLoop(true_name)
     578        if loopno < 0:  #top level
     579            self.item_order.remove(true_name)
     580            self.item_order.insert(newpos,true_name)
     581        else:
     582            self.loops[loopno].remove(true_name)
     583            self.loops[loopno].insert(newpos,true_name)
     584
     585    def GetItemOrder(self):
     586        """Return a list of datanames in the order in which they will be printed.  Loops are
     587        referred to by numerical index"""
     588        return self.item_order[:]
     589
     590    def AddItem(self,key,value,precheck=False):
     591        """Add dataname `key` to block with value `value`.  `value` may be
     592        a single value, a list or a tuple. If `precheck` is False (the default),
     593        all values will be checked and converted to unicode strings as necessary. If
     594        `precheck` is True, this checking is bypassed.  No checking is necessary
     595        when values are read from a CIF file as they are already in correct form."""
     596        if not isinstance(key,(unicode,str)):
     597             raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
     598        key = unicode(key)    #everything is unicode internally
     599        if not precheck:
     600             self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
     601        # check for overwriting
     602        if key in self:
     603             if not self.overwrite:
     604                 raise StarError( 'Attempt to insert duplicate item name %s' % key)
     605        if not precheck:   #need to sanitise
     606            regval,empty_val = self.regularise_data(value)
     607            pure_string = check_stringiness(regval)
     608            self.check_item_value(regval)
     609        else:
     610            regval,empty_val = value,None
     611            pure_string = True
     612        # update ancillary information first
     613        lower_key = key.lower()
     614        if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
     615            self.item_order.append(lower_key)
     616        # always remove from our case table in case the case is different
     617        try:
     618            del self.true_case[lower_key]
     619        except KeyError:
     620            pass
     621        self.true_case[lower_key] = key
     622        if pure_string:
     623            self.block.update({lower_key:[regval,empty_val]})
     624        else:
     625            self.block.update({lower_key:[empty_val,regval]})
     626
     627    def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
     628        """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
     629        necessary."""
     630        # print "Received data %s" % `incomingdata`
     631        # we accept tuples, strings, lists and dicts!!
     632        # Direct insertion: we have a string-valued key, with an array
     633        # of values -> single-item into our loop
     634        if isinstance(incomingdata[0],(tuple,list)):
     635           # a whole loop
     636           keyvallist = zip(incomingdata[0],incomingdata[1])
     637           for key,value in keyvallist:
     638               self.AddItem(key,value)
     639           self.CreateLoop(incomingdata[0])
     640        elif not isinstance(incomingdata[0],(unicode,str)):
     641             raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
     642        else:
     643            self.AddItem(incomingdata[0],incomingdata[1])
     644
     645    def check_data_name(self,dataname,maxlength=-1):
     646        if maxlength > 0:
     647            self.check_name_length(dataname,maxlength)
     648        if dataname[0]!='_':
     649            raise StarError( 'Dataname ' + dataname + ' does not begin with _')
     650        if self.characterset=='ascii':
     651            if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0:
     652                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
     653        else:
     654            # print 'Checking %s for unicode characterset conformance' % dataname
     655            if len ([a for a in dataname if ord(a) < 33]) > 0:
     656                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)')
     657            if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0:
     658                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)')
     659            if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0:
     660                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)')
     661            if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0:
     662                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)')
     663            if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0:
     664                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)')
     665            if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0:
     666                print('%s fails' % dataname)
     667                for a in dataname: print('%x' % ord(a),end="")
     668                print()
     669                raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)')
     670
     671    def check_name_length(self,dataname,maxlength):
     672        if len(dataname)>maxlength:
     673            raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
     674        return
     675
     676    def check_item_value(self,item):
     677        test_item = item
     678        if not isinstance(item,(list,dict,tuple)):
     679           test_item = [item]         #single item list
     680        def check_one (it):
     681            if isinstance(it,unicode):
     682                if it=='': return
     683                me = self.char_check.match(it)
     684                if not me:
     685                    print("Fail value check: %s" % it)
     686                    raise StarError('Bad character in %s' % it)
     687                else:
     688                    if me.span() != (0,len(it)):
     689                        print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it )))
     690                        raise StarError('Data item "' + repr( it ) +  u'"... contains forbidden characters')
     691        [check_one(a) for a in test_item]
     692
     693    def regularise_data(self,dataitem):
     694        """Place dataitem into a list if necessary"""
     695        from numbers import Number
     696        if isinstance(dataitem,str):
     697            return unicode(dataitem),None
     698        if isinstance(dataitem,(Number,unicode,StarList,StarDict)):
     699            return dataitem,None  #assume StarList/StarDict contain unicode if necessary
     700        if isinstance(dataitem,(tuple,list)):
     701            v,s = zip(*list([self.regularise_data(a) for a in dataitem]))
     702            return list(v),list(s)
     703            #return dataitem,[None]*len(dataitem)
     704        # so try to make into a list
     705        try:
     706            regval = list(dataitem)
     707        except TypeError as value:
     708            raise StarError( str(dataitem) + ' is wrong type for data value\n' )
     709        v,s = zip(*list([self.regularise_data(a) for a in regval]))
     710        return list(v),list(s)
     711
     712    def RemoveItem(self,itemname):
     713        """Remove `itemname` from the block."""
     714        # first check any loops
     715        loop_no = self.FindLoop(itemname)
     716        testkey = itemname.lower()
     717        if testkey in self:
     718            del self.block[testkey]
     719            del self.true_case[testkey]
     720            # now remove from loop
     721            if loop_no >= 0:
     722                self.loops[loop_no].remove(testkey)
     723                if len(self.loops[loop_no])==0:
     724                    del self.loops[loop_no]
     725                    self.item_order.remove(loop_no)
     726            else:  #will appear in order list
     727                self.item_order.remove(testkey)
     728
     729    def RemoveLoopItem(self,itemname):
     730        """*Deprecated*. Use `RemoveItem` instead"""
     731        self.RemoveItem(itemname)
     732
     733    def GetItemValue(self,itemname):
     734        """Return value of `itemname`.  If `itemname` is looped, a list
     735        of all values will be returned."""
     736        return self.GetFullItemValue(itemname)[0]
     737
     738    def GetFullItemValue(self,itemname):
     739        """Return the value associated with `itemname`, and a boolean flagging whether
     740        (True) or not (False) it is in a form suitable for calculation.  False is
     741        always returned for strings and `StarList` objects."""
     742        try:
     743            s,v = self.block[itemname.lower()]
     744        except KeyError:
     745            raise KeyError('Itemname %s not in datablock' % itemname)
     746        # prefer string value unless all are None
     747        # are we a looped value?
     748        if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
     749            if not_none(s):
     750                return s,False    #a string value
     751            else:
     752                return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
     753        elif not_none(s):
     754            return s,False         #a list of string values
     755        else:
     756            if len(v)>0:
     757                return v,not isinstance(v[0],StarList)
     758            return v,True
     759
     760    def CreateLoop(self,datanames,order=-1,length_check=True):
     761           """Create a loop in the datablock. `datanames` is a list of datanames that
     762           together form a loop.  If length_check is True, they should have been initialised in the block
     763           to have the same number of elements (possibly 0). If `order` is given,
     764           the loop will appear at this position in the block when printing
     765           out. A loop counts as a single position."""
     766
     767           if length_check:
     768               # check lengths: these datanames should exist
     769               listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
     770               if len(listed_values) == len(datanames):
     771                   len_set = set([len(self[a]) for a in datanames])
     772                   if len(len_set)>1:
     773                       raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
     774               elif len(listed_values) != 0:
     775                   raise ValueError('Request to loop datanames where some are single values and some are not')
     776           # store as lower case
     777           lc_datanames = [d.lower() for d in datanames]
     778           # remove these datanames from all other loops
     779           [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
     780           # remove empty loops
     781           empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
     782           for a in empty_loops:
     783               self.item_order.remove(a)
     784               del self.loops[a]
     785           if len(self.loops)>0:
     786               loopno = max(self.loops.keys()) + 1
     787           else:
     788               loopno = 1
     789           self.loops[loopno] = list(lc_datanames)
     790           if order >= 0:
     791               self.item_order.insert(order,loopno)
     792           else:
     793               self.item_order.append(loopno)
     794           # remove these datanames from item ordering
     795           self.item_order = [a for a in self.item_order if a not in lc_datanames]
     796
     797    def AddLoopName(self,oldname, newname):
     798        """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
     799        error is raised.  If `newname` is in a different loop, it is removed from that loop.
     800        The number of values associated with `newname` must match the number of values associated
     801        with all other columns of the new loop or a `ValueError` will be raised."""
     802        lower_newname = newname.lower()
     803        loop_no = self.FindLoop(oldname)
     804        if loop_no < 0:
     805            raise KeyError('%s not in loop' % oldname)
     806        if lower_newname in self.loops[loop_no]:
     807            return
     808        # check length
     809        old_provides = self.provide_value
     810        self.provide_value = False
     811        loop_len = len(self[oldname])
     812        self.provide_value = old_provides
     813        if len(self[newname]) != loop_len:
     814            raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
     815        # remove from any other loops
     816        [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
     817        # and add to this loop
     818        self.loops[loop_no].append(lower_newname)
     819        # remove from item_order if present
     820        try:
     821            self.item_order.remove(lower_newname)
     822        except ValueError:
     823            pass
     824
     825    def FindLoop(self,keyname):
     826        """Find the loop that contains `keyname` and return its numerical index or
     827        -1 if not present. The numerical index can be used to refer to the loop in
     828        other routines."""
     829        loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
     830        if len(loop_no)>0:
     831            return loop_no[0]
     832        else:
     833            return -1
     834
     835    def GetLoop(self,keyname):
     836        """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
     837        `keyname` is only significant as a way to specify the loop."""
     838        return LoopBlock(self,keyname)
     839
     840    def GetLoopNames(self,keyname):
     841        if keyname in self:
     842            return self.keys()
     843        for aloop in self.loops:
     844            try:
     845                return aloop.GetLoopNames(keyname)
     846            except KeyError:
     847                pass
     848        raise KeyError('Item does not exist')
     849
     850    def GetLoopNames(self,keyname):
     851        """Return all datanames appearing together with `keyname`"""
     852        loop_no = self.FindLoop(keyname)
     853        if loop_no >= 0:
     854            return self.loops[loop_no]
     855        else:
     856            raise KeyError('%s is not in any loop' % keyname)
     857
     858    def AddLoopName(self,oldname, newname):
     859        """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
     860        error is raised.  If `newname` is in a different loop, it is removed from that loop.
     861        The number of values associated with `newname` must match the number of values associated
     862        with all other columns of the new loop or a `ValueError` will be raised."""
     863        lower_newname = newname.lower()
     864        loop_no = self.FindLoop(oldname)
     865        if loop_no < 0:
     866            raise KeyError('%s not in loop' % oldname)
     867        if lower_newname in self.loops[loop_no]:
     868            return
     869        # check length
     870        old_provides = self.provide_value
     871        self.provide_value = False
     872        loop_len = len(self[oldname])
     873        self.provide_value = old_provides
     874        if len(self[newname]) != loop_len:
     875            raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
     876        # remove from any other loops
     877        [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
     878        # and add to this loop
     879        self.loops[loop_no].append(lower_newname)
     880        # remove from item_order if present
     881        try:
     882            self.item_order.remove(lower_newname)
     883        except ValueError:
     884            pass
     885
     886    def AddToLoop(self,dataname,loopdata):
     887        thisloop = self.GetLoop(dataname)
     888        for itemname,itemvalue in loopdata.items():
     889            thisloop[itemname] = itemvalue
     890
     891    def AddToLoop(self,dataname,loopdata):
     892        """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
     893
     894        Add multiple columns to the loop containing `dataname`. `loopdata` is a
     895        collection of (key,value) pairs, where `key` is the new dataname and `value`
     896        is a list of values for that dataname"""
     897        # check lengths
     898        thisloop = self.FindLoop(dataname)
     899        loop_len = len(self[dataname])
     900        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
     901        if len(bad_vals)>0:
     902           raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
     903               % (repr( bad_vals ),loop_len))
     904        self.update(loopdata)
     905        self.loops[thisloop]+=loopdata.keys()
     906
     907    def RemoveKeyedPacket(self,keyname,keyvalue):
     908        """Remove the packet for which dataname `keyname` takes
     909        value `keyvalue`.  Only the first such occurrence is
     910        removed."""
     911        packet_coord = list(self[keyname]).index(keyvalue)
     912        loopnames = self.GetLoopNames(keyname)
     913        for dataname in loopnames:
     914            self.block[dataname][0] = list(self.block[dataname][0])
     915            del self.block[dataname][0][packet_coord]
     916            self.block[dataname][1] = list(self.block[dataname][1])
     917            del self.block[dataname][1][packet_coord]
     918
     919    def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
     920        """Return the loop packet (a `StarPacket` object) where `keyname` has value
     921        `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
     922        is raised if no packet is found or more than one packet is found."""
     923        my_loop = self.GetLoop(keyname)
     924        #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
     925        #print('Packet check on:' + keyname)
     926        #[print(repr(getattr(a,keyname))) for a in my_loop]
     927        if no_case:
     928           one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
     929        else:
     930           one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
     931        if len(one_pack)!=1:
     932            raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
     933        print("Keyed packet: %s" % one_pack[0])
     934        return one_pack[0]
     935
     936    def GetCompoundKeyedPacket(self,keydict):
     937        """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
     938        in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
     939        True.  `ValueError` is raised if no packet is found or more than one packet is found."""
     940        #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
     941        keynames = list(keydict.keys())
     942        my_loop = self.GetLoop(keynames[0])
     943        for one