Changeset 3137
- Timestamp:
- Oct 24, 2017 11:53:41 AM (6 years ago)
- Location:
- trunk
- Files:
-
- 4 added
- 3 deleted
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CifFile/CifFile.py
r469 r3137 1 # To maximize python3/python2 compatibility 2 from __future__ import print_function 3 from __future__ import unicode_literals 4 from __future__ import division 5 from __future__ import absolute_import 6 7 try: 8 from cStringIO import StringIO 9 except ImportError: 10 from io import StringIO 11 12 # Python 2,3 compatibility 13 try: 14 from urllib import urlopen # for arbitrary opening 15 from urlparse import urlparse, urlunparse,urljoin 16 except: 17 from urllib.request import urlopen 18 from urllib.parse import urlparse,urlunparse,urljoin 19 20 # The unicode type does not exist in Python3 as the str type 21 # encompasses unicode. PyCIFRW tests for 'unicode' would fail 22 # Suggestions for a better approach welcome. 23 24 if isinstance(u"abc",str): #Python3 25 unicode = str 26 27 __copyright = """ 28 PYCIFRW License Agreement (Python License, Version 2) 29 ----------------------------------------------------- 30 31 1. This LICENSE AGREEMENT is between the Australian Nuclear Science 32 and Technology Organisation ("ANSTO"), and the Individual or 33 Organization ("Licensee") accessing and otherwise using this software 34 ("PyCIFRW") in source or binary form and its associated documentation. 35 36 2. Subject to the terms and conditions of this License Agreement, 37 ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide 38 license to reproduce, analyze, test, perform and/or display publicly, 39 prepare derivative works, distribute, and otherwise use PyCIFRW alone 40 or in any derivative version, provided, however, that this License 41 Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 42 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or 43 in any derivative version prepared by Licensee. 44 45 3. In the event Licensee prepares a derivative work that is based on 46 or incorporates PyCIFRW or any part thereof, and wants to make the 47 derivative work available to others as provided herein, then Licensee 48 hereby agrees to include in any such work a brief summary of the 49 changes made to PyCIFRW. 50 51 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" 52 basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 53 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND 54 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 55 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT 56 INFRINGE ANY THIRD PARTY RIGHTS. 57 58 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW 59 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A 60 RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY 61 DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 62 63 6. This License Agreement will automatically terminate upon a material 64 breach of its terms and conditions. 65 66 7. Nothing in this License Agreement shall be deemed to create any 67 relationship of agency, partnership, or joint venture between ANSTO 68 and Licensee. This License Agreement does not grant permission to use 69 ANSTO trademarks or trade name in a trademark sense to endorse or 70 promote products or services of Licensee, or any third party. 71 72 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees 73 to be bound by the terms and conditions of this License Agreement. 74 1 75 """ 2 1.This Software copyright \u00A9 Australian Synchrotron Research Program Inc, ("ASRP"). 3 4 2.Subject to ensuring that this copyright notice and licence terms 5 appear on all copies and all modified versions, of PyCIFRW computer 6 code ("this Software"), a royalty-free non-exclusive licence is hereby 7 given (i) to use, copy and modify this Software including the use of 8 reasonable portions of it in other software and (ii) to publish, 9 bundle and otherwise re-distribute this Software or modified versions 10 of this Software to third parties, provided that this copyright notice 11 and terms are clearly shown as applying to all parts of software 12 derived from this Software on each occasion it is published, bundled 13 or re-distributed. You are encouraged to communicate useful 14 modifications to ASRP for inclusion for future versions. 15 16 3.No part of this Software may be sold as a standalone package. 17 18 4.If any part of this Software is bundled with Software that is sold, 19 a free copy of the relevant version of this Software must be made 20 available through the same distribution channel (be that web server, 21 tape, CD or otherwise). 22 23 5.It is a term of exercise of any of the above royalty free licence 24 rights that ASRP gives no warranty, undertaking or representation 25 whatsoever whether express or implied by statute, common law, custom 26 or otherwise, in respect of this Software or any part of it. Without 27 limiting the generality of the preceding sentence, ASRP will not be 28 liable for any injury, loss or damage (including consequential loss or 29 damage) or other loss, loss of profits, costs, charges or expenses 30 however caused which may be suffered, incurred or arise directly or 31 indirectly in respect of this Software. 32 33 6. This Software is not licenced for use in medical applications. 34 """ 35 36 from types import * 37 import re 38 import StarFile 39 import sys 40 class CifLoopBlock(StarFile.LoopBlock): 41 def __init__(self,data=(),dimension=0,**kwargs): 42 self.loopclass = CifLoopBlock 43 if dimension > 1: 44 raise CifError( 'Attempt to nest loops, loop level %d' % dimension) 45 StarFile.LoopBlock.__init__(self,data,dimension=dimension,**kwargs) 46 # self.__iter__ = self.recursive_iter 47 48 def __iter__(self): 49 return self.recursive_iter() 50 51 def AddLoopItem(self,data,precheck=False): 52 StarFile.LoopBlock.AddLoopItem(self,data,precheck,maxlength=75) 53 54 def insert_loop(self,newloop,**kwargs): 55 if newloop.dimension > 1: 56 raise CifError( 'Attempt to insert inner loop, loop level %d' % dimension) 57 StarFile.LoopBlock.insert_loop(self,newloop,**kwargs) 58 59 class CifBlock(CifLoopBlock): 60 def __init__(self,data = (), strict = 1, maxoutlength=2048,wraplength=80,overwrite=True,dimension=0): 61 self.strict = strict 62 CifLoopBlock.__init__(self,data=data,dimension=0,maxoutlength=maxoutlength,wraplength=wraplength,overwrite=overwrite) 63 if isinstance(data,(StarFile.StarBlock,CifBlock)): 64 self.saves = StarFile.BlockCollection(datasource=data["saves"],element_class=CifBlock,type_tag="save") 65 else: 66 self.saves = StarFile.BlockCollection(element_class=CifLoopBlock,type_tag="save") 67 if self.strict: 68 self.checklengths() 69 self.dictionary = None 70 71 def RemoveCifItem(self,itemname): 72 CifLoopBlock.RemoveLoopItem(self,itemname) 73 74 def __getitem__(self,key): 75 if key == "saves": 76 return self.saves 77 try: 78 rawitem = CifLoopBlock.__getitem__(self,key) 79 except KeyError: 80 if self.dictionary: 81 # send the dictionary the required key and a pointer to us 82 rawitem = self.dictionary.derive_item(key,self) 83 else: 84 raise KeyError, 'No such item: %s' % key 85 # we now have an item, we can try to convert it to a number if that is appropriate 86 if not self.dictionary or not self.dictionary.has_key(key): return rawitem 87 return self.dictionary.change_type(key,rawitem) 76 77 78 import re,sys 79 from . import StarFile 80 from .StarFile import StarList #put in global scope for exec statement 81 try: 82 import numpy #put in global scope for exec statement 83 from .drel import drel_runtime #put in global scope for exec statement 84 except ImportError: 85 pass #will fail when using dictionaries for calcs 86 from copy import copy #must be in global scope for exec statement 87 88 def track_recursion(in_this_func): 89 """Keep an eye on a function call to make sure that the key argument hasn't been 90 seen before""" 91 def wrapper(*args,**kwargs): 92 key_arg = args[1] 93 if key_arg in wrapper.called_list: 94 print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg))) 95 raise CifRecursionError( key_arg,wrapper.called_list[:]) #failure 96 if len(wrapper.called_list) == 0: #first time 97 wrapper.stored_use_defaults = kwargs.get("allow_defaults",False) 98 print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults)) 99 else: 100 kwargs["allow_defaults"] = wrapper.stored_use_defaults 101 wrapper.called_list.append(key_arg) 102 print('Recursion watch: call stack: ' + repr(wrapper.called_list)) 103 try: 104 result = in_this_func(*args,**kwargs) 105 except StarFile.StarDerivationError as s: 106 if len(wrapper.called_list) == 1: #no more 107 raise StarFile.StarDerivationFailure(wrapper.called_list[0]) 108 else: 109 raise 110 finally: 111 wrapper.called_list.pop() 112 if len(wrapper.called_list) == 0: 113 wrapper.stored_used_defaults = 'error' 114 return result 115 wrapper.called_list = [] 116 return wrapper 117 118 class CifBlock(StarFile.StarBlock): 119 """ 120 A class to hold a single block of a CIF file. A `CifBlock` object can be treated as 121 a Python dictionary, in particular, individual items can be accessed using square 122 brackets e.g. `b['_a_dataname']`. All other Python dictionary methods are also 123 available (e.g. `keys()`, `values()`). Looped datanames will return a list of values. 124 125 ## Initialisation 126 127 When provided, `data` should be another `CifBlock` whose contents will be copied to 128 this block. 129 130 * if `strict` is set, maximum name lengths will be enforced 131 132 * `maxoutlength` is the maximum length for output lines 133 134 * `wraplength` is the ideal length to make output lines 135 136 * When set, `overwrite` allows the values of datanames to be changed (otherwise an error 137 is raised). 138 139 * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using 140 the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` 141 after setting the dataitem value. 142 """ 143 def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs): 144 """When provided, `data` should be another CifBlock whose contents will be copied to 145 this block. 146 147 * if `strict` is set, maximum name lengths will be enforced 148 149 * `maxoutlength` is the maximum length for output lines 150 151 * `wraplength` is the ideal length to make output lines 152 153 * When set, `overwrite` allows the values of datanames to be changed (otherwise an error 154 is raised). 155 156 * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using 157 the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` 158 after setting the dataitem value. 159 """ 160 if strict: maxnamelength=75 161 else: 162 maxnamelength=-1 163 super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs) 164 self.dictionary = None #DDL dictionary referring to this block 165 self.compat_mode = compat_mode #old-style behaviour of setitem 166 167 def RemoveCifItem(self,itemname): 168 """Remove `itemname` from the CifBlock""" 169 self.RemoveItem(itemname) 88 170 89 171 def __setitem__(self,key,value): 90 if key == "saves": 91 self.saves[key] = value 92 else: 93 self.AddCifItem((key,value)) 94 95 def clear(self): 96 CifLoopBlock.clear(self) 97 self.saves = StarFile.BlockCollection(element_class=CifLoopBlock,type_tag="save_") 172 self.AddItem(key,value) 173 # for backwards compatibility make a single-element loop 174 if self.compat_mode: 175 if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList): 176 # single element loop 177 self.CreateLoop([key]) 98 178 99 179 def copy(self): 100 newblock = CifLoopBlock.copy(self) 101 newblock.saves = self.saves.copy() 180 newblock = super(CifBlock,self).copy() 102 181 return self.copy.im_class(newblock) #catch inheritance 103 182 104 def has_key(self,key):105 if key == "saves": return 1106 else: return CifLoopBlock.has_key(self,key)107 108 def __str__(self):109 retstr = ''110 for sb in self.saves.keys():111 retstr = retstr + '\nsave_%s\n\n' % sb112 self.saves[sb].SetOutputLength(self.wraplength,self.maxoutlength)113 retstr = retstr + str(self.saves[sb])114 retstr = retstr + '\nsave_\n\n'115 return retstr + CifLoopBlock.__str__(self)116 117 # this is not appropriate for save blocks. Instead, the save block118 # should be accessed directly for update119 120 def update(self,adict):121 loopdone = []122 if not isinstance(adict,CifBlock):123 raise TypeError124 for key in adict.block.keys():125 self.AddCifItem((key,adict[key]))126 for aloop in adict.loops:127 self.insert_loop(aloop,audit=True)128 129 183 def AddCifItem(self,data): 184 """ *DEPRECATED*. Use `AddItem` instead.""" 130 185 # we accept only tuples, strings and lists!! 131 if not (isinstance(data[0],(StringType,TupleType,ListType))): 132 raise TypeError, 'Cif datanames are either a string, tuple or list' 133 # single items passed straight through to underlying routine 186 if not (isinstance(data[0],(unicode,tuple,list,str))): 187 raise TypeError('Cif datanames are either a string, tuple or list') 134 188 # we catch single item loops as well... 135 if isinstance(data[0],StringType): 136 if isinstance(data[1],(TupleType,ListType)) and not isinstance(data[1],(StarFile.StarList,StarFile.StarTuple)): 137 CifLoopBlock.AddLoopItem(self,((data[0],),((data[1],)))) 138 else: 139 CifLoopBlock.AddLoopItem(self,data) 189 if isinstance(data[0],(unicode,str)): 190 self.AddSingleCifItem(data[0],list(data[1])) 191 if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList): # a single element loop 192 self.CreateLoop([data[0]]) 140 193 return 141 # otherwise, we unpack one level and send along. This is different 142 # to the StarBlock behaviour, which assumes that any tuples imply an 143 # inner loop. 144 keyvals = zip(data[0],data[1]) 145 map(lambda a:CifLoopBlock.AddLoopItem(self,a),keyvals) 146 147 def checklengths(self): 148 toolong = filter(lambda a:len(a)>75, self.keys()) 149 outstring = "" 150 for it in toolong: outstring += "\n" + it 151 if toolong: 152 raise CifError( 'Following data names too long:' + outstring) 194 # otherwise, we loop over the datanames 195 keyvals = zip(data[0][0],[list(a) for a in data[1][0]]) 196 [self.AddSingleCifItem(a,b) for a,b in keyvals] 197 # and create the loop 198 self.CreateLoop(data[0][0]) 199 200 def AddSingleCifItem(self,key,value): 201 """*Deprecated*. Use `AddItem` instead""" 202 """Add a single data item. If it is part of a loop, a separate call should be made""" 203 self.AddItem(key,value) 153 204 154 205 def loopnames(self): 155 return map(lambda a:a.keys(),self.loops) 156 157 def assign_dictionary(self,dic): 158 if not dic.diclang=="DDLm": 159 print "Warning: ignoring dictionary %s" % dic.dic_as_cif.my_uri 160 return 161 self.dictionary = dic 162 163 def merge(self,new_block,mode="strict",match_att=[],match_function=None,nosaves=False, 164 rel_keys = []): 165 # deal with save frames 166 if not nosaves: 167 self["saves"].merge(new_block["saves"],mode,match_att=match_att, 168 match_function=match_function) 169 if mode == 'strict': 170 for key in new_block.item_order: 171 if self.has_key(key) and key not in match_att: 172 raise CifError( "Identical keys %s in strict merge mode" % key) 173 elif key not in match_att: #no change otherwise 174 if isinstance(key,StringType): 175 self[key] = new_block[key] 176 else: 177 self.insert_loop(key) 178 elif mode == 'replace': 179 newkeys = new_block.keys() 180 for ma in match_att: 181 try: 182 newkeys.remove(ma) #don't touch the special ones 183 except ValueError: 184 pass 185 for key in new_block.item_order: 186 if isinstance(key,StringType): 187 self[key] = new_block[key] 188 else: 189 self.insert_loop(key) #assume is a loop 190 elif mode == 'overlay': 191 for attribute in new_block.keys(): 192 if attribute in match_att: continue #ignore this one 193 new_value = new_block[attribute] 194 #non-looped items 195 if isinstance(new_value,StringType): 196 self[attribute] = new_value 197 these_atts = self.keys() 198 for newloop in new_block.loops: 199 newkeys = newloop.keys() 200 # note that the following line determines packet item order 201 overlaps = filter(lambda a: a in these_atts,newkeys) 202 if len(overlaps)< len(newloop):#completely new loop 203 self.insert_loop(newloop) 204 elif len(overlaps)==len(newloop): 205 # appending packets 206 # print "In overlay merge mode, found extra packet items:" 207 # print `overlaps` 208 # get key position 209 loop_keys = filter(lambda a:a in rel_keys,overlaps) 210 try: 211 newkeypos = map(lambda a:newkeys.index(a),loop_keys) 212 newkeypos = newkeypos[0] #one key per loop for now 213 loop_keys = loop_keys[0] 214 except (ValueError,IndexError): 215 newkeypos = [] 216 overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data 217 new_data = map(lambda a:new_block[a],overlaps) #new packet data 218 packet_data = transpose(overlap_data) 219 new_p_data = transpose(new_data) 220 # remove any packets for which the keys match between old and new; we 221 # make the arbitrary choice that the old data stays 222 if newkeypos: 223 # get matching values in new list 224 print "Old, new data:\n%s\n%s" % (`overlap_data[newkeypos]`,`new_data[newkeypos]`) 225 key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos]) 226 # filter out any new data with these key values 227 new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data) 228 if new_p_data: 229 new_data = transpose(new_p_data) 230 else: new_data = [] 231 # wipe out the old data and enter the new stuff 232 byebyeloop = self.GetLoop(overlaps[0]) 233 # print "Removing '%s' with overlaps '%s'" % (`byebyeloop`,`overlaps`) 234 # Note that if, in the original dictionary, overlaps are not 235 # looped, GetLoop will return the block itself. So we check 236 # for this case... 237 if byebyeloop != self: 238 self.remove_loop(byebyeloop) 239 self.AddCifItem(((overlaps,),(overlap_data,))) #adding old packets 240 for pd in new_p_data: #adding new packets 241 if pd not in packet_data: 242 for i in range(len(overlaps)): 243 #don't do this at home; we are appending 244 #to something in place 245 self[overlaps[i]].append(pd[i]) 246 206 return [self.loops[a] for a in self.loops] 207 247 208 248 209 class CifFile(StarFile.StarFile): 249 def __init__(self,datasource=None,strict=1, maxinlength=2048,maxoutlength=0,**kwargs):250 StarFile.StarFile.__init__(self,datasource=datasource,maxinlength=maxinlength,maxoutlength=maxoutlength,blocktype=CifBlock,**kwargs)210 def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs): 211 super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs) 251 212 self.strict = strict 252 213 self.header_comment = \ 253 """ #\\#CIF1.1214 """ 254 215 ########################################################################## 255 # Crystallographic Information Format file 216 # Crystallographic Information Format file 256 217 # Produced by PyCifRW module 257 # 218 # 258 219 # This is a CIF file. CIF has been adopted by the International 259 # Union of Crystallography as the standard for data archiving and 220 # Union of Crystallography as the standard for data archiving and 260 221 # transmission. 261 222 # … … 264 225 ########################################################################## 265 226 """ 266 def NewBlock(self,blockname,*nkwargs,**kwargs):267 if len(blockname)>75:268 raise CifError , 'Blockname %s is longer than 75 characters' % blockname269 else:270 StarFile.StarFile.NewBlock(self,blockname,*nkwargs,**kwargs)271 227 272 228 … … 275 231 self.value = value 276 232 def __str__(self): 277 return '\nCif Format error: '+ self.value 233 return '\nCif Format error: '+ self.value 278 234 279 235 class ValidCifError(Exception): … … 283 239 return '\nCif Validity error: ' + self.value 284 240 285 class CifDic(StarFile.BlockCollection): 286 def __init__(self,dic,do_minimum=False,grammar='1.1'): 241 class CifRecursionError(Exception): 242 def __init__(self,key_value,call_stack): 243 self.key_value = key_value 244 self.call_stack = call_stack 245 def __str__(self): 246 return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack)) 247 248 249 class CifDic(StarFile.StarFile): 250 """Create a Cif Dictionary object from the provided source, which can 251 be a filename/URL or a CifFile. Optional arguments (relevant to DDLm 252 only): 253 254 * do_minimum (Boolean): 255 Do not set up the dREL system for auto-calculation or perform 256 imports. This implies do_imports=False and do_dREL=False 257 258 * do_imports = No/Full/Contents/All: 259 If not 'No', replace _import.get statements with the imported contents for 260 Full mode/Contents mode/Both respectively. 261 262 * do_dREL = True/False: 263 Parse and convert all dREL methods to Python. Implies do_imports=All 264 265 """ 266 def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True, 267 grammar='auto',**kwargs): 287 268 self.do_minimum = do_minimum 288 self.dic_as_cif = dic 269 if do_minimum: 270 do_imports = 'No' 271 do_dREL = False 272 if do_dREL: do_imports = 'All' 289 273 self.template_cache = {} #for DDLm imports 290 274 self.ddlm_functions = {} #for DDLm functions 291 self.switch_numpy(False) #no Numpy arrays returned 292 if isinstance(dic,StringType): 293 self.dic_as_cif = CifFile(dic,grammar=grammar) 294 (self.dicname,self.diclang,self.defdata) = self.dic_determine(self.dic_as_cif) 295 StarFile.BlockCollection.__init__(self,element_class=CifBlock,datasource=self.defdata) 296 self.scopes_mandatory = {"dictionary":[],"category":[],"item":[]} 297 self.scopes_naughty = {"dictionary":[],"category":[],"item":[]} 275 self.switch_numpy(False) #no Numpy arrays returned 276 super(CifDic,self).__init__(datasource=dic,grammar=grammar,**kwargs) 277 self.standard = 'Dic' #for correct output order 278 self.scoping = 'dictionary' 279 (self.dicname,self.diclang) = self.dic_determine() 280 print('%s is a %s dictionary' % (self.dicname,self.diclang)) 281 self.scopes_mandatory = {} 282 self.scopes_naughty = {} 298 283 # rename and expand out definitions using "_name" in DDL dictionaries 299 284 if self.diclang == "DDL1": 300 285 self.DDL1_normalise() #this removes any non-definition entries 286 self.create_def_block_table() #From now on, [] uses definition_id 287 if self.diclang == "DDL1": 301 288 self.ddl1_cat_load() 302 289 elif self.diclang == "DDL2": 303 290 self.DDL2_normalise() #iron out some DDL2 tricky bits 304 291 elif self.diclang == "DDLm": 305 self.ddlm_normalise() 306 self.ddlm_import() #recursively calls this routine 307 if not self.do_minimum: 308 print "Doing full dictionary initialisation" 309 self.ddlm_parse_valid() #extract validity information from data block 310 self.transform_drel() #parse the drel functions 311 self.add_drel_funcs() #put the drel functions into the namespace 312 self.add_category_info() 292 self.scoping = 'dictionary' #expose all save frames 293 if do_imports is not 'No': 294 self.ddlm_import(import_mode=do_imports)#recursively calls this routine 295 self.create_alias_table() 296 self.create_cat_obj_table() 297 self.create_cat_key_table() 298 if do_dREL: 299 print('Doing full dictionary initialisation') 300 self.initialise_drel() 301 self.add_category_info(full=do_dREL) 313 302 # initialise type information 314 303 self.typedic={} 315 304 self.primdic = {} #typecode<->primitive type translation 316 305 self.add_type_info() 317 self.item_validation_funs = [ 318 self.validate_item_type, 319 self.validate_item_esd, 320 self.validate_item_enum, # functions which check conformance 321 self.validate_enum_range, 322 self.validate_looping] 323 self.loop_validation_funs = [ 324 self.validate_loop_membership, 325 self.validate_loop_key, 326 self.validate_loop_references] # functions checking loop values 327 self.global_validation_funs = [ 328 self.validate_exclusion, 329 self.validate_parent, 330 self.validate_child, 331 self.validate_dependents, 332 self.validate_uniqueness] # where we need to look at other values 333 self.block_validation_funs = [ # where only a full block will do 334 self.validate_mandatory_category] 335 self.global_remove_validation_funs = [ 336 self.validate_remove_parent_child] # removal is quicker with special checks 337 self.optimize = False # default value 338 self.done_parents = [] 339 self.done_children = [] 340 self.done_keys = [] 341 # debug 342 # j = open("dic_debug","w") 343 # j.write(self.__str__()) 344 # j.close() 345 346 def dic_determine(self,cifdic): 347 if cifdic.has_key("on_this_dictionary"): 348 self.master_key = "on_this_dictionary" 306 self.install_validation_functions() 307 308 def dic_determine(self): 309 if "on_this_dictionary" in self: 310 self.master_block = super(CifDic,self).__getitem__("on_this_dictionary") 311 self.def_id_spec = "_name" 312 self.cat_id_spec = "_category.id" #we add this ourselves 349 313 self.type_spec = "_type" 350 314 self.enum_spec = "_enumeration" … … 354 318 self.must_exist_spec = "_list_mandatory" 355 319 self.list_ref_spec = "_list_reference" 320 self.key_spec = "_list_mandatory" 356 321 self.unique_spec = "_list_uniqueness" 357 322 self.child_spec = "_list_link_child" … … 362 327 self.dep_spec = "xxx" 363 328 self.cat_list = [] #to save searching all the time 364 name = cifdic["on_this_dictionary"]["_dictionary_name"] 365 version = cifdic["on_this_dictionary"]["_dictionary_version"] 366 return (name+version,"DDL1",cifdic) 367 elif len(cifdic.keys()) == 1: # DDL2/DDLm 368 self.master_key = cifdic.keys()[0] 369 name = cifdic[self.master_key]["_dictionary.title"] 370 version = cifdic[self.master_key]["_dictionary.version"] 371 if name != self.master_key: 372 print "Warning: DDL2 blockname %s not equal to dictionary name %s" % (self.master_key,name) 373 if cifdic[self.master_key].has_key("_dictionary.class"): #DDLm 374 self.unique_spec = "_category_key.generic" 375 return(name+version,"DDLm",cifdic[self.master_key]["saves"]) 376 #otherwise DDL2 377 self.type_spec = "_item_type.code" 378 self.enum_spec = "_item_enumeration.value" 379 self.esd_spec = "_item_type_conditions.code" 380 self.cat_spec = "_item.category_id" 381 self.loop_spec = "there_is_no_loop_spec!" 382 self.must_loop_spec = "xxx" 383 self.must_exist_spec = "_item.mandatory_code" 384 self.child_spec = "_item_linked.child_name" 385 self.parent_spec = "_item_linked.parent_name" 386 self.related_func = "_item_related.function_code" 387 self.related_item = "_item_related.related_name" 388 self.unique_spec = "_category_key.name" 389 self.list_ref_spec = "xxx" 390 self.primitive_type = "_type" 391 self.dep_spec = "_item_dependent.dependent_name" 392 return (name+version,"DDL2",cifdic[self.master_key]["saves"]) 393 else: 394 raise CifError, "Unable to determine dictionary DDL version" 395 329 name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"] 330 version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"] 331 return (name+version,"DDL1") 332 elif len(self.get_roots()) == 1: # DDL2/DDLm 333 self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0]) 334 # now change to dictionary scoping 335 self.scoping = 'dictionary' 336 name = self.master_block["_dictionary.title"] 337 version = self.master_block["_dictionary.version"] 338 if self.master_block.has_key("_dictionary.class"): #DDLm 339 self.enum_spec = '_enumeration_set.state' 340 self.key_spec = '_category.key_id' 341 self.must_exist_spec = None 342 self.cat_spec = '_name.category_id' 343 self.primitive_type = '_type.contents' 344 self.cat_id_spec = "_definition.id" 345 self.def_id_spec = "_definition.id" 346 return(name+version,"DDLm") 347 else: #DDL2 348 self.cat_id_spec = "_category.id" 349 self.def_id_spec = "_item.name" 350 self.key_spec = "_category_mandatory.name" 351 self.type_spec = "_item_type.code" 352 self.enum_spec = "_item_enumeration.value" 353 self.esd_spec = "_item_type_conditions.code" 354 self.cat_spec = "_item.category_id" 355 self.loop_spec = "there_is_no_loop_spec!" 356 self.must_loop_spec = "xxx" 357 self.must_exist_spec = "_item.mandatory_code" 358 self.child_spec = "_item_linked.child_name" 359 self.parent_spec = "_item_linked.parent_name" 360 self.related_func = "_item_related.function_code" 361 self.related_item = "_item_related.related_name" 362 self.unique_spec = "_category_key.name" 363 self.list_ref_spec = "xxx" 364 self.primitive_type = "_type" 365 self.dep_spec = "_item_dependent.dependent_name" 366 return (name+version,"DDL2") 367 else: 368 raise CifError("Unable to determine dictionary DDL version") 369 396 370 def DDL1_normalise(self): 371 # switch off block name collision checks 372 self.standard = None 397 373 # add default type information in DDL2 style 398 374 # initial types and constructs 399 375 base_types = ["char","numb","null"] 400 prim_types = base_types[:] 376 prim_types = base_types[:] 401 377 base_constructs = [".*", 402 378 '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.', 403 379 "\"\" "] 404 for key,value in self.dictionary.items(): 405 if value.has_key("_name"): 380 for key,value in self.items(): 381 newnames = [key] #keep by default 382 if "_name" in value: 406 383 real_name = value["_name"] 407 if type(real_name) is ListType: #looped values384 if isinstance(real_name,list): #looped values 408 385 for looped_name in real_name: 409 new_value = value.copy() 410 new_value["_name"] = looped_name #only looped name 411 self.dictionary[looped_name] = new_value 412 else: self.dictionary[real_name] = value 386 new_value = value.copy() 387 new_value["_name"] = looped_name #only looped name 388 self[looped_name] = new_value 389 newnames = real_name 390 else: 391 self[real_name] = value 392 newnames = [real_name] 413 393 # delete the old one 414 del self.dictionary[key] 394 if key not in newnames: 395 del self[key] 415 396 # loop again to normalise the contents of each definition 416 for key,value in self.dictionary.items(): 397 for key,value in self.items(): 398 #unlock the block 399 save_overwrite = value.overwrite 400 value.overwrite = True 417 401 # deal with a missing _list, _type_conditions 418 if not value.has_key("_list"): value["_list"] = 'no'419 if not value.has_key("_type_conditions"): value["_type_conditions"] = 'none'402 if "_list" not in value: value["_list"] = 'no' 403 if "_type_conditions" not in value: value["_type_conditions"] = 'none' 420 404 # deal with enumeration ranges 421 if value.has_key("_enumeration_range"):405 if "_enumeration_range" in value: 422 406 max,min = self.getmaxmin(value["_enumeration_range"]) 423 407 if min == ".": 424 self .dictionary[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))408 self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min)))) 425 409 elif max == ".": 426 self .dictionary[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))410 self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min)))) 427 411 else: 428 self .dictionary[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))412 self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min)))) 429 413 #add any type construct information 430 if value.has_key("_type_construct"):414 if "_type_construct" in value: 431 415 base_types.append(value["_name"]+"_type") #ie dataname_type 432 416 base_constructs.append(value["_type_construct"]+"$") 433 417 prim_types.append(value["_type"]) #keep a record 434 418 value["_type"] = base_types[-1] #the new type name 435 436 419 437 420 #make categories conform with ddl2 438 421 #note that we must remove everything from the last underscore 439 if value ["_category"]== "category_overview":422 if value.get("_category",None) == "category_overview": 440 423 last_under = value["_name"].rindex("_") 441 424 catid = value["_name"][1:last_under] 442 425 value["_category.id"] = catid #remove square bracks 443 if catid not in self.cat_list: self.cat_list.append(catid) 426 if catid not in self.cat_list: self.cat_list.append(catid) 427 value.overwrite = save_overwrite 444 428 # we now add any missing categories before filling in the rest of the 445 429 # information 446 for key,value in self.dictionary.items(): 447 if self[key].has_key("_category"): 430 for key,value in self.items(): 431 #print('processing ddl1 definition %s' % key) 432 if "_category" in self[key]: 448 433 if self[key]["_category"] not in self.cat_list: 449 434 # rogue category, add it in 450 435 newcat = self[key]["_category"] 451 fake_name = "_" + newcat + "_[]" 436 fake_name = "_" + newcat + "_[]" 452 437 newcatdata = CifBlock() 453 438 newcatdata["_category"] = "category_overview" … … 457 442 self.cat_list.append(newcat) 458 443 # write out the type information in DDL2 style 459 self. dic_as_cif[self.master_key].AddLoopItem((444 self.master_block.AddLoopItem(( 460 445 ("_item_type_list.code","_item_type_list.construct", 461 446 "_item_type_list.primitive_code"), 462 447 (base_types,base_constructs,prim_types) 463 448 )) 464 465 def DDL2_normalise(self):466 listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),ListType),self.keys())467 # now filter out all the single element lists!468 dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)469 for item_def in dodgy_defs:470 # print "DDL2 norm: processing %s" % item_def471 thisdef = self[item_def]472 packet_no = thisdef['_item.name'].index(item_def)473 realcat = thisdef['_item.category_id'][packet_no]474 realmand = thisdef['_item.mandatory_code'][packet_no]475 # first add in all the missing categories476 # we don't replace the entry in the list corresponding to the477 # current item, as that would wipe out the information we want478 for child_no in range(len(thisdef['_item.name'])):479 if child_no == packet_no: continue480 child_name = thisdef['_item.name'][child_no]481 child_cat = thisdef['_item.category_id'][child_no]482 child_mand = thisdef['_item.mandatory_code'][child_no]483 if not self.has_key(child_name):484 self[child_name] = CifBlock()485 self[child_name]['_item.name'] = child_name486 self[child_name]['_item.category_id'] = child_cat487 self[child_name]['_item.mandatory_code'] = child_mand488 self[item_def]['_item.name'] = item_def489 self[item_def]['_item.category_id'] = realcat490 self[item_def]['_item.mandatory_code'] = realmand491 # go through any _item_linked tables492 dodgy_defs = filter(lambda a:isinstance(self[a].get('_item_linked.child_name'),ListType),self.keys())493 dodgy_defs = filter(lambda a:len(self[a]['_item_linked.child_name']) > 1, dodgy_defs)494 for item_def in dodgy_defs:495 thisdef = self[item_def]496 child_list = thisdef.get('_item_linked.child_name',[])497 parents = thisdef.get('_item_linked.parent_name',[])498 # zap the parents, they will confuse us!!499 del thisdef['_item_linked.parent_name']500 if isinstance(child_list,StringType):501 self[child_list]['_item_linked.parent_name'] = parents502 self[parents]['_item_linked.child_name'] = child_list503 else:504 # for each parent, find the list of children.505 family = map(None,parents,child_list)506 notmychildren = family507 while len(notmychildren):508 # get all children of first entry509 mychildren = filter(lambda a:a[0]==notmychildren[0][0],family)510 # print "Parent %s: %d children" % (notmychildren[0][0],len(mychildren))511 for parent,child in mychildren: #parent is the same for all512 self[child]['_item_linked.parent_name'] = parent513 # put all the children into the parent514 try:515 del self[mychildren[0][0]]['_item_linked.child_name']516 except ValueError: pass517 self[mychildren[0][0]]['_item_linked.child_name'] = map(lambda a:a[1],mychildren)518 # now make a new,smaller list519 notmychildren = filter(lambda a:a[0]!=mychildren[0][0],notmychildren)520 # now flatten any single element lists521 single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)522 for flat_def in single_defs:523 flat_keys = self[flat_def].GetLoop('_item.name').keys()524 for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]525 # now deal with the multiple lists526 # next we do aliases527 all_aliases = filter(lambda a:self[a].has_key('_item_aliases.alias_name'),self.keys())528 for aliased in all_aliases:529 my_aliases = listify(self[aliased]['_item_aliases.alias_name'])530 for alias in my_aliases:531 self[alias] = self[aliased].copy() #we are going to delete stuff...532 del self[alias]["_item_aliases.alias_name"]533 534 def ddlm_normalise(self):535 for key,value in self.dictionary.items():536 if value.has_key("_name.category_id"):537 real_name = "_" + value["_name.category_id"] + "." + value["_name.object_id"]538 self[real_name] = value539 # delete the old one540 del self[key]541 542 def ddlm_parse_valid(self):543 if not self.dic_as_cif[self.master_key].has_key("_dictionary_valid.scope"):544 return545 for scope_pack in self.dic_as_cif[self.master_key].GetLoop("_dictionary_valid.scope"):546 scope = getattr(scope_pack,"_dictionary_valid.scope")547 valid_info = getattr(scope_pack,"_dictionary_valid.attributes")548 valid_info = valid_info.split()549 for i in range(0,len(valid_info),2):550 if valid_info[i]=="+":551 self.scopes_mandatory[scope.lower()].append(valid_info[i+1].lower())552 elif valid_info[i]=="!":553 self.scopes_naughty[scope.lower()].append(valid_info[i+1].lower())554 555 def ddlm_import(self):556 import urllib557 #first check the outermost datablocks. Note we expect our dREL558 #machinery to create _import_list.id only if the individual values are available559 #For this to happen, we need the ddl.dic to have been assigned560 try:561 to_be_imported = self.dic_as_cif[self.master_key]["_import_list.id"]562 except KeyError:563 pass564 else:565 # deal with foreshortened import blocks566 for import_target in to_be_imported:567 if len(import_target)==3: #default values have been left off568 import_target.append('Exit')569 import_target.append('Exit')570 for scope,dict_block,file_loc,on_dupl,on_miss in to_be_imported:571 scope = scope.lower() #work around capitalisation in draft dics572 if scope == 'att' or scope == 'sta' or scope == 'val':573 print 'Improper import directive at top level in %s: ignored' % self.master.key574 continue575 # resolve URI576 full_uri = self.resolve_path(file_loc)577 dic_as_cif = CifFile(urllib.urlopen(full_uri),grammar="DDLm")578 import_from = CifDic(dic_as_cif,do_minimum=True) #this will recurse internal imports579 # and now merge these definitions580 if scope == "dic":581 self.get_whole_dict(import_from,on_dupl,on_miss)582 elif scope=="cat":583 self.get_one_cat(import_from,dict_block,on_dupl,on_miss)584 elif scope=="grp":585 self.get_one_cat_with_children(import_from,dict_block,on_dupl,on_miss)586 elif scope=="itm": #not clear what to do if category is missing587 self.add_one_defn(import_from,dict_block,on_dupl,on_miss)588 # it will never happen again...589 del self.dic_as_cif[self.master_key]["_import_list.id"]590 # next we resolve per-definition imports591 for one_def in self.keys():592 try:593 to_be_imported = self[one_def]["_import_list.id"]594 except KeyError:595 pass596 else:597 if len(to_be_imported) == 5 and len(to_be_imported[0])!=5:598 #catch an error in earlier versions of the dictionaries where599 #the outer brackets were missing600 to_be_imported = [to_be_imported]601 # deal with foreshortened import blocks602 for import_target in to_be_imported:603 if len(import_target)==3: #default values have been left off604 import_target.append('Exit')605 import_target.append('Exit')606 for scope,block,file_loc,on_dupl,on_miss in to_be_imported:607 scope = scope.lower() #work around capitalisation in draft dics608 if scope == 'dic' or scope == 'cat' or scope == 'grp' or scope == "itm":609 print 'Improper import directive at definition level in %s: ignored' % self.master.key610 continue611 full_uri = self.resolve_path(file_loc)612 if full_uri not in self.template_cache:613 dic_as_cif = CifFile(urllib.urlopen(full_uri),grammar="DDLm")614 self.template_cache[full_uri] = CifDic(dic_as_cif,do_minimum=True) #this will recurse internal imports615 print 'Added %s to cached dictionaries' % full_uri616 import_from = self.template_cache[full_uri]617 if scope == 'att':618 self.import_attributes(one_def,import_from,block,on_dupl,on_miss)619 elif scope == 'sta':620 self.import_loop(one_def,import_from,block,'_enumeration_set.state',on_miss)621 elif scope == 'val':622 self.import_loop(one_def,import_from,block,'_enumeration_default.value',on_miss)623 else:624 raise CifError, "Unrecognised import scope %s" % scope625 # remove the import attribute626 del self[one_def]["_import_list.id"]627 628 def resolve_path(self,file_loc):629 import urlparse630 url_comps = urlparse.urlparse(file_loc)631 if url_comps[0]: return file_loc #already full URI632 new_url = urlparse.urljoin(self.dic_as_cif.my_uri,file_loc)633 print "Transformed %s to %s for import " % (file_loc,new_url)634 return new_url635 636 def get_whole_dict(self,source_dict,on_dupl,on_miss):637 print "Cat_map: `%s`" % source_dict.cat_map.values()638 for source_cat in source_dict.cat_map.values():639 self.get_one_cat(source_dict,source_cat,on_dupl,on_miss)640 641 def get_one_cat(self,source_dict,source_cat,on_dupl,on_miss):642 ext_cat = source_dict.get(source_cat,"")643 this_cat = self.get(source_cat,"")644 print "Adding category %s" % source_cat645 if not ext_cat:646 if on_miss == "Ignore":647 pass648 else:649 raise CifError, "Missing category %s" % source_cat650 else:651 all_ext_defns = source_dict.keys()652 cat_list = filter(lambda a:source_dict[a].get("_name.category_id","").lower()==source_cat.lower(),653 all_ext_defns)654 print "Items: %s" % `cat_list`655 if this_cat: # The category block itself is duplicated656 if on_dupl=="Ignore":657 pass658 elif on_dupl == "Exit":659 raise CifError, "Duplicate category %s" % source_cat660 else:661 self[source_cat] = ext_cat662 else:663 self[source_cat] = ext_cat664 # now do all member definitions665 for cat_defn in cat_list:666 self.add_one_defn(source_dict,cat_defn,on_dupl)667 668 def add_one_defn(self,source_dict,cat_defn,on_dupl):669 if self.has_key(cat_defn):670 if on_dupl == "Ignore": pass671 elif on_dupl == "Exit":672 raise CifError, "Duplicate definition %s" % cat_defn673 else: self[cat_defn] = source_dict[cat_defn]674 else: self[cat_defn] = source_dict[cat_defn]675 print " "+cat_defn676 677 def get_one_cat_with_children(self,source_dict,source_cat,on_dupl,on_miss):678 self.get_one_cat(source_dict,source_cat,on_dupl,on_miss)679 child_cats = filter(lambda a:source_dict[a]["_category.parent_id"]==source_dict[source_cat]["_definition.id"],source_dict.cat_map.values())680 for child_cat in child_cats: self.get_one_cat(source_dict,child_cat,on_dupl,on_miss)681 682 def import_attributes(self,mykey,source_dict,source_def,on_dupl,on_miss):683 # process missing684 if not source_dict.has_key(source_def):685 if on_miss == 'Exit':686 raise CifError, 'Missing definition for import %s' % source_def687 else: return #nothing else to do688 # now do the import689 print 'Adding attributes from %s to %s' % (source_def,mykey)690 self[mykey].merge(source_dict[source_def],mode='replace',match_att= \691 ['_definition.id','_name.category_id','_name.object_id'])692 693 def import_loop(self,mykey,source_dict,source_def,loop_name,on_miss):694 # process imssing695 if not source_dict.has_key(source_def):696 if on_miss == 'Exit':697 raise CifError, 'Missing definition for import %s' % source_def698 else: return #nothing else to do699 print 'Adding %s attributes from %s to %s' % (loop_name,source_def,mykey)700 state_loop = source_dict[source_def].GetLoop(loop_name)701 self[mykey].insert_loop(state_loop)702 703 449 704 450 def ddl1_cat_load(self): … … 725 471 if new_unique not in uis: uis.append(new_unique) 726 472 cat_unique_dic[thiscat] = uis 473 474 [get_cat_info(a) for a in deflist] # apply the above function 475 for cat in cat_mand_dic.keys(): 476 self[cat]["_category_mandatory.name"] = cat_mand_dic[cat] 477 for cat in cat_unique_dic.keys(): 478 self[cat]["_category_key.name"] = cat_unique_dic[cat] 479 480 def create_pcloop(self,definition): 481 old_children = self[definition].get('_item_linked.child_name',[]) 482 old_parents = self[definition].get('_item_linked.parent_name',[]) 483 if isinstance(old_children,unicode): 484 old_children = [old_children] 485 if isinstance(old_parents,unicode): 486 old_parents = [old_parents] 487 if (len(old_children)==0 and len(old_parents)==0) or \ 488 (len(old_children) > 1 and len(old_parents)>1): 489 return 490 if len(old_children)==0: 491 old_children = [definition]*len(old_parents) 492 if len(old_parents)==0: 493 old_parents = [definition]*len(old_children) 494 newloop = CifLoopBlock(dimension=1) 495 newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) 496 newloop.AddLoopItem(('_item_linked.child_name',old_children)) 497 try: 498 del self[definition]['_item_linked.parent_name'] 499 del self[definition]['_item_linked.child_name'] 500 except KeyError: 501 pass 502 self[definition].insert_loop(newloop) 503 504 505 506 def DDL2_normalise(self): 507 listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys()) 508 # now filter out all the single element lists! 509 dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs) 510 for item_def in dodgy_defs: 511 # print("DDL2 norm: processing %s" % item_def) 512 thisdef = self[item_def] 513 packet_no = thisdef['_item.name'].index(item_def) 514 realcat = thisdef['_item.category_id'][packet_no] 515 realmand = thisdef['_item.mandatory_code'][packet_no] 516 # first add in all the missing categories 517 # we don't replace the entry in the list corresponding to the 518 # current item, as that would wipe out the information we want 519 for child_no in range(len(thisdef['_item.name'])): 520 if child_no == packet_no: continue 521 child_name = thisdef['_item.name'][child_no] 522 child_cat = thisdef['_item.category_id'][child_no] 523 child_mand = thisdef['_item.mandatory_code'][child_no] 524 if child_name not in self: 525 self[child_name] = CifBlock() 526 self[child_name]['_item.name'] = child_name 527 self[child_name]['_item.category_id'] = child_cat 528 self[child_name]['_item.mandatory_code'] = child_mand 529 self[item_def]['_item.name'] = item_def 530 self[item_def]['_item.category_id'] = realcat 531 self[item_def]['_item.mandatory_code'] = realmand 532 533 target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \ 534 '_item_linked.parent_name' in self[a]] 535 # now dodgy_defs contains all definition blocks with more than one child/parent link 536 for item_def in dodgy_defs: self.create_pcloop(item_def) #regularise appearance 537 for item_def in dodgy_defs: 538 print('Processing %s' % item_def) 539 thisdef = self[item_def] 540 child_list = thisdef['_item_linked.child_name'] 541 parents = thisdef['_item_linked.parent_name'] 542 # for each parent, find the list of children. 543 family = list(zip(parents,child_list)) 544 notmychildren = family #We aim to remove non-children 545 # Loop over the parents, relocating as necessary 546 while len(notmychildren): 547 # get all children of first entry 548 mychildren = [a for a in family if a[0]==notmychildren[0][0]] 549 print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren))) 550 for parent,child in mychildren: #parent is the same for all 551 # Make sure that we simply add in the new entry for the child, not replace it, 552 # otherwise we might spoil the child entry loop structure 553 try: 554 childloop = self[child].GetLoop('_item_linked.parent_name') 555 except KeyError: 556 print('Creating new parent entry %s for definition %s' % (parent,child)) 557 self[child]['_item_linked.parent_name'] = [parent] 558 childloop = self[child].GetLoop('_item_linked.parent_name') 559 childloop.AddLoopItem(('_item_linked.child_name',[child])) 560 continue 561 else: 562 # A parent loop already exists and so will a child loop due to the 563 # call to create_pcloop above 564 pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child] 565 goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent] 566 if len(goodpars)>0: #no need to add it 567 print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child)) 568 continue 569 print('Adding %s to %s entry' % (parent,child)) 570 newpacket = childloop.GetPacket(0) #essentially a copy, I hope 571 setattr(newpacket,'_item_linked.child_name',child) 572 setattr(newpacket,'_item_linked.parent_name',parent) 573 childloop.AddPacket(newpacket) 574 # 575 # Make sure the parent also points to the children. We get 576 # the current entry, then add our 577 # new values if they are not there already 578 # 579 parent_name = mychildren[0][0] 580 old_children = self[parent_name].get('_item_linked.child_name',[]) 581 old_parents = self[parent_name].get('_item_linked.parent_name',[]) 582 oldfamily = zip(old_parents,old_children) 583 newfamily = [] 584 print('Old parents -> %s' % repr(old_parents)) 585 for jj, childname in mychildren: 586 alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname] 587 if len(alreadythere)>0: continue 588 'Adding new child %s to parent definition at %s' % (childname,parent_name) 589 old_children.append(childname) 590 old_parents.append(parent_name) 591 # Now output the loop, blowing away previous definitions. If there is something 592 # else in this category, we are destroying it. 593 newloop = CifLoopBlock(dimension=1) 594 newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) 595 newloop.AddLoopItem(('_item_linked.child_name',old_children)) 596 del self[parent_name]['_item_linked.parent_name'] 597 del self[parent_name]['_item_linked.child_name'] 598 self[parent_name].insert_loop(newloop) 599 print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name'])) 600 # now make a new,smaller list 601 notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]] 602 603 # now flatten any single element lists 604 single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs) 605 for flat_def in single_defs: 606 flat_keys = self[flat_def].GetLoop('_item.name').keys() 607 for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0] 608 # now deal with the multiple lists 609 # next we do aliases 610 all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')] 611 for aliased in all_aliases: 612 my_aliases = listify(self[aliased]['_item_aliases.alias_name']) 613 for alias in my_aliases: 614 self[alias] = self[aliased].copy() #we are going to delete stuff... 615 del self[alias]["_item_aliases.alias_name"] 616 617 def ddlm_parse_valid(self): 618 if "_dictionary_valid.application" not in self.master_block: 619 return 620 for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"): 621 scope = getattr(scope_pack,"_dictionary_valid.application") 622 valid_info = getattr(scope_pack,"_dictionary_valid.attributes") 623 if scope[1] == "Mandatory": 624 self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info) 625 elif scope[1] == "Prohibited": 626 self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info) 627 628 def ddlm_import(self,import_mode='All'): 629 import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]]) 630 print ('Import mode %s applied to following frames' % import_mode) 631 print (str([a[0] for a in import_frames])) 632 if import_mode != 'All': 633 for i in range(len(import_frames)): 634 import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents') == import_mode]) 635 print('Importing following frames in mode %s' % import_mode) 636 print(str(import_frames)) 637 #resolve all references 638 for parent_block,import_list in import_frames: 639 for import_ref in import_list: 640 file_loc = import_ref["file"] 641 full_uri = self.resolve_path(file_loc) 642 if full_uri not in self.template_cache: 643 dic_as_cif = CifFile(urlopen(full_uri),grammar=self.grammar) 644 self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,do_dREL=False) #this will recurse internal imports 645 print('Added %s to cached dictionaries' % full_uri) 646 import_from = self.template_cache[full_uri] 647 dupl = import_ref.get('dupl','Exit') 648 miss = import_ref.get('miss','Exit') 649 target_key = import_ref["save"] 650 try: 651 import_target = import_from[target_key] 652 except KeyError: 653 if miss == 'Exit': 654 raise CifError('Import frame %s not found in %s' % (target_key,full_uri)) 655 else: continue 656 # now import appropriately 657 mode = import_ref.get("mode",'Contents').lower() 658 if target_key in self and mode=='full': #so blockname will be duplicated 659 if dupl == 'Exit': 660 raise CifError('Import frame %s already in dictionary' % target_key) 661 elif dupl == 'Ignore': 662 continue 663 if mode == 'contents': #merge attributes only 664 self[parent_block].merge(import_target) 665 elif mode =="full": 666 # Do the syntactic merge 667 syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting 668 from_cat_head = import_target['_name.object_id'] 669 child_frames = import_from.ddlm_all_children(from_cat_head) 670 # Check for Head merging Head 671 if self[parent_block].get('_definition.class','Datum')=='Head' and \ 672 import_target.get('_definition.class','Datum')=='Head': 673 head_to_head = True 674 else: 675 head_to_head = False 676 child_frames.remove(from_cat_head) 677 # As we are in syntax land, we call the CifFile methods 678 child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames]) 679 child_blocks = super(CifDic,import_from).makebc(child_blocks) 680 # Prune out any datablocks that have identical definitions 681 from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()]) 682 double_defs = list([b for b in from_defs.items() if self.has_key(b[1])]) 683 print ('Definitions for %s superseded' % repr(double_defs)) 684 for b in double_defs: 685 del child_blocks[b[0]] 686 super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head) # 687 print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames), 688 mode,len(self))) 689 # Now the semantic merge 690 # First expand our definition <-> blockname tree 691 self.create_def_block_table() 692 merging_cat = self[parent_block]['_name.object_id'] #new parent 693 if head_to_head: 694 child_frames = self.ddlm_immediate_children(from_cat_head) #old children 695 #the new parent is the importing category for all old children 696 for f in child_frames: 697 self[f].overwrite = True 698 self[f]['_name.category_id'] = merging_cat 699 self[f].overwrite = False 700 # remove the old head 701 del self[from_cat_head] 702 print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat)) 703 else: #imported category is only child 704 from_frame = import_from[target_key]['_definition.id'] #so we can find it 705 child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0] 706 self[child_frame]['_name.category_id'] = merging_cat 707 print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat)) 708 # it will never happen again... 709 del self[parent_block]["_import.get"] 710 711 def resolve_path(self,file_loc): 712 url_comps = urlparse(file_loc) 713 if url_comps[0]: return file_loc #already full URI 714 new_url = urljoin(self.my_uri,file_loc) 715 #print("Transformed %s to %s for import " % (file_loc,new_url)) 716 return new_url 717 718 719 720 def create_def_block_table(self): 721 """ Create an internal table matching definition to block id """ 722 proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()] 723 # now get the actual ids instead of blocks 724 proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table]) 725 # remove non-definitions 726 if self.diclang != "DDL1": 727 top_blocks = list([a[0].lower() for a in self.get_roots()]) 728 else: 729 top_blocks = ["on_this_dictionary"] 730 # catch dodgy duplicates 731 uniques = set([a[0] for a in proto_table]) 732 if len(uniques)<len(proto_table): 733 def_names = list([a[0] for a in proto_table]) 734 dodgy = [a for a in def_names if def_names.count(a)>1] 735 raise CifError('Duplicate definitions in dictionary:' + repr(dodgy)) 736 self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks]) 737 738 def __getitem__(self,key): 739 """Access a datablock by definition id, after the lookup has been created""" 740 try: 741 return super(CifDic,self).__getitem__(self.block_id_table[key.lower()]) 742 except AttributeError: #block_id_table not present yet 743 return super(CifDic,self).__getitem__(key) 744 except KeyError: # key is missing 745 # print(Definition for %s not found, reverting to CifFile' % key) 746 return super(CifDic,self).__getitem__(key) 747 748 def __setitem__(self,key,value): 749 """Add a new definition block""" 750 super(CifDic,self).__setitem__(key,value) 751 try: 752 self.block_id_table[value['_definition.id']]=key 753 except AttributeError: #does not exist yet 754 pass 755 756 def __delitem__(self,key): 757 """Remove a definition""" 758 try: 759 super(CifDic,self).__delitem__(self.block_id_table[key.lower()]) 760 del self.block_id_table[key.lower()] 761 except (AttributeError,KeyError): #block_id_table not present yet 762 super(CifDic,self).__delitem__(key) 763 return 764 # fix other datastructures 765 # cat_obj table 766 767 def keys(self): 768 """Return all definitions""" 769 try: 770 return self.block_id_table.keys() 771 except AttributeError: 772 return super(CifDic,self).keys() 773 774 def has_key(self,key): 775 return key in self 776 777 def __contains__(self,key): 778 try: 779 return key.lower() in self.block_id_table 780 except AttributeError: 781 return super(CifDic,self).__contains__(key) 727 782 728 map(get_cat_info,deflist) # apply the above function 729 for cat in cat_mand_dic.keys(): 730 cat_entry = self.get_ddl1_entry(cat) 731 self[cat_entry]["_category_mandatory.name"] = cat_mand_dic[cat] 732 for cat in cat_unique_dic.keys(): 733 cat_entry = self.get_ddl1_entry(cat) 734 self[cat_entry]["_category_key.name"] = cat_unique_dic[cat] 735 736 # A helper function get find the entry corresponding to a given category name: 737 # yes, in DDL1 the actual name is different in the category block due to the 738 # addition of square brackets which may or may not contain stuff. 739 740 def get_ddl1_entry(self,cat_name): 741 chop_len = len(cat_name) 742 possibles = filter(lambda a:a[1:chop_len+3]==cat_name+"_[",self.keys()) 743 if len(possibles) > 1 or possibles == []: 744 raise ValidCifError, "Category name %s can't be matched to category entry" % cat_name 745 else: 746 return possibles[0] 783 def items(self): 784 """Return (key,value) pairs""" 785 return list([(a,self[a]) for a in self.keys()]) 786 787 def unlock(self): 788 """Allow overwriting of all definitions in this collection""" 789 for a in self.keys(): 790 self[a].overwrite=True 791 792 def lock(self): 793 """Disallow changes in definitions""" 794 for a in self.keys(): 795 self[a].overwrite=False 796 797 def rename(self,oldname,newname,blockname_as_well=True): 798 """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True, 799 change the underlying blockname too.""" 800 if blockname_as_well: 801 super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname) 802 self.block_id_table[newname.lower()]=newname 803 if oldname.lower() in self.block_id_table: #not removed 804 del self.block_id_table[oldname.lower()] 805 else: 806 self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()] 807 del self.block_id_table[oldname.lower()] 808 return 809 810 def get_root_category(self): 811 """Get the single 'Head' category of this dictionary""" 812 root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head'] 813 if len(root_cats)>1 or len(root_cats)==0: 814 raise CifError("Cannot determine a unique Head category, got" % repr(root_cats)) 815 return root_cats[0] 816 817 def ddlm_immediate_children(self,catname): 818 """Return a list of datanames for the immediate children of catname. These are 819 semantic children (i.e. based on _name.category_id), not structural children as 820 in the case of StarFile.get_immediate_children""" 821 822 straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()] 823 return list(straight_children) 824 825 def ddlm_all_children(self,catname): 826 """Return a list of all children, including the `catname`""" 827 all_children = self.ddlm_immediate_children(catname) 828 cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category'] 829 for c in cat_children: 830 all_children.remove(c) 831 all_children += self.ddlm_all_children(c) 832 return all_children + [catname] 833 834 def is_semantic_child(self,parent,maybe_child): 835 """Return true if `maybe_child` is a child of `parent`""" 836 all_children = self.ddlm_all_children(parent) 837 return maybe_child in all_children 838 839 def ddlm_danglers(self): 840 """Return a list of definitions that do not have a category defined 841 for them, or are children of an unattached category""" 842 top_block = self.get_root_category() 843 connected = set(self.ddlm_all_children(top_block)) 844 all_keys = set(self.keys()) 845 unconnected = all_keys - connected 846 return list(unconnected) 847 848 def get_ddlm_parent(self,itemname): 849 """Get the parent category of itemname""" 850 parent = self[itemname].get('_name.category_id','') 851 if parent == '': # use the top block by default 852 raise CifError("%s has no parent" % itemname) 853 return parent 854 855 def expand_category_opt(self,name_list): 856 """Return a list of all non-category items in a category or return the name 857 if the name is not a category""" 858 new_list = [] 859 for name in name_list: 860 if self.get(name,{}).get('_definition.scope','Item') == 'Category': 861 new_list += self.expand_category_opt([a for a in self.keys() if \ 862 self[a].get('_name.category_id','').lower() == name.lower()]) 863 else: 864 new_list.append(name) 865 return new_list 866 867 def get_categories(self): 868 """Return a list of category names""" 869 return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category']) 870 871 def names_in_cat(self,cat,names_only=False): 872 names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()] 873 if not names_only: 874 return list([a for a in names if self[a].get('_definition.scope','Item')=='Item']) 875 else: 876 return list([self[a]["_name.object_id"] for a in names]) 877 878 879 880 def create_alias_table(self): 881 """Populate an alias table that we can look up when searching for a dataname""" 882 all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]] 883 self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases]) 884 885 def create_cat_obj_table(self): 886 """Populate a table indexed by (cat,obj) and returning the correct dataname""" 887 base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \ 888 for a in self.keys() if self[a].get('_definition.scope','Item')=='Item']) 889 loopable = self.get_loopable_cats() 890 loopers = [self.ddlm_immediate_children(a) for a in loopable] 891 print('Loopable cats:' + repr(loopable)) 892 loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers] 893 expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0]) 894 print("Expansion list:" + repr(expand_list)) 895 extra_table = {} #for debugging we keep it separate from base_table until the end 896 def expand_base_table(parent_cat,child_cats): 897 extra_names = [] 898 # first deal with all the child categories 899 for child_cat in child_cats: 900 nn = [] 901 if child_cat in expand_list: # a nested category: grab its names 902 nn = expand_base_table(child_cat,expand_list[child_cat]) 903 # store child names 904 extra_names += nn 905 # add all child names to the table 906 child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ 907 for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key'] 908 child_names += extra_names 909 extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table])) 910 # and the repeated ones get appended instead 911 repeats = [a for a in child_names if a in extra_table] 912 for obj,name in repeats: 913 extra_table[(parent_cat,obj)] += [name] 914 # and finally, add our own names to the return list 915 child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ 916 for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key'] 917 return child_names 918 [expand_base_table(parent,child) for parent,child in expand_list.items()] 919 print('Expansion cat/obj values: ' + repr(extra_table)) 920 # append repeated ones 921 non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table]) 922 repeats = [a for a in extra_table.keys() if a in base_table] 923 base_table.update(non_repeats) 924 for k in repeats: 925 base_table[k] += extra_table[k] 926 self.cat_obj_lookup_table = base_table 927 self.loop_expand_list = expand_list 928 929 def get_loopable_cats(self): 930 """A short utility function which returns a list of looped categories. This 931 is preferred to a fixed attribute as that fixed attribute would need to be 932 updated after any edits""" 933 return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop'] 934 935 def create_cat_key_table(self): 936 """Create a utility table with a list of keys applicable to each category. A key is 937 a compound key, that is, it is a list""" 938 self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name", 939 [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()]) 940 def collect_keys(parent_cat,child_cats): 941 kk = [] 942 for child_cat in child_cats: 943 if child_cat in self.loop_expand_list: 944 kk += collect_keys(child_cat) 945 # add these keys to our list 946 kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))] 947 self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk 948 return kk 949 for k,v in self.loop_expand_list.items(): 950 collect_keys(k,v) 951 print('Keys for categories' + repr(self.cat_key_table)) 747 952 748 953 def add_type_info(self): 749 if self.dic_as_cif[self.master_key].has_key("_item_type_list.construct"):750 types = self. dic_as_cif[self.master_key]["_item_type_list.code"]751 prim_types = self. dic_as_cif[self.master_key]["_item_type_list.primitive_code"]752 constructs = map(lambda a: a + "$", self.dic_as_cif[self.master_key]["_item_type_list.construct"])954 if "_item_type_list.construct" in self.master_block: 955 types = self.master_block["_item_type_list.code"] 956 prim_types = self.master_block["_item_type_list.primitive_code"] 957 constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]]) 753 958 # add in \r wherever we see \n, and change \{ to \\{ 754 959 def regex_fiddle(mm_regex): 755 brack_match = r"((.*\[.+)(\\{)(.*\].*))" 756 ret_match = r"((.*\[.+)(\\n)(.*\].*))" 960 brack_match = r"((.*\[.+)(\\{)(.*\].*))" 961 ret_match = r"((.*\[.+)(\\n)(.*\].*))" 757 962 fixed_regexp = mm_regex[:] #copy 758 963 # fix the brackets 759 964 bm = re.match(brack_match,mm_regex) 760 if bm != None: 965 if bm != None: 761 966 fixed_regexp = bm.expand(r"\2\\\\{\4") 762 967 # fix missing \r 763 968 rm = re.match(ret_match,fixed_regexp) 764 969 if rm != None: 765 fixed_regexp = rm.expand(r"\2\3\\r\4") 766 #print "Regexp %s becomes %s" % (mm_regex,fixed_regexp)970 fixed_regexp = rm.expand(r"\2\3\\r\4") 971 #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp)) 767 972 return fixed_regexp 768 973 constructs = map(regex_fiddle,constructs) 769 packed_up = map(None,types,constructs) 770 for typecode,construct in packed_up: 974 for typecode,construct in zip(types,constructs): 771 975 self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL) 772 976 # now make a primitive <-> type construct mapping 773 packed_up = map(None,types,prim_types) 774 for typecode,primtype in packed_up: 977 for typecode,primtype in zip(types,prim_types): 775 978 self.primdic[typecode] = primtype 776 979 777 def add_category_info(self ):980 def add_category_info(self,full=True): 778 981 if self.diclang == "DDLm": 779 categories = filter(lambda a:self[a].get("_definition.scope","Item")=="Category",self.keys()) 780 category_ids = map(lambda a:self[a]["_definition.id"],categories) 781 782 783 else: 784 categories = filter(lambda a:self[a].has_key("_category.id"),self.keys()) 785 # get the category id 786 category_ids = map(lambda a:self[a]["_category.id"],categories) 787 788 # match ids and entries in the dictionary 789 catpairs = map(None,category_ids,categories) 790 self.cat_map = {} 791 for catid,cat in catpairs:self.cat_map[catid] = cat 792 793 def names_in_cat(self,cat): 794 nameblocks = filter(lambda a:self[a].get("_name.category_id","").lower() 795 ==cat.lower(),self.keys()) 796 return map(lambda a:"_" + self[a]["_name.category_id"]+"." + self[a]["_name.object_id"],nameblocks) 982 catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category'] 983 looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop'] 984 self.parent_lookup = {} 985 for one_cat in looped_cats: 986 parent_cat = one_cat 987 parent_def = self[parent_cat] 988 next_up = parent_def['_name.category_id'].lower() 989 while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop': 990 parent_def = self[next_up] 991 parent_cat = next_up 992 next_up = parent_def['_name.category_id'].lower() 993 self.parent_lookup[one_cat] = parent_cat 994 995 if full: 996 self.key_equivs = {} 997 for one_cat in looped_cats: #follow them up 998 lower_keys = listify(self[one_cat]['_category_key.name']) 999 start_keys = lower_keys[:] 1000 while len(lower_keys)>0: 1001 this_cat = self[lower_keys[0]]['_name.category_id'] 1002 parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a] 1003 #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent))) 1004 if len(parent)>1: 1005 raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent))) 1006 if len(parent)==0: break 1007 parent = parent[0] 1008 parent_keys = listify(self[parent]['_category_key.name']) 1009 linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys] 1010 # sanity check 1011 if set(parent_keys) != set(linked_keys): 1012 raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys)) 1013 # now add in our information 1014 for parent,child in zip(linked_keys,start_keys): 1015 self.key_equivs[child] = self.key_equivs.get(child,[])+[parent] 1016 lower_keys = linked_keys #preserves order of start keys 1017 1018 else: 1019 self.parent_lookup = {} 1020 self.key_equivs = {} 1021 1022 def change_category_name(self,oldname,newname): 1023 self.unlock() 1024 """Change the category name from [[oldname]] to [[newname]]""" 1025 if oldname not in self: 1026 raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname)) 1027 if newname in self: 1028 raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname)) 1029 child_defs = self.ddlm_immediate_children(oldname) 1030 self.rename(oldname,newname) #NB no name integrity checks 1031 self[newname]['_name.object_id']=newname 1032 self[newname]['_definition.id']=newname 1033 for child_def in child_defs: 1034 self[child_def]['_name.category_id'] = newname 1035 if self[child_def].get('_definition.scope','Item')=='Item': 1036 newid = self.create_catobj_name(newname,self[child_def]['_name.object_id']) 1037 self[child_def]['_definition.id']=newid 1038 self.rename(child_def,newid[1:]) #no underscore at the beginning 1039 self.lock() 1040 1041 def create_catobj_name(self,cat,obj): 1042 """Combine category and object in approved fashion to create id""" 1043 return ('_'+cat+'.'+obj) 1044 1045 def change_category(self,itemname,catname): 1046 """Move itemname into catname, return new handle""" 1047 defid = self[itemname] 1048 if defid['_name.category_id'].lower()==catname.lower(): 1049 print('Already in category, no change') 1050 return itemname 1051 if catname not in self: #don't have it 1052 print('No such category %s' % catname) 1053 return itemname 1054 self.unlock() 1055 objid = defid['_name.object_id'] 1056 defid['_name.category_id'] = catname 1057 newid = itemname # stays the same for categories 1058 if defid.get('_definition.scope','Item') == 'Item': 1059 newid = self.create_catobj_name(catname,objid) 1060 defid['_definition.id']= newid 1061 self.rename(itemname,newid) 1062 self.set_parent(catname,newid) 1063 self.lock() 1064 return newid 1065 1066 def change_name(self,one_def,newobj): 1067 """Change the object_id of one_def to newobj. This is not used for 1068 categories, but can be used for dictionaries""" 1069 if '_dictionary.title' not in self[one_def]: #a dictionary block 1070 newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj) 1071 self.unlock() 1072 self.rename(one_def,newid) 1073 self[newid]['_definition.id']=newid 1074 self[newid]['_name.object_id']=newobj 1075 else: 1076 self.unlock() 1077 newid = newobj 1078 self.rename(one_def,newobj) 1079 self[newid]['_dictionary.title'] = newid 1080 self.lock() 1081 return newid 1082 1083 # Note that our semantic parent is given by catparent, but our syntactic parent is 1084 # always just the root block 1085 def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False): 1086 """Add a new category to the dictionary with name [[catname]]. 1087 If [[catparent]] is None, the category will be a child of 1088 the topmost 'Head' category or else the top data block. If 1089 [[is_loop]] is false, a Set category is created. If [[allow_dangler]] 1090 is true, the parent category does not have to exist.""" 1091 if catname in self: 1092 raise CifError('Attempt to add existing category %s' % catname) 1093 self.unlock() 1094 syntactic_root = self.get_roots()[0][0] 1095 if catparent is None: 1096 semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head'] 1097 if len(semantic_root)>0: 1098 semantic_root = semantic_root[0] 1099 else: 1100 semantic_root = syntactic_root 1101 else: 1102 semantic_root = catparent 1103 realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root) 1104 self.block_id_table[catname.lower()]=realname 1105 self[catname]['_name.object_id'] = catname 1106 if not allow_dangler or catparent is None: 1107 self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id'] 1108 else: 1109 self[catname]['_name.category_id'] = catparent 1110 self[catname]['_definition.id'] = catname 1111 self[catname]['_definition.scope'] = 'Category' 1112 if is_loop: 1113 self[catname]['_definition.class'] = 'Loop' 1114 else: 1115 self[catname]['_definition.class'] = 'Set' 1116 self[catname]['_description.text'] = 'No definition provided' 1117 self.lock() 1118 return catname 1119 1120 def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False): 1121 """Add itemname to category [[catparent]]. If itemname contains periods, 1122 all text before the final period is ignored. If [[allow_dangler]] is True, 1123 no check for a parent category is made.""" 1124 self.unlock() 1125 if '.' in itemname: 1126 objname = itemname.split('.')[-1] 1127 else: 1128 objname = itemname 1129 objname = objname.strip('_') 1130 if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'): 1131 raise CifError('No category %s in dictionary' % catparent) 1132 fullname = '_'+catparent.lower()+'.'+objname 1133 print('New name: %s' % fullname) 1134 syntactic_root = self.get_roots()[0][0] 1135 realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change 1136 # update our dictionary structures 1137 self.block_id_table[fullname]=realname 1138 self[fullname]['_definition.id']=fullname 1139 self[fullname]['_name.object_id']=objname 1140 self[fullname]['_name.category_id']=catparent 1141 self[fullname]['_definition.class']='Datum' 1142 self[fullname]['_description.text']=def_text 797 1143 1144 def remove_definition(self,defname): 1145 """Remove a definition from the dictionary.""" 1146 if defname not in self: 1147 return 1148 if self[defname].get('_definition.scope')=='Category': 1149 children = self.ddlm_immediate_children(defname) 1150 [self.remove_definition(a) for a in children] 1151 cat_id = self[defname]['_definition.id'].lower() 1152 del self[defname] 1153 1154 def get_cat_obj(self,name): 1155 """Return (cat,obj) tuple. [[name]] must contain only a single period""" 1156 cat,obj = name.split('.') 1157 return (cat.strip('_'),obj) 1158 1159 def get_name_by_cat_obj(self,category,object,give_default=False): 1160 """Return the dataname corresponding to the given category and object""" 1161 if category[0] == '_': #accidentally left in 1162 true_cat = category[1:].lower() 1163 else: 1164 true_cat = category.lower() 1165 try: 1166 return self.cat_obj_lookup_table[(true_cat,object.lower())][0] 1167 except KeyError: 1168 if give_default: 1169 return '_'+true_cat+'.'+object 1170 raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object)) 1171 1172 1173 def WriteOut(self,**kwargs): 1174 myblockorder = self.get_full_child_list() 1175 self.set_grammar(self.grammar) 1176 self.standard = 'Dic' 1177 return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs) 1178 1179 def get_full_child_list(self): 1180 """Return a list of definition blocks in order parent-child-child-child-parent-child...""" 1181 top_block = self.get_roots()[0][0] 1182 root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head'] 1183 if len(root_cat) == 1: 1184 all_names = [top_block] + self.recurse_child_list(root_cat[0]) 1185 unrooted = self.ddlm_danglers() 1186 double_names = set(unrooted).intersection(set(all_names)) 1187 if len(double_names)>0: 1188 raise CifError('Names are children of internal and external categories:%s' % repr(double_names)) 1189 remaining = unrooted[:] 1190 for no_root in unrooted: 1191 if self[no_root].get('_definition.scope','Item')=='Category': 1192 all_names += [no_root] 1193 remaining.remove(no_root) 1194 these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()] 1195 all_names += these_children 1196 [remaining.remove(n) for n in these_children] 1197 # now sort by category 1198 ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining]) 1199 for e in ext_cats: 1200 cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e] 1201 [remaining.remove(n) for n in cat_items] 1202 all_names += cat_items 1203 if len(remaining)>0: 1204 print('WARNING: following items do not seem to belong to a category??') 1205 print(repr(remaining)) 1206 all_names += remaining 1207 print('Final block order: ' + repr(all_names)) 1208 return all_names 1209 raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead') 1210 1211 def cat_from_name(self,one_name): 1212 """Guess the category from the name. This should be used only when this is not important semantic information, 1213 for example, when printing out""" 1214 (cat,obj) = one_name.split(".") 1215 if cat[0] == "_": cat = cat[1:] 1216 return cat 1217 1218 def recurse_child_list(self,parentname): 1219 """Recursively expand the logical child list of [[parentname]]""" 1220 final_list = [parentname] 1221 child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()] 1222 child_blocks.sort() #we love alphabetical order 1223 child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item'] 1224 final_list += child_items 1225 child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category'] 1226 for child_cat in child_cats: 1227 final_list += self.recurse_child_list(child_cat) 1228 return final_list 1229 1230 1231 798 1232 def get_key_pack(self,category,value,data): 799 1233 keyname = self[category][self.unique_spec] 800 1234 onepack = data.GetPackKey(keyname,value) 801 1235 return onepack 802 1236 803 1237 def get_number_with_esd(numstring): 804 1238 import string 805 numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' 1239 numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' 806 1240 our_match = re.match(numb_re,numstring) 807 1241 if our_match: 808 1242 a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups() 809 # print "Debug: %s -> %s" % (numstring, `our_match.groups()`)1243 # print("Debug: {} -> {!r}".format(numstring, our_match.groups())) 810 1244 else: 811 1245 return None,None 812 1246 if dot or q: return None,None #a dot or question mark 813 if exp: #has exponent 814 exp = string.replace(exp,"d","e") # mop up old fashioned numbers815 exp = string.replace(exp,"D","e")1247 if exp: #has exponent 1248 exp = exp.replace("d","e") # mop up old fashioned numbers 1249 exp = exp.replace("D","e") 816 1250 base_num = base_num + exp 817 # print "Debug: have %s for base_num from %s" % (base_num,numstring)1251 # print("Debug: have %s for base_num from %s" % (base_num,numstring)) 818 1252 base_num = float(base_num) 819 1253 # work out esd, if present. … … 827 1261 828 1262 def getmaxmin(self,rangeexp): 829 regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*' 1263 regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*' 830 1264 regexp = regexp + ":" + regexp 831 1265 regexp = re.match(regexp,rangeexp) … … 834 1268 maximum = regexp.group(7) 835 1269 except AttributeError: 836 print "Can't match %s" % rangeexp837 if minimum == None: minimum = "." 1270 print("Can't match %s" % rangeexp) 1271 if minimum == None: minimum = "." 838 1272 else: minimum = float(minimum) 839 if maximum == None: maximum = "." 1273 if maximum == None: maximum = "." 840 1274 else: maximum = float(maximum) 841 1275 return maximum,minimum 842 1276 1277 def initialise_drel(self): 1278 """Parse drel functions and prepare data structures in dictionary""" 1279 self.ddlm_parse_valid() #extract validity information from data block 1280 self.transform_drel() #parse the drel functions 1281 self.add_drel_funcs() #put the drel functions into the namespace 1282 843 1283 def transform_drel(self): 844 import drel_yacc 845 parser = drel_yacc.parser 1284 from .drel import drel_ast_yacc 1285 from .drel import py_from_ast 1286 import traceback 1287 parser = drel_ast_yacc.parser 1288 lexer = drel_ast_yacc.lexer 846 1289 my_namespace = self.keys() 847 my_namespace = dict(map(None,my_namespace,my_namespace)) 848 parser.loopable_cats = filter(lambda a:self[a].get("_definition.class","Datum")=="List",self.keys()) 849 parser.loopable_cats = map(lambda a:self[a]["_definition.id"],parser.loopable_cats) 850 parser.listable_items = filter(lambda a:"*" in self[a].get("_type.dimension",""),self.keys()) 851 derivable_list = filter(lambda a:self[a].has_key("_method.expression") and self[a].get("_definition.scope","")!='Category' and self[a].get("_name.category_id","")!= "function",self.keys()) 1290 my_namespace = dict(zip(my_namespace,my_namespace)) 1291 # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...}) 1292 loopable_cats = self.get_loopable_cats() 1293 loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] 1294 loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] 1295 cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] 1296 loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) 1297 # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")] 1298 derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \ 1299 and self[a].get("_name.category_id","")!= "function"] 852 1300 for derivable in derivable_list: 853 parser.target_id = derivable1301 target_id = derivable 854 1302 # reset the list of visible names for parser 855 parser.special_id = [my_namespace] 856 # reset list of looped with statements 857 parser.withtable = {} 858 print "Target id: %s" % derivable 859 drel_expr = self[derivable]["_method.expression"] 860 if isinstance(drel_expr,ListType): 861 drel_expr = drel_expr[0] 862 print "Transforming %s" % drel_expr 863 # List categories are treated differently... 864 pyth_meth = parser.parse(drel_expr,debug=True) 865 self[derivable]["_loop_categories"] = pyth_meth[1].keys() 866 self[derivable]["_method.expression"] = drel_yacc.make_func(pyth_meth,"pyfunc",None) 867 print "Final result:\n " + self[derivable]["_method.expression"] 1303 special_ids = [dict(zip(self.keys(),self.keys()))] 1304 print("Target id: %s" % derivable) 1305 drel_exprs = self[derivable]["_method.expression"] 1306 drel_purposes = self[derivable]["_method.purpose"] 1307 all_methods = [] 1308 if not isinstance(drel_exprs,list): 1309 drel_exprs = [drel_exprs] 1310 drel_purposes = [drel_purposes] 1311 for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs): 1312 if drel_purpose != 'Evaluation': 1313 continue 1314 drel_expr = "\n".join(drel_expr.splitlines()) 1315 # print("Transforming %s" % drel_expr) 1316 # List categories are treated differently... 1317 try: 1318 meth_ast = parser.parse(drel_expr+"\n",lexer=lexer) 1319 except: 1320 print('Syntax error in method for %s; leaving as is' % derivable) 1321 a,b = sys.exc_info()[:2] 1322 print((repr(a),repr(b))) 1323 print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout)) 1324 # reset the lexer 1325 lexer.begin('INITIAL') 1326 continue 1327 # Construct the python method 1328 cat_meth = False 1329 if self[derivable].get('_definition.scope','Item') == 'Category': 1330 cat_meth = True 1331 pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id, 1332 loopable=loop_info, 1333 cif_dic = self,cat_meth=cat_meth) 1334 all_methods.append(pyth_meth) 1335 if len(all_methods)>0: 1336 save_overwrite = self[derivable].overwrite 1337 self[derivable].overwrite = True 1338 self[derivable]["_method.py_expression"] = all_methods 1339 self[derivable].overwrite = save_overwrite 1340 #print("Final result:\n " + repr(self[derivable]["_method.py_expression"])) 868 1341 869 1342 def add_drel_funcs(self): 870 import drel_yacc 871 funclist = filter(lambda a:self[a].get("_name.category_id","")=='function',self.keys()) 872 funcnames = map(lambda a:self[a]["_name.object_id"],funclist) 873 funcbodys = map(lambda a:self[a]["_method.expression"],funclist) 1343 from .drel import drel_ast_yacc 1344 from .drel import py_from_ast 1345 funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function'] 1346 funcnames = [(self[a]["_name.object_id"], 1347 getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist] 874 1348 # create executable python code... 875 parser = drel_yacc.parser 876 for funcname,funcbody in zip(funcnames,funcbodys): 1349 parser = drel_ast_yacc.parser 1350 # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...}) 1351 loopable_cats = self.get_loopable_cats() 1352 loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] 1353 loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] 1354 cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] 1355 loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) 1356 for funcname,funcbody in funcnames: 1357 newline_body = "\n".join(funcbody.splitlines()) 877 1358 parser.target_id = funcname 878 parser.special_id = [{}] #first element is always global namespace of dictionary 879 parser.withtable = {} 880 res,ww = parser.parse(funcbody[0]) 881 print 'dREL library function ->\n' + res 1359 res_ast = parser.parse(newline_body) 1360 py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self) 1361 #print('dREL library function ->\n' + py_function) 882 1362 global_table = globals() 883 global_table.update(self.ddlm_functions) 884 exec res in global_table #add to namespace 885 print "All functions -> " + `self.ddlm_functions` 886 887 def switch_numpy(self,to_val): 888 if to_val: 889 self.recursive_numerify = self.numpy_numerify 890 else: 891 self.recursive_numerify = self.normal_numerify 892 893 def derive_item(self,key,cifdata,store_value = False): 1363 exec(py_function, global_table) #add to namespace 1364 #print('Globals after dREL functions added:' + repr(globals())) 1365 self.ddlm_functions = globals() #for outside access 1366 1367 @track_recursion 1368 def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True): 1369 key = start_key #starting value 1370 result = None #success is a non-None value 1371 default_result = False #we have not used a default value 1372 # check for aliases 1373 # check for an older form of a new value 1374 found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata] 1375 if len(found_it)>0: 1376 corrected_type = self.change_type(key,cifdata[found_it[0]]) 1377 return corrected_type 1378 # now do the reverse check - any alternative form 1379 alias_name = [a for a in self.alias_table.items() if key in a[1]] 1380 print('Aliases for %s: %s' % (key,repr(alias_name))) 1381 if len(alias_name)==1: 1382 key = alias_name[0][0] #actual definition name 1383 if key in cifdata: return self.change_type(key,cifdata[key]) 1384 found_it = [k for k in alias_name[0][1] if k in cifdata] 1385 if len(found_it)>0: 1386 return self.change_type(key,cifdata[found_it[0]]) 1387 elif len(alias_name)>1: 1388 raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name)) 1389 1390 the_category = self[key]["_name.category_id"] 1391 cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] 1392 has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] 894 1393 # store any default value in case we have a problem 895 1394 def_val = self[key].get("_enumeration.default","") 896 1395 def_index_val = self[key].get("_enumeration.def_index_id","") 897 the_func = self[key].get("_method.expression","") 898 if def_val and not the_func : return def_val 899 if def_index_val and not the_func: #derive a default value 900 index_vals = self[key]["_enumeration_default.index"] 901 val_to_index = cifdata[def_index_val] #what we are keying on 902 # Handle loops 903 if isinstance(val_to_index,ListType): 904 keypos = map(lambda a:index_vals.index(a),val_to_index) 905 result = map(lambda a:self[key]["_enumeration_default.value"][a] ,keypos) 906 else: 907 keypos = index_vals.index(val_to_index) #value error if no such value available 908 result = self[key]["_enumeration_default.value"][keypos] 909 print "Indexed on %s to get %s for %s" % (def_index_val,`result`,`val_to_index`) 910 return result 1396 if len(has_cat_names)==0: # try category method 1397 cat_result = {} 1398 pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]] 1399 pulled_from_cats = [(k,[ 1400 self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']] 1401 ) for k in pulled_from_cats] 1402 pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]] 1403 if '_category_construct_local.type' in self[the_category]: 1404 print("**Now constructing category %s using DDLm attributes**" % the_category) 1405 try: 1406 cat_result = self.construct_category(the_category,cifdata,store_value=True) 1407 except (CifRecursionError,StarFile.StarDerivationError): 1408 print('** Failed to construct category %s (error)' % the_category) 1409 # Trying a pull-back when the category is partially populated 1410 # will not work, hence we test that cat_result has no keys 1411 if len(pulled_to_cats)>0 and len(cat_result)==0: 1412 print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats))) 1413 try: 1414 cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True) 1415 except (CifRecursionError,StarFile.StarDerivationError): 1416 print('** Failed to construct category %s from pullback information (error)' % the_category) 1417 if '_method.py_expression' in self[the_category] and key not in cat_result: 1418 print("**Now applying category method for %s in search of %s**" % (the_category,key)) 1419 cat_result = self.derive_item(the_category,cifdata,store_value=True) 1420 print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result)) 1421 # do we now have our value? 1422 if key in cat_result: 1423 return cat_result[key] 1424 1425 # Recalculate in case it actually worked 1426 has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] 1427 the_funcs = self[key].get('_method.py_expression',"") 1428 if the_funcs: #attempt to calculate it 1429 #global_table = globals() 1430 #global_table.update(self.ddlm_functions) 1431 for one_func in the_funcs: 1432 print('Executing function for %s:' % key) 1433 #print(one_func) 1434 exec(one_func, globals()) #will access dREL functions, puts "pyfunc" in scope 1435 # print('in following global environment: ' + repr(global_table)) 1436 stored_setting = cifdata.provide_value 1437 cifdata.provide_value = True 1438 try: 1439 result = pyfunc(cifdata) 1440 except CifRecursionError as s: 1441 print(s) 1442 result = None 1443 except StarFile.StarDerivationError as s: 1444 print(s) 1445 result = None 1446 finally: 1447 cifdata.provide_value = stored_setting 1448 if result is not None: 1449 break 1450 #print("Function returned {!r}".format(result)) 1451 1452 if result is None and allow_defaults: # try defaults 1453 if def_val: 1454 result = self.change_type(key,def_val) 1455 default_result = True 1456 elif def_index_val: #derive a default value 1457 index_vals = self[key]["_enumeration_default.index"] 1458 val_to_index = cifdata[def_index_val] #what we are keying on 1459 if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']: 1460 lcase_comp = True 1461 index_vals = [a.lower() for a in index_vals] 1462 # Handle loops 1463 if isinstance(val_to_index,list): 1464 if lcase_comp: 1465 val_to_index = [a.lower() for a in val_to_index] 1466 keypos = [index_vals.index(a) for a in val_to_index] 1467 result = [self[key]["_enumeration_default.value"][a] for a in keypos] 1468 else: 1469 if lcase_comp: 1470 val_to_index = val_to_index.lower() 1471 keypos = index_vals.index(val_to_index) #value error if no such value available 1472 result = self[key]["_enumeration_default.value"][keypos] 1473 default_result = True #flag that it must be extended 1474 result = self.change_type(key,result) 1475 print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index))) 1476 911 1477 # read it in 912 the_category = self[key]["_name.category_id"] 913 the_type = self[the_category]["_definition.class"] 914 global_table = globals() 915 global_table.update(self.ddlm_functions) 916 exec the_func in global_table,locals() #will access dREL functions, puts "pyfunc" in scope 917 print 'Executing following function' 918 print the_func 919 print 'With following loop categories:' + `self[key].get("_loop_categories","")` 920 # print 'in following global environment: ' + `global_table` 921 if self[key].get("_loop_categories",""): 922 loop_category = self[key]["_loop_categories"][0] 923 loop_names = self.names_in_cat(loop_category) 924 no_of_packs = len(cifdata[loop_names[0]]) 925 packlist = [] 926 for pack_index in range(no_of_packs): 927 packlist.append(pyfunc(self,cifdata,pack_index)) 1478 if result is None: #can't do anything else 1479 print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults))) 1480 raise StarFile.StarDerivationError(start_key) 1481 is_looped = False 1482 if self[the_category].get('_definition.class','Set')=='Loop': 1483 is_looped = True 1484 if len(has_cat_names)>0: #this category already exists 1485 if result is None or default_result: #need to create a list of values 1486 loop_len = len(cifdata[has_cat_names[0]]) 1487 out_result = [result]*loop_len 1488 result = out_result 1489 else: #nothing exists in this category, we can't store this at all 1490 print('Resetting result %s for %s to null list as category is empty' % (key,result)) 1491 result = [] 1492 928 1493 # now try to insert the new information into the right place 929 1494 # find if items of this category already appear... 930 if store_value: 931 cat_names = filter(lambda a:self[a].get["_name.category_id",None]==the_category,self.keys()) 932 has_cat_names = filter(lambda a:cifdata.has_key(a),cat_names) 933 if len(has_cat_names)>0: 934 target_loop = cifdata.GetLoop(has_cat_names[0]) 935 target_loop[key] = packlist #lengths must match or else!! 936 else: 937 cifdata[key] = packlist 938 return packlist 939 else: # No looped categories 940 return pyfunc(self,cifdata) 1495 # Never cache empty values 1496 if not (isinstance(result,list) and len(result)==0) and\ 1497 store_value: 1498 if self[key].get("_definition.scope","Item")=='Item': 1499 if is_looped: 1500 result = self.store_new_looped_value(key,cifdata,result,default_result) 1501 else: 1502 result = self.store_new_unlooped_value(key,cifdata,result) 1503 else: 1504 self.store_new_cat_values(cifdata,result,the_category) 1505 return result 1506 1507 def store_new_looped_value(self,key,cifdata,result,default_result): 1508 """Store a looped value from the dREL system into a CifFile""" 1509 # try to change any matrices etc. to lists 1510 the_category = self[key]["_name.category_id"] 1511 out_result = result 1512 if result is not None and not default_result: 1513 # find any numpy arrays 1514 def conv_from_numpy(one_elem): 1515 if not hasattr(one_elem,'dtype'): 1516 if isinstance(one_elem,(list,tuple)): 1517 return StarFile.StarList([conv_from_numpy(a) for a in one_elem]) 1518 return one_elem 1519 if one_elem.size > 1: #so is not a float 1520 return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()]) 1521 else: 1522 try: 1523 return one_elem.item(0) 1524 except: 1525 return one_elem 1526 out_result = [conv_from_numpy(a) for a in result] 1527 # so out_result now contains a value suitable for storage 1528 cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] 1529 has_cat_names = [a for a in cat_names if a in cifdata] 1530 print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names)) 1531 if len(has_cat_names)>0: #this category already exists 1532 cifdata[key] = out_result #lengths must match or else!! 1533 cifdata.AddLoopName(has_cat_names[0],key) 1534 else: 1535 cifdata[key] = out_result 1536 cifdata.CreateLoop([key]) 1537 print('Loop info:' + repr(cifdata.loops)) 1538 return out_result 1539 1540 def store_new_unlooped_value(self,key,cifdata,result): 1541 """Store a single value from the dREL system""" 1542 if result is not None and hasattr(result,'dtype'): 1543 if result.size > 1: 1544 out_result = StarFile.StarList(result.tolist()) 1545 cifdata[key] = out_result 1546 else: 1547 cifdata[key] = result.item(0) 1548 else: 1549 cifdata[key] = result 1550 return result 1551 1552 def construct_category(self,category,cifdata,store_value=True): 1553 """Construct a category using DDLm attributes""" 1554 con_type = self[category].get('_category_construct_local.type',None) 1555 if con_type == None: 1556 return {} 1557 if con_type == 'Pullback' or con_type == 'Filter': 1558 morphisms = self[category]['_category_construct_local.components'] 1559 morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat 1560 cats = [self[a]['_name.category_id'] for a in morphisms] 1561 cat_keys = [self[a]['_category.key_id'] for a in cats] 1562 cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat 1563 if con_type == 'Filter': 1564 int_filter = self[category].get('_category_construct_local.integer_filter',None) 1565 text_filter = self[category].get('_category_construct_local.text_filter',None) 1566 if int_filter is not None: 1567 morph_values.append([int(a) for a in int_filter]) 1568 if text_filter is not None: 1569 morph_values.append(text_filter) 1570 cat_values.append(range(len(morph_values[-1]))) 1571 # create the mathematical product filtered by equality of dataname values 1572 pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \ 1573 if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]] 1574 # now prepare for return 1575 if len(pullback_ids)==0: 1576 return {} 1577 newids = self[category]['_category_construct_local.new_ids'] 1578 fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids] 1579 if con_type == 'Pullback': 1580 final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]} 1581 final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) 1582 final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids)) 1583 elif con_type == 'Filter': #simple filter 1584 final_results = {fullnewids[0]:[x[0] for x in pullback_ids]} 1585 final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) 1586 if store_value: 1587 self.store_new_cat_values(cifdata,final_results,category) 1588 return final_results 1589 1590 def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True): 1591 """Each of the categories in source_categories are pullbacks that include 1592 the target_category""" 1593 target_key = self[target_category]['_category.key_id'] 1594 result = {target_key:[]} 1595 first_time = True 1596 # for each source category, determine which element goes to the target 1597 for sc in source_categories: 1598 components = self[sc]['_category_construct_local.components'] 1599 comp_cats = [self[c]['_name.category_id'] for c in components] 1600 new_ids = self[sc]['_category_construct_local.new_ids'] 1601 source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids] 1602 if len(components) == 2: # not a filter 1603 element_pos = comp_cats.index(target_category) 1604 old_id = source_ids[element_pos] 1605 print('Using %s to populate %s' % (old_id,target_key)) 1606 result[target_key].extend(cifdata[old_id]) 1607 # project through all identical names 1608 extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key]) 1609 # we only include keys that are common to all categories 1610 if first_time: 1611 result.update(extra_result) 1612 else: 1613 for k in extra_result.keys(): 1614 if k in result: 1615 print('Updating %s: was %s' % (k,repr(result[k]))) 1616 result[k].extend(extra_result[k]) 1617 else: 1618 extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids) 1619 if len(extra_result)>0 or source_ids[0] in cifdata: #something is present 1620 result[target_key].extend(cifdata[source_ids[0]]) 1621 for k in extra_result.keys(): 1622 if k in result: 1623 print('Reverse filter: Updating %s: was %s' % (k,repr(result[k]))) 1624 result[k].extend(extra_result[k]) 1625 else: 1626 result[k]=extra_result[k] 1627 # Bonus derivation if there is a singleton filter 1628 if self[sc]['_category_construct_local.type'] == 'Filter': 1629 int_filter = self[sc].get('_category_construct_local.integer_filter',None) 1630 text_filter = self[sc].get('_category_construct_local.text_filter',None) 1631 if int_filter is not None: 1632 filter_values = int_filter 1633 else: 1634 filter_values = text_filter 1635 if len(filter_values)==1: #a singleton 1636 extra_dataname = self[sc]['_category_construct_local.components'][0] 1637 if int_filter is not None: 1638 new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]]) 1639 else: 1640 new_value = filter_values * len(cifdata[source_ids[0]]) 1641 if extra_dataname not in result: 1642 result[extra_dataname] = new_value 1643 else: 1644 result[extra_dataname].extend(new_value) 1645 else: 1646 raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type']) 1647 first_time = False 1648 # check for sanity - all dataname lengths must be identical 1649 datalen = len(set([len(a) for a in result.values()])) 1650 if datalen != 1: 1651 raise AssertionError('Failed to construct equal-length category items,'+ repr(result)) 1652 if store_value: 1653 print('Now storing ' + repr(result)) 1654 self.store_new_cat_values(cifdata,result,target_category) 1655 return result 1656 1657 def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]): 1658 """Copy across datanames for which the from_category key equals [[key_vals]]""" 1659 result = {} 1660 s_names_in_cat = set(self.names_in_cat(from_category,names_only=True)) 1661 t_names_in_cat = set(self.names_in_cat(to_category,names_only=True)) 1662 can_project = s_names_in_cat & t_names_in_cat 1663 can_project -= set(skip_names) #already dealt with 1664 source_key = self[from_category]['_category.key_id'] 1665 print('Source dataname set: ' + repr(s_names_in_cat)) 1666 print('Target dataname set: ' + repr(t_names_in_cat)) 1667 print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project)) 1668 for project_name in can_project: 1669 full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0] 1670 full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0] 1671 if key_vals is None: 1672 try: 1673 result[full_to_name] = cifdata[full_from_name] 1674 except StarFile.StarDerivationError: 1675 pass 1676 else: 1677 all_key_vals = cifdata[source_key] 1678 filter_pos = [all_key_vals.index(a) for a in key_vals] 1679 try: 1680 all_data_vals = cifdata[full_from_name] 1681 except StarFile.StarDerivationError: 1682 pass 1683 result[full_to_name] = [all_data_vals[i] for i in filter_pos] 1684 return result 1685 1686 def store_new_cat_values(self,cifdata,result,the_category): 1687 """Store the values in [[result]] into [[cifdata]]""" 1688 the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key'] 1689 double_names = [a for a in result.keys() if a in cifdata] 1690 if len(double_names)>0: 1691 already_present = [a for a in self.names_in_cat(the_category) if a in cifdata] 1692 if set(already_present) != set(result.keys()): 1693 print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys())))) 1694 return 1695 #check key values 1696 old_keys = set(cifdata[the_key]) 1697 common_keys = old_keys & set(result[the_key]) 1698 if len(common_keys)>0: 1699 print("Category %s not updated, key values in common:" % (common_keys)) 1700 return 1701 #extend result values with old values 1702 for one_name,one_value in result.items(): 1703 result[one_name].extend(cifdata[one_name]) 1704 for one_name, one_value in result.items(): 1705 try: 1706 self.store_new_looped_value(one_name,cifdata,one_value,False) 1707 except StarFile.StarError: 1708 print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value))) 1709 #put the key as the first item 1710 print('Fixing item order for {}'.format(repr(the_key))) 1711 for one_key in the_key: #should only be one 1712 cifdata.ChangeItemOrder(one_key,0) 1713 1714 1715 def generate_default_packet(self,catname,catkey,keyvalue): 1716 """Return a StarPacket with items from ``catname`` and a key value 1717 of ``keyvalue``""" 1718 newpack = StarPacket() 1719 for na in self.names_in_cat(catname): 1720 def_val = self[na].get("_enumeration.default","") 1721 if def_val: 1722 final_val = self.change_type(na,def_val) 1723 newpack.extend(final_val) 1724 setattr(newpack,na,final_val) 1725 if len(newpack)>0: 1726 newpack.extend(keyvalue) 1727 setattr(newpack,catkey,keyvalue) 1728 return newpack 1729 1730 1731 def switch_numpy(self,to_val): 1732 pass 941 1733 942 1734 def change_type(self,itemname,inval): 943 1735 import numpy 944 # we need to iterate over the structure description. For now we deal only with 945 # Single and Array containers, with types that are a simple specification 946 item_type = self[itemname]["_type.contents"] 947 item_container = self[itemname]["_type.container"] 948 isnumeric = (item_type == "Real" or \ 949 item_type == "Float" or \ 950 item_type == "Count" or \ 951 item_type == "Integer" or \ 952 item_type == "Digit") 953 if not isnumeric: return inval # we don't attempt any changes 954 # even for a 'Single' container, it may be looped 955 # print 'Changing type for %s' % `inval` 956 if StarFile.get_dim(inval)[0] == 0: 957 if item_container == 'Single': return float_with_esd(inval) 958 if item_container == 'Array': 959 return self.recursive_numerify(inval) 960 else: 961 if item_container == 'Single': return map(float_with_esd,inval) 962 if item_container == 'Array': return map(self.recursive_numerify,inval) 963 964 # A utility function to recursively make all atomic values numeric 965 # All embedded values will be either StarTuples or StarLists 966 def normal_numerify(self,valarray): 967 # print 'Recursive evaluation of %s' % `valarray` 968 if isinstance(valarray,StarFile.StarTuple): 969 return StarFile.StarTuple(map(self.recursive_numerify,valarray)) 970 if isinstance(valarray,StarFile.StarList): 971 return StarFile.StarList(map(self.recursive_numerify,valarray)) 972 if isinstance(valarray,(StringType,IntType,LongType)): 973 return float_with_esd(valarray) 974 else: 975 return valarray #assume is OK 976 977 # Identical to the above except that a numpy array is returned. We 978 # do the normal_numerify call in order to perform the float conversion. 979 # 980 def numpy_numerify(self,valarray): 981 import numpy 982 return numpy.array(self.normal_numerify(valarray)) 1736 if inval == "?": return inval 1737 change_function = convert_type(self[itemname]) 1738 if isinstance(inval,list) and not isinstance(inval,StarFile.StarList): #from a loop 1739 newval = list([change_function(a) for a in inval]) 1740 else: 1741 newval = change_function(inval) 1742 return newval 1743 1744 def install_validation_functions(self): 1745 """Install the DDL-appropriate validation checks""" 1746 if self.diclang != 'DDLm': 1747 self.item_validation_funs = [ 1748 self.validate_item_type, 1749 self.validate_item_esd, 1750 self.validate_item_enum, # functions which check conformance 1751 self.validate_enum_range, 1752 self.validate_looping] 1753 self.loop_validation_funs = [ 1754 self.validate_loop_membership, 1755 self.validate_loop_key, 1756 self.validate_loop_references] # functions checking loop values 1757 self.global_validation_funs = [ 1758 self.validate_exclusion, 1759 self.validate_parent, 1760 self.validate_child, 1761 self.validate_dependents, 1762 self.validate_uniqueness] # where we need to look at other values 1763 self.block_validation_funs = [ # where only a full block will do 1764 self.validate_mandatory_category] 1765 self.global_remove_validation_funs = [ 1766 self.validate_remove_parent_child] # removal is quicker with special checks 1767 elif self.diclang == 'DDLm': 1768 self.item_validation_funs = [ 1769 self.validate_item_enum, 1770 self.validate_item_esd_ddlm, 1771 ] 1772 self.loop_validation_funs = [ 1773 self.validate_looping_ddlm, 1774 self.validate_loop_key_ddlm, 1775 self.validate_loop_membership 1776 ] 1777 self.global_validation_funs = [] 1778 self.block_validation_funs = [ 1779 self.check_mandatory_items, 1780 self.check_prohibited_items 1781 ] 1782 self.global_remove_validation_funs = [] 1783 self.optimize = False # default value 1784 self.done_parents = [] 1785 self.done_children = [] 1786 self.done_keys = [] 983 1787 984 1788 def validate_item_type(self,item_name,item_value): 985 def mymatch(m,a): 1789 def mymatch(m,a): 986 1790 res = m.match(a) 987 if res != None: return res.group() 1791 if res != None: return res.group() 988 1792 else: return "" 989 target_type = self[item_name].get(self.type_spec) 1793 target_type = self[item_name].get(self.type_spec) 990 1794 if target_type == None: # e.g. a category definition 991 1795 return {"result":True} # not restricted in any way … … 993 1797 item_values = listify(item_value) 994 1798 #for item in item_values: 995 #print "Type match " + item_name + " " + item + ":",1799 #print("Type match " + item_name + " " + item + ":",) 996 1800 #skip dots and question marks 997 check_all = filter(lambda a: a !="." and a != "?",item_values)998 check_all = filter(lambda a: mymatch(matchexpr,a) != a, check_all)1801 check_all = [a for a in item_values if a !="." and a != "?"] 1802 check_all = [a for a in check_all if mymatch(matchexpr,a) != a] 999 1803 if len(check_all)>0: return {"result":False,"bad_values":check_all} 1000 1804 else: return {"result":True} 1805 1806 def decide(self,result_list): 1807 """Construct the return list""" 1808 if len(result_list)==0: 1809 return {"result":True} 1810 else: 1811 return {"result":False,"bad_values":result_list} 1812 1813 def validate_item_container(self, item_name,item_value): 1814 container_type = self[item_name]['_type.container'] 1815 item_values = listify(item_value) 1816 if container_type == 'Single': 1817 okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))] 1818 return decide(okcheck) 1819 if container_type in ('Multiple','List'): 1820 okcheck = [a for a in item_values if not isinstance(a,StarList)] 1821 return decide(okcheck) 1822 if container_type == 'Array': #A list with numerical values 1823 okcheck = [a for a in item_values if not isinstance(a,StarList)] 1824 first_check = decide(okcheck) 1825 if not first_check['result']: return first_check 1826 #num_check = [a for a in item_values if len([b for b in a if not isinstance 1001 1827 1002 1828 def validate_item_esd(self,item_name,item_value): 1003 1829 if self[item_name].get(self.primitive_type) != 'numb': 1004 1830 return {"result":None} 1005 can_esd = self[item_name].get(self.esd_spec,"none") == "esd" 1831 can_esd = self[item_name].get(self.esd_spec,"none") == "esd" 1006 1832 if can_esd: return {"result":True} #must be OK! 1007 1833 item_values = listify(item_value) 1008 check_all = filter(lambda a: get_number_with_esd(a)[1] != None, item_values)1834 check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None]) 1009 1835 if len(check_all)>0: return {"result":False,"bad_values":check_all} 1010 1836 return {"result":True} 1011 1837 1838 def validate_item_esd_ddlm(self,item_name,item_value): 1839 if self[item_name].get('self.primitive_type') not in \ 1840 ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']: 1841 return {"result":None} 1842 can_esd = True 1843 if self[item_name].get('_type.purpose') != 'Measurand': 1844 can_esd = False 1845 item_values = listify(item_value) 1846 check_all = [get_number_with_esd(a)[1] for a in item_values] 1847 check_all = [v for v in check_all if (can_esd and v == None) or \ 1848 (not can_esd and v != None)] 1849 if len(check_all)>0: return {"result":False,"bad_values":check_all} 1850 return {"result":True} 1851 1012 1852 def validate_enum_range(self,item_name,item_value): 1013 if not self[item_name].has_key("_item_range.minimum")and \1014 not self[item_name].has_key("_item_range.maximum"):1853 if "_item_range.minimum" not in self[item_name] and \ 1854 "_item_range.maximum" not in self[item_name]: 1015 1855 return {"result":None} 1016 1856 minvals = self[item_name].get("_item_range.minimum",default = ["."]) … … 1021 1861 maxvals = map(makefloat, maxvals) 1022 1862 minvals = map(makefloat, minvals) 1023 rangelist = map(None,minvals,maxvals)1863 rangelist = list(zip(minvals,maxvals)) 1024 1864 item_values = listify(item_value) 1025 1865 def map_check(rangelist,item_value): … … 1034 1874 if upper == lower and iv == upper: return True 1035 1875 # debug 1036 # print "Value %s fails range check %d < x < %d" % (item_value,lower,upper)1876 # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper)) 1037 1877 return False 1038 check_all = filter(lambda a,b=rangelist: map_check(b,a) != True, item_values)1878 check_all = [a for a in item_values if map_check(rangelist,a) != True] 1039 1879 if len(check_all)>0: return {"result":False,"bad_values":check_all} 1040 1880 else: return {"result":True} 1041 1881 1042 1882 def validate_item_enum(self,item_name,item_value): 1043 try: 1883 try: 1044 1884 enum_list = self[item_name][self.enum_spec][:] 1045 1885 except KeyError: … … 1048 1888 enum_list.append("?") #unknown 1049 1889 item_values = listify(item_value) 1050 #print "Enum check: %s in %s" % (`item_values`,`enum_list`)1051 check_all = filter(lambda a: a not in enum_list,item_values)1890 #print("Enum check: {!r} in {!r}".format(item_values, enum_list)) 1891 check_all = [a for a in item_values if a not in enum_list] 1052 1892 if len(check_all)>0: return {"result":False,"bad_values":check_all} 1053 1893 else: return {"result":True} … … 1058 1898 except KeyError: 1059 1899 return {"result":None} 1060 if must_loop == 'yes' and isinstance(item_value, StringType): # not looped1900 if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped 1061 1901 return {"result":False} #this could be triggered 1062 if must_loop == 'no' and not isinstance(item_value, StringType):1902 if must_loop == 'no' and not isinstance(item_value,(unicode,str)): 1063 1903 return {"result":False} 1064 1904 return {"result":True} 1065 1905 1906 def validate_looping_ddlm(self,loop_names): 1907 """Check that all names are loopable""" 1908 truly_loopy = self.get_final_cats(loop_names) 1909 if len(truly_loopy)<len(loop_names): #some are bad 1910 categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names] 1911 not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()] 1912 return {"result":False,"bad_items":not_looped} 1913 return {"result":True} 1914 1066 1915 1067 1916 def validate_loop_membership(self,loop_names): 1068 try: 1069 categories = map(lambda a:self[a][self.cat_spec],loop_names) 1070 except KeyError: #category is mandatory 1071 raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0])) 1072 bad_items = filter(lambda a:a != categories[0],categories) 1917 final_cat = self.get_final_cats(loop_names) 1918 bad_items = [a for a in final_cat if a != final_cat[0]] 1073 1919 if len(bad_items)>0: 1074 1920 return {"result":False,"bad_items":bad_items} 1075 1921 else: return {"result":True} 1076 1922 1923 def get_final_cats(self,loop_names): 1924 """Return a list of the uppermost parent categories for the loop_names. Names 1925 that are not from loopable categories are ignored.""" 1926 try: 1927 categories = [self[a][self.cat_spec].lower() for a in loop_names] 1928 except KeyError: #category is mandatory 1929 raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0])) 1930 truly_looped = [a for a in categories if a in self.parent_lookup.keys()] 1931 return [self.parent_lookup[a] for a in truly_looped] 1932 1077 1933 def validate_loop_key(self,loop_names): 1078 1934 category = self[loop_names[0]][self.cat_spec] 1079 1935 # find any unique values which must be present 1080 entry_name = self.cat_map[category] 1081 key_spec = self[entry_name].get("_category_mandatory.name",[]) 1936 key_spec = self[category].get(self.key_spec,[]) 1082 1937 for names_to_check in key_spec: 1083 if isinstance(names_to_check, StringType): #only one1938 if isinstance(names_to_check,unicode): #only one 1084 1939 names_to_check = [names_to_check] 1085 1940 for loop_key in names_to_check: 1086 if loop_key not in loop_names: 1941 if loop_key not in loop_names: 1087 1942 #is this one of those dang implicit items? 1088 1943 if self[loop_key].get(self.must_exist_spec,None) == "implicit": 1089 1944 continue #it is virtually there... 1090 1945 alternates = self.get_alternates(loop_key) 1091 if alternates == []: 1946 if alternates == []: 1092 1947 return {"result":False,"bad_items":loop_key} 1093 1948 for alt_names in alternates: 1094 alt = filter(lambda a:a in loop_names,alt_names)1095 if len(alt) == 0: 1096 return {"result":False,"bad_items":loop_key} # no alternates 1949 alt = [a for a in alt_names if a in loop_names] 1950 if len(alt) == 0: 1951 return {"result":False,"bad_items":loop_key} # no alternates 1097 1952 return {"result":True} 1098 1953 1954 def validate_loop_key_ddlm(self,loop_names): 1955 """Make sure at least one of the necessary keys are available""" 1956 final_cats = self.get_final_cats(loop_names) 1957 if len(final_cats)>0: 1958 poss_keys = self.cat_key_table[final_cats[0]] 1959 found_keys = [a for a in poss_keys if a in loop_names] 1960 if len(found_keys)>0: 1961 return {"result":True} 1962 else: 1963 return {"result":False,"bad_items":poss_keys} 1964 else: 1965 return {"result":True} 1966 1099 1967 def validate_loop_references(self,loop_names): 1100 must_haves = map(lambda a:self[a].get(self.list_ref_spec,None),loop_names)1101 must_haves = filter(lambda a:a != None,must_haves)1968 must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names] 1969 must_haves = [a for a in must_haves if a != None] 1102 1970 # build a flat list. For efficiency we don't remove duplicates,as 1103 1971 # we expect no more than the order of 10 or 20 looped names. 1104 def flat_func(a,b): 1105 if isinstance(b, StringType):1972 def flat_func(a,b): 1973 if isinstance(b,unicode): 1106 1974 a.append(b) #single name 1107 1975 else: 1108 1976 a.extend(b) #list of names 1109 1977 return a 1110 flat_mh = reduce(flat_func,must_haves,[]) 1978 flat_mh = [] 1979 [flat_func(flat_mh,a) for a in must_haves] 1111 1980 group_mh = filter(lambda a:a[-1]=="_",flat_mh) 1112 1981 single_mh = filter(lambda a:a[-1]!="_",flat_mh) 1113 res = filter(lambda a: a not in loop_names,single_mh)1982 res = [a for a in single_mh if a not in loop_names] 1114 1983 def check_gr(s_item, name_list): 1115 1984 nl = map(lambda a:a[:len(s_item)],name_list) 1116 1985 if s_item in nl: return True 1117 1986 return False 1118 res_g = filter(lambda a:check_gr(a,loop_names),group_mh)1987 res_g = [a for a in group_mh if check_gr(a,loop_names)] 1119 1988 if len(res) == 0 and len(res_g) == 0: return {"result":True} 1120 1989 # construct alternate list 1121 1990 alternates = map(lambda a: (a,self.get_alternates(a)),res) 1122 alternates = filter(lambda a:a[1] != [], alternates) 1123 # next two lines purely for error reporting 1124 missing_alts = filter(lambda a: a[1] == [], alternates) 1125 missing_alts = map(lambda a:a[0],missing_alts) 1126 if len(alternates) != len(res): 1991 alternates = [a for a in alternates if a[1] != []] 1992 # next line purely for error reporting 1993 missing_alts = [a[0] for a in alternates if a[1] == []] 1994 if len(alternates) != len(res): 1127 1995 return {"result":False,"bad_items":missing_alts} #short cut; at least one 1128 1996 #doesn't have an altern 1129 1997 #loop over alternates 1130 1998 for orig_name,alt_names in alternates: 1131 alt = filter(lambda a:a in loop_names,alt_names)1132 if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates 1999 alt = [a for a in alt_names if a in loop_names] 2000 if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates 1133 2001 return {"result":True} #found alternates 1134 2002 1135 2003 def get_alternates(self,main_name,exclusive_only=False): 1136 2004 alternates = self[main_name].get(self.related_func,None) 1137 2005 alt_names = [] 1138 if alternates != None: 2006 if alternates != None: 1139 2007 alt_names = self[main_name].get(self.related_item,None) 1140 if isinstance(alt_names, StringType):2008 if isinstance(alt_names,unicode): 1141 2009 alt_names = [alt_names] 1142 2010 alternates = [alternates] 1143 together = map(None,alt_names,alternates)2011 together = zip(alt_names,alternates) 1144 2012 if exclusive_only: 1145 alt_names = filter(lambda a:a[1]=="alternate_exclusive" \1146 or a[1]=="replace" , together)2013 alt_names = [a for a in together if a[1]=="alternate_exclusive" \ 2014 or a[1]=="replace"] 1147 2015 else: 1148 alt_names = filter(lambda a:a[1]=="alternate" or a[1]=="replace",together)1149 alt_names = map(lambda a:a[0],alt_names)2016 alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"] 2017 alt_names = list([a[0] for a in alt_names]) 1150 2018 # now do the alias thing 1151 2019 alias_names = listify(self[main_name].get("_item_aliases.alias_name",[])) 1152 2020 alt_names.extend(alias_names) 1153 # print "Alternates for %s: %s" % (main_name,`alt_names`)2021 # print("Alternates for {}: {!r}".format(main_name, alt_names)) 1154 2022 return alt_names 1155 2023 1156 2024 1157 2025 def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}): 1158 alternates = map(lambda a:a.lower(),self.get_alternates(item_name,exclusive_only=True)) 1159 item_name_list = map(lambda a:a.lower(),whole_block.keys()) 1160 item_name_list.extend(map(lambda a:a.lower(),provisional_items.keys())) 1161 item_name_list.extend(map(lambda a:a.lower(),globals.keys())) 1162 bad = filter(lambda a:a in item_name_list,alternates) 2026 alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)] 2027 item_name_list = [a.lower() for a in whole_block.keys()] 2028 item_name_list.extend([a.lower() for a in provisional_items.keys()]) 2029 bad = [a for a in alternates if a in item_name_list] 1163 2030 if len(bad)>0: 1164 print "Bad: %s, alternates %s" % (`bad`,`alternates`)2031 print("Bad: %s, alternates %s" % (repr(bad),repr(alternates))) 1165 2032 return {"result":False,"bad_items":bad} 1166 2033 else: return {"result":True} … … 1170 2037 parent_item = self[item_name].get(self.parent_spec) 1171 2038 if not parent_item: return {"result":None} #no parent specified 1172 if isinstance(parent_item, ListType):2039 if isinstance(parent_item,list): 1173 2040 parent_item = parent_item[0] 1174 2041 if self.optimize: 1175 2042 if parent_item in self.done_parents: 1176 2043 return {"result":None} 1177 else: 2044 else: 1178 2045 self.done_parents.append(parent_item) 1179 print "Done parents %s" % `self.done_parents`2046 print("Done parents %s" % repr(self.done_parents)) 1180 2047 # initialise parent/child values 1181 if isinstance(item_value, StringType):2048 if isinstance(item_value,unicode): 1182 2049 child_values = [item_value] 1183 2050 else: child_values = item_value[:] #copy for safety 1184 2051 # track down the parent 1185 # print "Looking for %s parent item %s in %s" % (item_name,parent_item,`whole_block`)1186 # if globals contains the parent values, we are doing a DDL2 dictionary, and so 2052 # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block)) 2053 # if globals contains the parent values, we are doing a DDL2 dictionary, and so 1187 2054 # we have collected all parent values into the global block - so no need to search 1188 # for them elsewhere. 1189 # print "Looking for %s" % `parent_item`2055 # for them elsewhere. 2056 # print("Looking for {!r}".format(parent_item)) 1190 2057 parent_values = globals.get(parent_item) 1191 2058 if not parent_values: 1192 2059 parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) 1193 if not parent_values: 2060 if not parent_values: 1194 2061 # go for alternates 1195 2062 namespace = whole_block.keys() … … 1198 2065 alt_names = filter_present(self.get_alternates(parent_item),namespace) 1199 2066 if len(alt_names) == 0: 1200 if len( filter(lambda a:a != "." and a != "?",child_values))>0:2067 if len([a for a in child_values if a != "." and a != "?"])>0: 1201 2068 return {"result":False,"parent":parent_item}#no parent available -> error 1202 2069 else: 1203 2070 return {"result":None} #maybe True is more appropriate?? 1204 parent_item = alt_names[0] #should never be more than one?? 2071 parent_item = alt_names[0] #should never be more than one?? 1205 2072 parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) 1206 2073 if not parent_values: # check global block 1207 2074 parent_values = globals.get(parent_item) 1208 if isinstance(parent_values, StringType):1209 parent_values = [parent_values] 1210 #print "Checking parent %s against %s, values %s/%s" % (parent_item,1211 # item_name, `parent_values`,`child_values`)2075 if isinstance(parent_values,unicode): 2076 parent_values = [parent_values] 2077 #print("Checking parent %s against %s, values %r/%r" % (parent_item, 2078 # item_name, parent_values, child_values)) 1212 2079 missing = self.check_parent_child(parent_values,child_values) 1213 2080 if len(missing) > 0: … … 1221 2088 return {"result":None} #not relevant 1222 2089 # special case for dictionaries -> we check parents of children only 1223 if globals.has_key(item_name): #dictionary so skip2090 if item_name in globals: #dictionary so skip 1224 2091 return {"result":None} 1225 if isinstance(child_items, StringType): # only one child2092 if isinstance(child_items,unicode): # only one child 1226 2093 child_items = [child_items] 1227 if isinstance(item_value, StringType): # single value2094 if isinstance(item_value,unicode): # single value 1228 2095 parent_values = [item_value] 1229 2096 else: parent_values = item_value[:] … … 1236 2103 if child_item in self.done_children: 1237 2104 return {"result":None} 1238 else: 2105 else: 1239 2106 self.done_children.append(child_item) 1240 print "Done children %s" % `self.done_children`1241 if provisional_items.has_key(child_item):2107 print("Done children %s" % repr(self.done_children)) 2108 if child_item in provisional_items: 1242 2109 child_values = provisional_items[child_item][:] 1243 elif whole_block.has_key(child_item):2110 elif child_item in whole_block: 1244 2111 child_values = whole_block[child_item][:] 1245 else: continue 1246 if isinstance(child_values, StringType):2112 else: continue 2113 if isinstance(child_values,unicode): 1247 2114 child_values = [child_values] 1248 # print "Checking child %s against %s, values %s/%s" % (child_item,1249 # item_name,`child_values`,`parent_values`)2115 # print("Checking child %s against %s, values %r/%r" % (child_item, 2116 # item_name, child_values, parent_values)) 1250 2117 missing = self.check_parent_child(parent_values,child_values) 1251 2118 if len(missing)>0: 1252 2119 return {"result":False,"bad_values":missing,"child":child_item} 1253 2120 return {"result":True} #could mean that no child items present 1254 2121 1255 2122 #a generic checker: all child vals should appear in parent_vals 1256 2123 def check_parent_child(self,parent_vals,child_vals): … … 1258 2125 pv = parent_vals[:] 1259 2126 pv.extend([".","?"]) 1260 res = filter(lambda a:a not in pv,child_vals)1261 #print "Missing: %s" % res2127 res = [a for a in child_vals if a not in pv] 2128 #print("Missing: %s" % res) 1262 2129 return res 1263 2130 … … 1267 2134 except KeyError: 1268 2135 return {"result":None} 1269 if isinstance(child_items, StringType): # only one child2136 if isinstance(child_items,unicode): # only one child 1270 2137 child_items = [child_items] 1271 2138 for child_item in child_items: 1272 if whole_block.has_key(child_item):2139 if child_item in whole_block: 1273 2140 return {"result":False,"child":child_item} 1274 2141 return {"result":True} 1275 2142 1276 2143 def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}): 1277 2144 try: … … 1279 2146 except KeyError: 1280 2147 return {"result":None} #not relevant 1281 if isinstance(dep_items, StringType):2148 if isinstance(dep_items,unicode): 1282 2149 dep_items = [dep_items] 1283 2150 actual_names = whole_block.keys() 1284 2151 actual_names.extend(prov.keys()) 1285 2152 actual_names.extend(globals.keys()) 1286 missing = filter(lambda a:a not in actual_names,dep_items)2153 missing = [a for a in dep_items if a not in actual_names] 1287 2154 if len(missing) > 0: 1288 2155 alternates = map(lambda a:[self.get_alternates(a),a],missing) 1289 # compact way to get a list of alternative items which are 2156 # compact way to get a list of alternative items which are 1290 2157 # present 1291 have_check = map(lambda b:[filter_present(b[0],actual_names),1292 b[1] ],alternates)1293 have_check = filter(lambda a:len(a[0])==0,have_check)2158 have_check = [(filter_present(b[0],actual_names), 2159 b[1]) for b in alternates] 2160 have_check = list([a for a in have_check if len(a[0])==0]) 1294 2161 if len(have_check) > 0: 1295 have_check = map(lambda a:a[1],have_check)2162 have_check = [a[1] for a in have_check] 1296 2163 return {"result":False,"bad_items":have_check} 1297 2164 return {"result":True} 1298 2165 1299 2166 def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={}, 1300 2167 globals={}): 1301 2168 category = self[item_name].get(self.cat_spec) 1302 2169 if category == None: 1303 print "No category found for %s" % item_name2170 print("No category found for %s" % item_name) 1304 2171 return {"result":None} 1305 # print "Category %s for item %s" % (`category`,item_name) 1306 catentry = self.cat_map[category] 2172 # print("Category {!r} for item {}".format(category, item_name)) 1307 2173 # we make a copy in the following as we will be removing stuff later! 1308 unique_i = self[cate ntry].get("_category_key.name",[])[:]1309 if isinstance(unique_i, StringType):2174 unique_i = self[category].get("_category_key.name",[])[:] 2175 if isinstance(unique_i,unicode): 1310 2176 unique_i = [unique_i] 1311 2177 if item_name not in unique_i: #no need to verify 1312 2178 return {"result":None} 1313 if isinstance(item_value, StringType): #not looped2179 if isinstance(item_value,unicode): #not looped 1314 2180 return {"result":None} 1315 # print "Checking %s -> %s -> %s ->Unique: " % (item_name,category,catentry) + `unique_i`2181 # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i)) 1316 2182 # check that we can't optimize by not doing this check 1317 2183 if self.optimize: … … 1329 2195 # the logic being that anything in the provisional dict overrides the 1330 2196 # main block 1331 if provisional_items.has_key(other_name):1332 other_data.append(provisional_items[other_name]) 1333 elif whole_block.has_key(other_name):2197 if other_name in provisional_items: 2198 other_data.append(provisional_items[other_name]) 2199 elif other_name in whole_block: 1334 2200 other_data.append(whole_block[other_name]) 1335 2201 elif self[other_name].get(self.must_exist_spec)=="implicit": … … 1339 2205 # ok, so we go through all of our values 1340 2206 # this works by comparing lists of strings to one other, and 1341 # so could be fooled if you think that '1.' and '1' are 2207 # so could be fooled if you think that '1.' and '1' are 1342 2208 # identical 1343 2209 for i in range(len(item_value)): 1344 #print "Value no. %d" % i ,2210 #print("Value no. %d" % i, end=" ") 1345 2211 this_entry = item_value[i] 1346 2212 for j in range(len(other_data)): 1347 this_entry = " ".join([this_entry,other_data[j][i]]) 1348 #print "Looking for %s in %s: " % (`this_entry`,`val_list`)1349 if this_entry in val_list: 2213 this_entry = " ".join([this_entry,other_data[j][i]]) 2214 #print("Looking for {!r} in {!r}: ".format(this_entry, val_list)) 2215 if this_entry in val_list: 1350 2216 return {"result":False,"bad_values":this_entry} 1351 2217 val_list.append(this_entry) … … 1353 2219 1354 2220 1355 def validate_mandatory_category(self,whole_block,globals={},fake_mand=False): 1356 if fake_mand: 2221 def validate_mandatory_category(self,whole_block): 2222 mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"] 2223 if len(mand_cats) == 0: 1357 2224 return {"result":True} 1358 mand_cats = filter(lambda a:self[a].get("_category.mandatory_code","no")=="yes", 1359 self.keys()) 1360 # map to actual ids 1361 catlist = self.cat_map.items() 1362 # print "Mandatory categories - %s" % `mand_cats` 1363 all_keys = whole_block.keys() #non-save block keys 1364 if globals: # 1365 all_keys.extend(globals.abs_all_keys) 1366 for mand_cat in mand_cats: 1367 cat_id = filter(lambda a:a[1]==mand_cat,catlist)[0][0] 1368 no_of_items = len(filter(lambda a:self[a].get(self.cat_spec)==cat_id, 1369 all_keys)) 1370 if no_of_items == 0: 1371 return {"result":False,"bad_items":cat_id} 2225 # print("Mandatory categories - {!r}".format(mand_cats) 2226 # find which categories each of our datanames belongs to 2227 all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()] 2228 missing = set(mand_cats) - set(all_cats) 2229 if len(missing) > 0: 2230 return {"result":False,"bad_items":repr(missing)} 1372 2231 return {"result":True} 1373 2232 1374 def find_prob_cats(self,whole_block): 1375 mand_cats = filter(lambda a:self[a].get("_category.mandatory_code","no")=="yes", 1376 self.keys()) 1377 # map to actual ids 1378 catlist = self.cat_map.items() 1379 # find missing categories 1380 wbs = whole_block["saves"] 1381 abs_all_keys = whole_block.keys() 1382 abs_all_keys.extend(reduce(lambda a,b:a+(wbs[b].keys()),wbs.keys(),[])) 1383 prob_cats = [] 1384 for mand_cat in mand_cats: 1385 cat_id = filter(lambda a:a[1]==mand_cat,catlist)[0][0] 1386 1387 if len(filter(lambda a:self[a].get(self.cat_spec)==cat_id,abs_all_keys))==0: 1388 prob_cats.append(cat_id) 1389 if len(prob_cats) > 0: 1390 return (False,{'whole_block':[('validate_mandatory_category',{"result":False,"bad_items":problem_cats})]}) 1391 else: 1392 return (True,{}) 2233 def check_mandatory_items(self,whole_block,default_scope='Item'): 2234 """Return an error if any mandatory items are missing""" 2235 if len(self.scopes_mandatory)== 0: return {"result":True} 2236 if default_scope == 'Datablock': 2237 return {"result":True} #is a data file 2238 scope = whole_block.get('_definition.scope',default_scope) 2239 if '_dictionary.title' in whole_block: 2240 scope = 'Dictionary' 2241 missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block]) 2242 if len(missing)==0: 2243 return {"result":True} 2244 else: 2245 return {"result":False,"bad_items":missing} 2246 2247 def check_prohibited_items(self,whole_block,default_scope='Item'): 2248 """Return an error if any prohibited items are present""" 2249 if len(self.scopes_naughty)== 0: return {"result":True} 2250 if default_scope == 'Datablock': 2251 return {"result":True} #is a data file 2252 scope = whole_block.get('_definition.scope',default_scope) 2253 if '_dictionary.title' in whole_block: 2254 scope = 'Dictionary' 2255 present = list([a for a in self.scopes_naughty[scope] if a in whole_block]) 2256 if len(present)==0: 2257 return {"result":True} 2258 else: 2259 return {"result":False,"bad_items":present} 1393 2260 1394 2261 1395 2262 def run_item_validation(self,item_name,item_value): 1396 return {item_name: map(lambda f:(f.__name__,f(item_name,item_value)),self.item_validation_funs)}2263 return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])} 1397 2264 1398 2265 def run_loop_validation(self,loop_names): 1399 return {loop_names[0]: map(lambda f:(f.__name__,f(loop_names)),self.loop_validation_funs)}2266 return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])} 1400 2267 1401 2268 def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}): 1402 results = map(lambda f:(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)),self.global_validation_funs)2269 results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs]) 1403 2270 return {item_name:results} 1404 2271 1405 def run_block_validation(self,whole_block, globals={},fake_mand=False):1406 results = map(lambda f:(f.__name__,f(whole_block,globals,fake_mand)),self.block_validation_funs)2272 def run_block_validation(self,whole_block,block_scope='Item'): 2273 results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs]) 1407 2274 # fix up the return values 1408 2275 return {"whole_block":results} … … 1421 2288 1422 2289 2290 1423 2291 class ValidCifBlock(CifBlock): 2292 """A `CifBlock` that is valid with respect to a given CIF dictionary. Methods 2293 of `CifBlock` are overridden where necessary to disallow addition of invalid items to the 2294 `CifBlock`. 2295 2296 ## Initialisation 2297 2298 * `dic` is a `CifDic` object to be used for validation. 2299 2300 """ 1424 2301 def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords): 1425 CifBlock.__init__(self,*args,**kwords) 2302 CifBlock.__init__(self,*args,**kwords) 1426 2303 if dic and diclist: 1427 print "Warning: diclist argument ignored when initialising ValidCifBlock"2304 print("Warning: diclist argument ignored when initialising ValidCifBlock") 1428 2305 if isinstance(dic,CifDic): 1429 2306 self.fulldic = dic … … 1443 2320 update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname])) 1444 2321 update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self)) 1445 for loop in self.loops:1446 update_value(self.v_result,self.fulldic.run_loop_validation(loop .keys()))2322 for loop_names in self.loops.values(): 2323 update_value(self.v_result,self.fulldic.run_loop_validation(loop_names)) 1447 2324 # now run block-level checks 1448 2325 update_value(self.v_result,self.fulldic.run_block_validation(self)) 1449 2326 # return false and list of baddies if anything didn't match 1450 2327 self.fulldic.optimize_off() 1451 for test_key in self.v_result.keys(): 1452 #print "%s: %s" % (test_key,`self.v_result[test_key]`) 1453 self.v_result[test_key] = filter(lambda a:a[1]["result"]==False,self.v_result[test_key]) 1454 if len(self.v_result[test_key]) == 0: 2328 all_keys = list(self.v_result.keys()) #dictionary will change 2329 for test_key in all_keys: 2330 #print("%s: %r" % (test_key, self.v_result[test_key])) 2331 self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False] 2332 if len(self.v_result[test_key]) == 0: 1455 2333 del self.v_result[test_key] 1456 2334 isvalid = len(self.v_result)==0 1457 2335 #if not isvalid: 1458 # print "Baddies:" + `self.v_result`2336 # print("Baddies: {!r}".format(self.v_result)) 1459 2337 return isvalid,self.v_result 1460 2338 1461 2339 def single_item_check(self,item_name,item_value): 1462 2340 #self.match_single_item(item_name) 1463 if not self.fulldic.has_key(item_name):2341 if item_name not in self.fulldic: 1464 2342 result = {item_name:[]} 1465 2343 else: 1466 2344 result = self.fulldic.run_item_validation(item_name,item_value) 1467 baddies = filter(lambda a:a[1]["result"]==False, result[item_name])2345 baddies = list([a for a in result[item_name] if a[1]["result"]==False]) 1468 2346 # if even one false one is found, this should trigger 1469 2347 isvalid = (len(baddies) == 0) 1470 # if not isvalid: print "Failures for %s:" % item_name + `baddies`2348 # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) 1471 2349 return isvalid,baddies 1472 2350 1473 2351 def loop_item_check(self,loop_names): 1474 in_dic_names = filter(lambda a:self.fulldic.has_key(a),loop_names)2352 in_dic_names = list([a for a in loop_names if a in self.fulldic]) 1475 2353 if len(in_dic_names)==0: 1476 2354 result = {loop_names[0]:[]} 1477 2355 else: 1478 2356 result = self.fulldic.run_loop_validation(in_dic_names) 1479 baddies = filter(lambda a:a[1]["result"]==False,result[in_dic_names[0]])2357 baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False]) 1480 2358 # if even one false one is found, this should trigger 1481 2359 isvalid = (len(baddies) == 0) 1482 # if not isvalid: print "Failures for %s:" % `loop_names` + `baddies`2360 # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies)) 1483 2361 return isvalid,baddies 1484 2362 1485 2363 def global_item_check(self,item_name,item_value,provisional_items={}): 1486 if not self.fulldic.has_key(item_name):2364 if item_name not in self.fulldic: 1487 2365 result = {item_name:[]} 1488 2366 else: 1489 2367 result = self.fulldic.run_global_validation(item_name, 1490 2368 item_value,self,provisional_items = provisional_items) 1491 baddies = filter(lambda a:a[1]["result"]==False,result[item_name])2369 baddies = list([a for a in result[item_name] if a[1]["result"] is False]) 1492 2370 # if even one false one is found, this should trigger 1493 2371 isvalid = (len(baddies) == 0) 1494 # if not isvalid: print "Failures for %s:" % item_name + `baddies`2372 # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) 1495 2373 return isvalid,baddies 1496 2374 1497 2375 def remove_global_item_check(self,item_name): 1498 if not self.fulldic.has_key(item_name):2376 if item_name not in self.fulldic: 1499 2377 result = {item_name:[]} 1500 2378 else: 1501 2379 result = self.fulldic.run_remove_global_validation(item_name,self,False) 1502 baddies = filter(lambda a:a[1]["result"]==False,result[item_name])2380 baddies = list([a for a in result[item_name] if a[1]["result"]==False]) 1503 2381 # if even one false one is found, this should trigger 1504 2382 isvalid = (len(baddies) == 0) 1505 # if not isvalid: print "Failures for %s:" % item_name + `baddies`2383 # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) 1506 2384 return isvalid,baddies 1507 2385 … … 1510 2388 paired_data = loopdata.items() 1511 2389 for name,value in paired_data: 1512 valid,problems = self.single_item_check(name,value) 2390 valid,problems = self.single_item_check(name,value) 1513 2391 self.report_if_invalid(valid,problems) 1514 2392 # loop item checks; merge with current loop 1515 2393 found = 0 1516 2394 for aloop in self.block["loops"]: 1517 if aloop.has_key(dataname):2395 if dataname in aloop: 1518 2396 loopnames = aloop.keys() 1519 2397 for new_name in loopdata.keys(): … … 1522 2400 self.report_if_invalid(valid,problems) 1523 2401 prov_dict = loopdata.copy() 1524 for name,value in paired_data: 2402 for name,value in paired_data: 1525 2403 del prov_dict[name] # remove temporarily 1526 2404 valid,problems = self.global_item_check(name,value,prov_dict) … … 1528 2406 self.report_if_invalid(valid,problems) 1529 2407 CifBlock.AddToLoop(self,dataname,loopdata) 1530 2408 1531 2409 def AddCifItem(self,data): 1532 if isinstance(data[0], StringType): # single item2410 if isinstance(data[0],(unicode,str)): # single item 1533 2411 valid,problems = self.single_item_check(data[0],data[1]) 1534 2412 self.report_if_invalid(valid,problems,data[0]) 1535 2413 valid,problems = self.global_item_check(data[0],data[1]) 1536 2414 self.report_if_invalid(valid,problems,data[0]) 1537 elif isinstance(data[0], TupleType) or isinstance(data[0],ListType):1538 paired_data = map(None,data[0],data[1])2415 elif isinstance(data[0],tuple) or isinstance(data[0],list): 2416 paired_data = list(zip(data[0],data[1])) 1539 2417 for name,value in paired_data: 1540 valid,problems = self.single_item_check(name,value) 2418 valid,problems = self.single_item_check(name,value) 1541 2419 self.report_if_invalid(valid,problems,name) 1542 2420 valid,problems = self.loop_item_check(data[0]) … … 1544 2422 prov_dict = {} # for storing temporary items 1545 2423 for name,value in paired_data: prov_dict[name]=value 1546 for name,value in paired_data: 2424 for name,value in paired_data: 1547 2425 del prov_dict[name] # remove temporarily 1548 2426 valid,problems = self.global_item_check(name,value,prov_dict) 1549 2427 prov_dict[name] = value # add back in 1550 2428 self.report_if_invalid(valid,problems,name) 1551 CifBlock.AddCifItem(self,data) 2429 else: 2430 raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item") 2431 super(ValidCifBlock,self).AddCifItem(data) 2432 2433 def AddItem(self,key,value,**kwargs): 2434 """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary""" 2435 valid,problems = self.single_item_check(key,value) 2436 self.report_if_invalid(valid,problems,key) 2437 valid,problems = self.global_item_check(key,value) 2438 self.report_if_invalid(valid,problems,key) 2439 super(ValidCifBlock,self).AddItem(key,value,**kwargs) 1552 2440 1553 2441 # utility function 1554 2442 def report_if_invalid(self,valid,bad_list,data_name): 1555 2443 if not valid: 1556 error_string = reduce(lambda a,b: a + "," + b[0], bad_list, "") 1557 error_string = `data_name` + " fails following validity checks: " + error_string 2444 bad_tests = [a[0] for a in bad_list] 2445 error_string = ",".join(bad_tests) 2446 error_string = repr(data_name) + " fails following validity checks: " + error_string 1558 2447 raise ValidCifError( error_string) 1559 2448 … … 1561 2450 # we don't need to run single item checks; we do need to run loop and 1562 2451 # global checks. 1563 if self.has_key(key):1564 try: 2452 if key in self: 2453 try: 1565 2454 loop_items = self.GetLoop(key) 1566 2455 except TypeError: 1567 2456 loop_items = [] 1568 2457 if loop_items: #need to check loop conformance 1569 loop_names = map(lambda a:a[0],loop_items) 1570 loop_names = filter(lambda a:a != key,loop_names) 2458 loop_names = [a[0] for a in loop_items if a[0] != key] 1571 2459 valid,problems = self.loop_item_check(loop_names) 1572 2460 self.report_if_invalid(valid,problems) … … 1577 2465 1578 2466 def report(self): 1579 import cStringIO 1580 outstr = cStringIO.StringIO() 2467 outstr = StringIO() 1581 2468 outstr.write( "Validation results\n") 1582 2469 outstr.write( "------------------\n") 1583 print "%d invalid items found\n" % len(self.v_result)2470 print("%d invalid items found\n" % len(self.v_result)) 1584 2471 for item_name,val_func_list in self.v_result.items(): 1585 2472 outstr.write("%s fails following tests:\n" % item_name) … … 1590 2477 1591 2478 class ValidCifFile(CifFile): 2479 """A CIF file for which all datablocks are valid. Argument `dic` to 2480 initialisation specifies a `CifDic` object to use for validation.""" 1592 2481 def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs): 1593 2482 if not diclist and not dic and not hasattr(self,'bigdic'): … … 1598 2487 self.bigdic = dic 1599 2488 CifFile.__init__(self,*args,**kwargs) 1600 #for blockname in self.keys():1601 #self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)2489 for blockname in self.keys(): 2490 self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic) 1602 2491 1603 2492 def NewBlock(self,blockname,blockcontents,**kwargs): … … 1610 2499 1611 2500 class ValidationResult: 1612 """Represents validation result """2501 """Represents validation result. It is initialised with """ 1613 2502 def __init__(self,results): 1614 2503 """results is return value of validate function""" … … 1632 2521 valid = True 1633 2522 return valid 1634 2523 1635 2524 def has_no_match_items(self,block_name=None): 1636 2525 """Return true if some items are not found in dictionary""" … … 1638 2527 block_names = [block_name] 1639 2528 else: 1640 block_names = self.no_matches.iter_keys() 2529 block_names = self.no_matches.iter_keys() 1641 2530 for block_name in block_names: 1642 2531 if self.no_matches[block_name]: … … 1646 2535 has_no_match_items = False 1647 2536 return has_no_match_items 1648 1649 1650 1651 def validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False,fake_mand=True): 1652 check_file = CifFile(ciffile) 2537 2538 2539 2540 def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False): 2541 """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing, 2542 to the results of merging the `CifDic` objects in `diclist` according to `mergemode`. Flag 2543 `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be 2544 accessed for validation and that mandatory_category should be interpreted differently for DDL2.""" 2545 if not isinstance(ciffile,CifFile): 2546 check_file = CifFile(ciffile) 2547 else: 2548 check_file = ciffile 1653 2549 if not dic: 1654 2550 fulldic = merge_dic(diclist,mergemode) … … 1658 2554 valid_result = {} 1659 2555 if isdic: #assume one block only 1660 blockname = check_file.keys()[0]1661 check_bc = check_file[blockname]["saves"]1662 check_ globals = check_file[blockname]2556 check_file.scoping = 'instance' #only data blocks visible 2557 top_level = check_file.keys()[0] 2558 check_file.scoping = 'dictionary' #all blocks visible 1663 2559 # collect a list of parents for speed 1664 poss_parents = fulldic.get_all("_item_linked.parent_name") 1665 for parent in poss_parents: 1666 curr_parent = listify(check_globals.get(parent,[])) 1667 new_vals = check_bc.get_all(parent) 1668 new_vals.extend(curr_parent) 1669 if len(new_vals)>0: 1670 check_globals[parent] = new_vals 1671 # print "Added %s (len %d)" % (parent,len(check_globals[parent])) 1672 # next dictionary problem: the main DDL2 dictionary has what 1673 # I would characterise as a mandatory_category problem, but 1674 # in order to gloss over it, we allow a different 1675 # interpretation, which requires only a single check for one 1676 # block. 1677 if fake_mand: 1678 valid_result[blockname] = fulldic.find_prob_cats(check_globals) 1679 no_matches[blockname] = filter(lambda a:not fulldic.has_key(a),check_globals.keys()) 1680 else: 1681 check_bc = check_file 1682 check_globals = CifBlock() #empty 1683 for block in check_bc.keys(): 1684 #print "Validating block %s" % block 1685 no_matches[block] = filter(lambda a:not fulldic.has_key(a),check_bc[block].keys()) 2560 if fulldic.diclang == 'DDL2': 2561 poss_parents = fulldic.get_all("_item_linked.parent_name") 2562 for parent in poss_parents: 2563 curr_parent = listify(check_file.get(parent,[])) 2564 new_vals = check_file.get_all(parent) 2565 new_vals.extend(curr_parent) 2566 if len(new_vals)>0: 2567 check_file[parent] = new_vals 2568 print("Added %s (len %d)" % (parent,len(check_file[parent]))) 2569 # now run the validations 2570 for block in check_file.keys(): 2571 if isdic and block == top_level: 2572 block_scope = 'Dictionary' 2573 elif isdic: 2574 block_scope = 'Item' 2575 else: 2576 block_scope = 'Datablock' 2577 no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic] 1686 2578 # remove non-matching items 1687 # print "Not matched: " + `no_matches[block]`2579 print("Not matched: " + repr(no_matches[block])) 1688 2580 for nogood in no_matches[block]: 1689 del check_bc[block][nogood] 1690 valid_result[block] = run_data_checks(check_bc[block],fulldic,globals=check_globals,fake_mand=fake_mand) 2581 del check_file[block][nogood] 2582 print("Validating block %s, scope %s" % (block,block_scope)) 2583 valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope) 1691 2584 return valid_result,no_matches 1692 2585 1693 2586 def validate_report(val_result,use_html=False): 1694 import cStringIO1695 2587 valid_result,no_matches = val_result 1696 outstr = cStringIO.StringIO()2588 outstr = StringIO() 1697 2589 if use_html: 1698 2590 outstr.write("<h2>Validation results</h2>") … … 1700 2592 outstr.write( "Validation results\n") 1701 2593 outstr.write( "------------------\n") 1702 if len(valid_result) > 10: 2594 if len(valid_result) > 10: 1703 2595 suppress_valid = True #don't clutter with valid messages 1704 2596 if use_html: … … 1722 2614 outstr.write(" (note that this does not invalidate the data block):</p>") 1723 2615 outstr.write("<p><table>\n") 1724 map(lambda it:outstr.write("<tr><td>%s</td></tr>" % it),no_matches[block])2616 [outstr.write("<tr><td>%s</td></tr>" % it) for it in no_matches[block]] 1725 2617 outstr.write("</table>\n") 1726 2618 else: 1727 2619 outstr.write( "\n The following items were not found in the dictionary:\n") 1728 2620 outstr.write("Note that this does not invalidate the data block\n") 1729 map(lambda it:outstr.write("%s\n" % it),no_matches[block])2621 [outstr.write("%s\n" % it) for it in no_matches[block]] 1730 2622 # now organise our results by type of error, not data item... 1731 2623 error_type_dic = {} … … 1753 2645 'validate_loop_key':\ 1754 2646 "A required dataname for this category is missing from the loop\n containing the dataname", 2647 'validate_loop_key_ddlm':\ 2648 "A loop key is missing for the category containing the dataname", 1755 2649 'validate_loop_references':\ 1756 2650 "A dataname required by the item is missing from the loop", … … 1766 2660 "Both dataname and exclusive alternates or aliases are present in data block", 1767 2661 'validate_mandatory_category':\ 1768 "A required category is missing from this block"} 2662 "A required category is missing from this block", 2663 'check_mandatory_items':\ 2664 "A required data attribute is missing from this block", 2665 'check_prohibited_items':\ 2666 "A prohibited data attribute is present in this block"} 1769 2667 1770 2668 for test_name,test_results in error_type_dic.items(): 1771 2669 if use_html: 1772 outstr.write(html_error_report(test_name,info_table[test_name],test_results)) 2670 outstr.write(html_error_report(test_name,info_table[test_name],test_results)) 1773 2671 else: 1774 outstr.write(error_report(test_name,info_table[test_name],test_results)) 2672 outstr.write(error_report(test_name,info_table[test_name],test_results)) 1775 2673 outstr.write("\n\n") 1776 2674 return outstr.getvalue() 1777 2675 1778 2676 # A function to lay out a single error report. We are passed 1779 2677 # the name of the error (one of our validation functions), the 1780 # explanation to print out, and a dictionary with the error 2678 # explanation to print out, and a dictionary with the error 1781 2679 # information. We print no more than 50 characters of the item 1782 2680 … … 1785 2683 headstring = "%-32s" % "Item name" 1786 2684 bodystring = "" 1787 if error_dics[0].has_key("bad_values"):2685 if "bad_values" in error_dics[0]: 1788 2686 headstring += "%-20s" % "Bad value(s)" 1789 if error_dics[0].has_key("bad_items"):2687 if "bad_items" in error_dics[0]: 1790 2688 headstring += "%-20s" % "Bad dataname(s)" 1791 if error_dics[0].has_key("child"):2689 if "child" in error_dics[0]: 1792 2690 headstring += "%-20s" % "Child" 1793 if error_dics[0].has_key("parent"):1794 headstring += "%-20s" % "Parent" 2691 if "parent" in error_dics[0]: 2692 headstring += "%-20s" % "Parent" 1795 2693 headstring +="\n" 1796 2694 for error in error_dics: 1797 2695 bodystring += "\n%-32s" % error["item_name"] 1798 if error.has_key("bad_values"):1799 out_vals = map(lambda a:a[:50],error["bad_values"])1800 bodystring += "%-20s" % out_vals 1801 if error.has_key("bad_items"):1802 bodystring += "%-20s" % error["bad_items"]1803 if error.has_key("child"):1804 bodystring += "%-20s" % error["child"]1805 if error.has_key("parent"):1806 bodystring += "%-20s" % error["parent"]1807 return retstring + headstring + bodystring 2696 if "bad_values" in error: 2697 out_vals = [repr(a)[:50] for a in error["bad_values"]] 2698 bodystring += "%-20s" % out_vals 2699 if "bad_items" in error: 2700 bodystring += "%-20s" % repr(error["bad_items"]) 2701 if "child" in error: 2702 bodystring += "%-20s" % repr(error["child"]) 2703 if "parent" in error: 2704 bodystring += "%-20s" % repr(error["parent"]) 2705 return retstring + headstring + bodystring 1808 2706 1809 2707 # This lays out an HTML error report … … 1814 2712 headstring = "<th>Item name</th>" 1815 2713 bodystring = "" 1816 if error_dics[0].has_key("bad_values"):2714 if "bad_values" in error_dics[0]: 1817 2715 headstring += "<th>Bad value(s)</th>" 1818 if error_dics[0].has_key("bad_items"):2716 if "bad_items" in error_dics[0]: 1819 2717 headstring += "<th>Bad dataname(s)</th>" 1820 if error_dics[0].has_key("child"):2718 if "child" in error_dics[0]: 1821 2719 headstring += "<th>Child</th>" 1822 if error_dics[0].has_key("parent"):1823 headstring += "<th>Parent</th>" 2720 if "parent" in error_dics[0]: 2721 headstring += "<th>Parent</th>" 1824 2722 headstring +="</tr>\n" 1825 2723 for error in error_dics: 1826 2724 bodystring += "<tr><td><tt>%s</tt></td>" % error["item_name"] 1827 if error.has_key("bad_values"):2725 if "bad_values" in error: 1828 2726 bodystring += "<td>%s</td>" % error["bad_values"] 1829 if error.has_key("bad_items"):2727 if "bad_items" in error: 1830 2728 bodystring += "<td><tt>%s</tt></td>" % error["bad_items"] 1831 if error.has_key("child"):2729 if "child" in error: 1832 2730 bodystring += "<td><tt>%s</tt></td>" % error["child"] 1833 if error.has_key("parent"):2731 if "parent" in error: 1834 2732 bodystring += "<td><tt>%s</tt></td>" % error["parent"] 1835 2733 bodystring += "</tr>\n" 1836 2734 return retstring + headstring + bodystring + "</table>\n" 1837 2735 1838 def run_data_checks(check_block,fulldic, globals={},fake_mand=False):2736 def run_data_checks(check_block,fulldic,block_scope='Item'): 1839 2737 v_result = {} 1840 2738 for key in check_block.keys(): 1841 2739 update_value(v_result, fulldic.run_item_validation(key,check_block[key])) 1842 update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block ,globals=globals))1843 for loop in check_block.loops:1844 update_value(v_result, fulldic.run_loop_validation(loop .keys()))1845 update_value(v_result,fulldic.run_block_validation(check_block, globals=globals,fake_mand=fake_mand))2740 update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block)) 2741 for loopnames in check_block.loops.values(): 2742 update_value(v_result, fulldic.run_loop_validation(loopnames)) 2743 update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope)) 1846 2744 # return false and list of baddies if anything didn't match 1847 for test_key in v_result.keys(): 1848 v_result[test_key] = filter(lambda a:a[1]["result"]==False,v_result[test_key]) 1849 if len(v_result[test_key]) == 0: 2745 all_keys = list(v_result.keys()) 2746 for test_key in all_keys: 2747 v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False] 2748 if len(v_result[test_key]) == 0: 1850 2749 del v_result[test_key] 1851 2750 # if even one false one is found, this should trigger 1852 # print "Baddies:" + `v_result`2751 # print("Baddies: {!r}".format(v_result)) 1853 2752 isvalid = len(v_result)==0 1854 2753 return isvalid,v_result 1855 2754 1856 2755 1857 2756 def get_number_with_esd(numstring): 1858 2757 import string 1859 numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' 2758 numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' 1860 2759 our_match = re.match(numb_re,numstring) 1861 2760 if our_match: 1862 2761 a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups() 1863 # print "Debug: %s -> %s" % (numstring, `our_match.groups()`)2762 # print("Debug: {} -> {!r}".format(numstring, our_match.groups())) 1864 2763 else: 1865 2764 return None,None 1866 2765 if dot or q: return None,None #a dot or question mark 1867 if exp: #has exponent 1868 exp = string.replace(exp,"d","e") # mop up old fashioned numbers1869 exp = string.replace(exp,"D","e")2766 if exp: #has exponent 2767 exp = exp.replace("d","e") # mop up old fashioned numbers 2768 exp = exp.replace("D","e") 1870 2769 base_num = base_num + exp 1871 # print "Debug: have %s for base_num from %s" % (base_num,numstring)2770 # print("Debug: have %s for base_num from %s" % (base_num,numstring)) 1872 2771 base_num = float(base_num) 1873 2772 # work out esd, if present. … … 1881 2780 1882 2781 def float_with_esd(inval): 1883 if isinstance(inval, StringType):2782 if isinstance(inval,unicode): 1884 2783 j = inval.find("(") 1885 2784 if j>=0: return float(inval[:j]) 1886 2785 return float(inval) 1887 1888 1889 2786 2787 2788 2789 def convert_type(definition): 2790 """Convert value to have the type given by definition""" 2791 #extract the actual required type information 2792 container = definition['_type.container'] 2793 dimension = definition.get('_type.dimension',StarFile.StarList([])) 2794 structure = interpret_structure(definition['_type.contents']) 2795 if container == 'Single': #a single value to convert 2796 return convert_single_value(structure) 2797 elif container == 'List': #lots of the same value 2798 return convert_list_values(structure,dimension) 2799 elif container == 'Multiple': #no idea 2800 return None 2801 elif container in ('Array','Matrix'): #numpy array 2802 return convert_matrix_values(structure) 2803 return lambda a:a #unable to convert 2804 2805 def convert_single_value(type_spec): 2806 """Convert a single item according to type_spec""" 2807 if type_spec == 'Real': 2808 return float_with_esd 2809 if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'): 2810 return int 2811 if type_spec == 'Complex': 2812 return complex 2813 if type_spec == 'Imag': 2814 return lambda a:complex(0,a) 2815 if type_spec in ('Code','Name','Tag'): #case-insensitive -> lowercase 2816 return lambda a:a.lower() 2817 return lambda a:a #can't do anything numeric 2818 2819 def convert_list_values(structure,dimension): 2820 """Convert the values according to the element 2821 structure given in [[structure]]""" 2822 if isinstance(structure,(unicode,str)): #simple repetition 2823 func_def = "element_convert = convert_single_value('%s')" % structure 2824 else: 2825 func_def = "def element_convert(element):\n" 2826 func_def += " final_val = []\n" 2827 for pos_no in range(len(structure)): 2828 func_def += " final_val.append(" 2829 type_spec = structure[pos_no] 2830 if type_spec == 'Real': 2831 cf = "float_with_esd(" 2832 elif type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'): 2833 cf = 'int(' 2834 elif type_spec == 'Complex': 2835 cf = 'complex(' 2836 elif type_spec == 'Imag': 2837 cf = 'complex(0,' 2838 elif type_spec in ('Code','Name','Tag'): 2839 cf = '(' 2840 else: cf = '' 2841 func_def += cf 2842 func_def += "element[%d]" % pos_no 2843 if "(" in cf: func_def +=")" 2844 if type_spec in ('Code','Name','Tag'): 2845 func_def +=".lower()" 2846 func_def +=")\n" # close append 2847 func_def += " return final_val\n" 2848 print(func_def) 2849 exec(func_def, globals()) #(re)defines element_convert in global namespace 2850 if len(dimension)> 0 and int(dimension[0]) != 1: 2851 return lambda a: list(map(element_convert,a)) 2852 else: return element_convert 2853 2854 def convert_matrix_values(valtype): 2855 """Convert a dREL String or Float valued List structure to a numpy matrix structure""" 2856 # first convert to numpy array, then let numpy do the work 2857 try: import numpy 2858 except: 2859 return lambda a:a #cannot do it 2860 func_def = "def matrix_convert(a):\n" 2861 func_def += " import numpy\n" 2862 func_def += " p = numpy.array(a)\n" 2863 if valtype == 'Real': 2864 func_def+= " return p.astype('float')\n" 2865 elif valtype == 'Integer': 2866 func_def +=" return p.astype('int')\n" 2867 elif valtype == 'Complex': 2868 func_def +=" return p.astype('complex')\n" 2869 else: 2870 raise ValueError('Unknown matrix value type') 2871 exec(func_def,globals()) #matrix convert is defined 2872 return matrix_convert 2873 2874 def interpret_structure(struc_spec): 2875 """Interpret a DDLm structure specification""" 2876 from . import TypeContentsParser as t 2877 p = t.TypeParser(t.TypeParserScanner(struc_spec)) 2878 return getattr(p,"input")() 2879 2880 1890 2881 # A utility function to append to item values rather than replace them 1891 2882 def update_value(base_dict,new_items): 1892 2883 for new_key in new_items.keys(): 1893 if base_dict.has_key(new_key):2884 if new_key in base_dict: 1894 2885 base_dict[new_key].extend(new_items[new_key]) 1895 2886 else: … … 1902 2893 opt_range = range(full_length) 1903 2894 for i in range(len(base_list[0])): 1904 new_packet = [] 2895 new_packet = [] 1905 2896 for j in opt_range: 1906 2897 new_packet.append(base_list[j][i]) … … 1910 2901 # listify strings - used surprisingly often 1911 2902 def listify(item): 1912 if isinstance(item, StringType): return [item]2903 if isinstance(item,(unicode,str)): return [item] 1913 2904 else: return item 1914 2905 1915 # given a list of search items, return a list of items 2906 # given a list of search items, return a list of items 1916 2907 # actually contained in the given data block 1917 2908 def filter_present(namelist,datablocknames): 1918 return filter(lambda a:a in datablocknames,namelist) 2909 return [a for a in namelist if a in datablocknames] 2910 2911 # Make an item immutable, used if we want a list to be a key 2912 def make_immutable(values): 2913 """Turn list of StarList values into a list of immutable items""" 2914 if not isinstance(values[0],StarList): 2915 return values 2916 else: 2917 return [tuple(a) for a in values] 1919 2918 1920 2919 # merge ddl dictionaries. We should be passed filenames or CifFile … … 1924 2923 for dic in diclist: 1925 2924 if not isinstance(dic,CifFile) and \ 1926 not isinstance(dic, StringType):1927 raise TypeError , "Require list of CifFile names/objects for dictionary merging"2925 not isinstance(dic,(unicode,str)): 2926 raise TypeError("Require list of CifFile names/objects for dictionary merging") 1928 2927 if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic)) 1929 2928 else: dic_as_cif_list.append(dic) 1930 2929 # we now merge left to right 1931 2930 basedic = dic_as_cif_list[0] 1932 if basedic.has_key("on_this_dictionary"): #DDL1 style only2931 if "on_this_dictionary" in basedic: #DDL1 style only 1933 2932 for dic in dic_as_cif_list[1:]: 1934 2933 basedic.merge(dic,mode=mergemode,match_att=["_name"]) 1935 elif len(basedic.keys()) == 1: #One block: DDL2 style2934 elif len(basedic.keys()) == 1: #One block: DDL2/m style 1936 2935 old_block = basedic[basedic.keys()[0]] 1937 2936 for dic in dic_as_cif_list[1:]: … … 1943 2942 1944 2943 def find_parent(ddl2_def): 1945 if not ddl2_def.has_key("_item.name"):1946 return None 1947 if isinstance(ddl2_def["_item.name"], StringType):2944 if "_item.name" not in ddl2_def: 2945 return None 2946 if isinstance(ddl2_def["_item.name"],unicode): 1948 2947 return ddl2_def["_item.name"] 1949 if not ddl2_def.has_key("_item_linked.child_name"):2948 if "_item_linked.child_name" not in ddl2_def: 1950 2949 raise CifError("Asked to find parent in block with no child_names") 1951 if not ddl2_def.has_key("_item_linked.parent_name"):2950 if "_item_linked.parent_name" not in ddl2_def: 1952 2951 raise CifError("Asked to find parent in block with no parent_names") 1953 result = filter(lambda a:a not in ddl2_def["_item_linked.child_name"],ddl2_def["_item.name"])2952 result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]]) 1954 2953 if len(result)>1 or len(result)==0: 1955 2954 raise CifError("Unable to find single unique parent data item") … … 1957 2956 1958 2957 1959 def ReadCif(filename,strict=1,maxlength=2048,scantype="standard",grammar="1.1"): 1960 proto_cif = StarFile.ReadStar(filename,maxlength,scantype=scantype,grammar=grammar) 1961 # convert to CifFile 1962 proto_cif = CifFile(proto_cif) 1963 # check for nested loops 1964 for bname,bvalue in proto_cif.items(): 1965 nests = filter(lambda a:len(a.loops)>0,bvalue.loops) 1966 if len(nests) > 0: 1967 raise CifError( "Block %s contains nested loops") 1968 # check for save frame references (not yet implemented in PySTARRW) 1969 # check for global blocks (not yet implemented in PySTARRW) 1970 return proto_cif 1971 1972 2958 def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF'): 2959 """ Read in a CIF file, returning a `CifFile` object. 2960 2961 * `filename` may be a URL, a file 2962 path on the local system, or any object with a `read` method. 2963 2964 * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1` 2965 is identical except for the exclusion of square brackets as the first characters in 2966 undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will 2967 read files according to the STAR2 publication. If grammar is `None`, autodetection 2968 will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for 2969 properly-formed CIF2.0 files. Note that only Unicode characters in the basic multilingual 2970 plane are recognised (this will be fixed when PyCIFRW is ported to Python 3). 2971 2972 * `scantype` can be `standard` or `flex`. `standard` provides pure Python parsing at the 2973 cost of a factor of 10 or so in speed. `flex` will tokenise the input CIF file using 2974 fast C routines, but is not available for CIF2/STAR2 files. Note that running PyCIFRW in 2975 Jython uses native Java regular expressions 2976 to provide a speedup regardless of this argument (and does not yet support CIF2). 2977 2978 * `scoping` is only relevant where nested save frames are expected (STAR2 only). 2979 `instance` scoping makes nested save frames 2980 invisible outside their hierarchy, allowing duplicate save frame names in separate 2981 hierarchies. `dictionary` scoping makes all save frames within a data block visible to each 2982 other, thereby restricting all save frames to have unique names. 2983 Currently the only recognised value for `standard` is `CIF`, which when set enforces a 2984 maximum length of 75 characters for datanames and has no other effect. """ 2985 2986 finalcif = CifFile(scoping=scoping,standard=standard) 2987 return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype) 2988 #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs) 2989 2990 class CifLoopBlock(StarFile.LoopBlock): 2991 def __init__(self,data=(),**kwargs): 2992 super(CifLoopBlock,self).__init__(data,**kwargs) 2993 2994 #No documentation flags 2995 -
trunk/CifFile/StarFile.py
r469 r3137 1 # To maximize python3/python2 compatibility 2 from __future__ import print_function 3 from __future__ import unicode_literals 4 from __future__ import division 5 from __future__ import absolute_import 6 7 __copyright = """ 8 PYCIFRW License Agreement (Python License, Version 2) 9 ----------------------------------------------------- 10 11 1. This LICENSE AGREEMENT is between the Australian Nuclear Science 12 and Technology Organisation ("ANSTO"), and the Individual or 13 Organization ("Licensee") accessing and otherwise using this software 14 ("PyCIFRW") in source or binary form and its associated documentation. 15 16 2. Subject to the terms and conditions of this License Agreement, 17 ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide 18 license to reproduce, analyze, test, perform and/or display publicly, 19 prepare derivative works, distribute, and otherwise use PyCIFRW alone 20 or in any derivative version, provided, however, that this License 21 Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 22 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or 23 in any derivative version prepared by Licensee. 24 25 3. In the event Licensee prepares a derivative work that is based on 26 or incorporates PyCIFRW or any part thereof, and wants to make the 27 derivative work available to others as provided herein, then Licensee 28 hereby agrees to include in any such work a brief summary of the 29 changes made to PyCIFRW. 30 31 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" 32 basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 33 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND 34 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 35 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT 36 INFRINGE ANY THIRD PARTY RIGHTS. 37 38 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW 39 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A 40 RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY 41 DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 42 43 6. This License Agreement will automatically terminate upon a material 44 breach of its terms and conditions. 45 46 7. Nothing in this License Agreement shall be deemed to create any 47 relationship of agency, partnership, or joint venture between ANSTO 48 and Licensee. This License Agreement does not grant permission to use 49 ANSTO trademarks or trade name in a trademark sense to endorse or 50 promote products or services of Licensee, or any third party. 51 52 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees 53 to be bound by the terms and conditions of this License Agreement. 54 1 55 """ 2 1.This Software copyright \u00A9 Australian Synchrotron Research Program Inc, ("ASRP"). 3 4 2.Subject to ensuring that this copyright notice and licence terms 5 appear on all copies and all modified versions, of PyCIFRW computer 6 code ("this Software"), a royalty-free non-exclusive licence is hereby 7 given (i) to use, copy and modify this Software including the use of 8 reasonable portions of it in other software and (ii) to publish, 9 bundle and otherwise re-distribute this Software or modified versions 10 of this Software to third parties, provided that this copyright notice 11 and terms are clearly shown as applying to all parts of software 12 derived from this Software on each occasion it is published, bundled 13 or re-distributed. You are encouraged to communicate useful 14 modifications to ASRP for inclusion for future versions. 15 16 3.No part of this Software may be sold as a standalone package. 17 18 4.If any part of this Software is bundled with Software that is sold, 19 a free copy of the relevant version of this Software must be made 20 available through the same distribution channel (be that web server, 21 tape, CD or otherwise). 22 23 5.It is a term of exercise of any of the above royalty free licence 24 rights that ASRP gives no warranty, undertaking or representation 25 whatsoever whether express or implied by statute, common law, custom 26 or otherwise, in respect of this Software or any part of it. Without 27 limiting the generality of the preceding sentence, ASRP will not be 28 liable for any injury, loss or damage (including consequential loss or 29 damage) or other loss, loss of profits, costs, charges or expenses 30 however caused which may be suffered, incurred or arise directly or 31 indirectly in respect of this Software. 32 33 6. This Software is not licenced for use in medical applications. 34 """ 35 36 from types import * 37 from urllib import * # for arbitrary opening 38 import re 56 57 58 # Python 2,3 compatibility 59 try: 60 from urllib import urlopen # for arbitrary opening 61 from urlparse import urlparse, urlunparse 62 except: 63 from urllib.request import urlopen 64 from urllib.parse import urlparse,urlunparse 65 import re,os 39 66 import copy 67 import textwrap 68 69 try: 70 from StringIO import StringIO #not cStringIO as we cannot subclass 71 except ImportError: 72 from io import StringIO 73 74 if isinstance(u"abc",str): #Python 3 75 unicode = str 76 77 try: 78 import numpy 79 have_numpy = True 80 except ImportError: 81 have_numpy = False 82 40 83 class StarList(list): 41 pass 42 43 # Because DDLm makes a tuple from a tuple... 44 class StarTuple(tuple): 45 def __new__(cls,*arglist): 46 return tuple.__new__(cls,arglist) 84 def __getitem__(self,args): 85 if isinstance(args,(int,slice)): 86 return super(StarList,self).__getitem__(args) 87 elif isinstance(args,tuple) and len(args)>1: #extended comma notation 88 return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:]) 89 else: 90 return super(StarList,self).__getitem__(args[0]) 91 92 def __str__(self): 93 return "SL("+super(StarList,self).__str__() + ")" 47 94 48 95 class StarDict(dict): 49 96 pass 50 97 51 class LoopBlock: 52 def __init__(self,data = (), dimension = 0, maxoutlength=2048, wraplength=80, overwrite=True): 53 # print 'Creating new loop block, dimension %d' % dimension 54 self.block = {} 55 self.loops = [] 56 self.no_packets = 0 57 self.item_order = [] 58 self.lower_keys = [] #for efficiency 59 self.comment_list = {} 60 self.dimension = dimension 61 self.popout = False #used during load iteration 62 self.curitem = -1 #used during iteration 63 self.maxoutlength = maxoutlength 64 self.wraplength = wraplength 65 self.overwrite = overwrite 66 if not hasattr(self,'loopclass'): #in case are derived class 67 self.loopclass = LoopBlock #when making new loops 68 self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M) 69 if isinstance(data,(TupleType,ListType)): 70 for item in data: 71 self.AddLoopItem(item) 72 elif isinstance(data,LoopBlock): 73 self.block = data.block.copy() 74 self.item_order = data.item_order[:] 75 self.lower_keys = data.lower_keys[:] 76 self.comment_list = data.comment_list.copy() 77 self.dimension = data.dimension 78 # loops as well; change loop class 79 for loopno in range(len(data.loops)): 80 try: 81 placeholder = self.item_order.index(data.loops[loopno]) 82 except ValueError: 83 print "Warning: loop %s (%s) in loops, but not in item_order (%s)" % (`data.loops[loopno]`,str(data.loops[loopno]),`self.item_order`) 84 placeholder = -1 85 self.item_order.remove(data.loops[loopno]) #gone 86 newobject = self.loopclass(data.loops[loopno]) 87 # print "Recasting and adding loop %s -> %s" % (`data.loops[loopno]`,`newobject`) 88 self.insert_loop(newobject,position=placeholder) 89 90 def __str__(self): 91 return self.printsection() 92 93 def __setitem__(self,key,value): 94 # catch a one member loop, for convenience 95 # we assume the key is a string value only 96 self.AddLoopItem((key,value)) 97 98 def __getitem__(self,key): 99 if isinstance(key,IntType): #return a packet!! 100 return self.GetPacket(key) 101 return self.GetLoopItem(key) 102 103 def __delitem__(self,key): 104 self.RemoveLoopItem(key) 105 106 def __len__(self): 107 blen = len(self.block) 108 for aloop in self.loops: 109 # print 'Aloop is %s' % `aloop` 110 blen = blen + len(aloop) # also a LoopBlock 111 return blen 112 113 def __nonzero__(self): 114 if self.__len__() > 0: return 1 115 return 0 116 117 # keys returns all internal keys 98 99 class LoopBlock(object): 100 def __init__(self,parent_block,dataname): 101 self.loop_no = parent_block.FindLoop(dataname) 102 if self.loop_no < 0: 103 raise KeyError('%s is not in a loop structure' % dataname) 104 self.parent_block = parent_block 105 118 106 def keys(self): 119 thesekeys = self.block.keys() 120 for aloop in self.loops: 121 thesekeys.extend(aloop.keys()) 122 return thesekeys 107 return self.parent_block.loops[self.loop_no] 123 108 124 109 def values(self): 125 ourkeys = self.keys()126 return map(lambda a:self[a],ourkeys) 127 110 return [self.parent_block[a] for a in self.keys()] 111 112 #Avoid iterator even though that is Python3-esque 128 113 def items(self): 129 ourkeys = self.keys() 130 return map(lambda a,b:(a,b),self.keys(),self.values()) 114 return list(zip(self.keys(),self.values())) 115 116 def __getitem__(self,dataname): 117 if isinstance(dataname,int): #a packet request 118 return self.GetPacket(dataname) 119 if dataname in self.keys(): 120 return self.parent_block[dataname] 121 else: 122 raise KeyError('%s not in loop block' % dataname) 123 124 def __setitem__(self,dataname,value): 125 self.parent_block[dataname] = value 126 self.parent_block.AddLoopName(self.keys()[0],dataname) 127 128 def __contains__(self,key): 129 return key in self.parent_block.loops[self.loop_no] 131 130 132 131 def has_key(self,key): 133 if key.lower() in self.lower_keys: 134 return 1 135 for aloop in self.loops: 136 if aloop.has_key(key): return 1 137 return 0 138 139 def get(self,key,default=None): 140 if self.has_key(key): 141 retval = self.GetLoopItem(key) 142 else: 143 retval = default 144 return retval 145 146 def clear(self): 147 self.block = {} 148 self.loops = [] 149 self.item_order = [] 150 self.lower_keys = [] 151 self.no_packets = 0 152 153 # doesn't appear to work 154 def copy(self): 155 newcopy = self.copy.im_class(dimension = self.dimension) 156 newcopy.block = self.block.copy() 157 newcopy.loops = [] 158 newcopy.no_packets = self.no_packets 159 newcopy.item_order = self.item_order[:] 160 newcopy.lower_keys = self.lower_keys[:] 161 for loop in self.loops: 162 try: 163 placeholder = self.item_order.index(loop) 164 except ValueError: 165 print "Warning: loop %s (%s) in loops, but not in item_order (%s)" % (`loop`,str(loop),`self.item_order`) 166 placeholder = -1 167 newcopy.item_order.remove(loop) #gone 168 newobject = loop.copy() 169 # print "Adding loop %s -> %s" % (`loop`,`newobject`) 170 newcopy.insert_loop(newobject,position=placeholder) 171 return newcopy 172 173 # this is not appropriate for subloops. Instead, the loop block 174 # should be accessed directly for update 175 176 def update(self,adict): 177 for key in adict.keys(): 178 self.AddLoopItem((key,adict[key])) 132 return key in self 133 134 def __iter__(self): 135 packet_list = zip(*self.values()) 136 names = self.keys() 137 for p in packet_list: 138 r = StarPacket(p) 139 for n in range(len(names)): 140 setattr(r,names[n].lower(),r[n]) 141 yield r 142 143 # for compatibility 144 def __getattr__(self,attname): 145 return getattr(self.parent_block,attname) 179 146 180 147 def load_iter(self,coords=[]): 181 count = 0 #to create packet index 148 count = 0 #to create packet index 182 149 while not self.popout: 183 150 # ok, we have a new packet: append a list to our subloops … … 217 184 for iname in self.keys(): #includes lower levels 218 185 target_list = self[iname] 219 for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 186 for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 220 187 target_list = target_list[-1] 221 188 target_list.append([]) … … 232 199 drill_values=drill_values[0] #drill in 233 200 else: 234 raise StarError("Malformed loop packet %s" % `top_items[0]`)235 my_length = len(drill_values )201 raise StarError("Malformed loop packet %s" % repr( top_items[0] )) 202 my_length = len(drill_values[0]) #length of 'string' entry 236 203 if self.dimension == 0: #top level 237 204 for aloop in self.loops: 238 205 for apacket in aloop.recursive_iter(): 239 # print "Recursive yielding %s" % `dict(top_items + apacket.items())`206 # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) ) 240 207 prep_yield = StarPacket(top_values+apacket.values()) #straight list 241 208 for name,value in top_items + apacket.items(): … … 246 213 kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys()) 247 214 kvvals = map(lambda a:a[1],kvpairs) #just values 248 # print "Recursive kvpairs at %d: %s" % (i, `kvpairs`)215 # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs )) 249 216 if self.loops: 250 217 for aloop in self.loops: 251 218 for apacket in aloop.recursive_iter(coord=coord+[i]): 252 # print "Recursive yielding %s" % `dict(kvpairs + apacket.items())`219 # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) ) 253 220 prep_yield = StarPacket(kvvals+apacket.values()) 254 221 for name,value in kvpairs + apacket.items(): … … 256 223 yield prep_yield 257 224 else: # we're at the bottom of the tree 258 # print "Recursive yielding %s" % `dict(kvpairs)`225 # print "Recursive yielding %s" % repr( dict(kvpairs) ) 259 226 prep_yield = StarPacket(kvvals) 260 227 for name,value in kvpairs: … … 262 229 yield prep_yield 263 230 264 # small function to use the coordinates. 231 # small function to use the coordinates. 265 232 def coord_to_group(self,dataname,coords): 266 if not isinstance(dataname, StringType):233 if not isinstance(dataname,unicode): 267 234 return dataname # flag inner loop processing 268 235 newm = self[dataname] # newm must be a list or tuple 269 236 for c in coords: 270 # print "Coord_to_group: %s ->" % ( `newm`),237 # print "Coord_to_group: %s ->" % (repr( newm )), 271 238 newm = newm[c] 272 # print `newm`273 return newm 239 # print repr( newm ) 240 return newm 274 241 275 242 def flat_iterator(self): 276 if self.dimension == 0:277 yield copy.copy(self)278 else:279 243 my_length = 0 280 244 top_keys = self.block.keys() … … 283 247 for pack_no in range(my_length): 284 248 yield(self.collapse(pack_no)) 285 286 287 def insert_loop(self,newloop,position=-1,audit=True): 288 # check that new loop is kosher 289 if newloop.dimension != self.dimension + 1: 290 raise StarError( 'Insertion of loop of wrong nesting level %d, should be %d' % (newloop.dimension, self.dimension+1)) 291 self.loops.append(newloop) 292 if audit: 293 dupes = self.audit() 294 if dupes: 295 dupenames = map(lambda a:a[0],dupes) 296 raise StarError( 'Duplicate names: %s' % `dupenames`) 297 if position >= 0: 298 self.item_order.insert(position,newloop) 299 else: 300 self.item_order.append(newloop) 301 # print "Insert loop: item_order now" + `self.item_order` 302 303 def remove_loop(self,oldloop): 304 # print "Removing %s: item_order %s" % (`oldloop`,self.item_order) 305 # print "Length %d" % len(oldloop) 306 self.item_order.remove(oldloop) 307 self.loops.remove(oldloop) 308 309 def AddComment(self,itemname,comment): 310 self.comment_list[itemname.lower()] = comment 311 312 def RemoveComment(self,itemname): 313 del self.comment_list[itemname.lower()] 314 315 def GetLoopItem(self,itemname): 316 # assume case is correct first 317 try: 318 return self.block[itemname] 319 except KeyError: 320 for loop in self.loops: 321 try: 322 return loop[itemname] 323 except KeyError: 324 pass 325 if itemname.lower() not in self.lower_keys: 326 raise KeyError, 'Item %s not in block' % itemname 327 # it is there somewhere, now we need to find it 328 real_keys = self.block.keys() 329 lower_keys = map(lambda a:a.lower(),self.block.keys()) 330 try: 331 k_index = lower_keys.index(itemname.lower()) 332 except ValueError: 333 raise KeyError, 'Item %s not in block' % itemname 334 return self.block[real_keys[k_index]] 249 250 251 def RemoveItem(self,itemname): 252 """Remove `itemname` from the block.""" 253 # first check any loops 254 loop_no = self.FindLoop(itemname) 255 testkey = itemname.lower() 256 if testkey in self: 257 del self.block[testkey] 258 del self.true_case[testkey] 259 # now remove from loop 260 if loop_no >= 0: 261 self.loops[loop_no].remove(testkey) 262 if len(self.loops[loop_no])==0: 263 del self.loops[loop_no] 264 self.item_order.remove(loop_no) 265 else: #will appear in order list 266 self.item_order.remove(testkey) 335 267 336 268 def RemoveLoopItem(self,itemname): 337 if self.has_key(itemname): 338 testkey = itemname.lower() 339 real_keys = self.block.keys() 340 lower_keys = map(lambda a:a.lower(),real_keys) 341 try: 342 k_index = lower_keys.index(testkey) 343 except ValueError: #must be in a lower loop 344 for aloop in self.loops: 345 if aloop.has_key(itemname): 346 # print "Deleting %s (%s)" % (itemname,aloop[itemname]) 347 del aloop[itemname] 348 if len(aloop)==0: # all gone 349 self.remove_loop(aloop) 350 break 351 else: 352 del self.block[real_keys[k_index]] 353 self.lower_keys.remove(testkey) 354 # now remove the key in the order list 355 for i in range(len(self.item_order)): 356 if isinstance(self.item_order[i],StringType): #may be loop 357 if self.item_order[i].lower()==testkey: 358 del self.item_order[i] 359 break 360 if len(self.block)==0: #no items in loop, length -> 0 361 self.no_packets = 0 362 return #no duplicates, no more checking needed 363 364 def AddLoopItem(self,data,precheck=False,maxlength=-1): 365 # print "Received data %s" % `data` 366 # we accept only tuples, strings and lists!! 367 if isinstance(data[0],(TupleType,ListType)): 368 # internal loop 369 # first we remove any occurences of these datanames in 370 # other loops 371 for one_item in data[0]: 372 if self.has_key(one_item): 373 if not self.overwrite: 374 raise StarError( 'Attempt to insert duplicate item name %s' % data[0]) 375 else: 376 del self[one_item] 377 newloop = self.loopclass(dimension = self.dimension+1) 378 keyvals = zip(data[0],data[1]) 379 for key,val in keyvals: 380 newloop.AddLoopItem((key,val)) 381 self.insert_loop(newloop) 382 elif not isinstance(data[0],StringType): 383 raise TypeError, 'Star datanames are strings only (got %s)' % `data[0]` 384 else: 385 if data[1] == [] or get_dim(data[1])[0] == self.dimension: 386 if not precheck: 387 self.check_data_name(data[0],maxlength) # make sure no nasty characters 388 # check that we can replace data 389 if not self.overwrite: 390 if self.has_key(data[0]): 391 raise StarError( 'Attempt to insert duplicate item name %s' % data[0]) 392 # now make sure the data is OK type 393 regval = self.regularise_data(data[1]) 394 if not precheck: 395 try: 396 self.check_item_value(regval) 397 except StarError, errmes: 398 raise StarError( "Item name " + data[0] + " " + `errmes`) 399 if self.dimension > 0: 400 if self.no_packets <= 0: 401 self.no_packets = len(data[1]) #first item in this loop 402 if len(data[1]) != self.no_packets: 403 raise StarLengthError, 'Not enough values supplied for %s' % (data[0]) 404 try: 405 oldpos = self.GetItemPosition(data[0]) 406 except ValueError: 407 oldpos = len(self.item_order)#end of list 408 self.RemoveLoopItem(data[0]) # may be different case, so have to do this 409 self.block.update({data[0]:regval}) # trust the data is OK 410 self.lower_keys.insert(oldpos,data[0].lower()) 411 self.item_order.insert(oldpos,data[0]) 412 # self.lower_keys.append(data[0].lower()) 413 # self.item_order.append(data[0]) 414 415 else: #dimension mismatch 416 raise StarLengthError, "input data dim %d != required dim %d: %s %s" % (get_dim(data[1])[0],self.dimension,data[0],`data[1]`) 417 418 def check_data_name(self,dataname,maxlength=-1): 419 if maxlength > 0: 420 if len(dataname)>maxlength: 421 raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength)) 422 if dataname[0]!='_': 423 raise StarError( 'Dataname ' + dataname + ' does not begin with _') 424 if len (filter (lambda a: ord(a) < 33 or ord(a) > 126, dataname)) > 0: 425 raise StarError( 'Dataname ' + dataname + ' contains forbidden characters') 426 427 def check_item_value(self,item): 428 test_item = item 429 if type(item) != TupleType and type(item) != ListType: 430 test_item = [item] #single item list 431 def check_one (it): 432 if type(it) == StringType: 433 if it=='': return 434 me = self.char_check.match(it) 435 if not me: 436 raise StarError( 'Bad character in %s' % it) 437 else: 438 if me.span() != (0,len(it)): 439 raise StarError('Data item "' + it + '"... contains forbidden characters') 440 map(check_one,test_item) 441 442 def regularise_data(self,dataitem): 443 alrighttypes = [IntType, LongType, 444 FloatType, StringType] 445 okmappingtypes = [TupleType, ListType] 446 thistype = type(dataitem) 447 if thistype in alrighttypes or thistype in okmappingtypes: 448 return dataitem 449 if isinstance(dataitem,StarTuple) or \ 450 isinstance(dataitem,StarList) or \ 451 isinstance(dataitem,StarDict): 452 return dataitem 453 # so try to make into a list 454 try: 455 regval = list(dataitem) 456 except TypeError, value: 457 raise StarError( str(dataitem) + ' is wrong type for data value\n' ) 458 return regval 459 269 """*Deprecated*. Use `RemoveItem` instead""" 270 self.RemoveItem(itemname) 271 460 272 def GetLoop(self,keyname): 461 if keyname in self.block: #python 2.2 or above 462 return self 463 for aloop in self.loops: 464 try: 465 return aloop.GetLoop(keyname) 466 except KeyError: 467 pass 468 raise KeyError, 'Item %s does not exist' % keyname 273 """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. 274 `keyname` is only significant as a way to specify the loop.""" 275 return LoopBlock(self,keyname) 469 276 470 277 def GetPacket(self,index): 471 278 thispack = StarPacket([]) 472 for myitem in self.item_order: 473 if isinstance(myitem,LoopBlock): 474 pack_list = map(lambda b:myitem[b][index],myitem.item_order) 475 # print 'Pack_list -> %s' % `pack_list` 476 thispack.append(pack_list) 477 elif self.dimension==0: 478 thispack.append(self[myitem]) 479 else: 480 thispack.append(self[myitem][index]) 481 setattr(thispack,myitem,thispack[-1]) 482 return thispack 279 for myitem in self.parent_block.loops[self.loop_no]: 280 thispack.append(self[myitem][index]) 281 setattr(thispack,myitem,thispack[-1]) 282 return thispack 483 283 484 284 def AddPacket(self,packet): 485 if self.dimension==0: 486 raise StarError,"Attempt to add packet to top level block" 487 for myitem in self.item_order: 488 self[myitem] = list(self[myitem]) #in case we have stored a tuple 489 self[myitem].append(packet.__getattribute__(myitem)) 490 self.no_packets +=1 491 # print "%s now %s" % (myitem,`self[myitem]`) 492 493 def RemoveKeyedPacket(self,keyname,keyvalue): 494 packet_coord = list(self[keyname]).index(keyvalue) 495 loophandle = self.GetLoop(keyname) 496 for packet_entry in loophandle.item_order: 497 loophandle[packet_entry] = list(loophandle[packet_entry]) 498 del loophandle[packet_entry][packet_coord] 499 self.no_packets -= 1 500 501 def GetKeyedPacket(self,keyname,keyvalue): 502 #print "Looking for %s in %s" % (keyvalue, self[keyname]) 503 one_pack= filter(lambda a:getattr(a,keyname)==keyvalue,self) 504 if len(one_pack)!=1: 505 raise KeyError, "Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)) 506 #print "Keyed packet: %s" % one_pack[0] 507 return one_pack[0] 285 for myitem in self.parent_block.loops[self.loop_no]: 286 old_values = self.parent_block[myitem] 287 old_values.append(packet.__getattribute__(myitem)) 288 self.parent_block[myitem] = old_values 508 289 509 290 def GetItemOrder(self): 510 return self.item_order[:] 291 """Return a list of datanames in this `LoopBlock` in the order that they will be 292 printed""" 293 return self.parent_block.loops[self.loop_no][:] 294 295 296 def GetItemOrder(self): 297 """Return a list of datanames in this `LoopBlock` in the order that they will be 298 printed""" 299 return self.parent_block.loops[self.loop_no][:] 511 300 512 301 def ChangeItemOrder(self,itemname,newpos): 513 testpos = self.GetItemPosition(itemname) 514 del self.item_order[testpos] 515 # so we have an object ready for action 516 self.item_order.insert(newpos,itemname) 302 """Change the position at which `itemname` appears when printing out to `newpos`.""" 303 self.parent_block.loops[self.loop_no].remove(itemname.lower()) 304 self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower()) 517 305 518 306 def GetItemPosition(self,itemname): 307 """A utility function to get the numerical order in the printout 308 of `itemname`. An item has coordinate `(loop_no,pos)` with 309 the top level having a `loop_no` of -1. If an integer is passed to 310 the routine then it will return the position of the loop 311 referenced by that number.""" 519 312 import string 520 def low_case(item): 521 try: 522 return string.lower(item) 523 except AttributeError: 524 return item 525 try: 526 testname = string.lower(itemname) 527 except AttributeError: 528 testname = itemname 529 lowcase_order = map(low_case,self.item_order) 530 return lowcase_order.index(testname) 531 532 def collapse(self,packet_no): 533 if self.dimension == 0: 534 raise StarError( "Attempt to select non-existent packet") 535 newlb = LoopBlock(dimension=self.dimension-1) 536 for one_item in self.item_order: 537 if isinstance(one_item,LoopBlock): 538 newlb.insert_loop(one_item.collapse(packet_no)) 539 else: 540 # print "Collapse: %s -> %s" % (one_item,`self[one_item][packet_no]`) 541 newlb[one_item] = self[one_item][packet_no] 542 return newlb 543 544 def audit(self): 545 import sets 546 allkeys = self.keys() 547 uniquenames = sets.Set(allkeys) 548 if len(uniquenames) == len(allkeys): return [] 549 else: 550 keycount = map(lambda a:(a,allkeys.count(a)),uniquenames) 551 return filter(lambda a:a[1]>1,keycount) 552 313 if isinstance(itemname,int): 314 # return loop position 315 return (-1, self.item_order.index(itemname)) 316 if not itemname in self: 317 raise ValueError('No such dataname %s' % itemname) 318 testname = itemname.lower() 319 if testname in self.item_order: 320 return (-1,self.item_order.index(testname)) 321 loop_no = self.FindLoop(testname) 322 loop_pos = self.loops[loop_no].index(testname) 323 return loop_no,loop_pos 324 553 325 def GetLoopNames(self,keyname): 554 326 if keyname in self: 555 327 return self.keys() 556 328 for aloop in self.loops: 557 try: 329 try: 558 330 return aloop.GetLoopNames(keyname) 559 331 except KeyError: 560 332 pass 561 raise KeyError, 'Item does not exist' 333 raise KeyError('Item does not exist') 334 335 def GetLoopNames(self,keyname): 336 """Return all datanames appearing together with `keyname`""" 337 loop_no = self.FindLoop(keyname) 338 if loop_no >= 0: 339 return self.loops[loop_no] 340 else: 341 raise KeyError('%s is not in any loop' % keyname) 562 342 563 343 def AddToLoop(self,dataname,loopdata): … … 566 346 thisloop[itemname] = itemvalue 567 347 348 def AddToLoop(self,dataname,loopdata): 349 """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. 350 351 Add multiple columns to the loop containing `dataname`. `loopdata` is a 352 collection of (key,value) pairs, where `key` is the new dataname and `value` 353 is a list of values for that dataname""" 354 # check lengths 355 thisloop = self.FindLoop(dataname) 356 loop_len = len(self[dataname]) 357 bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len] 358 if len(bad_vals)>0: 359 raise StarLengthError("Number of values for looped datanames %s not equal to %d" \ 360 % (repr( bad_vals ),loop_len)) 361 self.update(loopdata) 362 self.loops[thisloop]+=loopdata.keys() 363 364 365 class StarBlock(object): 366 def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True, 367 characterset='ascii',maxnamelength=-1): 368 self.block = {} #the actual data storage (lower case keys) 369 self.loops = {} #each loop is indexed by a number and contains a list of datanames 370 self.item_order = [] #lower case, loops referenced by integer 371 self.formatting_hints = {} 372 self.true_case = {} #transform lower case to supplied case 373 self.provide_value = False #prefer string version always 374 self.dictionary = None #DDLm dictionary 375 self.popout = False #used during load iteration 376 self.curitem = -1 #used during iteration 377 self.cache_vals = True #store all calculated values 378 self.maxoutlength = maxoutlength 379 self.setmaxnamelength(maxnamelength) #to enforce CIF limit of 75 characters 380 self.set_characterset(characterset) #to check input names 381 self.wraplength = wraplength 382 self.overwrite = overwrite 383 self.string_delimiters = ["'",'"',"\n;"] #universal CIF set 384 self.list_delimiter = " " #CIF2 default 385 self.wrapper = textwrap.TextWrapper() 386 if isinstance(data,(tuple,list)): 387 for item in data: 388 self.AddLoopItem(item) 389 elif isinstance(data,StarBlock): 390 self.block = data.block.copy() 391 self.item_order = data.item_order[:] 392 self.true_case = data.true_case.copy() 393 # loops as well 394 self.loops = data.loops.copy() 395 396 def setmaxnamelength(self,maxlength): 397 """Set the maximum allowable dataname length (-1 for no check)""" 398 self.maxnamelength = maxlength 399 if maxlength > 0: 400 bad_names = [a for a in self.keys() if len(a)>self.maxnamelength] 401 if len(bad_names)>0: 402 raise StarError('Datanames too long: ' + repr( bad_names )) 403 404 def set_characterset(self,characterset): 405 """Set the characterset for checking datanames: may be `ascii` or `unicode`""" 406 import sys 407 self.characterset = characterset 408 if characterset == 'ascii': 409 self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M) 410 elif characterset == 'unicode': 411 if sys.maxunicode < 1114111: 412 self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M) 413 else: 414 self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M) 415 416 def __str__(self): 417 return self.printsection() 418 419 def __setitem__(self,key,value): 420 if key == "saves": 421 raise StarError("""Setting the saves key is deprecated. Add the save block to 422 an enclosing block collection (e.g. CIF or STAR file) with this block as child""") 423 self.AddItem(key,value) 424 425 def __getitem__(self,key): 426 if key == "saves": 427 raise StarError("""The saves key is deprecated. Access the save block from 428 the enclosing block collection (e.g. CIF or STAR file object)""") 429 try: 430 rawitem,is_value = self.GetFullItemValue(key) 431 except KeyError: 432 if self.dictionary: 433 # send the dictionary the required key and a pointer to us 434 try: 435 new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False) 436 except StarDerivationFailure: #try now with defaults included 437 try: 438 new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True) 439 except StarDerivationFailure as s: 440 print("In StarBlock.__getitem__, " + repr(s)) 441 raise KeyError('No such item: %s' % key) 442 print('Set %s to derived value %s' % (key, repr(new_value))) 443 return new_value 444 else: 445 raise KeyError('No such item: %s' % key) 446 # we now have an item, we can try to convert it to a number if that is appropriate 447 # note numpy values are never stored but are converted to lists 448 if not self.dictionary or not key in self.dictionary: return rawitem 449 print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem ))) 450 if is_value: 451 if self.provide_value: return rawitem 452 else: 453 print('Turning %s into string' % repr( rawitem )) 454 return self.convert_to_string(key) 455 else: # a string 456 if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \ 457 (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)): 458 return self.dictionary.change_type(key,rawitem) 459 elif self.provide_value: # catch the question marks 460 do_calculate = False 461 if isinstance(rawitem,(list,tuple)): 462 known = [a for a in rawitem if a != '?'] 463 if len(known) == 0: #all questions 464 do_calculate = True 465 elif rawitem == '?': 466 do_calculate = True 467 if do_calculate: 468 # remove old value 469 del self[key] 470 try: 471 new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False) 472 except StarDerivationFailure as s: 473 try: 474 new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True) 475 except StarDerivationFailure as s: 476 477 print("Could not turn %s into a value:" + repr(s)) 478 return rawitem 479 else: 480 print('Set %s to derived value %s' % (key, repr( new_value ))) 481 return new_value 482 return rawitem #can't do anything 483 484 def __delitem__(self,key): 485 self.RemoveItem(key) 486 487 def __len__(self): 488 blen = len(self.block) 489 return blen 490 491 def __nonzero__(self): 492 if self.__len__() > 0: return 1 493 return 0 494 495 # keys returns all internal keys 496 def keys(self): 497 return list(self.block.keys()) #always lower case 498 499 def values(self): 500 return [self[a] for a in self.keys()] 501 502 def items(self): 503 return list(zip(self.keys(),self.values())) 504 505 def __contains__(self,key): 506 if isinstance(key,(unicode,str)) and key.lower() in self.keys(): 507 return True 508 return False 509 510 def has_key(self,key): 511 return key in self 512 513 def has_key_or_alias(self,key): 514 """Check if a dataname or alias is available in the block""" 515 initial_test = key in self 516 if initial_test: return True 517 elif self.dictionary: 518 aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)] 519 if len(aliases)>0: 520 return True 521 return False 522 523 def get(self,key,default=None): 524 if key in self: 525 retval = self.__getitem__(key) 526 else: 527 retval = default 528 return retval 529 530 def clear(self): 531 self.block = {} 532 self.loops = {} 533 self.item_order = [] 534 self.true_case = {} 535 536 # doesn't appear to work 537 def copy(self): 538 newcopy = StarBlock() 539 newcopy.block = self.block.copy() 540 newcopy.loops = [] 541 newcopy.item_order = self.item_order[:] 542 newcopy.true_case = self.true_case.copy() 543 newcopy.loops = self.loops.copy() 544 # return self.copy.im_class(newcopy) #catch inheritance 545 return newcopy 546 547 def update(self,adict): 548 for key in adict.keys(): 549 self.AddItem(key,adict[key]) 550 551 def GetItemPosition(self,itemname): 552 """A utility function to get the numerical order in the printout 553 of `itemname`. An item has coordinate `(loop_no,pos)` with 554 the top level having a `loop_no` of -1. If an integer is passed to 555 the routine then it will return the position of the loop 556 referenced by that number.""" 557 import string 558 if isinstance(itemname,int): 559 # return loop position 560 return (-1, self.item_order.index(itemname)) 561 if not itemname in self: 562 raise ValueError('No such dataname %s' % itemname) 563 testname = itemname.lower() 564 if testname in self.item_order: 565 return (-1,self.item_order.index(testname)) 566 loop_no = self.FindLoop(testname) 567 loop_pos = self.loops[loop_no].index(testname) 568 return loop_no,loop_pos 569 570 def ChangeItemOrder(self,itemname,newpos): 571 """Move the printout order of `itemname` to `newpos`. If `itemname` is 572 in a loop, `newpos` refers to the order within the loop.""" 573 if isinstance(itemname,(unicode,str)): 574 true_name = itemname.lower() 575 else: 576 true_name = itemname 577 loopno = self.FindLoop(true_name) 578 if loopno < 0: #top level 579 self.item_order.remove(true_name) 580 self.item_order.insert(newpos,true_name) 581 else: 582 self.loops[loopno].remove(true_name) 583 self.loops[loopno].insert(newpos,true_name) 584 585 def GetItemOrder(self): 586 """Return a list of datanames in the order in which they will be printed. Loops are 587 referred to by numerical index""" 588 return self.item_order[:] 589 590 def AddItem(self,key,value,precheck=False): 591 """Add dataname `key` to block with value `value`. `value` may be 592 a single value, a list or a tuple. If `precheck` is False (the default), 593 all values will be checked and converted to unicode strings as necessary. If 594 `precheck` is True, this checking is bypassed. No checking is necessary 595 when values are read from a CIF file as they are already in correct form.""" 596 if not isinstance(key,(unicode,str)): 597 raise TypeError('Star datanames are strings only (got %s)' % repr( key )) 598 key = unicode(key) #everything is unicode internally 599 if not precheck: 600 self.check_data_name(key,self.maxnamelength) # make sure no nasty characters 601 # check for overwriting 602 if key in self: 603 if not self.overwrite: 604 raise StarError( 'Attempt to insert duplicate item name %s' % key) 605 if not precheck: #need to sanitise 606 regval,empty_val = self.regularise_data(value) 607 pure_string = check_stringiness(regval) 608 self.check_item_value(regval) 609 else: 610 regval,empty_val = value,None 611 pure_string = True 612 # update ancillary information first 613 lower_key = key.lower() 614 if not lower_key in self and self.FindLoop(lower_key)<0: #need to add to order 615 self.item_order.append(lower_key) 616 # always remove from our case table in case the case is different 617 try: 618 del self.true_case[lower_key] 619 except KeyError: 620 pass 621 self.true_case[lower_key] = key 622 if pure_string: 623 self.block.update({lower_key:[regval,empty_val]}) 624 else: 625 self.block.update({lower_key:[empty_val,regval]}) 626 627 def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1): 628 """*Deprecated*. Use `AddItem` followed by `CreateLoop` if 629 necessary.""" 630 # print "Received data %s" % `incomingdata` 631 # we accept tuples, strings, lists and dicts!! 632 # Direct insertion: we have a string-valued key, with an array 633 # of values -> single-item into our loop 634 if isinstance(incomingdata[0],(tuple,list)): 635 # a whole loop 636 keyvallist = zip(incomingdata[0],incomingdata[1]) 637 for key,value in keyvallist: 638 self.AddItem(key,value) 639 self.CreateLoop(incomingdata[0]) 640 elif not isinstance(incomingdata[0],(unicode,str)): 641 raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] )) 642 else: 643 self.AddItem(incomingdata[0],incomingdata[1]) 644 645 def check_data_name(self,dataname,maxlength=-1): 646 if maxlength > 0: 647 self.check_name_length(dataname,maxlength) 648 if dataname[0]!='_': 649 raise StarError( 'Dataname ' + dataname + ' does not begin with _') 650 if self.characterset=='ascii': 651 if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0: 652 raise StarError( 'Dataname ' + dataname + ' contains forbidden characters') 653 else: 654 # print 'Checking %s for unicode characterset conformance' % dataname 655 if len ([a for a in dataname if ord(a) < 33]) > 0: 656 raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)') 657 if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0: 658 raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)') 659 if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0: 660 raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)') 661 if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0: 662 raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)') 663 if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0: 664 raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)') 665 if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0: 666 print('%s fails' % dataname) 667 for a in dataname: print('%x' % ord(a),end="") 668 print() 669 raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)') 670 671 def check_name_length(self,dataname,maxlength): 672 if len(dataname)>maxlength: 673 raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength)) 674 return 675 676 def check_item_value(self,item): 677 test_item = item 678 if not isinstance(item,(list,dict,tuple)): 679 test_item = [item] #single item list 680 def check_one (it): 681 if isinstance(it,unicode): 682 if it=='': return 683 me = self.char_check.match(it) 684 if not me: 685 print("Fail value check: %s" % it) 686 raise StarError('Bad character in %s' % it) 687 else: 688 if me.span() != (0,len(it)): 689 print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it ))) 690 raise StarError('Data item "' + repr( it ) + u'"... contains forbidden characters') 691 [check_one(a) for a in test_item] 692 693 def regularise_data(self,dataitem): 694 """Place dataitem into a list if necessary""" 695 from numbers import Number 696 if isinstance(dataitem,str): 697 return unicode(dataitem),None 698 if isinstance(dataitem,(Number,unicode,StarList,StarDict)): 699 return dataitem,None #assume StarList/StarDict contain unicode if necessary 700 if isinstance(dataitem,(tuple,list)): 701 v,s = zip(*list([self.regularise_data(a) for a in dataitem])) 702 return list(v),list(s) 703 #return dataitem,[None]*len(dataitem) 704 # so try to make into a list 705 try: 706 regval = list(dataitem) 707 except TypeError as value: 708 raise StarError( str(dataitem) + ' is wrong type for data value\n' ) 709 v,s = zip(*list([self.regularise_data(a) for a in regval])) 710 return list(v),list(s) 711 712 def RemoveItem(self,itemname): 713 """Remove `itemname` from the block.""" 714 # first check any loops 715 loop_no = self.FindLoop(itemname) 716 testkey = itemname.lower() 717 if testkey in self: 718 del self.block[testkey] 719 del self.true_case[testkey] 720 # now remove from loop 721 if loop_no >= 0: 722 self.loops[loop_no].remove(testkey) 723 if len(self.loops[loop_no])==0: 724 del self.loops[loop_no] 725 self.item_order.remove(loop_no) 726 else: #will appear in order list 727 self.item_order.remove(testkey) 728 729 def RemoveLoopItem(self,itemname): 730 """*Deprecated*. Use `RemoveItem` instead""" 731 self.RemoveItem(itemname) 732 733 def GetItemValue(self,itemname): 734 """Return value of `itemname`. If `itemname` is looped, a list 735 of all values will be returned.""" 736 return self.GetFullItemValue(itemname)[0] 737 738 def GetFullItemValue(self,itemname): 739 """Return the value associated with `itemname`, and a boolean flagging whether 740 (True) or not (False) it is in a form suitable for calculation. False is 741 always returned for strings and `StarList` objects.""" 742 try: 743 s,v = self.block[itemname.lower()] 744 except KeyError: 745 raise KeyError('Itemname %s not in datablock' % itemname) 746 # prefer string value unless all are None 747 # are we a looped value? 748 if not isinstance(s,(tuple,list)) or isinstance(s,StarList): 749 if not_none(s): 750 return s,False #a string value 751 else: 752 return v,not isinstance(v,StarList) #a StarList is not calculation-ready 753 elif not_none(s): 754 return s,False #a list of string values 755 else: 756 if len(v)>0: 757 return v,not isinstance(v[0],StarList) 758 return v,True 759 760 def CreateLoop(self,datanames,order=-1,length_check=True): 761 """Create a loop in the datablock. `datanames` is a list of datanames that 762 together form a loop. If length_check is True, they should have been initialised in the block 763 to have the same number of elements (possibly 0). If `order` is given, 764 the loop will appear at this position in the block when printing 765 out. A loop counts as a single position.""" 766 767 if length_check: 768 # check lengths: these datanames should exist 769 listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)] 770 if len(listed_values) == len(datanames): 771 len_set = set([len(self[a]) for a in datanames]) 772 if len(len_set)>1: 773 raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set ))) 774 elif len(listed_values) != 0: 775 raise ValueError('Request to loop datanames where some are single values and some are not') 776 # store as lower case 777 lc_datanames = [d.lower() for d in datanames] 778 # remove these datanames from all other loops 779 [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]] 780 # remove empty loops 781 empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0] 782 for a in empty_loops: 783 self.item_order.remove(a) 784 del self.loops[a] 785 if len(self.loops)>0: 786 loopno = max(self.loops.keys()) + 1 787 else: 788 loopno = 1 789 self.loops[loopno] = list(lc_datanames) 790 if order >= 0: 791 self.item_order.insert(order,loopno) 792 else: 793 self.item_order.append(loopno) 794 # remove these datanames from item ordering 795 self.item_order = [a for a in self.item_order if a not in lc_datanames] 796 797 def AddLoopName(self,oldname, newname): 798 """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no 799 error is raised. If `newname` is in a different loop, it is removed from that loop. 800 The number of values associated with `newname` must match the number of values associated 801 with all other columns of the new loop or a `ValueError` will be raised.""" 802 lower_newname = newname.lower() 803 loop_no = self.FindLoop(oldname) 804 if loop_no < 0: 805 raise KeyError('%s not in loop' % oldname) 806 if lower_newname in self.loops[loop_no]: 807 return 808 # check length 809 old_provides = self.provide_value 810 self.provide_value = False 811 loop_len = len(self[oldname]) 812 self.provide_value = old_provides 813 if len(self[newname]) != loop_len: 814 raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) 815 # remove from any other loops 816 [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] 817 # and add to this loop 818 self.loops[loop_no].append(lower_newname) 819 # remove from item_order if present 820 try: 821 self.item_order.remove(lower_newname) 822 except ValueError: 823 pass 824 825 def FindLoop(self,keyname): 826 """Find the loop that contains `keyname` and return its numerical index or 827 -1 if not present. The numerical index can be used to refer to the loop in 828 other routines.""" 829 loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]] 830 if len(loop_no)>0: 831 return loop_no[0] 832 else: 833 return -1 834 835 def GetLoop(self,keyname): 836 """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. 837 `keyname` is only significant as a way to specify the loop.""" 838 return LoopBlock(self,keyname) 839 840 def GetLoopNames(self,keyname): 841 if keyname in self: 842 return self.keys() 843 for aloop in self.loops: 844 try: 845 return aloop.GetLoopNames(keyname) 846 except KeyError: 847 pass 848 raise KeyError('Item does not exist') 849 850 def GetLoopNames(self,keyname): 851 """Return all datanames appearing together with `keyname`""" 852 loop_no = self.FindLoop(keyname) 853 if loop_no >= 0: 854 return self.loops[loop_no] 855 else: 856 raise KeyError('%s is not in any loop' % keyname) 857 858 def AddLoopName(self,oldname, newname): 859 """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no 860 error is raised. If `newname` is in a different loop, it is removed from that loop. 861 The number of values associated with `newname` must match the number of values associated 862 with all other columns of the new loop or a `ValueError` will be raised.""" 863 lower_newname = newname.lower() 864 loop_no = self.FindLoop(oldname) 865 if loop_no < 0: 866 raise KeyError('%s not in loop' % oldname) 867 if lower_newname in self.loops[loop_no]: 868 return 869 # check length 870 old_provides = self.provide_value 871 self.provide_value = False 872 loop_len = len(self[oldname]) 873 self.provide_value = old_provides 874 if len(self[newname]) != loop_len: 875 raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) 876 # remove from any other loops 877 [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] 878 # and add to this loop 879 self.loops[loop_no].append(lower_newname) 880 # remove from item_order if present 881 try: 882 self.item_order.remove(lower_newname) 883 except ValueError: 884 pass 885 886 def AddToLoop(self,dataname,loopdata): 887 thisloop = self.GetLoop(dataname) 888 for itemname,itemvalue in loopdata.items(): 889 thisloop[itemname] = itemvalue 890 891 def AddToLoop(self,dataname,loopdata): 892 """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. 893 894 Add multiple columns to the loop containing `dataname`. `loopdata` is a 895 collection of (key,value) pairs, where `key` is the new dataname and `value` 896 is a list of values for that dataname""" 897 # check lengths 898 thisloop = self.FindLoop(dataname) 899 loop_len = len(self[dataname]) 900 bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len] 901 if len(bad_vals)>0: 902 raise StarLengthError("Number of values for looped datanames %s not equal to %d" \ 903 % (repr( bad_vals ),loop_len)) 904 self.update(loopdata) 905 self.loops[thisloop]+=loopdata.keys() 906 907 def RemoveKeyedPacket(self,keyname,keyvalue): 908 """Remove the packet for which dataname `keyname` takes 909 value `keyvalue`. Only the first such occurrence is 910 removed.""" 911 packet_coord = list(self[keyname]).index(keyvalue) 912 loopnames = self.GetLoopNames(keyname) 913 for dataname in loopnames: 914 self.block[dataname][0] = list(self.block[dataname][0]) 915 del self.block[dataname][0][packet_coord] 916 self.block[dataname][1] = list(self.block[dataname][1]) 917 del self.block[dataname][1][packet_coord] 918 919 def GetKeyedPacket(self,keyname,keyvalue,no_case=False): 920 """Return the loop packet (a `StarPacket` object) where `keyname` has value 921 `keyvalue`. Ignore case in `keyvalue` if `no_case` is True. `ValueError` 922 is raised if no packet is found or more than one packet is found.""" 923 my_loop = self.GetLoop(keyname) 924 #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block)) 925 #print('Packet check on:' + keyname) 926 #[print(repr(getattr(a,keyname))) for a in my_loop] 927 if no_case: 928 one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()] 929 else: 930 one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue] 931 if len(one_pack)!=1: 932 raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))) 933 print("Keyed packet: %s" % one_pack[0]) 934 return one_pack[0]