Changeset 3137 for trunk/CifFile/StarFile.py
- Timestamp:
- Oct 24, 2017 11:53:41 AM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified trunk/CifFile/StarFile.py ¶
r469 r3137 1 # To maximize python3/python2 compatibility 2 from __future__ import print_function 3 from __future__ import unicode_literals 4 from __future__ import division 5 from __future__ import absolute_import 6 7 __copyright = """ 8 PYCIFRW License Agreement (Python License, Version 2) 9 ----------------------------------------------------- 10 11 1. This LICENSE AGREEMENT is between the Australian Nuclear Science 12 and Technology Organisation ("ANSTO"), and the Individual or 13 Organization ("Licensee") accessing and otherwise using this software 14 ("PyCIFRW") in source or binary form and its associated documentation. 15 16 2. Subject to the terms and conditions of this License Agreement, 17 ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide 18 license to reproduce, analyze, test, perform and/or display publicly, 19 prepare derivative works, distribute, and otherwise use PyCIFRW alone 20 or in any derivative version, provided, however, that this License 21 Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 22 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or 23 in any derivative version prepared by Licensee. 24 25 3. In the event Licensee prepares a derivative work that is based on 26 or incorporates PyCIFRW or any part thereof, and wants to make the 27 derivative work available to others as provided herein, then Licensee 28 hereby agrees to include in any such work a brief summary of the 29 changes made to PyCIFRW. 30 31 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" 32 basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 33 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND 34 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 35 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT 36 INFRINGE ANY THIRD PARTY RIGHTS. 37 38 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW 39 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A 40 RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY 41 DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 42 43 6. This License Agreement will automatically terminate upon a material 44 breach of its terms and conditions. 45 46 7. Nothing in this License Agreement shall be deemed to create any 47 relationship of agency, partnership, or joint venture between ANSTO 48 and Licensee. This License Agreement does not grant permission to use 49 ANSTO trademarks or trade name in a trademark sense to endorse or 50 promote products or services of Licensee, or any third party. 51 52 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees 53 to be bound by the terms and conditions of this License Agreement. 54 1 55 """ 2 1.This Software copyright \u00A9 Australian Synchrotron Research Program Inc, ("ASRP"). 3 4 2.Subject to ensuring that this copyright notice and licence terms 5 appear on all copies and all modified versions, of PyCIFRW computer 6 code ("this Software"), a royalty-free non-exclusive licence is hereby 7 given (i) to use, copy and modify this Software including the use of 8 reasonable portions of it in other software and (ii) to publish, 9 bundle and otherwise re-distribute this Software or modified versions 10 of this Software to third parties, provided that this copyright notice 11 and terms are clearly shown as applying to all parts of software 12 derived from this Software on each occasion it is published, bundled 13 or re-distributed. You are encouraged to communicate useful 14 modifications to ASRP for inclusion for future versions. 15 16 3.No part of this Software may be sold as a standalone package. 17 18 4.If any part of this Software is bundled with Software that is sold, 19 a free copy of the relevant version of this Software must be made 20 available through the same distribution channel (be that web server, 21 tape, CD or otherwise). 22 23 5.It is a term of exercise of any of the above royalty free licence 24 rights that ASRP gives no warranty, undertaking or representation 25 whatsoever whether express or implied by statute, common law, custom 26 or otherwise, in respect of this Software or any part of it. Without 27 limiting the generality of the preceding sentence, ASRP will not be 28 liable for any injury, loss or damage (including consequential loss or 29 damage) or other loss, loss of profits, costs, charges or expenses 30 however caused which may be suffered, incurred or arise directly or 31 indirectly in respect of this Software. 32 33 6. This Software is not licenced for use in medical applications. 34 """ 35 36 from types import * 37 from urllib import * # for arbitrary opening 38 import re 56 57 58 # Python 2,3 compatibility 59 try: 60 from urllib import urlopen # for arbitrary opening 61 from urlparse import urlparse, urlunparse 62 except: 63 from urllib.request import urlopen 64 from urllib.parse import urlparse,urlunparse 65 import re,os 39 66 import copy 67 import textwrap 68 69 try: 70 from StringIO import StringIO #not cStringIO as we cannot subclass 71 except ImportError: 72 from io import StringIO 73 74 if isinstance(u"abc",str): #Python 3 75 unicode = str 76 77 try: 78 import numpy 79 have_numpy = True 80 except ImportError: 81 have_numpy = False 82 40 83 class StarList(list): 41 pass 42 43 # Because DDLm makes a tuple from a tuple... 44 class StarTuple(tuple): 45 def __new__(cls,*arglist): 46 return tuple.__new__(cls,arglist) 84 def __getitem__(self,args): 85 if isinstance(args,(int,slice)): 86 return super(StarList,self).__getitem__(args) 87 elif isinstance(args,tuple) and len(args)>1: #extended comma notation 88 return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:]) 89 else: 90 return super(StarList,self).__getitem__(args[0]) 91 92 def __str__(self): 93 return "SL("+super(StarList,self).__str__() + ")" 47 94 48 95 class StarDict(dict): 49 96 pass 50 97 51 class LoopBlock: 52 def __init__(self,data = (), dimension = 0, maxoutlength=2048, wraplength=80, overwrite=True): 53 # print 'Creating new loop block, dimension %d' % dimension 54 self.block = {} 55 self.loops = [] 56 self.no_packets = 0 57 self.item_order = [] 58 self.lower_keys = [] #for efficiency 59 self.comment_list = {} 60 self.dimension = dimension 61 self.popout = False #used during load iteration 62 self.curitem = -1 #used during iteration 63 self.maxoutlength = maxoutlength 64 self.wraplength = wraplength 65 self.overwrite = overwrite 66 if not hasattr(self,'loopclass'): #in case are derived class 67 self.loopclass = LoopBlock #when making new loops 68 self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M) 69 if isinstance(data,(TupleType,ListType)): 70 for item in data: 71 self.AddLoopItem(item) 72 elif isinstance(data,LoopBlock): 73 self.block = data.block.copy() 74 self.item_order = data.item_order[:] 75 self.lower_keys = data.lower_keys[:] 76 self.comment_list = data.comment_list.copy() 77 self.dimension = data.dimension 78 # loops as well; change loop class 79 for loopno in range(len(data.loops)): 80 try: 81 placeholder = self.item_order.index(data.loops[loopno]) 82 except ValueError: 83 print "Warning: loop %s (%s) in loops, but not in item_order (%s)" % (`data.loops[loopno]`,str(data.loops[loopno]),`self.item_order`) 84 placeholder = -1 85 self.item_order.remove(data.loops[loopno]) #gone 86 newobject = self.loopclass(data.loops[loopno]) 87 # print "Recasting and adding loop %s -> %s" % (`data.loops[loopno]`,`newobject`) 88 self.insert_loop(newobject,position=placeholder) 89 90 def __str__(self): 91 return self.printsection() 92 93 def __setitem__(self,key,value): 94 # catch a one member loop, for convenience 95 # we assume the key is a string value only 96 self.AddLoopItem((key,value)) 97 98 def __getitem__(self,key): 99 if isinstance(key,IntType): #return a packet!! 100 return self.GetPacket(key) 101 return self.GetLoopItem(key) 102 103 def __delitem__(self,key): 104 self.RemoveLoopItem(key) 105 106 def __len__(self): 107 blen = len(self.block) 108 for aloop in self.loops: 109 # print 'Aloop is %s' % `aloop` 110 blen = blen + len(aloop) # also a LoopBlock 111 return blen 112 113 def __nonzero__(self): 114 if self.__len__() > 0: return 1 115 return 0 116 117 # keys returns all internal keys 98 99 class LoopBlock(object): 100 def __init__(self,parent_block,dataname): 101 self.loop_no = parent_block.FindLoop(dataname) 102 if self.loop_no < 0: 103 raise KeyError('%s is not in a loop structure' % dataname) 104 self.parent_block = parent_block 105 118 106 def keys(self): 119 thesekeys = self.block.keys() 120 for aloop in self.loops: 121 thesekeys.extend(aloop.keys()) 122 return thesekeys 107 return self.parent_block.loops[self.loop_no] 123 108 124 109 def values(self): 125 ourkeys = self.keys()126 return map(lambda a:self[a],ourkeys) 127 110 return [self.parent_block[a] for a in self.keys()] 111 112 #Avoid iterator even though that is Python3-esque 128 113 def items(self): 129 ourkeys = self.keys() 130 return map(lambda a,b:(a,b),self.keys(),self.values()) 114 return list(zip(self.keys(),self.values())) 115 116 def __getitem__(self,dataname): 117 if isinstance(dataname,int): #a packet request 118 return self.GetPacket(dataname) 119 if dataname in self.keys(): 120 return self.parent_block[dataname] 121 else: 122 raise KeyError('%s not in loop block' % dataname) 123 124 def __setitem__(self,dataname,value): 125 self.parent_block[dataname] = value 126 self.parent_block.AddLoopName(self.keys()[0],dataname) 127 128 def __contains__(self,key): 129 return key in self.parent_block.loops[self.loop_no] 131 130 132 131 def has_key(self,key): 133 if key.lower() in self.lower_keys: 134 return 1 135 for aloop in self.loops: 136 if aloop.has_key(key): return 1 137 return 0 138 139 def get(self,key,default=None): 140 if self.has_key(key): 141 retval = self.GetLoopItem(key) 142 else: 143 retval = default 144 return retval 145 146 def clear(self): 147 self.block = {} 148 self.loops = [] 149 self.item_order = [] 150 self.lower_keys = [] 151 self.no_packets = 0 152 153 # doesn't appear to work 154 def copy(self): 155 newcopy = self.copy.im_class(dimension = self.dimension) 156 newcopy.block = self.block.copy() 157 newcopy.loops = [] 158 newcopy.no_packets = self.no_packets 159 newcopy.item_order = self.item_order[:] 160 newcopy.lower_keys = self.lower_keys[:] 161 for loop in self.loops: 162 try: 163 placeholder = self.item_order.index(loop) 164 except ValueError: 165 print "Warning: loop %s (%s) in loops, but not in item_order (%s)" % (`loop`,str(loop),`self.item_order`) 166 placeholder = -1 167 newcopy.item_order.remove(loop) #gone 168 newobject = loop.copy() 169 # print "Adding loop %s -> %s" % (`loop`,`newobject`) 170 newcopy.insert_loop(newobject,position=placeholder) 171 return newcopy 172 173 # this is not appropriate for subloops. Instead, the loop block 174 # should be accessed directly for update 175 176 def update(self,adict): 177 for key in adict.keys(): 178 self.AddLoopItem((key,adict[key])) 132 return key in self 133 134 def __iter__(self): 135 packet_list = zip(*self.values()) 136 names = self.keys() 137 for p in packet_list: 138 r = StarPacket(p) 139 for n in range(len(names)): 140 setattr(r,names[n].lower(),r[n]) 141 yield r 142 143 # for compatibility 144 def __getattr__(self,attname): 145 return getattr(self.parent_block,attname) 179 146 180 147 def load_iter(self,coords=[]): 181 count = 0 #to create packet index 148 count = 0 #to create packet index 182 149 while not self.popout: 183 150 # ok, we have a new packet: append a list to our subloops … … 217 184 for iname in self.keys(): #includes lower levels 218 185 target_list = self[iname] 219 for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 186 for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 220 187 target_list = target_list[-1] 221 188 target_list.append([]) … … 232 199 drill_values=drill_values[0] #drill in 233 200 else: 234 raise StarError("Malformed loop packet %s" % `top_items[0]`)235 my_length = len(drill_values )201 raise StarError("Malformed loop packet %s" % repr( top_items[0] )) 202 my_length = len(drill_values[0]) #length of 'string' entry 236 203 if self.dimension == 0: #top level 237 204 for aloop in self.loops: 238 205 for apacket in aloop.recursive_iter(): 239 # print "Recursive yielding %s" % `dict(top_items + apacket.items())`206 # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) ) 240 207 prep_yield = StarPacket(top_values+apacket.values()) #straight list 241 208 for name,value in top_items + apacket.items(): … … 246 213 kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys()) 247 214 kvvals = map(lambda a:a[1],kvpairs) #just values 248 # print "Recursive kvpairs at %d: %s" % (i, `kvpairs`)215 # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs )) 249 216 if self.loops: 250 217 for aloop in self.loops: 251 218 for apacket in aloop.recursive_iter(coord=coord+[i]): 252 # print "Recursive yielding %s" % `dict(kvpairs + apacket.items())`219 # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) ) 253 220 prep_yield = StarPacket(kvvals+apacket.values()) 254 221 for name,value in kvpairs + apacket.items(): … … 256 223 yield prep_yield 257 224 else: # we're at the bottom of the tree 258 # print "Recursive yielding %s" % `dict(kvpairs)`225 # print "Recursive yielding %s" % repr( dict(kvpairs) ) 259 226 prep_yield = StarPacket(kvvals) 260 227 for name,value in kvpairs: … … 262 229 yield prep_yield 263 230 264 # small function to use the coordinates. 231 # small function to use the coordinates. 265 232 def coord_to_group(self,dataname,coords): 266 if not isinstance(dataname, StringType):233 if not isinstance(dataname,unicode): 267 234 return dataname # flag inner loop processing 268 235 newm = self[dataname] # newm must be a list or tuple 269 236 for c in coords: 270 # print "Coord_to_group: %s ->" % ( `newm`),237 # print "Coord_to_group: %s ->" % (repr( newm )), 271 238 newm = newm[c] 272 # print `newm`273 return newm 239 # print repr( newm ) 240 return newm 274 241 275 242 def flat_iterator(self): 276 if self.dimension == 0:277 yield copy.copy(self)278 else:279 243 my_length = 0 280 244 top_keys = self.block.keys() … … 283 247 for pack_no in range(my_length): 284 248 yield(self.collapse(pack_no)) 285 286 287 def insert_loop(self,newloop,position=-1,audit=True): 288 # check that new loop is kosher 289 if newloop.dimension != self.dimension + 1: 290 raise StarError( 'Insertion of loop of wrong nesting level %d, should be %d' % (newloop.dimension, self.dimension+1)) 291 self.loops.append(newloop) 292 if audit: 293 dupes = self.audit() 294 if dupes: 295 dupenames = map(lambda a:a[0],dupes) 296 raise StarError( 'Duplicate names: %s' % `dupenames`) 297 if position >= 0: 298 self.item_order.insert(position,newloop) 299 else: 300 self.item_order.append(newloop) 301 # print "Insert loop: item_order now" + `self.item_order` 302 303 def remove_loop(self,oldloop): 304 # print "Removing %s: item_order %s" % (`oldloop`,self.item_order) 305 # print "Length %d" % len(oldloop) 306 self.item_order.remove(oldloop) 307 self.loops.remove(oldloop) 308 309 def AddComment(self,itemname,comment): 310 self.comment_list[itemname.lower()] = comment 311 312 def RemoveComment(self,itemname): 313 del self.comment_list[itemname.lower()] 314 315 def GetLoopItem(self,itemname): 316 # assume case is correct first 317 try: 318 return self.block[itemname] 319 except KeyError: 320 for loop in self.loops: 321 try: 322 return loop[itemname] 323 except KeyError: 324 pass 325 if itemname.lower() not in self.lower_keys: 326 raise KeyError, 'Item %s not in block' % itemname 327 # it is there somewhere, now we need to find it 328 real_keys = self.block.keys() 329 lower_keys = map(lambda a:a.lower(),self.block.keys()) 330 try: 331 k_index = lower_keys.index(itemname.lower()) 332 except ValueError: 333 raise KeyError, 'Item %s not in block' % itemname 334 return self.block[real_keys[k_index]] 249 250 251 def RemoveItem(self,itemname): 252 """Remove `itemname` from the block.""" 253 # first check any loops 254 loop_no = self.FindLoop(itemname) 255 testkey = itemname.lower() 256 if testkey in self: 257 del self.block[testkey] 258 del self.true_case[testkey] 259 # now remove from loop 260 if loop_no >= 0: 261 self.loops[loop_no].remove(testkey) 262 if len(self.loops[loop_no])==0: 263 del self.loops[loop_no] 264 self.item_order.remove(loop_no) 265 else: #will appear in order list 266 self.item_order.remove(testkey) 335 267 336 268 def RemoveLoopItem(self,itemname): 337 if self.has_key(itemname): 338 testkey = itemname.lower() 339 real_keys = self.block.keys() 340 lower_keys = map(lambda a:a.lower(),real_keys) 341 try: 342 k_index = lower_keys.index(testkey) 343 except ValueError: #must be in a lower loop 344 for aloop in self.loops: 345 if aloop.has_key(itemname): 346 # print "Deleting %s (%s)" % (itemname,aloop[itemname]) 347 del aloop[itemname] 348 if len(aloop)==0: # all gone 349 self.remove_loop(aloop) 350 break 351 else: 352 del self.block[real_keys[k_index]] 353 self.lower_keys.remove(testkey) 354 # now remove the key in the order list 355 for i in range(len(self.item_order)): 356 if isinstance(self.item_order[i],StringType): #may be loop 357 if self.item_order[i].lower()==testkey: 358 del self.item_order[i] 359 break 360 if len(self.block)==0: #no items in loop, length -> 0 361 self.no_packets = 0 362 return #no duplicates, no more checking needed 363 364 def AddLoopItem(self,data,precheck=False,maxlength=-1): 365 # print "Received data %s" % `data` 366 # we accept only tuples, strings and lists!! 367 if isinstance(data[0],(TupleType,ListType)): 368 # internal loop 369 # first we remove any occurences of these datanames in 370 # other loops 371 for one_item in data[0]: 372 if self.has_key(one_item): 373 if not self.overwrite: 374 raise StarError( 'Attempt to insert duplicate item name %s' % data[0]) 375 else: 376 del self[one_item] 377 newloop = self.loopclass(dimension = self.dimension+1) 378 keyvals = zip(data[0],data[1]) 379 for key,val in keyvals: 380 newloop.AddLoopItem((key,val)) 381 self.insert_loop(newloop) 382 elif not isinstance(data[0],StringType): 383 raise TypeError, 'Star datanames are strings only (got %s)' % `data[0]` 384 else: 385 if data[1] == [] or get_dim(data[1])[0] == self.dimension: 386 if not precheck: 387 self.check_data_name(data[0],maxlength) # make sure no nasty characters 388 # check that we can replace data 389 if not self.overwrite: 390 if self.has_key(data[0]): 391 raise StarError( 'Attempt to insert duplicate item name %s' % data[0]) 392 # now make sure the data is OK type 393 regval = self.regularise_data(data[1]) 394 if not precheck: 395 try: 396 self.check_item_value(regval) 397 except StarError, errmes: 398 raise StarError( "Item name " + data[0] + " " + `errmes`) 399 if self.dimension > 0: 400 if self.no_packets <= 0: 401 self.no_packets = len(data[1]) #first item in this loop 402 if len(data[1]) != self.no_packets: 403 raise StarLengthError, 'Not enough values supplied for %s' % (data[0]) 404 try: 405 oldpos = self.GetItemPosition(data[0]) 406 except ValueError: 407 oldpos = len(self.item_order)#end of list 408 self.RemoveLoopItem(data[0]) # may be different case, so have to do this 409 self.block.update({data[0]:regval}) # trust the data is OK 410 self.lower_keys.insert(oldpos,data[0].lower()) 411 self.item_order.insert(oldpos,data[0]) 412 # self.lower_keys.append(data[0].lower()) 413 # self.item_order.append(data[0]) 414 415 else: #dimension mismatch 416 raise StarLengthError, "input data dim %d != required dim %d: %s %s" % (get_dim(data[1])[0],self.dimension,data[0],`data[1]`) 417 418 def check_data_name(self,dataname,maxlength=-1): 419 if maxlength > 0: 420 if len(dataname)>maxlength: 421 raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength)) 422 if dataname[0]!='_': 423 raise StarError( 'Dataname ' + dataname + ' does not begin with _') 424 if len (filter (lambda a: ord(a) < 33 or ord(a) > 126, dataname)) > 0: 425 raise StarError( 'Dataname ' + dataname + ' contains forbidden characters') 426 427 def check_item_value(self,item): 428 test_item = item 429 if type(item) != TupleType and type(item) != ListType: 430 test_item = [item] #single item list 431 def check_one (it): 432 if type(it) == StringType: 433 if it=='': return 434 me = self.char_check.match(it) 435 if not me: 436 raise StarError( 'Bad character in %s' % it) 437 else: 438 if me.span() != (0,len(it)): 439 raise StarError('Data item "' + it + '"... contains forbidden characters') 440 map(check_one,test_item) 441 442 def regularise_data(self,dataitem): 443 alrighttypes = [IntType, LongType, 444 FloatType, StringType] 445 okmappingtypes = [TupleType, ListType] 446 thistype = type(dataitem) 447 if thistype in alrighttypes or thistype in okmappingtypes: 448 return dataitem 449 if isinstance(dataitem,StarTuple) or \ 450 isinstance(dataitem,StarList) or \ 451 isinstance(dataitem,StarDict): 452 return dataitem 453 # so try to make into a list 454 try: 455 regval = list(dataitem) 456 except TypeError, value: 457 raise StarError( str(dataitem) + ' is wrong type for data value\n' ) 458 return regval 459 269 """*Deprecated*. Use `RemoveItem` instead""" 270 self.RemoveItem(itemname) 271 460 272 def GetLoop(self,keyname): 461 if keyname in self.block: #python 2.2 or above 462 return self 463 for aloop in self.loops: 464 try: 465 return aloop.GetLoop(keyname) 466 except KeyError: 467 pass 468 raise KeyError, 'Item %s does not exist' % keyname 273 """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. 274 `keyname` is only significant as a way to specify the loop.""" 275 return LoopBlock(self,keyname) 469 276 470 277 def GetPacket(self,index): 471 278 thispack = StarPacket([]) 472 for myitem in self.item_order: 473 if isinstance(myitem,LoopBlock): 474 pack_list = map(lambda b:myitem[b][index],myitem.item_order) 475 # print 'Pack_list -> %s' % `pack_list` 476 thispack.append(pack_list) 477 elif self.dimension==0: 478 thispack.append(self[myitem]) 479 else: 480 thispack.append(self[myitem][index]) 481 setattr(thispack,myitem,thispack[-1]) 482 return thispack 279 for myitem in self.parent_block.loops[self.loop_no]: 280 thispack.append(self[myitem][index]) 281 setattr(thispack,myitem,thispack[-1]) 282 return thispack 483 283 484 284 def AddPacket(self,packet): 485 if self.dimension==0: 486 raise StarError,"Attempt to add packet to top level block" 487 for myitem in self.item_order: 488 self[myitem] = list(self[myitem]) #in case we have stored a tuple 489 self[myitem].append(packet.__getattribute__(myitem)) 490 self.no_packets +=1 491 # print "%s now %s" % (myitem,`self[myitem]`) 492 493 def RemoveKeyedPacket(self,keyname,keyvalue): 494 packet_coord = list(self[keyname]).index(keyvalue) 495 loophandle = self.GetLoop(keyname) 496 for packet_entry in loophandle.item_order: 497 loophandle[packet_entry] = list(loophandle[packet_entry]) 498 del loophandle[packet_entry][packet_coord] 499 self.no_packets -= 1 500 501 def GetKeyedPacket(self,keyname,keyvalue): 502 #print "Looking for %s in %s" % (keyvalue, self[keyname]) 503 one_pack= filter(lambda a:getattr(a,keyname)==keyvalue,self) 504 if len(one_pack)!=1: 505 raise KeyError, "Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)) 506 #print "Keyed packet: %s" % one_pack[0] 507 return one_pack[0] 285 for myitem in self.parent_block.loops[self.loop_no]: 286 old_values = self.parent_block[myitem] 287 old_values.append(packet.__getattribute__(myitem)) 288 self.parent_block[myitem] = old_values 508 289 509 290 def GetItemOrder(self): 510 return self.item_order[:] 291 """Return a list of datanames in this `LoopBlock` in the order that they will be 292 printed""" 293 return self.parent_block.loops[self.loop_no][:] 294 295 296 def GetItemOrder(self): 297 """Return a list of datanames in this `LoopBlock` in the order that they will be 298 printed""" 299 return self.parent_block.loops[self.loop_no][:] 511 300 512 301 def ChangeItemOrder(self,itemname,newpos): 513 testpos = self.GetItemPosition(itemname) 514 del self.item_order[testpos] 515 # so we have an object ready for action 516 self.item_order.insert(newpos,itemname) 302 """Change the position at which `itemname` appears when printing out to `newpos`.""" 303 self.parent_block.loops[self.loop_no].remove(itemname.lower()) 304 self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower()) 517 305 518 306 def GetItemPosition(self,itemname): 307 """A utility function to get the numerical order in the printout 308 of `itemname`. An item has coordinate `(loop_no,pos)` with 309 the top level having a `loop_no` of -1. If an integer is passed to 310 the routine then it will return the position of the loop 311 referenced by that number.""" 519 312 import string 520 def low_case(item): 521 try: 522 return string.lower(item) 523 except AttributeError: 524 return item 525 try: 526 testname = string.lower(itemname) 527 except AttributeError: 528 testname = itemname 529 lowcase_order = map(low_case,self.item_order) 530 return lowcase_order.index(testname) 531 532 def collapse(self,packet_no): 533 if self.dimension == 0: 534 raise StarError( "Attempt to select non-existent packet") 535 newlb = LoopBlock(dimension=self.dimension-1) 536 for one_item in self.item_order: 537 if isinstance(one_item,LoopBlock): 538 newlb.insert_loop(one_item.collapse(packet_no)) 539 else: 540 # print "Collapse: %s -> %s" % (one_item,`self[one_item][packet_no]`) 541 newlb[one_item] = self[one_item][packet_no] 542 return newlb 543 544 def audit(self): 545 import sets 546 allkeys = self.keys() 547 uniquenames = sets.Set(allkeys) 548 if len(uniquenames) == len(allkeys): return [] 549 else: 550 keycount = map(lambda a:(a,allkeys.count(a)),uniquenames) 551 return filter(lambda a:a[1]>1,keycount) 552 313 if isinstance(itemname,int): 314 # return loop position 315 return (-1, self.item_order.index(itemname)) 316 if not itemname in self: 317 raise ValueError('No such dataname %s' % itemname) 318 testname = itemname.lower() 319 if testname in self.item_order: 320 return (-1,self.item_order.index(testname)) 321 loop_no = self.FindLoop(testname) 322 loop_pos = self.loops[loop_no].index(testname) 323 return loop_no,loop_pos 324 553 325 def GetLoopNames(self,keyname): 554 326 if keyname in self: 555 327 return self.keys() 556 328 for aloop in self.loops: 557 try: 329 try: 558 330 return aloop.GetLoopNames(keyname) 559 331 except KeyError: 560 332 pass 561 raise KeyError, 'Item does not exist' 333 raise KeyError('Item does not exist') 334 335 def GetLoopNames(self,keyname): 336 """Return all datanames appearing together with `keyname`""" 337 loop_no = self.FindLoop(keyname) 338 if loop_no >= 0: 339 return self.loops[loop_no] 340 else: 341 raise KeyError('%s is not in any loop' % keyname) 562 342 563 343 def AddToLoop(self,dataname,loopdata): … … 566 346 thisloop[itemname] = itemvalue 567 347 348 def AddToLoop(self,dataname,loopdata): 349 """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. 350 351 Add multiple columns to the loop containing `dataname`. `loopdata` is a 352 collection of (key,value) pairs, where `key` is the new dataname and `value` 353 is a list of values for that dataname""" 354 # check lengths 355 thisloop = self.FindLoop(dataname) 356 loop_len = len(self[dataname]) 357 bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len] 358 if len(bad_vals)>0: 359 raise StarLengthError("Number of values for looped datanames %s not equal to %d" \ 360 % (repr( bad_vals ),loop_len)) 361 self.update(loopdata) 362 self.loops[thisloop]+=loopdata.keys() 363 364 365 class StarBlock(object): 366 def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True, 367 characterset='ascii',maxnamelength=-1): 368 self.block = {} #the actual data storage (lower case keys) 369 self.loops = {} #each loop is indexed by a number and contains a list of datanames 370 self.item_order = [] #lower case, loops referenced by integer 371 self.formatting_hints = {} 372 self.true_case = {} #transform lower case to supplied case 373 self.provide_value = False #prefer string version always 374 self.dictionary = None #DDLm dictionary 375 self.popout = False #used during load iteration 376 self.curitem = -1 #used during iteration 377 self.cache_vals = True #store all calculated values 378 self.maxoutlength = maxoutlength 379 self.setmaxnamelength(maxnamelength) #to enforce CIF limit of 75 characters 380 self.set_characterset(characterset) #to check input names 381 self.wraplength = wraplength 382 self.overwrite = overwrite 383 self.string_delimiters = ["'",'"',"\n;"] #universal CIF set 384 self.list_delimiter = " " #CIF2 default 385 self.wrapper = textwrap.TextWrapper() 386 if isinstance(data,(tuple,list)): 387 for item in data: 388 self.AddLoopItem(item) 389 elif isinstance(data,StarBlock): 390 self.block = data.block.copy() 391 self.item_order = data.item_order[:] 392 self.true_case = data.true_case.copy() 393 # loops as well 394 self.loops = data.loops.copy() 395 396 def setmaxnamelength(self,maxlength): 397 """Set the maximum allowable dataname length (-1 for no check)""" 398 self.maxnamelength = maxlength 399 if maxlength > 0: 400 bad_names = [a for a in self.keys() if len(a)>self.maxnamelength] 401 if len(bad_names)>0: 402 raise StarError('Datanames too long: ' + repr( bad_names )) 403 404 def set_characterset(self,characterset): 405 """Set the characterset for checking datanames: may be `ascii` or `unicode`""" 406 import sys 407 self.characterset = characterset 408 if characterset == 'ascii': 409 self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M) 410 elif characterset == 'unicode': 411 if sys.maxunicode < 1114111: 412 self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M) 413 else: 414 self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M) 415 416 def __str__(self): 417 return self.printsection() 418 419 def __setitem__(self,key,value): 420 if key == "saves": 421 raise StarError("""Setting the saves key is deprecated. Add the save block to 422 an enclosing block collection (e.g. CIF or STAR file) with this block as child""") 423 self.AddItem(key,value) 424 425 def __getitem__(self,key): 426 if key == "saves": 427 raise StarError("""The saves key is deprecated. Access the save block from 428 the enclosing block collection (e.g. CIF or STAR file object)""") 429 try: 430 rawitem,is_value = self.GetFullItemValue(key) 431 except KeyError: 432 if self.dictionary: 433 # send the dictionary the required key and a pointer to us 434 try: 435 new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False) 436 except StarDerivationFailure: #try now with defaults included 437 try: 438 new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True) 439 except StarDerivationFailure as s: 440 print("In StarBlock.__getitem__, " + repr(s)) 441 raise KeyError('No such item: %s' % key) 442 print('Set %s to derived value %s' % (key, repr(new_value))) 443 return new_value 444 else: 445 raise KeyError('No such item: %s' % key) 446 # we now have an item, we can try to convert it to a number if that is appropriate 447 # note numpy values are never stored but are converted to lists 448 if not self.dictionary or not key in self.dictionary: return rawitem 449 print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem ))) 450 if is_value: 451 if self.provide_value: return rawitem 452 else: 453 print('Turning %s into string' % repr( rawitem )) 454 return self.convert_to_string(key) 455 else: # a string 456 if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \ 457 (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)): 458 return self.dictionary.change_type(key,rawitem) 459 elif self.provide_value: # catch the question marks 460 do_calculate = False 461 if isinstance(rawitem,(list,tuple)): 462 known = [a for a in rawitem if a != '?'] 463 if len(known) == 0: #all questions 464 do_calculate = True 465 elif rawitem == '?': 466 do_calculate = True 467 if do_calculate: 468 # remove old value 469 del self[key] 470 try: 471 new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False) 472 except StarDerivationFailure as s: 473 try: 474 new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True) 475 except StarDerivationFailure as s: 476 477 print("Could not turn %s into a value:" + repr(s)) 478 return rawitem 479 else: 480 print('Set %s to derived value %s' % (key, repr( new_value ))) 481 return new_value 482 return rawitem #can't do anything 483 484 def __delitem__(self,key): 485 self.RemoveItem(key) 486 487 def __len__(self): 488 blen = len(self.block) 489 return blen 490 491 def __nonzero__(self): 492 if self.__len__() > 0: return 1 493 return 0 494 495 # keys returns all internal keys 496 def keys(self): 497 return list(self.block.keys()) #always lower case 498 499 def values(self): 500 return [self[a] for a in self.keys()] 501 502 def items(self): 503 return list(zip(self.keys(),self.values())) 504 505 def __contains__(self,key): 506 if isinstance(key,(unicode,str)) and key.lower() in self.keys(): 507 return True 508 return False 509 510 def has_key(self,key): 511 return key in self 512 513 def has_key_or_alias(self,key): 514 """Check if a dataname or alias is available in the block""" 515 initial_test = key in self 516 if initial_test: return True 517 elif self.dictionary: 518 aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)] 519 if len(aliases)>0: 520 return True 521 return False 522 523 def get(self,key,default=None): 524 if key in self: 525 retval = self.__getitem__(key) 526 else: 527 retval = default 528 return retval 529 530 def clear(self): 531 self.block = {} 532 self.loops = {} 533 self.item_order = [] 534 self.true_case = {} 535 536 # doesn't appear to work 537 def copy(self): 538 newcopy = StarBlock() 539 newcopy.block = self.block.copy() 540 newcopy.loops = [] 541 newcopy.item_order = self.item_order[:] 542 newcopy.true_case = self.true_case.copy() 543 newcopy.loops = self.loops.copy() 544 # return self.copy.im_class(newcopy) #catch inheritance 545 return newcopy 546 547 def update(self,adict): 548 for key in adict.keys(): 549 self.AddItem(key,adict[key]) 550 551 def GetItemPosition(self,itemname): 552 """A utility function to get the numerical order in the printout 553 of `itemname`. An item has coordinate `(loop_no,pos)` with 554 the top level having a `loop_no` of -1. If an integer is passed to 555 the routine then it will return the position of the loop 556 referenced by that number.""" 557 import string 558 if isinstance(itemname,int): 559 # return loop position 560 return (-1, self.item_order.index(itemname)) 561 if not itemname in self: 562 raise ValueError('No such dataname %s' % itemname) 563 testname = itemname.lower() 564 if testname in self.item_order: 565 return (-1,self.item_order.index(testname)) 566 loop_no = self.FindLoop(testname) 567 loop_pos = self.loops[loop_no].index(testname) 568 return loop_no,loop_pos 569 570 def ChangeItemOrder(self,itemname,newpos): 571 """Move the printout order of `itemname` to `newpos`. If `itemname` is 572 in a loop, `newpos` refers to the order within the loop.""" 573 if isinstance(itemname,(unicode,str)): 574 true_name = itemname.lower() 575 else: 576 true_name = itemname 577 loopno = self.FindLoop(true_name) 578 if loopno < 0: #top level 579 self.item_order.remove(true_name) 580 self.item_order.insert(newpos,true_name) 581 else: 582 self.loops[loopno].remove(true_name) 583 self.loops[loopno].insert(newpos,true_name) 584 585 def GetItemOrder(self): 586 """Return a list of datanames in the order in which they will be printed. Loops are 587 referred to by numerical index""" 588 return self.item_order[:] 589 590 def AddItem(self,key,value,precheck=False): 591 """Add dataname `key` to block with value `value`. `value` may be 592 a single value, a list or a tuple. If `precheck` is False (the default), 593 all values will be checked and converted to unicode strings as necessary. If 594 `precheck` is True, this checking is bypassed. No checking is necessary 595 when values are read from a CIF file as they are already in correct form.""" 596 if not isinstance(key,(unicode,str)): 597 raise TypeError('Star datanames are strings only (got %s)' % repr( key )) 598 key = unicode(key) #everything is unicode internally 599 if not precheck: 600 self.check_data_name(key,self.maxnamelength) # make sure no nasty characters 601 # check for overwriting 602 if key in self: 603 if not self.overwrite: 604 raise StarError( 'Attempt to insert duplicate item name %s' % key) 605 if not precheck: #need to sanitise 606 regval,empty_val = self.regularise_data(value) 607 pure_string = check_stringiness(regval) 608 self.check_item_value(regval) 609 else: 610 regval,empty_val = value,None 611 pure_string = True 612 # update ancillary information first 613 lower_key = key.lower() 614 if not lower_key in self and self.FindLoop(lower_key)<0: #need to add to order 615 self.item_order.append(lower_key) 616 # always remove from our case table in case the case is different 617 try: 618 del self.true_case[lower_key] 619 except KeyError: 620 pass 621 self.true_case[lower_key] = key 622 if pure_string: 623 self.block.update({lower_key:[regval,empty_val]}) 624 else: 625 self.block.update({lower_key:[empty_val,regval]}) 626 627 def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1): 628 """*Deprecated*. Use `AddItem` followed by `CreateLoop` if 629 necessary.""" 630 # print "Received data %s" % `incomingdata` 631 # we accept tuples, strings, lists and dicts!! 632 # Direct insertion: we have a string-valued key, with an array 633 # of values -> single-item into our loop 634 if isinstance(incomingdata[0],(tuple,list)): 635 # a whole loop 636 keyvallist = zip(incomingdata[0],incomingdata[1]) 637 for key,value in keyvallist: 638 self.AddItem(key,value) 639 self.CreateLoop(incomingdata[0]) 640 elif not isinstance(incomingdata[0],(unicode,str)): 641 raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] )) 642 else: 643 self.AddItem(incomingdata[0],incomingdata[1]) 644 645 def check_data_name(self,dataname,maxlength=-1): 646 if maxlength > 0: 647 self.check_name_length(dataname,maxlength) 648 if dataname[0]!='_': 649 raise StarError( 'Dataname ' + dataname + ' does not begin with _') 650 if self.characterset=='ascii': 651 if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0: 652 raise StarError( 'Dataname ' + dataname + ' contains forbidden characters') 653 else: 654 # print 'Checking %s for unicode characterset conformance' % dataname 655 if len ([a for a in dataname if ord(a) < 33]) > 0: 656 raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)') 657 if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0: 658 raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)') 659 if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0: 660 raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)') 661 if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0: 662 raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)') 663 if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0: 664 raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)') 665 if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0: 666 print('%s fails' % dataname) 667 for a in dataname: print('%x' % ord(a),end="") 668 print() 669 raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)') 670 671 def check_name_length(self,dataname,maxlength): 672 if len(dataname)>maxlength: 673 raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength)) 674 return 675 676 def check_item_value(self,item): 677 test_item = item 678 if not isinstance(item,(list,dict,tuple)): 679 test_item = [item] #single item list 680 def check_one (it): 681 if isinstance(it,unicode): 682 if it=='': return 683 me = self.char_check.match(it) 684 if not me: 685 print("Fail value check: %s" % it) 686 raise StarError('Bad character in %s' % it) 687 else: 688 if me.span() != (0,len(it)): 689 print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it ))) 690 raise StarError('Data item "' + repr( it ) + u'"... contains forbidden characters') 691 [check_one(a) for a in test_item] 692 693 def regularise_data(self,dataitem): 694 """Place dataitem into a list if necessary""" 695 from numbers import Number 696 if isinstance(dataitem,str): 697 return unicode(dataitem),None 698 if isinstance(dataitem,(Number,unicode,StarList,StarDict)): 699 return dataitem,None #assume StarList/StarDict contain unicode if necessary 700 if isinstance(dataitem,(tuple,list)): 701 v,s = zip(*list([self.regularise_data(a) for a in dataitem])) 702 return list(v),list(s) 703 #return dataitem,[None]*len(dataitem) 704 # so try to make into a list 705 try: 706 regval = list(dataitem) 707 except TypeError as value: 708 raise StarError( str(dataitem) + ' is wrong type for data value\n' ) 709 v,s = zip(*list([self.regularise_data(a) for a in regval])) 710 return list(v),list(s) 711 712 def RemoveItem(self,itemname): 713 """Remove `itemname` from the block.""" 714 # first check any loops 715 loop_no = self.FindLoop(itemname) 716 testkey = itemname.lower() 717 if testkey in self: 718 del self.block[testkey] 719 del self.true_case[testkey] 720 # now remove from loop 721 if loop_no >= 0: 722 self.loops[loop_no].remove(testkey) 723 if len(self.loops[loop_no])==0: 724 del self.loops[loop_no] 725 self.item_order.remove(loop_no) 726 else: #will appear in order list 727 self.item_order.remove(testkey) 728 729 def RemoveLoopItem(self,itemname): 730 """*Deprecated*. Use `RemoveItem` instead""" 731 self.RemoveItem(itemname) 732 733 def GetItemValue(self,itemname): 734 """Return value of `itemname`. If `itemname` is looped, a list 735 of all values will be returned.""" 736 return self.GetFullItemValue(itemname)[0] 737 738 def GetFullItemValue(self,itemname): 739 """Return the value associated with `itemname`, and a boolean flagging whether 740 (True) or not (False) it is in a form suitable for calculation. False is 741 always returned for strings and `StarList` objects.""" 742 try: 743 s,v = self.block[itemname.lower()] 744 except KeyError: 745 raise KeyError('Itemname %s not in datablock' % itemname) 746 # prefer string value unless all are None 747 # are we a looped value? 748 if not isinstance(s,(tuple,list)) or isinstance(s,StarList): 749 if not_none(s): 750 return s,False #a string value 751 else: 752 return v,not isinstance(v,StarList) #a StarList is not calculation-ready 753 elif not_none(s): 754 return s,False #a list of string values 755 else: 756 if len(v)>0: 757 return v,not isinstance(v[0],StarList) 758 return v,True 759 760 def CreateLoop(self,datanames,order=-1,length_check=True): 761 """Create a loop in the datablock. `datanames` is a list of datanames that 762 together form a loop. If length_check is True, they should have been initialised in the block 763 to have the same number of elements (possibly 0). If `order` is given, 764 the loop will appear at this position in the block when printing 765 out. A loop counts as a single position.""" 766 767 if length_check: 768 # check lengths: these datanames should exist 769 listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)] 770 if len(listed_values) == len(datanames): 771 len_set = set([len(self[a]) for a in datanames]) 772 if len(len_set)>1: 773 raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set ))) 774 elif len(listed_values) != 0: 775 raise ValueError('Request to loop datanames where some are single values and some are not') 776 # store as lower case 777 lc_datanames = [d.lower() for d in datanames] 778 # remove these datanames from all other loops 779 [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]] 780 # remove empty loops 781 empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0] 782 for a in empty_loops: 783 self.item_order.remove(a) 784 del self.loops[a] 785 if len(self.loops)>0: 786 loopno = max(self.loops.keys()) + 1 787 else: 788 loopno = 1 789 self.loops[loopno] = list(lc_datanames) 790 if order >= 0: 791 self.item_order.insert(order,loopno) 792 else: 793 self.item_order.append(loopno) 794 # remove these datanames from item ordering 795 self.item_order = [a for a in self.item_order if a not in lc_datanames] 796 797 def AddLoopName(self,oldname, newname): 798 """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no 799 error is raised. If `newname` is in a different loop, it is removed from that loop. 800 The number of values associated with `newname` must match the number of values associated 801 with all other columns of the new loop or a `ValueError` will be raised.""" 802 lower_newname = newname.lower() 803 loop_no = self.FindLoop(oldname) 804 if loop_no < 0: 805 raise KeyError('%s not in loop' % oldname) 806 if lower_newname in self.loops[loop_no]: 807 return 808 # check length 809 old_provides = self.provide_value 810 self.provide_value = False 811 loop_len = len(self[oldname]) 812 self.provide_value = old_provides 813 if len(self[newname]) != loop_len: 814 raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) 815 # remove from any other loops 816 [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] 817 # and add to this loop 818 self.loops[loop_no].append(lower_newname) 819 # remove from item_order if present 820 try: 821 self.item_order.remove(lower_newname) 822 except ValueError: 823 pass 824 825 def FindLoop(self,keyname): 826 """Find the loop that contains `keyname` and return its numerical index or 827 -1 if not present. The numerical index can be used to refer to the loop in 828 other routines.""" 829 loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]] 830 if len(loop_no)>0: 831 return loop_no[0] 832 else: 833 return -1 834 835 def GetLoop(self,keyname): 836 """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. 837 `keyname` is only significant as a way to specify the loop.""" 838 return LoopBlock(self,keyname) 839 840 def GetLoopNames(self,keyname): 841 if keyname in self: 842 return self.keys() 843 for aloop in self.loops: 844 try: 845 return aloop.GetLoopNames(keyname) 846 except KeyError: 847 pass 848 raise KeyError('Item does not exist') 849 850 def GetLoopNames(self,keyname): 851 """Return all datanames appearing together with `keyname`""" 852 loop_no = self.FindLoop(keyname) 853 if loop_no >= 0: 854 return self.loops[loop_no] 855 else: 856 raise KeyError('%s is not in any loop' % keyname) 857 858 def AddLoopName(self,oldname, newname): 859 """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no 860 error is raised. If `newname` is in a different loop, it is removed from that loop. 861 The number of values associated with `newname` must match the number of values associated 862 with all other columns of the new loop or a `ValueError` will be raised.""" 863 lower_newname = newname.lower() 864 loop_no = self.FindLoop(oldname) 865 if loop_no < 0: 866 raise KeyError('%s not in loop' % oldname) 867 if lower_newname in self.loops[loop_no]: 868 return 869 # check length 870 old_provides = self.provide_value 871 self.provide_value = False 872 loop_len = len(self[oldname]) 873 self.provide_value = old_provides 874 if len(self[newname]) != loop_len: 875 raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) 876 # remove from any other loops 877 [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] 878 # and add to this loop 879 self.loops[loop_no].append(lower_newname) 880 # remove from item_order if present 881 try: 882 self.item_order.remove(lower_newname) 883 except ValueError: 884 pass 885 886 def AddToLoop(self,dataname,loopdata): 887 thisloop = self.GetLoop(dataname) 888 for itemname,itemvalue in loopdata.items(): 889 thisloop[itemname] = itemvalue 890 891 def AddToLoop(self,dataname,loopdata): 892 """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. 893 894 Add multiple columns to the loop containing `dataname`. `loopdata` is a 895 collection of (key,value) pairs, where `key` is the new dataname and `value` 896 is a list of values for that dataname""" 897 # check lengths 898 thisloop = self.FindLoop(dataname) 899 loop_len = len(self[dataname]) 900 bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len] 901 if len(bad_vals)>0: 902 raise StarLengthError("Number of values for looped datanames %s not equal to %d" \ 903 % (repr( bad_vals ),loop_len)) 904 self.update(loopdata) 905 self.loops[thisloop]+=loopdata.keys() 906 907 def RemoveKeyedPacket(self,keyname,keyvalue): 908 """Remove the packet for which dataname `keyname` takes 909 value `keyvalue`. Only the first such occurrence is 910 removed.""" 911 packet_coord = list(self[keyname]).index(keyvalue) 912 loopnames = self.GetLoopNames(keyname) 913 for dataname in loopnames: 914 self.block[dataname][0] = list(self.block[dataname][0]) 915 del self.block[dataname][0][packet_coord] 916 self.block[dataname][1] = list(self.block[dataname][1]) 917 del self.block[dataname][1][packet_coord] 918 919 def GetKeyedPacket(self,keyname,keyvalue,no_case=False): 920 """Return the loop packet (a `StarPacket` object) where `keyname` has value 921 `keyvalue`. Ignore case in `keyvalue` if `no_case` is True. `ValueError` 922 is raised if no packet is found or more than one packet is found.""" 923 my_loop = self.GetLoop(keyname) 924 #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block)) 925 #print('Packet check on:' + keyname) 926 #[print(repr(getattr(a,keyname))) for a in my_loop] 927 if no_case: 928 one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()] 929 else: 930 one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue] 931 if len(one_pack)!=1: 932 raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))) 933 print("Keyed packet: %s" % one_pack[0]) 934 return one_pack[0] 935 936 def GetCompoundKeyedPacket(self,keydict): 937 """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs 938 in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is 939 True. `ValueError` is raised if no packet is found or more than one packet is found.""" 940 #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname]) 941 keynames = list(keydict.keys()) 942 my_loop = self.GetLoop(keynames[0]) 943 for one_key in keynames: 944 keyval,no_case = keydict[one_key] 945 if no_case: 946 my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()]) 947 else: 948 my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval]) 949 if len(my_loop)!=1: 950 raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop))) 951 print("Compound keyed packet: %s" % my_loop[0]) 952 return my_loop[0] 953 954 def GetKeyedSemanticPacket(self,keyvalue,cat_id): 955 """Return a complete packet for category `cat_id` where the 956 category key for the category equals `keyvalue`. This routine 957 will understand any joined loops, so if separate loops in the 958 datafile belong to the 959 same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), 960 the returned `StarPacket` object will contain datanames from 961 both categories.""" 962 target_keys = self.dictionary.cat_key_table[cat_id] 963 target_keys = [k[0] for k in target_keys] #one only in each list 964 p = StarPacket() 965 # set case-sensitivity flag 966 lcase = False 967 if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']: 968 lcase = True 969 for cat_key in target_keys: 970 try: 971 extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) 972 except KeyError: #missing key 973 try: 974 test_key = self[cat_key] #generate key if possible 975 print('Test key is %s' % repr( test_key )) 976 if test_key is not None and\ 977 not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)): 978 print('Getting packet for key %s' % repr( keyvalue )) 979 extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) 980 except: #cannot be generated 981 continue 982 except ValueError: #none/more than one, assume none 983 continue 984 #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue) 985 p.merge_packet(extra_packet) 986 # the following attributes used to calculate missing values 987 for keyname in target_keys: 988 if hasattr(p,keyname): 989 p.key = [keyname] 990 break 991 if not hasattr(p,"key"): 992 raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p))) 993 p.cif_dictionary = self.dictionary 994 p.fulldata = self 995 return p 996 997 def GetMultiKeyedSemanticPacket(self,keydict,cat_id): 998 """Return a complete packet for category `cat_id` where the keyvalues are 999 provided as a dictionary of key:(value,caseless) pairs 1000 This routine 1001 will understand any joined loops, so if separate loops in the 1002 datafile belong to the 1003 same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), 1004 the returned `StarPacket` object will contain datanames from 1005 the requested category and any children.""" 1006 #if len(keyvalues)==1: #simplification 1007 # return self.GetKeyedSemanticPacket(keydict[1][0],cat_id) 1008 target_keys = self.dictionary.cat_key_table[cat_id] 1009 # update the dictionary passed to us with all equivalents, for 1010 # simplicity. 1011 parallel_keys = list(zip(*target_keys)) #transpose 1012 print('Parallel keys:' + repr(parallel_keys)) 1013 print('Keydict:' + repr(keydict)) 1014 start_keys = list(keydict.keys()) 1015 for one_name in start_keys: 1016 key_set = [a for a in parallel_keys if one_name in a] 1017 for one_key in key_set: 1018 keydict[one_key] = keydict[one_name] 1019 # target_keys is a list of lists, each of which is a compound key 1020 p = StarPacket() 1021 # a little function to return the dataname for a key 1022 def find_key(key): 1023 for one_key in self.dictionary.key_equivs.get(key,[])+[key]: 1024 if self.has_key(one_key): 1025 return one_key 1026 return None 1027 for one_set in target_keys: #loop down the categories 1028 true_keys = [find_key(k) for k in one_set] 1029 true_keys = [k for k in true_keys if k is not None] 1030 if len(true_keys)==len(one_set): 1031 truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)]) 1032 try: 1033 extra_packet = self.GetCompoundKeyedPacket(truekeydict) 1034 except KeyError: #one or more are missing 1035 continue #should try harder? 1036 except ValueError: 1037 continue 1038 else: 1039 continue 1040 print('Merging packet for keys ' + repr(one_set)) 1041 p.merge_packet(extra_packet) 1042 # the following attributes used to calculate missing values 1043 p.key = true_keys 1044 p.cif_dictionary = self.dictionary 1045 p.fulldata = self 1046 return p 1047 1048 1049 def set_grammar(self,new_grammar): 1050 self.string_delimiters = ["'",'"',"\n;",None] 1051 if new_grammar in ['STAR2','2.0']: 1052 self.string_delimiters += ['"""',"'''"] 1053 if new_grammar == '2.0': 1054 self.list_delimiter = " " 1055 elif new_grammar == 'STAR2': 1056 self.list_delimiter = ", " 1057 elif new_grammar not in ['1.0','1.1']: 1058 raise StarError('Request to set unknown grammar %s' % new_grammar) 1059 568 1060 def SetOutputLength(self,wraplength=80,maxoutlength=2048): 1061 """Set the maximum output line length (`maxoutlength`) and the line length to 1062 wrap at (`wraplength`). The wrap length is a target only and may not always be 1063 possible.""" 569 1064 if wraplength > maxoutlength: 570 1065 raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength)) 571 1066 self.wraplength = wraplength 572 1067 self.maxoutlength = maxoutlength 573 for loop in self.loops: 574 loop.SetOutputLength(wraplength,maxoutlength) 575 576 def printsection(self,instring='',blockstart="",blockend="",indent=0,coord=[]): 577 import cStringIO 1068 1069 def printsection(self,instring='',blockstart="",blockend="",indent=0,finish_at='',start_from=''): 578 1070 import string 1071 self.provide_value = False 579 1072 # first make an ordering 580 order = self.item_order[:]1073 self.create_ordering(finish_at,start_from) #create self.output_order 581 1074 # now do it... 582 1075 if not instring: 583 outstring = cStringIO.StringIO() # the returned string1076 outstring = CIFStringIO(target_width=80) # the returned string 584 1077 else: 585 1078 outstring = instring 586 if not coord: 587 coords = [0]*(self.dimension-1) 588 else: 589 coords = coord 590 if(len(coords)<self.dimension-1): 591 raise StarError("Not enough block packet coordinates to uniquely define data") 592 # print loop delimiter 593 outstring.write(blockstart) 594 while len(order)>0: 595 # print "Order now: " + `order` 596 itemname = order.pop(0) 597 if self.dimension == 0: # ie value next to tag 598 if not isinstance(itemname,LoopBlock): #no loop 599 # grab any comment 600 thiscomment = self.comment_list.get(itemname.lower(),'') 1079 # print block delimiter 1080 outstring.write(blockstart,canbreak=True) 1081 while len(self.output_order)>0: 1082 #print "Remaining to output " + `self.output_order` 1083 itemname = self.output_order.pop(0) 1084 if not isinstance(itemname,int): #no loop 1085 item_spec = [i for i in self.formatting_hints if i['dataname'].lower()==itemname.lower()] 1086 if len(item_spec)>0: 1087 item_spec = item_spec[0] 1088 col_pos = item_spec.get('column',-1) 1089 name_pos = item_spec.get('name_pos',-1) 1090 else: 1091 col_pos = -1 1092 item_spec = {} 1093 name_pos = -1 1094 if col_pos < 0: col_pos = 40 1095 outstring.set_tab(col_pos) 601 1096 itemvalue = self[itemname] 602 if isinstance(itemvalue,StringType): #need to sanitize 603 thisstring = self._formatstring(itemvalue) 604 else: thisstring = str(itemvalue) 605 # try for a tabstop at 40 606 if len(itemname)<40 and (len(thisstring)-40 < self.wraplength-1): 607 itemname = itemname + ' '*(40-len(itemname)) 608 else: itemname = itemname + ' ' 609 if len(thisstring) + len(itemname) < (self.wraplength-1): 610 outstring.write('%s%s' % (itemname,thisstring)) 611 if thiscomment: 612 if len(thiscomment)+len(thisstring)+len(itemname)< (self.wraplength-3): 613 outstring.write(' #'+thiscomment) 614 else: 615 outstring.write('%s\n %s' % (itemname, thisstring)) 616 if thiscomment: 617 if len(thiscomment)+len(thisstring)<(self.wraplength-3): 618 outstring.write(' #'+thiscomment) 619 else: 620 outstring.write('\n#'+thiscomment) 621 outstring.write('\n') 622 else: # we are asked to print an internal loop block 623 #first make sure we have sensible coords. Length should be one 624 #less than the current dimension 625 outstring.write(' '*indent); outstring.write('loop_\n') 626 itemname.format_names(outstring,indent+2) 627 itemname.format_packets(outstring,coords,indent+2) 628 else: # we are a nested loop 629 outstring.write(' '*indent); outstring.write('loop_\n') 630 self.format_names(outstring,indent+2) 631 self.format_packets(outstring,coords,indent+2) 632 if instring: return #inside a recursion 1097 outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False,startcol=name_pos) 1098 outstring.write(' ',canbreak=True,do_tab=False,delimiter=True) #space after itemname 1099 self.format_value(itemvalue,outstring,hints=item_spec) 1100 else:# we are asked to print a loop block 1101 outstring.set_tab(10) #guess this is OK? 1102 loop_spec = [i['name_pos'] for i in self.formatting_hints if i["dataname"]=='loop'] 1103 if loop_spec: 1104 loop_indent = max(loop_spec[0],0) 1105 else: 1106 loop_indent = indent 1107 outstring.write('loop_\n',mustbreak=True,do_tab=False,startcol=loop_indent) 1108 self.format_names(outstring,indent+2,loop_no=itemname) 1109 self.format_packets(outstring,indent+2,loop_no=itemname) 633 1110 else: 634 1111 returnstring = outstring.getvalue() … … 636 1113 return returnstring 637 1114 638 def format_names(self,outstring,indent=0): 639 temp_order = self.item_order[:] 1115 def format_names(self,outstring,indent=0,loop_no=-1): 1116 """Print datanames from `loop_no` one per line""" 1117 temp_order = self.loops[loop_no][:] #copy 1118 format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in temp_order]) 640 1119 while len(temp_order)>0: 641 1120 itemname = temp_order.pop(0) 642 if isinstance(itemname,StringType): #(not loop) 643 outstring.write(' ' * indent) 644 outstring.write(itemname) 645 outstring.write("\n") 646 else: # a loop 647 outstring.write(' ' * indent) 648 outstring.write("loop_\n") 649 itemname.format_names(outstring,indent+2) 650 outstring.write(" stop_\n") 651 652 def format_packets(self,outstring,coordinates,indent=0): 653 import cStringIO 1121 req_indent = format_hints.get(itemname,{}).get('name_pos',indent) 1122 outstring.write(' ' * req_indent,do_tab=False) 1123 outstring.write(self.true_case[itemname],do_tab=False) 1124 outstring.write("\n",do_tab=False) 1125 1126 def format_packets(self,outstring,indent=0,loop_no=-1): 654 1127 import string 655 # get our current group of data656 # print 'Coords: %s' % `coordinates`657 alldata = map(lambda a:self.coord_to_group(a,coordinates),self.item_order)658 # print 'Alldata: %s' % `alldata`659 packet_data = apply(zip,alldata)660 # print 'Packet data: %s' % `packet_data`661 curstring = ''1128 alldata = [self[a] for a in self.loops[loop_no]] 1129 loopnames = self.loops[loop_no] 1130 #print 'Alldata: %s' % `alldata` 1131 packet_data = list(zip(*alldata)) 1132 #print 'Packet data: %s' % `packet_data` 1133 #create a dictionary for quick lookup of formatting requirements 1134 format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in loopnames]) 662 1135 for position in range(len(packet_data)): 1136 if position > 0: 1137 outstring.write("\n") #new line each packet except first 663 1138 for point in range(len(packet_data[position])): 664 1139 datapoint = packet_data[position][point] 665 packstring = self.format_packet_item(datapoint,indent) 666 if len(curstring) + len(packstring)> self.wraplength-2: #past end of line with space 667 curstring = curstring + '\n' + ' '*indent + packstring 668 elif curstring == '': 669 curstring = curstring + ' '*indent + packstring 670 else: 671 curstring = curstring + ' ' + packstring 672 outstring.write(curstring + '\n') #end of one packet 673 curstring = '' 674 outstring.write(' ' + curstring + '\n') #last time through 675 676 def format_packet_item(self,pack_item,indent): 677 # print 'Formatting %s' % `pack_item` 678 curstring = '' 679 if isinstance(pack_item,(StringType,IntType,FloatType,LongType,StarTuple,StarList)): 680 if isinstance(pack_item,StringType): 681 thisstring = self._formatstring(pack_item) #no spaces yet 682 if '\n' in thisstring: #must have semicolon digraph then 683 curstring = curstring + thisstring 684 curstring = curstring + (' ' * indent) 685 thisstring = '' 686 else: 687 thisstring = '%s' % str(pack_item) 688 if len(curstring) + len(thisstring)> self.wraplength-2: #past end of line with space 689 curstring = curstring + '\n' #add the space 690 curstring = curstring + (' ' * indent) + thisstring 691 else: 692 curstring = curstring + ' ' + thisstring 693 # Now, for each nested loop we call ourselves again 694 # After first outputting the current line 695 else: # a nested packet 696 if not isinstance(pack_item[0],(ListType,TupleType)): #base packet 697 item_list = pack_item 1140 format_hint = format_hints.get(loopnames[point],{}) 1141 packstring = self.format_packet_item(datapoint,indent,outstring,format_hint) 1142 outstring.write(' ',canbreak=True,do_tab=False,delimiter=True) 1143 1144 def format_packet_item(self,pack_item,indent,outstring,format_hint): 1145 # print 'Formatting %s' % `pack_item` 1146 # temporary check for any non-unicode items 1147 if isinstance(pack_item,str) and not isinstance(pack_item,unicode): 1148 raise StarError("Item {0!r} is not unicode".format(pack_item)) 1149 if isinstance(pack_item,unicode): 1150 delimiter = format_hint.get('delimiter',None) 1151 startcol = format_hint.get('column',-1) 1152 outstring.write(self._formatstring(pack_item,delimiter=delimiter),startcol=startcol) 698 1153 else: 699 item_list = apply(zip,pack_item) 700 for sub_item in item_list: 701 curstring = curstring + ' ' + self.format_packet_item(sub_item,indent) 702 # stop_ is not issued at the end of each innermost packet 703 if isinstance(pack_item[0],(ListType,TupleType)): 704 curstring = curstring + ' stop_ ' 705 return curstring 706 707 def _formatstring(self,instring): 1154 self.format_value(pack_item,outstring,hints = format_hint) 1155 1156 def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,hints={}): 708 1157 import string 709 if len(instring)==0: return "''" 710 if len(instring)< (self.maxoutlength-2) and '\n' not in instring and not ('"' in instring and '\'' in instring): 711 if not ' ' in instring and not '\t' in instring and not '\v' \ 712 in instring and not '_' in instring and not (instring[0]=="'" or \ 713 instring[0]=='"'): # no blanks 714 return instring 715 if not "'" in instring: #use apostrophes 716 return "'%s'" % (instring) 717 elif not "\"" in instring: 718 return '"%s"' % (instring) 719 # is a long one or one that needs semicolons due to carriage returns 720 outstring = "\n;" 721 # if there are returns in the string, try to work with them 722 while 1: 723 retin = string.find(instring,'\n')+1 724 if retin < self.maxoutlength and retin > 0: # honour this break 725 outstring = outstring + instring[:retin] 726 instring = instring[retin:] 727 elif len(instring)<self.maxoutlength: # finished 728 outstring = outstring + instring + '\n;\n' 729 break 730 else: # find a space 731 for letter in range(self.maxoutlength-1,self.wraplength-1,-1): 732 if instring[letter] in ' \t\f': break 733 outstring = outstring + instring[:letter+1] 734 outstring = outstring + '\n' 735 instring = instring[letter+1:] 736 return outstring 737 738 739 740 class StarBlock(LoopBlock): 741 def __init__(self,*pos_args,**keyword_args): 742 LoopBlock.__init__(self,*pos_args,**keyword_args) 743 self.saves = BlockCollection(element_class=LoopBlock,type_tag="save") 744 745 def __getitem__(self,key): 746 if key == "saves": 747 return self.saves 748 else: 749 return LoopBlock.__getitem__(self,key) 750 751 def __setitem__(self,key,value): 752 if key == "saves": 753 self.saves[key] = value 754 else: 755 LoopBlock.__setitem__(self,key,value) 756 757 def clear(self): 758 LoopBlock.clear(self) 759 self.saves = BlockCollection(element_class=LoopBlock,type_tag="save_") 760 761 def copy(self): 762 newblock = LoopBlock.copy(self) 763 newblock.saves = self.saves.copy() 764 return self.copy.im_class(newblock) #catch inheritance 765 766 def has_key(self,key): 767 if key == "saves": return 1 768 else: return LoopBlock.has_key(self,key) 769 770 def __str__(self): 771 retstr = '' 772 for sb in self.saves.keys(): 773 retstr = retstr + '\nsave_%s\n\n' % sb 774 self.saves[sb].SetOutputLength(self.wraplength,self.maxoutlength) 775 retstr = retstr + str(self.saves[sb]) 776 retstr = retstr + '\nsave_\n\n' 777 return retstr + LoopBlock.__str__(self) 1158 if hints.get("reformat",False) and "\n" in instring: 1159 instring = "\n"+self.do_wrapping(instring,hints["reformat_indent"]) 1160 allowed_delimiters = set(self.string_delimiters) 1161 if len(instring)==0: allowed_delimiters.difference_update([None]) 1162 if len(instring) > (self.maxoutlength-2) or '\n' in instring: 1163 allowed_delimiters.intersection_update(["\n;","'''",'"""']) 1164 if ' ' in instring or '\t' in instring or '\v' in instring or (len(instring)>0 and instring[0] in '_$#;([{') or ',' in instring: 1165 allowed_delimiters.difference_update([None]) 1166 if len(instring)>3 and (instring[:4].lower()=='data' or instring[:4].lower()=='save'): 1167 allowed_delimiters.difference_update([None]) 1168 if len(instring)>5 and instring[:6].lower()=='global': 1169 allowed_delimiters.difference_update([None]) 1170 if '"' in instring: allowed_delimiters.difference_update(['"',None]) 1171 if "'" in instring: allowed_delimiters.difference_update(["'",None]) 1172 out_delimiter = "\n;" #default (most conservative) 1173 if delimiter in allowed_delimiters: 1174 out_delimiter = delimiter 1175 elif "'" in allowed_delimiters: out_delimiter = "'" 1176 elif '"' in allowed_delimiters: out_delimiter = '"' 1177 if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter 1178 elif out_delimiter is None: return instring 1179 # we are left with semicolon strings 1180 # use our protocols: 1181 maxlinelength = max([len(a) for a in instring.split('\n')]) 1182 if maxlinelength > self.maxoutlength: 1183 protocol_string = apply_line_folding(instring) 1184 else: 1185 protocol_string = instring 1186 # now check for embedded delimiters 1187 if "\n;" in protocol_string: 1188 prefix = "CIF:" 1189 while prefix in protocol_string: prefix = prefix + ":" 1190 protocol_string = apply_line_prefix(protocol_string,prefix+"> ") 1191 return "\n;" + protocol_string + "\n;" 1192 1193 def format_value(self,itemvalue,stringsink,compound=False,hints={}): 1194 """Format a Star data value""" 1195 global have_numpy 1196 delimiter = hints.get('delimiter',None) 1197 startcol = hints.get('column',-1) 1198 if isinstance(itemvalue,str) and not isinstance(itemvalue,unicode): #not allowed 1199 raise StarError("Non-unicode value {0} found in block".format(itemvalue)) 1200 if isinstance(itemvalue,unicode): #need to sanitize 1201 stringsink.write(self._formatstring(itemvalue,delimiter=delimiter,hints=hints),canbreak = True,startcol=startcol) 1202 elif isinstance(itemvalue,(list)) or (hasattr(itemvalue,'dtype') and hasattr(itemvalue,'__iter__')): #numpy 1203 stringsink.set_tab(0) 1204 stringsink.write('[',canbreak=True,newindent=True,mustbreak=compound,startcol=startcol) 1205 if len(itemvalue)>0: 1206 self.format_value(itemvalue[0],stringsink) 1207 for listval in itemvalue[1:]: 1208 # print 'Formatting %s' % `listval` 1209 stringsink.write(self.list_delimiter,do_tab=False) 1210 self.format_value(listval,stringsink,compound=True) 1211 stringsink.write(']',unindent=True) 1212 elif isinstance(itemvalue,dict): 1213 stringsink.set_tab(0) 1214 stringsink.write('{',newindent=True,mustbreak=compound,startcol=startcol) #start a new line inside 1215 items = list(itemvalue.items()) 1216 if len(items)>0: 1217 stringsink.write("'"+items[0][0]+"'"+':',canbreak=True) 1218 self.format_value(items[0][1],stringsink) 1219 for key,value in items[1:]: 1220 stringsink.write(self.list_delimiter) 1221 stringsink.write("'"+key+"'"+":",canbreak=True) 1222 self.format_value(value,stringsink) #never break between key and value 1223 stringsink.write('}',unindent=True) 1224 elif isinstance(itemvalue,(float,int)) or \ 1225 (have_numpy and isinstance(itemvalue,(numpy.number))): #TODO - handle uncertainties 1226 stringsink.write(str(itemvalue),canbreak=True,startcol=startcol) #numbers 1227 else: 1228 raise ValueError('Value in unexpected format for output: %s' % repr( itemvalue )) 1229 1230 def create_ordering(self,finish_at,start_from): 1231 """Create a canonical ordering that includes loops using our formatting hints dictionary""" 1232 requested_order = list([i['dataname'] for i in self.formatting_hints if i['dataname']!='loop']) 1233 new_order = [] 1234 for item in requested_order: 1235 if isinstance(item,unicode) and item.lower() in self.item_order: 1236 new_order.append(item.lower()) 1237 elif item in self: #in a loop somewhere 1238 target_loop = self.FindLoop(item) 1239 if target_loop not in new_order: 1240 new_order.append(target_loop) 1241 # adjust loop name order 1242 loopnames = self.loops[target_loop] 1243 loop_order = [i for i in requested_order if i in loopnames] 1244 unordered = [i for i in loopnames if i not in loop_order] 1245 self.loops[target_loop] = loop_order + unordered 1246 extras = list([i for i in self.item_order if i not in new_order]) 1247 self.output_order = new_order + extras 1248 # now handle partial output 1249 if start_from != '': 1250 if start_from in requested_order: 1251 sfi = requested_order.index(start_from) 1252 loop_order = [self.FindLoop(k) for k in requested_order[sfi:] if self.FindLoop(k)>0] 1253 candidates = list([k for k in self.output_order if k in requested_order[sfi:]]) 1254 cand_pos = len(new_order) 1255 if len(candidates)>0: 1256 cand_pos = self.output_order.index(candidates[0]) 1257 if len(loop_order)>0: 1258 cand_pos = min(cand_pos,self.output_order.index(loop_order[0])) 1259 if cand_pos < len(self.output_order): 1260 print('Output starts from %s, requested %s' % (self.output_order[cand_pos],start_from)) 1261 self.output_order = self.output_order[cand_pos:] 1262 else: 1263 print('Start is beyond end of output list') 1264 self.output_order = [] 1265 elif start_from in extras: 1266 self.output_order = self.output_order[self.output_order.index(start_from):] 1267 else: 1268 self.output_order = [] 1269 if finish_at != '': 1270 if finish_at in requested_order: 1271 fai = requested_order.index(finish_at) 1272 loop_order = list([self.FindLoop(k) for k in requested_order[fai:] if self.FindLoop(k)>0]) 1273 candidates = list([k for k in self.output_order if k in requested_order[fai:]]) 1274 cand_pos = len(new_order) 1275 if len(candidates)>0: 1276 cand_pos = self.output_order.index(candidates[0]) 1277 if len(loop_order)>0: 1278 cand_pos = min(cand_pos,self.output_order.index(loop_order[0])) 1279 if cand_pos < len(self.output_order): 1280 print('Output finishes before %s, requested before %s' % (self.output_order[cand_pos],finish_at)) 1281 self.output_order = self.output_order[:cand_pos] 1282 else: 1283 print('All of block output') 1284 elif finish_at in extras: 1285 self.output_order = self.output_order[:self.output_order.index(finish_at)] 1286 #print('Final order: ' + repr(self.output_order)) 1287 1288 def convert_to_string(self,dataname): 1289 """Convert values held in dataname value fork to string version""" 1290 v,is_value = self.GetFullItemValue(dataname) 1291 if not is_value: 1292 return v 1293 if check_stringiness(v): return v #already strings 1294 # TODO...something else 1295 return v 1296 1297 def do_wrapping(self,instring,indent=3): 1298 """Wrap the provided string""" 1299 if " " in instring: #already formatted 1300 return instring 1301 self.wrapper.initial_indent = ' '*indent 1302 self.wrapper.subsequent_indent = ' '*indent 1303 # remove leading and trailing space 1304 instring = instring.strip() 1305 # split into paragraphs 1306 paras = instring.split("\n\n") 1307 wrapped_paras = [self.wrapper.fill(p) for p in paras] 1308 return "\n".join(wrapped_paras) 1309 1310 1311 def merge(self,new_block,mode="strict",match_att=[],match_function=None, 1312 rel_keys = []): 1313 if mode == 'strict': 1314 for key in new_block.keys(): 1315 if key in self and key not in match_att: 1316 raise StarError( "Identical keys %s in strict merge mode" % key) 1317 elif key not in match_att: #a new dataname 1318 self[key] = new_block[key] 1319 # we get here if there are no keys in common, so we can now copy 1320 # the loops and not worry about overlaps 1321 for one_loop in new_block.loops.values(): 1322 self.CreateLoop(one_loop) 1323 # we have lost case information 1324 self.true_case.update(new_block.true_case) 1325 elif mode == 'replace': 1326 newkeys = list(new_block.keys()) 1327 for ma in match_att: 1328 try: 1329 newkeys.remove(ma) #don't touch the special ones 1330 except ValueError: 1331 pass 1332 for key in new_block.keys(): 1333 if isinstance(key,unicode): 1334 self[key] = new_block[key] 1335 # creating the loop will remove items from other loops 1336 for one_loop in new_block.loops.values(): 1337 self.CreateLoop(one_loop) 1338 # we have lost case information 1339 self.true_case.update(new_block.true_case) 1340 elif mode == 'overlay': 1341 print('Overlay mode, current overwrite is %s' % self.overwrite) 1342 raise StarError('Overlay block merge mode not implemented') 1343 save_overwrite = self.overwrite 1344 self.overwrite = True 1345 for attribute in new_block.keys(): 1346 if attribute in match_att: continue #ignore this one 1347 new_value = new_block[attribute] 1348 #non-looped items 1349 if new_block.FindLoop(attribute)<0: #not looped 1350 self[attribute] = new_value 1351 my_loops = self.loops.values() 1352 perfect_overlaps = [a for a in new_block.loops if a in my_loops] 1353 for po in perfect_overlaps: 1354 loop_keys = [a for a in po if a in rel_keys] #do we have a key? 1355 try: 1356 newkeypos = map(lambda a:newkeys.index(a),loop_keys) 1357 newkeypos = newkeypos[0] #one key per loop for now 1358 loop_keys = loop_keys[0] 1359 except (ValueError,IndexError): 1360 newkeypos = [] 1361 overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data 1362 new_data = map(lambda a:new_block[a],overlaps) #new packet data 1363 packet_data = transpose(overlap_data) 1364 new_p_data = transpose(new_data) 1365 # remove any packets for which the keys match between old and new; we 1366 # make the arbitrary choice that the old data stays 1367 if newkeypos: 1368 # get matching values in new list 1369 print("Old, new data:\n%s\n%s" % (repr(overlap_data[newkeypos]),repr(new_data[newkeypos]))) 1370 key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos]) 1371 # filter out any new data with these key values 1372 new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data) 1373 if new_p_data: 1374 new_data = transpose(new_p_data) 1375 else: new_data = [] 1376 # wipe out the old data and enter the new stuff 1377 byebyeloop = self.GetLoop(overlaps[0]) 1378 # print "Removing '%s' with overlaps '%s'" % (`byebyeloop`,`overlaps`) 1379 # Note that if, in the original dictionary, overlaps are not 1380 # looped, GetLoop will return the block itself. So we check 1381 # for this case... 1382 if byebyeloop != self: 1383 self.remove_loop(byebyeloop) 1384 self.AddLoopItem((overlaps,overlap_data)) #adding old packets 1385 for pd in new_p_data: #adding new packets 1386 if pd not in packet_data: 1387 for i in range(len(overlaps)): 1388 #don't do this at home; we are appending 1389 #to something in place 1390 self[overlaps[i]].append(pd[i]) 1391 self.overwrite = save_overwrite 1392 1393 def assign_dictionary(self,dic): 1394 if not dic.diclang=="DDLm": 1395 print("Warning: ignoring dictionary %s" % dic.dic_as_cif.my_uri) 1396 return 1397 self.dictionary = dic 1398 1399 def unassign_dictionary(self): 1400 """Remove dictionary-dependent behaviour""" 1401 self.dictionary = None 1402 778 1403 779 1404 780 1405 class StarPacket(list): 781 pass 782 783 class BlockCollection: 784 def __init__(self,datasource=None,element_class=StarBlock,type_tag=''): 1406 def merge_packet(self,incoming): 1407 """Merge contents of incoming packet with this packet""" 1408 new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"] 1409 self.extend(incoming) 1410 for na in new_attrs: 1411 setattr(self,na,getattr(incoming,na)) 1412 1413 def __getattr__(self,att_name): 1414 """Derive a missing attribute""" 1415 if att_name.lower() in self.__dict__: 1416 return getattr(self,att_name.lower()) 1417 if att_name in ('cif_dictionary','fulldata','key'): 1418 raise AttributeError('Programming error: can only assign value of %s' % att_name) 1419 d = self.cif_dictionary 1420 c = self.fulldata 1421 k = self.key 1422 assert isinstance(k,list) 1423 d.derive_item(att_name,c,store_value=True) 1424 # 1425 # now pick out the new value 1426 # self.key is a list of the key values 1427 keydict = dict([(v,(getattr(self,v),True)) for v in k]) 1428 full_pack = c.GetCompoundKeyedPacket(keydict) 1429 return getattr(full_pack,att_name) 1430 1431 class BlockCollection(object): 1432 """A container for StarBlock objects. The constructor takes 1433 one non-keyword argument `datasource` to set the initial data. If 1434 `datasource` is a Python dictionary, the values must be `StarBlock` 1435 objects and the keys will be blocknames in the new object. Keyword 1436 arguments: 1437 1438 standard: 1439 `CIF` or `Dic`. `CIF` enforces 75-character blocknames, and will 1440 print block contents before that block's save frame. 1441 1442 blocktype: 1443 The type of blocks held in this container. Normally `StarBlock` 1444 or `CifBlock`. 1445 1446 characterset: 1447 `ascii` or `unicode`. Blocknames and datanames appearing within 1448 blocks are restricted to the appropriate characterset. Note that 1449 only characters in the basic multilingual plane are accepted. This 1450 restriction will be lifted when PyCIFRW is ported to Python3. 1451 1452 scoping: 1453 `instance` or `dictionary`: `instance` implies that save frames are 1454 hidden from save frames lower in the hierarchy or in sibling 1455 hierarchies. `dictionary` makes all save frames visible everywhere 1456 within a data block. This setting is only relevant for STAR2 dictionaries and 1457 STAR2 data files, as save frames are currently not used in plain CIF data 1458 files. 1459 1460 """ 1461 def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock, 1462 characterset='ascii',scoping='instance',**kwargs): 1463 import collections 785 1464 self.dictionary = {} 786 self.type_tag = type_tag 787 self.lower_keys = [] # for efficiency 788 self.element_class = element_class 789 if isinstance(datasource,(DictType,BlockCollection)): 1465 self.standard = standard 1466 self.lower_keys = set() # short_cuts 1467 self.renamed = {} 1468 self.PC = collections.namedtuple('PC',['block_id','parent']) 1469 self.child_table = {} 1470 self.visible_keys = [] # for efficiency 1471 self.block_input_order = [] # to output in same order 1472 self.scoping = scoping #will trigger setting of child table 1473 self.blocktype = blocktype 1474 self.master_template = {} #for outputting 1475 self.set_grammar('2.0') 1476 self.set_characterset(characterset) 1477 if isinstance(datasource,BlockCollection): 1478 self.merge_fast(datasource) 1479 self.scoping = scoping #reset visibility 1480 elif isinstance(datasource,dict): 790 1481 for key,value in datasource.items(): 791 if value.__class__ == element_class: 792 self[key]=value 793 else: 794 self[key]= element_class(value) 1482 self[key]= value 795 1483 self.header_comment = '' 796 1484 1485 def set_grammar(self,new_grammar): 1486 """Set the syntax and grammar for output to `new_grammar`""" 1487 if new_grammar not in ['1.1','1.0','2.0','STAR2']: 1488 raise StarError('Unrecognised output grammar %s' % new_grammar) 1489 self.grammar = new_grammar 1490 1491 def set_characterset(self,characterset): 1492 """Set the allowed characters for datanames and datablocks: may be `ascii` or `unicode`. If datanames 1493 have already been added to any datablocks, they are not checked.""" 1494 self.characterset = characterset 1495 for one_block in self.lower_keys: 1496 self[one_block].set_characterset(characterset) 1497 1498 def unlock(self): 1499 """Allow overwriting of all blocks in this collection""" 1500 for a in self.lower_keys: 1501 self[a].overwrite=True 1502 1503 def lock(self): 1504 """Disallow overwriting for all blocks in this collection""" 1505 for a in self.lower_keys: 1506 self[a].overwrite = False 1507 797 1508 def __str__(self): 798 1509 return self.WriteOut() 799 1510 800 1511 def __setitem__(self,key,value): 801 if isinstance(value,(self.element_class,DictType)): 802 self.NewBlock(key,value,replace=True) 803 else: raise TypeError 804 self.lower_keys.append(key.lower()) 805 806 # due to attempt to get upper/lower case treated as identical 807 # we have a bit of cruft here 1512 self.NewBlock(key,value,parent=None) 1513 808 1514 def __getitem__(self,key): 1515 if isinstance(key,(unicode,str)): 1516 lowerkey = key.lower() 1517 if lowerkey in self.lower_keys: 1518 return self.dictionary[lowerkey] 1519 #print 'Visible keys:' + `self.visible_keys` 1520 #print 'All keys' + `self.lower_keys` 1521 #print 'Child table' + `self.child_table` 1522 raise KeyError('No such item %s' % key) 1523 1524 # we have to get an ordered list of the current keys, 1525 # as we'll have to delete one of them anyway. 1526 # Deletion will delete any key regardless of visibility 1527 1528 def __delitem__(self,key): 1529 dummy = self[key] #raise error if not present 1530 lowerkey = key.lower() 1531 # get rid of all children recursively as well 1532 children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey] 1533 for child in children: 1534 del self[child] #recursive call 1535 del self.dictionary[lowerkey] 1536 del self.child_table[lowerkey] 809 1537 try: 810 return self.dictionary[key]1538 self.visible_keys.remove(lowerkey) 811 1539 except KeyError: 812 if key.lower() not in self.lower_keys: 813 raise KeyError, "No such item: %s" % key 814 curr_keys = self.dictionary.keys() 815 lower_ordered = map(lambda a:a.lower(),curr_keys) 816 keyindex = lower_ordered.index(key.lower()) 817 return self.dictionary[curr_keys[keyindex]] 818 819 # we have to get an ordered list of the current keys, 820 # as we'll have to delete one of them anyway 821 def __delitem__(self,key): 822 try: 823 del self.dictionary[key] 824 self.lower_keys.remove(key.lower()) 825 except KeyError: 826 if not self.has_key(key): 827 raise KeyError 828 curr_keys = self.dictionary.keys() 829 lower_ordered = map(lambda a:a.lower(),curr_keys) 830 keyindex = lower_ordered.index(key.lower()) 831 del self.dictionary[curr_keys[keyindex]] 832 1540 pass 1541 self.lower_keys.remove(lowerkey) 1542 self.block_input_order.remove(lowerkey) 1543 833 1544 def __len__(self): 834 return len(self.dictionary) 835 1545 return len(self.visible_keys) 1546 1547 def __contains__(self,item): 1548 """Support the 'in' operator""" 1549 if not isinstance(item,(unicode,str)): return False 1550 if item.lower() in self.visible_keys: 1551 return True 1552 return False 1553 1554 # We iterate over all visible 1555 def __iter__(self): 1556 for one_block in self.keys(): 1557 yield self[one_block] 1558 1559 # TODO: handle different case 836 1560 def keys(self): 837 return self. dictionary.keys()838 839 # changes to take case independence into account1561 return self.visible_keys 1562 1563 # Note that has_key does not exist in 3.5 840 1564 def has_key(self,key): 841 if not isinstance(key,StringType): return 0 842 if self.dictionary.has_key(key): 843 return 1 844 if key.lower() in self.lower_keys: 845 return 1 846 return 0 1565 return key in self 847 1566 848 1567 def get(self,key,default=None): 849 if self.dictionary.has_key(key): 850 return self.dictionary[key] 851 elif self.has_key(key): # take account of case 1568 if key in self: # take account of case 852 1569 return self.__getitem__(key) 853 1570 else: … … 856 1573 def clear(self): 857 1574 self.dictionary.clear() 858 self.lower_keys = [] 859 860 def copy(self): 861 newcopy = self.dictionary.copy() 862 return BlockCollection('',newcopy) 863 1575 self.lower_keys = set() 1576 self.child_table = {} 1577 self.visible_keys = [] 1578 self.block_input_order = [] 1579 1580 def copy(self): 1581 newcopy = self.dictionary.copy() #all blocks 1582 for k,v in self.dictionary.items(): 1583 newcopy[k] = v.copy() 1584 newcopy = BlockCollection(newcopy) 1585 newcopy.child_table = self.child_table.copy() 1586 newcopy.lower_keys = self.lower_keys.copy() 1587 newcopy.block_input_order = self.block_input_order.copy() 1588 newcopy.characterset = self.characterset 1589 newcopy.SetTemplate(self.master_template.copy()) 1590 newcopy.scoping = self.scoping #this sets visible keys 1591 return newcopy 1592 864 1593 def update(self,adict): 865 1594 for key in adict.keys(): 866 self.dictionary[key] = adict[key] 867 self.lower_keys.extend(map(lambda a:a.lower(),adict.keys())) 1595 self[key] = adict[key] 868 1596 869 1597 def items(self): 870 return self.dictionary.items()1598 return [(a,self[a]) for a in self.keys()] 871 1599 872 1600 def first_block(self): 1601 """Return the 'first' block. This is not necessarily the first block in the file.""" 873 1602 if self.keys(): 874 1603 return self[self.keys()[0]] 875 1604 876 def NewBlock(self,blockname,blockcontents=(),replace=False,fix=True): 877 if not blockcontents: 878 blockcontents = self.element_class() 879 elif isinstance(blockcontents,DictType): 880 blockcontents = self.element_class(blockcontents) 881 if not isinstance(blockcontents,self.element_class): 882 raise StarError( 'Block is not of required type %s, is %s' % self.element_class.__name__,blockcontents.__class__.__name__) 1605 def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None): 1606 """Add a new block named `blockname` with contents `blockcontents`. If `fix` 1607 is True, `blockname` will have spaces and tabs replaced by underscores. `parent` 1608 allows a parent block to be set so that block hierarchies can be created. Depending on 1609 the output standard, these blocks will be printed out as nested save frames or 1610 ignored.""" 1611 if blockcontents is None: 1612 blockcontents = StarBlock() 1613 if self.standard == "CIF": 1614 blockcontents.setmaxnamelength(75) 1615 if len(blockname)>75: 1616 raise StarError('Blockname %s is longer than 75 characters' % blockname) 883 1617 if fix: 884 1618 newblockname = re.sub('[ \t]','_',blockname) 885 1619 else: newblockname = blockname 886 1620 new_lowerbn = newblockname.lower() 887 if self.lower_keys.count(new_lowerbn): #already in CIF 888 if not replace: 889 raise StarError( "Attempt to replace existing block" + blockname) 890 # generate a list of lower-case keys in correct order 891 current_keys = self.dictionary.keys() 892 blocknames = map(lambda a:a.lower(),current_keys) 893 location = blocknames.index(new_lowerbn) 894 del self.dictionary[current_keys[location]] 895 self.lower_keys.remove(new_lowerbn) 896 self.dictionary.update({blockname:blockcontents}) 897 self.lower_keys.append(new_lowerbn) 898 899 def merge(self,new_bc,mode="strict",single_block=[], 1621 if new_lowerbn in self.lower_keys: #already there 1622 if self.standard is not None: 1623 toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None] 1624 if parent is None and new_lowerbn not in toplevelnames: #can give a new key to this one 1625 while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+' 1626 elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one 1627 replace_name = new_lowerbn 1628 while replace_name in self.lower_keys: replace_name = replace_name + '+' 1629 self._rekey(new_lowerbn,replace_name) 1630 # now continue on to add in the new block 1631 if parent.lower() == new_lowerbn: #the new block's requested parent just got renamed!! 1632 parent = replace_name 1633 else: 1634 raise StarError( "Attempt to replace existing block " + blockname) 1635 else: 1636 del self[new_lowerbn] 1637 self.dictionary.update({new_lowerbn:blockcontents}) 1638 self.lower_keys.add(new_lowerbn) 1639 self.block_input_order.append(new_lowerbn) 1640 if parent is None: 1641 self.child_table[new_lowerbn]=self.PC(newblockname,None) 1642 self.visible_keys.append(new_lowerbn) 1643 else: 1644 if parent.lower() in self.lower_keys: 1645 if self.scoping == 'instance': 1646 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower()) 1647 else: 1648 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower()) 1649 self.visible_keys.append(new_lowerbn) 1650 else: 1651 print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname)) 1652 self[new_lowerbn].set_grammar(self.grammar) 1653 self[new_lowerbn].set_characterset(self.characterset) 1654 self[new_lowerbn].formatting_hints = self.master_template 1655 return new_lowerbn #in case calling routine wants to know 1656 1657 def _rekey(self,oldname,newname,block_id=''): 1658 """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name 1659 does not change unless [[block_id]] is given. Prefer [[rename]] for a safe version.""" 1660 move_block = self[oldname] #old block 1661 is_visible = oldname in self.visible_keys 1662 move_block_info = self.child_table[oldname] #old info 1663 move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname] 1664 # now rewrite the necessary bits 1665 self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children])) 1666 oldpos = self.block_input_order.index(oldname) 1667 del self[oldname] #do this after updating child table so we don't delete children 1668 self.dictionary.update({newname:move_block}) 1669 self.lower_keys.add(newname) 1670 #print 'Block input order was: ' + `self.block_input_order` 1671 self.block_input_order[oldpos:oldpos]=[newname] 1672 if block_id == '': 1673 self.child_table.update({newname:move_block_info}) 1674 else: 1675 self.child_table.update({newname:self.PC(block_id,move_block_info.parent)}) 1676 if is_visible: self.visible_keys += [newname] 1677 1678 def rename(self,oldname,newname): 1679 """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed. No 1680 conformance checks are conducted.""" 1681 realoldname = oldname.lower() 1682 realnewname = newname.lower() 1683 if realnewname in self.lower_keys: 1684 raise StarError('Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname)) 1685 if realoldname not in self.lower_keys: 1686 raise KeyError('Cannot find old block %s' % realoldname) 1687 self._rekey(realoldname,realnewname,block_id=newname) 1688 1689 def makebc(self,namelist,scoping='dictionary'): 1690 """Make a block collection from a list of block names""" 1691 newbc = BlockCollection() 1692 block_lower = [n.lower() for n in namelist] 1693 proto_child_table = [a for a in self.child_table.items() if a[0] in block_lower] 1694 newbc.child_table = dict(proto_child_table) 1695 new_top_level = [(a[0],self.PC(a[1].block_id,None)) for a in newbc.child_table.items() if a[1].parent not in block_lower] 1696 newbc.child_table.update(dict(new_top_level)) 1697 newbc.lower_keys = set([a[0] for a in proto_child_table]) 1698 newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table) 1699 newbc.scoping = scoping 1700 newbc.block_input_order = block_lower 1701 return newbc 1702 1703 1704 def merge_fast(self,new_bc,parent=None): 1705 """Do a fast merge. WARNING: this may change one or more of its frame headers in order to 1706 remove duplicate frames. Please keep a handle to the block object instead of the text of 1707 the header.""" 1708 if self.standard is None: 1709 mode = 'replace' 1710 else: 1711 mode = 'strict' 1712 overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys) 1713 if parent is not None: 1714 parent_name = [a[0] for a in self.dictionary.items() if a[1] == parent] 1715 if len(parent_name)==0 or len(parent_name)>1: 1716 raise StarError("Unable to find unique parent block name: have %s" % str(parent_name)) 1717 parent_name = parent_name[0] 1718 else: 1719 parent_name = None #an error will be thrown if we treat as a string 1720 if overlap_flag and mode != 'replace': 1721 double_keys = self.lower_keys.intersection(new_bc.lower_keys) 1722 for dup_key in double_keys: 1723 our_parent = self.child_table[dup_key].parent 1724 their_parent = new_bc.child_table[dup_key].parent 1725 if (our_parent is None and their_parent is not None and parent is None) or\ 1726 parent is not None: #rename our block 1727 start_key = dup_key 1728 while start_key in self.lower_keys: start_key = start_key+'+' 1729 self._rekey(dup_key,start_key) 1730 if parent_name.lower() == dup_key: #we just renamed the prospective parent! 1731 parent_name = start_key 1732 elif our_parent is not None and their_parent is None and parent is None: 1733 start_key = dup_key 1734 while start_key in new_bc.lower_keys: start_key = start_key+'+' 1735 new_bc._rekey(dup_key,start_key) 1736 else: 1737 raise StarError("In strict merge mode:duplicate keys %s" % dup_key) 1738 self.dictionary.update(new_bc.dictionary) 1739 self.lower_keys.update(new_bc.lower_keys) 1740 self.visible_keys += (list(new_bc.lower_keys)) 1741 self.block_input_order += new_bc.block_input_order 1742 #print('Block input order now:' + repr(self.block_input_order)) 1743 self.child_table.update(new_bc.child_table) 1744 if parent_name is not None: #redo the child_table entries 1745 reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None] 1746 reparent_dict = [(a[0],self.PC(a[1],parent_name.lower())) for a in reparent_list] 1747 self.child_table.update(dict(reparent_dict)) 1748 1749 def merge(self,new_bc,mode=None,parent=None,single_block=[], 900 1750 idblock="",match_att=[],match_function=None): 1751 if mode is None: 1752 if self.standard is None: 1753 mode = 'replace' 1754 else: 1755 mode = 'strict' 901 1756 if single_block: 902 self .dictionary[single_block[0]].merge(new_bc[single_block[1]],mode,1757 self[single_block[0]].merge(new_bc[single_block[1]],mode, 903 1758 match_att=match_att, 904 1759 match_function=match_function) 905 1760 return None 906 base_keys = self.keys()1761 base_keys = [a[1].block_id for a in self.child_table.items()] 907 1762 block_to_item = base_keys #default 908 new_keys = new_bc.keys()1763 new_keys = [a[1].block_id for a in new_bc.child_table.items()] #get list of incoming blocks 909 1764 if match_att: 910 1765 #make a blockname -> item name map 911 1766 if match_function: 912 block_to_item = map(lambda a:match_function(self[a]),self.keys())1767 block_to_item = [match_function(self[a]) for a in self.keys()] 913 1768 else: 914 block_to_item = map(lambda a:self[a].get(match_att[0],None),self.keys())1769 block_to_item = [self[a].get(match_att[0],None) for a in self.keys()] 915 1770 #print `block_to_item` 916 for key in new_keys: 917 if key == idblock: continue 918 basekey = key #default value 919 attval = new_bc[key].get(match_att[0],0) 1771 for key in new_keys: #run over incoming blocknames 1772 if key == idblock: continue #skip dictionary id 1773 basekey = key #default value 1774 if len(match_att)>0: 1775 attval = new_bc[key].get(match_att[0],0) #0 if ignoring matching 1776 else: 1777 attval = 0 920 1778 for ii in range(len(block_to_item)): #do this way to get looped names 921 thisatt = block_to_item[ii] 1779 thisatt = block_to_item[ii] #keyname in old block 922 1780 #print "Looking for %s in %s" % (attval,thisatt) 923 1781 if attval == thisatt or \ 924 (isinstance(thisatt, ListType) and attval in thisatt):1782 (isinstance(thisatt,list) and attval in thisatt): 925 1783 basekey = base_keys.pop(ii) 926 1784 block_to_item.remove(thisatt) 927 1785 break 928 if not self.dictionary.has_key(basekey) or mode=="replace": 929 self.dictionary[basekey] = new_bc[key] 1786 if not basekey in self or mode=="replace": 1787 new_parent = new_bc.get_parent(key) 1788 if parent is not None and new_parent is None: 1789 new_parent = parent 1790 self.NewBlock(basekey,new_bc[key],parent=new_parent) #add the block 930 1791 else: 931 1792 if mode=="strict": … … 933 1794 elif mode=="overlay": 934 1795 # print "Merging block %s with %s" % (basekey,key) 935 self .dictionary[basekey].merge(new_bc[key],mode,match_att=match_att)936 else: 1796 self[basekey].merge(new_bc[key],mode,match_att=match_att) 1797 else: 937 1798 raise StarError( "Merge called with unknown mode %s" % mode) 938 1799 1800 def checknamelengths(self,target_block,maxlength=-1): 1801 if maxlength < 0: 1802 return 1803 else: 1804 toolong = [a for a in target_block.keys() if len(a)>maxlength] 1805 outstring = "" 1806 if toolong: 1807 outstring = "\n".join(toolong) 1808 raise StarError( 'Following data names too long:' + outstring) 1809 939 1810 def get_all(self,item_name): 940 raw_values = map(lambda a:self[a].get(item_name),self.dictionary.keys())941 raw_values = filter(lambda a:a != None, raw_values)1811 raw_values = [self[a].get(item_name) for a in self.keys()] 1812 raw_values = [a for a in raw_values if a != None] 942 1813 ret_vals = [] 943 1814 for rv in raw_values: 944 if isinstance(rv, ListType):1815 if isinstance(rv,list): 945 1816 for rvv in rv: 946 1817 if rvv not in ret_vals: ret_vals.append(rvv) … … 949 1820 return ret_vals 950 1821 951 def WriteOut(self,comment='',wraplength=80,maxoutlength=2048): 952 import cStringIO 1822 def __setattr__(self,attr_name,newval): 1823 if attr_name == 'scoping': 1824 if newval not in ('dictionary','instance'): 1825 raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval) 1826 if newval == 'dictionary': 1827 self.visible_keys = [a for a in self.lower_keys] 1828 else: 1829 #only top-level datablocks visible 1830 self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None] 1831 object.__setattr__(self,attr_name,newval) 1832 1833 def get_parent(self,blockname): 1834 """Return the name of the block enclosing [[blockname]] in canonical form (lower case)""" 1835 possibles = (a for a in self.child_table.items() if a[0] == blockname.lower()) 1836 try: 1837 first = next(possibles) #get first one 1838 except: 1839 raise StarError('no parent for %s' % blockname) 1840 try: 1841 second = next(possibles) 1842 except StopIteration: 1843 return first[1].parent 1844 raise StarError('More than one parent for %s' % blockname) 1845 1846 def get_roots(self): 1847 """Get the top-level blocks""" 1848 return [a for a in self.child_table.items() if a[1].parent==None] 1849 1850 def get_children(self,blockname,include_parent=False,scoping='dictionary'): 1851 """Get all children of [[blockname]] as a block collection. If [[include_parent]] is 1852 True, the parent block will also be included in the block collection as the root.""" 1853 newbc = BlockCollection() 1854 block_lower = blockname.lower() 1855 proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)] 1856 newbc.child_table = dict(proto_child_table) 1857 if not include_parent: 1858 newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower])) 1859 newbc.lower_keys = set([a[0] for a in proto_child_table]) 1860 newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table) 1861 if include_parent: 1862 newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)}) 1863 newbc.lower_keys.add(block_lower) 1864 newbc.dictionary.update({block_lower:self.dictionary[block_lower]}) 1865 newbc.scoping = scoping 1866 return newbc 1867 1868 def get_immediate_children(self,parentname): 1869 """Get the next level of children of the given block as a list, without nested levels""" 1870 child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()] 1871 return child_handles 1872 1873 # This takes time 1874 def get_child_list(self,parentname): 1875 """Get a list of all child categories in alphabetical order""" 1876 child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])] 1877 child_handles.sort() 1878 return child_handles 1879 1880 def is_child_of_parent(self,parentname,blockname): 1881 """Return `True` if `blockname` is a child of `parentname`""" 1882 checkname = parentname.lower() 1883 more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname] 1884 if blockname.lower() in more_children: 1885 return True 1886 else: 1887 for one_child in more_children: 1888 if self.is_child_of_parent(one_child,blockname): return True 1889 return False 1890 1891 def set_parent(self,parentname,childname): 1892 """Set the parent block""" 1893 # first check that both blocks exist 1894 if parentname.lower() not in self.lower_keys: 1895 raise KeyError('Parent block %s does not exist' % parentname) 1896 if childname.lower() not in self.lower_keys: 1897 raise KeyError('Child block %s does not exist' % childname) 1898 old_entry = self.child_table[childname.lower()] 1899 self.child_table[childname.lower()]=self.PC(old_entry.block_id, 1900 parentname.lower()) 1901 self.scoping = self.scoping #reset visibility 1902 1903 def SetTemplate(self,template_file): 1904 """Use `template_file` as a template for all block output""" 1905 self.master_template = process_template(template_file) 1906 for b in self.dictionary.values(): 1907 b.formatting_hints = self.master_template 1908 1909 def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None): 1910 """Return the contents of this file as a string, wrapping if possible at `wraplength` 1911 characters and restricting maximum line length to `maxoutlength`. Delimiters and 1912 save frame nesting are controlled by `self.grammar`. If `blockorder` is 1913 provided, blocks are output in this order unless nested save frames have been 1914 requested (STAR2). The default block order is the order in which blocks were input. 1915 `saves_after` inserts all save frames after the given dataname, 1916 which allows less important items to appear later. Useful in conjunction with a 1917 template for dictionary files.""" 1918 if maxoutlength != 0: 1919 self.SetOutputLength(maxoutlength) 953 1920 if not comment: 954 1921 comment = self.header_comment 955 outstring = cStringIO.StringIO() 1922 outstring = StringIO() 1923 if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0": 1924 outstring.write(r"#\#CIF_2.0" + "\n") 956 1925 outstring.write(comment) 957 for datablock in self.dictionary.keys(): 958 outstring.write('\n' + self.type_tag +datablock+'\n') 959 self.dictionary[datablock].SetOutputLength(wraplength,maxoutlength) 960 outstring.write(str(self.dictionary[datablock])) 1926 # prepare all blocks 1927 for b in self.dictionary.values(): 1928 b.set_grammar(self.grammar) 1929 b.formatting_hints = self.master_template 1930 b.SetOutputLength(wraplength,self.maxoutlength) 1931 # loop over top-level 1932 # monitor output 1933 all_names = list(self.child_table.keys()) #i.e. lower case 1934 if blockorder is None: 1935 blockorder = self.block_input_order 1936 top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None] 1937 for blockref,blockname in top_block_names: 1938 print('Writing %s, ' % blockname + repr(self[blockref])) 1939 outstring.write('\n' + 'data_' +blockname+'\n') 1940 all_names.remove(blockref) 1941 if self.standard == 'Dic': #put contents before save frames 1942 outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application')) 1943 if self.grammar == 'STAR2': #nested save frames 1944 child_refs = self.get_immediate_children(blockref) 1945 for child_ref,child_info in child_refs: 1946 child_name = child_info.block_id 1947 outstring.write('\n\n' + 'save_' + child_name + '\n') 1948 self.block_to_string_nested(child_ref,child_name,outstring,4) 1949 outstring.write('\n' + 'save_'+ '\n') 1950 elif self.grammar in ('1.0','1.1','2.0'): #non-nested save frames 1951 child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)] 1952 for child_ref in child_refs: 1953 child_name = self.child_table[child_ref].block_id 1954 outstring.write('\n\n' + 'save_' + child_name + '\n') 1955 outstring.write(str(self[child_ref])) 1956 outstring.write('\n\n' + 'save_' + '\n') 1957 all_names.remove(child_ref.lower()) 1958 else: 1959 raise StarError('Grammar %s is not recognised for output' % self.grammar) 1960 if self.standard != 'Dic': #put contents after save frames 1961 outstring.write(str(self[blockref])) 1962 else: 1963 outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application')) 961 1964 returnstring = outstring.getvalue() 962 1965 outstring.close() 1966 if len(all_names)>0: 1967 print('WARNING: following blocks not output: %s' % repr(all_names)) 1968 else: 1969 print('All blocks output.') 963 1970 return returnstring 964 1971 1972 def block_to_string_nested(self,block_ref,block_id,outstring,indentlevel=0): 1973 """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children, 1974 and syntactically nesting save frames""" 1975 child_refs = self.get_immediate_children(block_ref) 1976 self[block_ref].set_grammar(self.grammar) 1977 if self.standard == 'Dic': 1978 outstring.write(str(self[block_ref])) 1979 for child_ref,child_info in child_refs: 1980 child_name = child_info.block_id 1981 outstring.write('\n' + 'save_' + child_name + '\n') 1982 self.block_to_string_nested(child_ref,child_name,outstring,indentlevel) 1983 outstring.write('\n' + ' '*indentlevel + 'save_' + '\n') 1984 if self.standard != 'Dic': 1985 outstring.write(str(self[block_ref])) 1986 965 1987 966 1988 class StarFile(BlockCollection): 967 def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0, blocktype=StarBlock,**kwargs):968 BlockCollection.__init__(self,datasource=datasource,element_class=blocktype,type_tag='data_')969 if isinstance(datasource, StarFile):970 self.my_uri = datasource.my_uri971 self.m axinlength = maxinlength #no restriction1989 def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0, 1990 scoping='instance',grammar='1.1',scantype='standard', 1991 **kwargs): 1992 super(StarFile,self).__init__(datasource=datasource,**kwargs) 1993 self.my_uri = getattr(datasource,'my_uri','') 972 1994 if maxoutlength == 0: 973 self.maxoutlength = 2048 1995 self.maxoutlength = 2048 974 1996 else: 975 1997 self.maxoutlength = maxoutlength 976 if type(datasource) is StringType or hasattr(datasource,"read"):977 newself = ReadStar(datasource,self.maxinlength,**kwargs)978 # print "Reinjecting by calling %s.__init__ with kwargs %s" % (`self.__init__.im_class`,kwargs)979 self.__init__.im_class.__init__(self,datasource=newself,maxoutlength=maxoutlength,**kwargs)1998 self.scoping = scoping 1999 if isinstance(datasource,(unicode,str)) or hasattr(datasource,"read"): 2000 ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype, 2001 maxlength = maxinlength) 980 2002 self.header_comment = \ 981 2003 """#\\#STAR 982 2004 ########################################################################## 983 # STAR Format file 2005 # STAR Format file 984 2006 # Produced by PySTARRW module 985 # 2007 # 986 2008 # This is a STAR file. STAR is a superset of the CIF file type. For 987 2009 # more information, please refer to International Tables for Crystallography, … … 993 2015 994 2016 2017 import math 2018 class CIFStringIO(StringIO): 2019 def __init__(self,target_width=80,**kwargs): 2020 StringIO.__init__(self,**kwargs) 2021 self.currentpos = 0 2022 self.target_width = target_width 2023 self.tabwidth = -1 2024 self.indentlist = [0] 2025 self.last_char = "" 2026 2027 def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False, 2028 delimiter=False,startcol=-1): 2029 """Write a string with correct linebreak, tabs and indents""" 2030 # do we need to break? 2031 if delimiter: 2032 if len(outstring)>1: 2033 raise ValueError('Delimiter %s is longer than one character' % repr( outstring )) 2034 output_delimiter = True 2035 if mustbreak: #insert a new line and indent 2036 temp_string = '\n' + ' ' * self.indentlist[-1] 2037 StringIO.write(self,temp_string) 2038 self.currentpos = self.indentlist[-1] 2039 self.last_char = temp_string[-1] 2040 if self.currentpos+len(outstring)>self.target_width: #try to break 2041 if not delimiter and outstring[0]!='\n': #ie <cr>; 2042 if canbreak: 2043 temp_string = '\n' + ' ' * self.indentlist[-1] 2044 StringIO.write(self,temp_string) 2045 self.currentpos = self.indentlist[-1] 2046 self.last_char = temp_string[-1] 2047 else: #assume a break will be forced on next value 2048 output_delimiter = False #the line break becomes the delimiter 2049 #try to match requested column 2050 if startcol > 0: 2051 if self.currentpos < startcol: 2052 StringIO.write(self,(startcol - self.currentpos)* ' ') 2053 self.currentpos = startcol 2054 self.last_char = ' ' 2055 else: 2056 print('Could not format %s at column %d as already at %d' % (outstring,startcol,self.currentpos)) 2057 startcol = -1 #so that tabbing works as a backup 2058 #handle tabs 2059 if self.tabwidth >0 and do_tab and startcol < 0: 2060 next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth 2061 #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop) 2062 if self.currentpos < next_stop: 2063 StringIO.write(self,(next_stop-self.currentpos)*' ') 2064 self.currentpos = next_stop 2065 self.last_char = ' ' 2066 #calculate indentation after tabs and col setting applied 2067 if newindent: #indent by current amount 2068 if self.indentlist[-1] == 0: #first time 2069 self.indentlist.append(self.currentpos) 2070 # print 'Indentlist: ' + `self.indentlist` 2071 else: 2072 self.indentlist.append(self.indentlist[-1]+2) 2073 elif unindent: 2074 if len(self.indentlist)>1: 2075 self.indentlist.pop() 2076 else: 2077 print('Warning: cannot unindent any further') 2078 #check that we still need a delimiter 2079 if self.last_char in [' ','\n','\t']: 2080 output_delimiter = False 2081 #now output the string - every invocation comes through here 2082 if (delimiter and output_delimiter) or not delimiter: 2083 StringIO.write(self,outstring) 2084 last_line_break = outstring.rfind('\n') 2085 if last_line_break >=0: 2086 self.currentpos = len(outstring)-last_line_break 2087 else: 2088 self.currentpos = self.currentpos + len(outstring) 2089 #remember the last character 2090 if len(outstring)>0: 2091 self.last_char = outstring[-1] 2092 2093 def set_tab(self,tabwidth): 2094 """Set the tab stop position""" 2095 self.tabwidth = tabwidth 2096 995 2097 class StarError(Exception): 996 2098 def __init__(self,value): 997 2099 self.value = value 998 2100 def __str__(self): 999 return '\nStar Format error: '+ self.value 2101 return '\nStar Format error: '+ self.value 1000 2102 1001 2103 class StarLengthError(Exception): … … 1004 2106 def __str__(self): 1005 2107 return '\nStar length error: ' + self.value 1006 def ReadStar(filename,maxlength=2048,dest=StarFile(),scantype='standard',grammar='1.1'): 2108 2109 class StarDerivationError(Exception): 2110 def __init__(self,fail_name): 2111 self.fail_name = fail_name 2112 def __str__(self): 2113 return "Derivation of %s failed, None returned" % self.fail_name 2114 2115 # 2116 # This is subclassed from AttributeError in order to allow hasattr 2117 # to work. 2118 # 2119 class StarDerivationFailure(AttributeError): 2120 def __init__(self,fail_name): 2121 self.fail_name = fail_name 2122 def __str__(self): 2123 return "Derivation of %s failed" % self.fail_name 2124 2125 def ReadStar(filename,prepared = None, maxlength=-1, 2126 scantype='standard',grammar='STAR2',CBF=False): 2127 2128 """ Read in a STAR file, returning the contents in the `prepared` object. 2129 2130 * `filename` may be a URL, a file 2131 path on the local system, or any object with a `read` method. 2132 2133 * `prepared` provides a `StarFile` or `CifFile` object that the contents of `filename` 2134 will be added to. 2135 2136 * `maxlength` is the maximum allowable line length in the input file. This has been set at 2137 2048 characters for CIF but is unlimited (-1) for STAR files. 2138 2139 * `grammar` chooses the STAR grammar variant. `1.0` is the original 1992 CIF/STAR grammar and `1.1` 2140 is identical except for the exclusion of square brackets as the first characters in 2141 undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will 2142 read files according to the STAR2 publication. If grammar is `None` or `auto`, autodetection 2143 will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for conformant CIF2.0 files. 2144 Note that (nested) save frames are read in all grammar variations and then flagged afterwards if 2145 they do not match the requested grammar. 2146 2147 * `scantype` can be `standard` or `flex`. `standard` provides pure Python parsing at the 2148 cost of a factor of 10 or so in speed. `flex` will tokenise the input CIF file using 2149 fast C routines. Note that running PyCIFRW in Jython uses native Java regular expressions 2150 to provide a speedup regardless of this argument. 2151 2152 * `CBF` flags that the input file is in Crystallographic Binary File format. The binary block is 2153 excised from the input data stream before parsing and is not available in the returned object. 2154 """ 2155 1007 2156 import string 1008 if grammar=="1.1": 1009 import YappsStarParser_1_1 as Y 1010 elif grammar=="1.0": 1011 import YappsStarParser_1_0 as Y 1012 elif grammar=="DDLm": 1013 import YappsStarParser_DDLm as Y 1014 if isinstance(filename,basestring): 1015 filestream = urlopen(filename) 2157 import codecs 2158 # save desired scoping 2159 save_scoping = prepared.scoping 2160 from . import YappsStarParser_1_1 as Y11 2161 from . import YappsStarParser_1_0 as Y10 2162 from . import YappsStarParser_2_0 as Y20 2163 from . import YappsStarParser_STAR2 as YST 2164 if prepared is None: 2165 prepared = StarFile() 2166 if grammar == "auto" or grammar is None: 2167 try_list = [('2.0',Y20),('1.1',Y11),('1.0',Y10)] 2168 elif grammar == '1.0': 2169 try_list = [('1.0',Y10)] 2170 elif grammar == '1.1': 2171 try_list = [('1.1',Y11)] 2172 elif grammar == '2.0': 2173 try_list = [('2.0',Y20)] 2174 elif grammar == 'STAR2': 2175 try_list = [('STAR2',YST)] 2176 else: 2177 raise AttributeError('Unknown STAR/CIF grammar requested, %s' % repr( grammar )) 2178 if isinstance(filename,(unicode,str)): 2179 # create an absolute URL 2180 relpath = urlparse(filename) 2181 if relpath.scheme == "": 2182 if not os.path.isabs(filename): 2183 fullpath = os.path.join(os.getcwd(),filename) 2184 else: 2185 fullpath = filename 2186 newrel = list(relpath) 2187 newrel[0] = "file" 2188 newrel[2] = fullpath 2189 my_uri = urlunparse(newrel) 2190 else: 2191 my_uri = urlunparse(relpath) 2192 # print("Full URL is: " + my_uri) 2193 filestream = urlopen(my_uri) 2194 text = filestream.read().decode('utf8') 2195 filestream.close() 1016 2196 else: 1017 2197 filestream = filename #already opened for us 1018 my_uri = "" 1019 if hasattr(filestream,"geturl"): 1020 my_uri = filestream.geturl() 1021 text = filestream.read() 1022 if isinstance(filename,basestring): #we opened it, we close it 1023 filestream.close() 2198 text = filestream.read() 2199 if not isinstance(text,unicode): 2200 text = text.decode('utf8') #CIF is always ascii/utf8 2201 my_uri = "" 1024 2202 if not text: # empty file, return empty block 1025 dest.set_uri(my_uri) 1026 return dest 2203 return prepared.set_uri(my_uri) 2204 # filter out non-ASCII characters in CBF files if required. We assume 2205 # that the binary is enclosed in a fixed string that occurs 2206 # nowhere else. 2207 if CBF: 2208 text_bits = text.split("-BINARY-FORMAT-SECTION-") 2209 text = text_bits[0] 2210 for section in range(2,len(text_bits),2): 2211 text = text+" (binary omitted)"+text_bits[section] 1027 2212 # we recognise ctrl-Z as end of file 1028 endoffile = text.find( '\x1a')1029 if endoffile >= 0: 2213 endoffile = text.find(chr(26)) 2214 if endoffile >= 0: 1030 2215 text = text[:endoffile] 1031 split = string.split(text,'\n')2216 split = text.split('\n') 1032 2217 if maxlength > 0: 1033 toolong = filter(lambda a:len(a)>maxlength,split)2218 toolong = [a for a in split if len(a)>maxlength] 1034 2219 if toolong: 1035 2220 pos = split.index(toolong[0]) 1036 2221 raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength)) 1037 try: 1038 if scantype == 'standard': 2222 # honour the header string 2223 if text[:10] != "#\#CIF_2.0" and ('2.0',Y20) in try_list: 2224 try_list.remove(('2.0',Y20),) 2225 if not try_list: 2226 raise StarError('File %s missing CIF2.0 header' % (filename)) 2227 for grammar_name,Y in try_list: 2228 if scantype == 'standard' or grammar_name in ['2.0','STAR2']: 1039 2229 parser = Y.StarParser(Y.StarParserScanner(text)) 1040 2230 else: 1041 2231 parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex')) 1042 proto_star = getattr(parser,"input")() 1043 except Y.yappsrt.SyntaxError: 2232 # handle encoding switch 2233 if grammar_name in ['2.0','STAR2']: 2234 prepared.set_characterset('unicode') 2235 else: 2236 prepared.set_characterset('ascii') 2237 proto_star = None 2238 try: 2239 proto_star = getattr(parser,"input")(prepared) 2240 except Y.yappsrt.SyntaxError as e: 2241 input = parser._scanner.input 2242 Y.yappsrt.print_error(input, e, parser._scanner) 2243 except Y.yappsrt.NoMoreTokens: 2244 print('Could not complete parsing; stopped around here:',file=sys.stderr) 2245 print(parser._scanner,file=sys.stderr) 2246 except ValueError: 2247 print('Unexpected error:') 2248 import traceback 2249 traceback.print_exc() 2250 if proto_star is not None: 2251 proto_star.set_grammar(grammar_name) #remember for output 2252 break 2253 if proto_star is None: 1044 2254 errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval 1045 errorstring = errorstring + '\nParser status: %s' % `parser._scanner`2255 errorstring = errorstring + '\nParser status: %s' % repr( parser._scanner ) 1046 2256 raise StarError( errorstring) 1047 # duplication check on all blocks 1048 audit_result = map(lambda a:(a,proto_star[a].audit()),proto_star.keys()) 1049 audit_result = filter(lambda a:len(a[1])>0,audit_result) 1050 if audit_result: 1051 raise StarError( 'Duplicate keys as follows: %s' % `audit_result`) 2257 # set visibility correctly 2258 proto_star.scoping = 'dictionary' 1052 2259 proto_star.set_uri(my_uri) 2260 proto_star.scoping = save_scoping 1053 2261 return proto_star 1054 2262 1055 2263 def get_dim(dataitem,current=0,packlen=0): 1056 zerotypes = [IntType, LongType, 1057 FloatType, StringType] 2264 zerotypes = [int, float, str] 1058 2265 if type(dataitem) in zerotypes: 1059 2266 return current, packlen … … 1061 2268 not dataitem.__class__ == [].__class__: 1062 2269 return current, packlen 1063 elif len(dataitem)>0: 2270 elif len(dataitem)>0: 1064 2271 # print "Get_dim: %d: %s" % (current,`dataitem`) 1065 2272 return get_dim(dataitem[0],current+1,len(dataitem)) 1066 2273 else: return current+1,0 1067 1068 1069 2274 2275 def apply_line_folding(instring,minwraplength=60,maxwraplength=80): 2276 """Insert line folding characters into instring between min/max wraplength""" 2277 # first check that we need to do this 2278 lines = instring.split('\n') 2279 line_len = [len(l) for l in lines] 2280 if max(line_len) < maxwraplength and re.match("\\[ \v\t\f]*\n",instring) is None: 2281 return instring 2282 outstring = "\\\n" #header 2283 for l in lines: 2284 if len(l) < maxwraplength: 2285 outstring = outstring + l 2286 if len(l) > 0 and l[-1]=='\\': #who'da thunk it? A line ending with a backslash 2287 outstring = outstring + "\\\n" # 2288 outstring = outstring + "\n" # put back the split character 2289 else: 2290 current_bit = l 2291 while len(current_bit) > maxwraplength: 2292 space_pos = re.search('[ \v\f\t]+',current_bit[minwraplength:]) 2293 if space_pos is not None and space_pos.start()<maxwraplength-1: 2294 outstring = outstring + current_bit[:minwraplength+space_pos.start()] + "\\\n" 2295 current_bit = current_bit[minwraplength+space_pos.start():] 2296 else: #just blindly insert 2297 outstring = outstring + current_bit[:maxwraplength-1] + "\\\n" 2298 current_bit = current_bit[maxwraplength-1:] 2299 outstring = outstring + current_bit 2300 if current_bit[-1] == '\\': #a backslash just happens to be here 2301 outstring = outstring + "\\\n" 2302 outstring = outstring + '\n' 2303 outstring = outstring[:-1] #remove final newline 2304 return outstring 2305 2306 def remove_line_folding(instring): 2307 """Remove line folding from instring""" 2308 if re.match(r"\\[ \v\t\f]*" +"\n",instring) is not None: 2309 return re.sub(r"\\[ \v\t\f]*$" + "\n?","",instring,flags=re.M) 2310 else: 2311 return instring 2312 2313 def apply_line_prefix(instring,prefix): 2314 """Prefix every line in instring with prefix""" 2315 if prefix[0] != ";" and "\\" not in prefix: 2316 header = re.match(r"(\\[ \v\t\f]*" +"\n)",instring) 2317 if header is not None: 2318 print('Found line folded string for prefixing...') 2319 not_header = instring[header.end():] 2320 outstring = prefix + "\\\\\n" + prefix 2321 else: 2322 print('No folding in input string...') 2323 not_header = instring 2324 outstring = prefix + "\\\n" + prefix 2325 outstring = outstring + not_header.replace("\n","\n"+prefix) 2326 return outstring 2327 raise StarError("Requested prefix starts with semicolon or contains a backslash: " + prefix) 2328 2329 def remove_line_prefix(instring): 2330 """Remove prefix from every line if present""" 2331 prefix_match = re.match("(?P<prefix>[^;\\\n][^\n\\\\]+)(?P<folding>\\\\{1,2}[ \t\v\f]*\n)",instring) 2332 if prefix_match is not None: 2333 prefix_text = prefix_match.group('prefix') 2334 print('Found prefix %s' % prefix_text) 2335 prefix_end = prefix_match.end('folding') 2336 # keep any line folding instructions 2337 if prefix_match.group('folding')[:2]=='\\\\': #two backslashes 2338 outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n") 2339 return "\\" + outstring #keep line folding first line 2340 else: 2341 outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n") 2342 return outstring[1:] #drop first line ending, no longer necessary 2343 else: 2344 return instring 2345 2346 2347 def listify(item): 2348 if isinstance(item,unicode): return [item] 2349 else: return item 2350 2351 #Transpose the list of lists passed to us 2352 def transpose(base_list): 2353 new_lofl = [] 2354 full_length = len(base_list) 2355 opt_range = range(full_length) 2356 for i in range(len(base_list[0])): 2357 new_packet = [] 2358 for j in opt_range: 2359 new_packet.append(base_list[j][i]) 2360 new_lofl.append(new_packet) 2361 return new_lofl 2362 2363 # This routine optimised to return as quickly as possible 2364 # as it is called a lot. 2365 def not_none(itemlist): 2366 """Return true only if no values of None are present""" 2367 if itemlist is None: 2368 return False 2369 if not isinstance(itemlist,(tuple,list)): 2370 return True 2371 for x in itemlist: 2372 if not not_none(x): return False 2373 return True 2374 2375 2376 def check_stringiness(data): 2377 """Check that the contents of data are all strings""" 2378 if not hasattr(data,'dtype'): #so not Numpy 2379 from numbers import Number 2380 if isinstance(data,Number): return False 2381 elif isinstance(data,(unicode,str)): return True 2382 elif data is None:return False #should be data are None :) 2383 else: 2384 for one_item in data: 2385 if not check_stringiness(one_item): return False 2386 return True #all must be strings 2387 else: #numerical python 2388 import numpy 2389 if data.ndim == 0: #a bare value 2390 if data.dtype.kind in ['S','U']: return True 2391 else: return False 2392 else: 2393 for one_item in numpy.nditer(data): 2394 print('numpy data: ' + repr( one_item )) 2395 if not check_stringiness(one_item): return False 2396 return True 2397 2398 def process_template(template_file): 2399 """Process a template datafile to formatting instructions""" 2400 template_as_cif = StarFile(template_file,grammar="2.0").first_block() 2401 if isinstance(template_file,(unicode,str)): 2402 template_string = open(template_file).read() 2403 else: #a StringIO object 2404 template_file.seek(0) #reset 2405 template_string = template_file.read() 2406 #template_as_lines = template_string.split("\n") 2407 #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#'] 2408 #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_'] 2409 #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0]) 2410 form_hints = [] #ordered array of hint dictionaries 2411 find_indent = "^ +" 2412 for item in template_as_cif.item_order: #order of input 2413 if not isinstance(item,int): #not nested 2414 hint_dict = {"dataname":item} 2415 # find the line in the file 2416 start_pos = re.search("(^[ \t]*(?P<name>" + item + ")[ \t\n]+)(?P<spec>([\S]+)|(^;))",template_string,re.I|re.M) 2417 if start_pos.group("spec") != None: 2418 spec_pos = start_pos.start("spec")-start_pos.start(0) 2419 spec_char = template_string[start_pos.start("spec"):start_pos.start("spec")+3] 2420 if spec_char[0] in '\'";': 2421 hint_dict.update({"delimiter":spec_char[0]}) 2422 if spec_char == '"""' or spec_char == "'''": 2423 hint_dict.update({"delimiter":spec_char}) 2424 if spec_char[0] != ";": #so we need to work out the column number 2425 hint_dict.update({"column":spec_pos}) 2426 else: #need to put in the carriage return 2427 hint_dict.update({"delimiter":"\n;"}) 2428 # can we format the text? 2429 text_val = template_as_cif[item] 2430 hint_dict["reformat"] = "\n\t" in text_val or "\n " in text_val 2431 if hint_dict["reformat"]: #find the indentation 2432 p = re.search(find_indent,text_val,re.M) 2433 if p.group() is not None: 2434 hint_dict["reformat_indent"]=p.end() - p.start() 2435 if start_pos.group('name') != None: 2436 name_pos = start_pos.start('name') - start_pos.start(0) 2437 hint_dict.update({"name_pos":name_pos}) 2438 #print '%s: %s' % (item,`hint_dict`) 2439 form_hints.append(hint_dict) 2440 else: #loop block 2441 testnames = template_as_cif.loops[item] 2442 total_items = len(template_as_cif.loops[item]) 2443 testname = testnames[0] 2444 #find the loop spec line in the file 2445 loop_regex = "(^[ \t]*(?P<loop>loop_)[ \t\n\r]+(?P<name>" + testname + ")([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P<packet>(.(?!_loop|_[\S]+))*))" % (total_items - 1) 2446 loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S) 2447 loop_so_far = loop_line.end() 2448 packet_text = loop_line.group('packet') 2449 loop_indent = loop_line.start('loop') - loop_line.start(0) 2450 form_hints.append({"dataname":'loop','name_pos':loop_indent}) 2451 packet_regex = "[ \t]*(?P<all>(?P<sqqq>'''([^\n\r\f']*)''')|(?P<sq>'([^\n\r\f']*)'+)|(?P<dq>\"([^\n\r\"]*)\"+)|(?P<none>[^\s]+))" 2452 packet_pos = re.finditer(packet_regex,packet_text) 2453 line_end_pos = re.finditer("^",packet_text,re.M) 2454 next_end = next(line_end_pos).end() 2455 last_end = next_end 2456 for loopname in testnames: 2457 #find the name in the file for name pos 2458 name_regex = "(^[ \t]*(?P<name>" + loopname + "))" 2459 name_match = re.search(name_regex,template_string,re.I|re.M|re.S) 2460 loop_name_indent = name_match.start('name')-name_match.start(0) 2461 hint_dict = {"dataname":loopname,"name_pos":loop_name_indent} 2462 #find the value 2463 thismatch = next(packet_pos) 2464 while thismatch.start('all') > next_end: 2465 try: 2466 last_end = next_end 2467 next_end = next(line_end_pos).start() 2468 print('next end %d' % next_end) 2469 except StopIteration: 2470 break 2471 print('Start %d, last_end %d' % (thismatch.start('all'),last_end)) 2472 col_pos = thismatch.start('all') - last_end + 1 2473 if thismatch.group('none') is None: 2474 if thismatch.group('sqqq') is not None: 2475 hint_dict.update({'delimiter':"'''"}) 2476 else: 2477 hint_dict.update({'delimiter':thismatch.groups()[0][0]}) 2478 hint_dict.update({'column':col_pos}) 2479 print('%s: %s' % (loopname,repr( hint_dict ))) 2480 form_hints.append(hint_dict) 2481 return form_hints 2482 2483 2484 #No documentation flags 2485
Note: See TracChangeset
for help on using the changeset viewer.