source: trunk/CifFile/CifFile.py @ 4082

Last change on this file since 4082 was 4082, checked in by vondreele, 4 years ago
File size: 147.4 KB
Line 
1# To maximize python3/python2 compatibility
2from __future__ import print_function
3from __future__ import unicode_literals
4from __future__ import division
5from __future__ import absolute_import
6
7try:
8    from cStringIO import StringIO
9except ImportError:
10    from io import StringIO
11
12# Python 2,3 compatibility
13try:
14    from urllib import urlopen         # for arbitrary opening
15    from urlparse import urlparse, urlunparse,urljoin
16except:
17    from urllib.request import urlopen
18    from urllib.parse import urlparse,urlunparse,urljoin
19
20# The unicode type does not exist in Python3 as the str type
21# encompasses unicode.  PyCIFRW tests for 'unicode' would fail
22# Suggestions for a better approach welcome.
23
24if isinstance(u"abc",str):   #Python3
25    unicode = str
26   
27__copyright = """
28PYCIFRW License Agreement (Python License, Version 2)
29-----------------------------------------------------
30
311. This LICENSE AGREEMENT is between the Australian Nuclear Science
32and Technology Organisation ("ANSTO"), and the Individual or
33Organization ("Licensee") accessing and otherwise using this software
34("PyCIFRW") in source or binary form and its associated documentation.
35
362. Subject to the terms and conditions of this License Agreement,
37ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
38license to reproduce, analyze, test, perform and/or display publicly,
39prepare derivative works, distribute, and otherwise use PyCIFRW alone
40or in any derivative version, provided, however, that this License
41Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
422001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
43in any derivative version prepared by Licensee.
44
453. In the event Licensee prepares a derivative work that is based on
46or incorporates PyCIFRW or any part thereof, and wants to make the
47derivative work available to others as provided herein, then Licensee
48hereby agrees to include in any such work a brief summary of the
49changes made to PyCIFRW.
50
514. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
52basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
53IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
54DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
55FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
56INFRINGE ANY THIRD PARTY RIGHTS.
57
585. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
59FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
60RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
61DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
62
636. This License Agreement will automatically terminate upon a material
64breach of its terms and conditions.
65
667. Nothing in this License Agreement shall be deemed to create any
67relationship of agency, partnership, or joint venture between ANSTO
68and Licensee. This License Agreement does not grant permission to use
69ANSTO trademarks or trade name in a trademark sense to endorse or
70promote products or services of Licensee, or any third party.
71
728. By copying, installing or otherwise using PyCIFRW, Licensee agrees
73to be bound by the terms and conditions of this License Agreement.
74
75"""
76
77
78import re,sys
79from . import StarFile
80from .StarFile import StarList  #put in global scope for exec statement
81try:
82    import numpy                   #put in global scope for exec statement
83    from .drel import drel_runtime  #put in global scope for exec statement
84except ImportError:
85    pass                       #will fail when using dictionaries for calcs
86from copy import copy          #must be in global scope for exec statement
87
88def track_recursion(in_this_func):
89    """Keep an eye on a function call to make sure that the key argument hasn't been
90    seen before"""
91    def wrapper(*args,**kwargs):
92        key_arg = args[1]
93        if key_arg in wrapper.called_list:
94            print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg)))
95            raise CifRecursionError( key_arg,wrapper.called_list[:])    #failure
96        if len(wrapper.called_list) == 0:   #first time
97            wrapper.stored_use_defaults = kwargs.get("allow_defaults",False)
98            print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults))
99        else:
100            kwargs["allow_defaults"] = wrapper.stored_use_defaults
101        wrapper.called_list.append(key_arg)
102        print('Recursion watch: call stack: ' + repr(wrapper.called_list))
103        try:
104            result = in_this_func(*args,**kwargs)
105        except StarFile.StarDerivationError as s:
106            if len(wrapper.called_list) == 1: #no more
107                raise StarFile.StarDerivationFailure(wrapper.called_list[0])
108            else:
109                raise
110        finally:
111            wrapper.called_list.pop()
112            if len(wrapper.called_list) == 0:
113                wrapper.stored_used_defaults = 'error'
114        return result
115    wrapper.called_list = []
116    return wrapper
117
118class CifBlock(StarFile.StarBlock):
119    """
120    A class to hold a single block of a CIF file.  A `CifBlock` object can be treated as
121    a Python dictionary, in particular, individual items can be accessed using square
122    brackets e.g. `b['_a_dataname']`.  All other Python dictionary methods are also
123    available (e.g. `keys()`, `values()`).  Looped datanames will return a list of values.
124
125    ## Initialisation
126
127    When provided, `data` should be another `CifBlock` whose contents will be copied to
128    this block.
129
130    * if `strict` is set, maximum name lengths will be enforced
131
132    * `maxoutlength` is the maximum length for output lines
133
134    * `wraplength` is the ideal length to make output lines
135
136    * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
137    is raised).
138
139    * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
140    the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
141    after setting the dataitem value.
142    """
143    def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs):
144        """When provided, `data` should be another CifBlock whose contents will be copied to
145        this block.
146
147        * if `strict` is set, maximum name lengths will be enforced
148
149        * `maxoutlength` is the maximum length for output lines
150
151        * `wraplength` is the ideal length to make output lines
152
153        * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
154        is raised).
155
156        * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
157        the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
158        after setting the dataitem value.
159        """
160        if strict: maxnamelength=75
161        else:
162           maxnamelength=-1
163        super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs)
164        self.dictionary = None   #DDL dictionary referring to this block
165        self.compat_mode = compat_mode   #old-style behaviour of setitem
166
167    def RemoveCifItem(self,itemname):
168        """Remove `itemname` from the CifBlock"""
169        self.RemoveItem(itemname)
170
171    def __setitem__(self,key,value):
172        self.AddItem(key,value)
173        # for backwards compatibility make a single-element loop
174        if self.compat_mode:
175            if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList):
176                 # single element loop
177                 self.CreateLoop([key])
178
179    def copy(self):
180        newblock = super(CifBlock,self).copy()
181        return self.copy.im_class(newblock)   #catch inheritance
182
183    def AddCifItem(self,data):
184        """ *DEPRECATED*. Use `AddItem` instead."""
185        # we accept only tuples, strings and lists!!
186        if not (isinstance(data[0],(unicode,tuple,list,str))):
187                  raise TypeError('Cif datanames are either a string, tuple or list')
188        # we catch single item loops as well...
189        if isinstance(data[0],(unicode,str)):
190            self.AddSingleCifItem(data[0],list(data[1]))
191            if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
192                self.CreateLoop([data[0]])
193            return
194        # otherwise, we loop over the datanames
195        keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
196        [self.AddSingleCifItem(a,b) for a,b in keyvals]
197        # and create the loop
198        self.CreateLoop(data[0][0])
199
200    def AddSingleCifItem(self,key,value):
201        """*Deprecated*. Use `AddItem` instead"""
202        """Add a single data item. If it is part of a loop, a separate call should be made"""
203        self.AddItem(key,value)
204
205    def loopnames(self):
206        return [self.loops[a] for a in self.loops]
207
208
209class CifFile(StarFile.StarFile):
210    def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs):
211        super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs)
212        self.strict = strict
213        self.header_comment = \
214"""
215##########################################################################
216#               Crystallographic Information Format file
217#               Produced by PyCifRW module
218#
219#  This is a CIF file.  CIF has been adopted by the International
220#  Union of Crystallography as the standard for data archiving and
221#  transmission.
222#
223#  For information on this file format, follow the CIF links at
224#  http://www.iucr.org
225##########################################################################
226"""
227
228
229class CifError(Exception):
230    def __init__(self,value):
231        self.value = value
232    def __str__(self):
233        return '\nCif Format error: '+ self.value
234
235class ValidCifError(Exception):
236    def __init__(self,value):
237        self.value = value
238    def __str__(self):
239        return '\nCif Validity error: ' + self.value
240
241class CifRecursionError(Exception):
242    def __init__(self,key_value,call_stack):
243        self.key_value = key_value
244        self.call_stack = call_stack
245    def __str__(self):
246        return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack))
247
248
249class CifDic(StarFile.StarFile):
250    """Create a Cif Dictionary object from the provided source, which can
251    be a filename/URL or a CifFile.  Optional arguments (relevant to DDLm
252    only):
253
254    * do_minimum (Boolean):
255         Do not set up the dREL system for auto-calculation or perform
256         imports.  This implies do_imports=False and do_dREL=False
257
258    * do_imports = No/Full/Contents/All:
259         If not 'No', replace _import.get statements with the imported contents for
260         Full mode/Contents mode/Both respectively.
261
262    * do_dREL = True/False:
263         Parse and convert all dREL methods to Python. Implies do_imports=All
264
265    """
266    def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True,
267                                                             grammar='auto',**kwargs):
268        self.do_minimum = do_minimum
269        if do_minimum:
270            do_imports = 'No'
271            do_dREL = False
272        if do_dREL: do_imports = 'All'
273        self.template_cache = {}    #for DDLm imports
274        self.ddlm_functions = {}    #for DDLm functions
275        self.switch_numpy(False)    #no Numpy arrays returned
276        super(CifDic,self).__init__(datasource=dic,grammar=grammar,**kwargs)
277        self.standard = 'Dic'    #for correct output order
278        self.scoping = 'dictionary'
279        (self.dicname,self.diclang) = self.dic_determine()
280        print('%s is a %s dictionary' % (self.dicname,self.diclang))
281        self.scopes_mandatory = {}
282        self.scopes_naughty = {}
283        # rename and expand out definitions using "_name" in DDL dictionaries
284        if self.diclang == "DDL1":
285            self.DDL1_normalise()   #this removes any non-definition entries
286        self.create_def_block_table() #From now on, [] uses definition_id
287        if self.diclang == "DDL1":
288            self.ddl1_cat_load()
289        elif self.diclang == "DDL2":
290            self.DDL2_normalise()   #iron out some DDL2 tricky bits
291        elif self.diclang == "DDLm":
292            self.scoping = 'dictionary'   #expose all save frames
293            if do_imports is not 'No':
294               self.ddlm_import(import_mode=do_imports)#recursively calls this routine
295            self.create_alias_table()
296            self.create_cat_obj_table()
297            self.create_cat_key_table()
298            if do_dREL:
299                print('Doing full dictionary initialisation')
300                self.initialise_drel()
301        self.add_category_info(full=do_dREL)
302        # initialise type information
303        self.typedic={}
304        self.primdic = {}   #typecode<->primitive type translation
305        self.add_type_info()
306        self.install_validation_functions()
307
308    def dic_determine(self):
309        if "on_this_dictionary" in self: 
310            self.master_block = super(CifDic,self).__getitem__("on_this_dictionary")
311            self.def_id_spec = "_name"
312            self.cat_id_spec = "_category.id"   #we add this ourselves
313            self.type_spec = "_type"
314            self.enum_spec = "_enumeration"
315            self.cat_spec = "_category"
316            self.esd_spec = "_type_conditions"
317            self.must_loop_spec = "_list"
318            self.must_exist_spec = "_list_mandatory"
319            self.list_ref_spec = "_list_reference"
320            self.key_spec = "_list_mandatory"
321            self.unique_spec = "_list_uniqueness"
322            self.child_spec = "_list_link_child"
323            self.parent_spec = "_list_link_parent"
324            self.related_func = "_related_function"
325            self.related_item = "_related_item"
326            self.primitive_type = "_type"
327            self.dep_spec = "xxx"
328            self.cat_list = []   #to save searching all the time
329            name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"]
330            version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"]
331            return (name+version,"DDL1")
332        elif len(self.get_roots()) == 1:              # DDL2/DDLm
333            self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0])     
334            # now change to dictionary scoping
335            self.scoping = 'dictionary'
336            name = self.master_block["_dictionary.title"]
337            version = self.master_block["_dictionary.version"]
338            if self.master_block.has_key("_dictionary.class"):   #DDLm
339                self.enum_spec = '_enumeration_set.state'
340                self.key_spec = '_category.key_id'
341                self.must_exist_spec = None
342                self.cat_spec = '_name.category_id'
343                self.primitive_type = '_type.contents'
344                self.cat_id_spec = "_definition.id"
345                self.def_id_spec = "_definition.id"
346                return(name+version,"DDLm") 
347            else:   #DDL2
348                self.cat_id_spec = "_category.id"
349                self.def_id_spec = "_item.name"
350                self.key_spec = "_category_mandatory.name"
351                self.type_spec = "_item_type.code"
352                self.enum_spec = "_item_enumeration.value"
353                self.esd_spec = "_item_type_conditions.code"
354                self.cat_spec = "_item.category_id"
355                self.loop_spec = "there_is_no_loop_spec!"
356                self.must_loop_spec = "xxx"
357                self.must_exist_spec = "_item.mandatory_code"
358                self.child_spec = "_item_linked.child_name"
359                self.parent_spec = "_item_linked.parent_name"
360                self.related_func = "_item_related.function_code"
361                self.related_item = "_item_related.related_name"
362                self.unique_spec = "_category_key.name"
363                self.list_ref_spec = "xxx"
364                self.primitive_type = "_type"
365                self.dep_spec = "_item_dependent.dependent_name"
366                return (name+version,"DDL2")
367        else:
368            raise CifError("Unable to determine dictionary DDL version")
369
370    def DDL1_normalise(self):
371        # switch off block name collision checks
372        self.standard = None
373        # add default type information in DDL2 style
374        # initial types and constructs
375        base_types = ["char","numb","null"]
376        prim_types = base_types[:]
377        base_constructs = [".*",
378            '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
379            "\"\" "]
380        for key,value in self.items():
381           newnames = [key]  #keep by default
382           if "_name" in value:
383               real_name = value["_name"]
384               if isinstance(real_name,list):        #looped values
385                   for looped_name in real_name:
386                      new_value = value.copy()
387                      new_value["_name"] = looped_name  #only looped name
388                      self[looped_name] = new_value
389                   newnames = real_name
390               else:
391                      self[real_name] = value
392                      newnames = [real_name]
393           # delete the old one
394           if key not in newnames:
395              del self[key]
396        # loop again to normalise the contents of each definition
397        for key,value in self.items():
398           #unlock the block
399           save_overwrite = value.overwrite
400           value.overwrite = True
401           # deal with a missing _list, _type_conditions
402           if "_list" not in value: value["_list"] = 'no'
403           if "_type_conditions" not in value: value["_type_conditions"] = 'none'
404           # deal with enumeration ranges
405           if "_enumeration_range" in value:
406               max,min = self.getmaxmin(value["_enumeration_range"])
407               if min == ".":
408                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
409               elif max == ".":
410                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
411               else:
412                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
413           #add any type construct information
414           if "_type_construct" in value:
415               base_types.append(value["_name"]+"_type")   #ie dataname_type
416               base_constructs.append(value["_type_construct"]+"$")
417               prim_types.append(value["_type"])     #keep a record
418               value["_type"] = base_types[-1]   #the new type name
419
420        #make categories conform with ddl2
421        #note that we must remove everything from the last underscore
422           if value.get("_category",None) == "category_overview":
423                last_under = value["_name"].rindex("_")
424                catid = value["_name"][1:last_under]
425                value["_category.id"] = catid  #remove square bracks
426                if catid not in self.cat_list: self.cat_list.append(catid)
427           value.overwrite = save_overwrite
428        # we now add any missing categories before filling in the rest of the
429        # information
430        for key,value in self.items():
431            #print('processing ddl1 definition %s' % key)
432            if "_category" in self[key]:
433                if self[key]["_category"] not in self.cat_list:
434                    # rogue category, add it in
435                    newcat = self[key]["_category"]
436                    fake_name = "_" + newcat + "_[]"
437                    newcatdata = CifBlock()
438                    newcatdata["_category"] = "category_overview"
439                    newcatdata["_category.id"] = newcat
440                    newcatdata["_type"] = "null"
441                    self[fake_name] = newcatdata
442                    self.cat_list.append(newcat)
443        # write out the type information in DDL2 style
444        self.master_block.AddLoopItem((
445            ("_item_type_list.code","_item_type_list.construct",
446              "_item_type_list.primitive_code"),
447            (base_types,base_constructs,prim_types)
448            ))
449
450    def ddl1_cat_load(self):
451        deflist = self.keys()       #slight optimization
452        cat_mand_dic = {}
453        cat_unique_dic = {}
454        # a function to extract any necessary information from each definition
455        def get_cat_info(single_def):
456            if self[single_def].get(self.must_exist_spec)=='yes':
457                thiscat = self[single_def]["_category"]
458                curval = cat_mand_dic.get(thiscat,[])
459                curval.append(single_def)
460                cat_mand_dic[thiscat] = curval
461            # now the unique items...
462            # cif_core.dic throws us a curly one: the value of list_uniqueness is
463            # not the same as the defined item for publ_body_label, so we have
464            # to collect both together.  We assume a non-listed entry, which
465            # is true for all current (May 2005) ddl1 dictionaries.
466            if self[single_def].get(self.unique_spec,None)!=None:
467                thiscat = self[single_def]["_category"]
468                new_unique = self[single_def][self.unique_spec]
469                uis = cat_unique_dic.get(thiscat,[])
470                if single_def not in uis: uis.append(single_def)
471                if new_unique not in uis: uis.append(new_unique)
472                cat_unique_dic[thiscat] = uis
473
474        [get_cat_info(a) for a in deflist] # apply the above function
475        for cat in cat_mand_dic.keys():
476            self[cat]["_category_mandatory.name"] = cat_mand_dic[cat]
477        for cat in cat_unique_dic.keys():
478            self[cat]["_category_key.name"] = cat_unique_dic[cat]
479
480    def create_pcloop(self,definition):
481        old_children = self[definition].get('_item_linked.child_name',[])
482        old_parents = self[definition].get('_item_linked.parent_name',[])
483        if isinstance(old_children,unicode):
484             old_children = [old_children]
485        if isinstance(old_parents,unicode):
486             old_parents = [old_parents]
487        if (len(old_children)==0 and len(old_parents)==0) or \
488           (len(old_children) > 1 and len(old_parents)>1):
489             return
490        if len(old_children)==0:
491             old_children = [definition]*len(old_parents)
492        if len(old_parents)==0:
493             old_parents = [definition]*len(old_children)
494        newloop = CifLoopBlock(dimension=1)
495        newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
496        newloop.AddLoopItem(('_item_linked.child_name',old_children))
497        try:
498            del self[definition]['_item_linked.parent_name']
499            del self[definition]['_item_linked.child_name']
500        except KeyError:
501            pass
502        self[definition].insert_loop(newloop)
503
504
505
506    def DDL2_normalise(self):
507       listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
508       # now filter out all the single element lists!
509       dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
510       for item_def in dodgy_defs:
511                # print("DDL2 norm: processing %s" % item_def)
512                thisdef = self[item_def]
513                packet_no = thisdef['_item.name'].index(item_def)
514                realcat = thisdef['_item.category_id'][packet_no]
515                realmand = thisdef['_item.mandatory_code'][packet_no]
516                # first add in all the missing categories
517                # we don't replace the entry in the list corresponding to the
518                # current item, as that would wipe out the information we want
519                for child_no in range(len(thisdef['_item.name'])):
520                    if child_no == packet_no: continue
521                    child_name = thisdef['_item.name'][child_no]
522                    child_cat = thisdef['_item.category_id'][child_no]
523                    child_mand = thisdef['_item.mandatory_code'][child_no]
524                    if child_name not in self:
525                        self[child_name] = CifBlock()
526                        self[child_name]['_item.name'] = child_name
527                    self[child_name]['_item.category_id'] = child_cat
528                    self[child_name]['_item.mandatory_code'] = child_mand
529                self[item_def]['_item.name'] = item_def
530                self[item_def]['_item.category_id'] = realcat
531                self[item_def]['_item.mandatory_code'] = realmand
532
533       target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
534                                     '_item_linked.parent_name' in self[a]]
535       # now dodgy_defs contains all definition blocks with more than one child/parent link
536       for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
537       for item_def in dodgy_defs:
538             print('Processing %s' % item_def)
539             thisdef = self[item_def]
540             child_list = thisdef['_item_linked.child_name']
541             parents = thisdef['_item_linked.parent_name']
542             # for each parent, find the list of children.
543             family = list(zip(parents,child_list))
544             notmychildren = family         #We aim to remove non-children
545             # Loop over the parents, relocating as necessary
546             while len(notmychildren):
547                # get all children of first entry
548                mychildren = [a for a in family if a[0]==notmychildren[0][0]]
549                print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
550                for parent,child in mychildren:   #parent is the same for all
551                         # Make sure that we simply add in the new entry for the child, not replace it,
552                         # otherwise we might spoil the child entry loop structure
553                         try:
554                             childloop = self[child].GetLoop('_item_linked.parent_name')
555                         except KeyError:
556                             print('Creating new parent entry %s for definition %s' % (parent,child))
557                             self[child]['_item_linked.parent_name'] = [parent]
558                             childloop = self[child].GetLoop('_item_linked.parent_name')
559                             childloop.AddLoopItem(('_item_linked.child_name',[child]))
560                             continue
561                         else:
562                             # A parent loop already exists and so will a child loop due to the
563                             # call to create_pcloop above
564                             pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
565                             goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
566                             if len(goodpars)>0:   #no need to add it
567                                 print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
568                                 continue
569                             print('Adding %s to %s entry' % (parent,child))
570                             newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
571                             setattr(newpacket,'_item_linked.child_name',child)
572                             setattr(newpacket,'_item_linked.parent_name',parent)
573                             childloop.AddPacket(newpacket)
574                #
575                # Make sure the parent also points to the children.  We get
576                # the current entry, then add our
577                # new values if they are not there already
578                #
579                parent_name = mychildren[0][0]
580                old_children = self[parent_name].get('_item_linked.child_name',[])
581                old_parents = self[parent_name].get('_item_linked.parent_name',[])
582                oldfamily = zip(old_parents,old_children)
583                newfamily = []
584                print('Old parents -> %s' % repr(old_parents))
585                for jj, childname in mychildren:
586                    alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
587                    if len(alreadythere)>0: continue
588                    'Adding new child %s to parent definition at %s' % (childname,parent_name)
589                    old_children.append(childname)
590                    old_parents.append(parent_name)
591                # Now output the loop, blowing away previous definitions.  If there is something
592                # else in this category, we are destroying it.
593                newloop = CifLoopBlock(dimension=1)
594                newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
595                newloop.AddLoopItem(('_item_linked.child_name',old_children))
596                del self[parent_name]['_item_linked.parent_name']
597                del self[parent_name]['_item_linked.child_name']
598                self[parent_name].insert_loop(newloop)
599                print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
600                # now make a new,smaller list
601                notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]
602
603       # now flatten any single element lists
604       single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
605       for flat_def in single_defs:
606           flat_keys = self[flat_def].GetLoop('_item.name').keys()
607           for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
608       # now deal with the multiple lists
609       # next we do aliases
610       all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
611       for aliased in all_aliases:
612          my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
613          for alias in my_aliases:
614              self[alias] = self[aliased].copy()   #we are going to delete stuff...
615              del self[alias]["_item_aliases.alias_name"]
616 
617    def ddlm_parse_valid(self):
618        if "_dictionary_valid.application" not in self.master_block:
619            return
620        for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"):
621            scope = getattr(scope_pack,"_dictionary_valid.application")
622            valid_info = getattr(scope_pack,"_dictionary_valid.attributes")
623            if scope[1] == "Mandatory":
624                self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info)
625            elif scope[1] == "Prohibited":
626                self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info)
627               
628    def ddlm_import(self,import_mode='All'):
629        import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]])
630        print ('Import mode %s applied to following frames' % import_mode)
631        print (str([a[0] for a in import_frames]))
632        if import_mode != 'All':
633           for i in range(len(import_frames)):
634                import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents') == import_mode])
635           print('Importing following frames in mode %s' % import_mode)
636           print(str(import_frames))
637        #resolve all references
638        for parent_block,import_list in import_frames:
639          for import_ref in import_list:
640            file_loc = import_ref["file"]
641            full_uri = self.resolve_path(file_loc)
642            if full_uri not in self.template_cache:
643                dic_as_cif = CifFile(urlopen(full_uri),grammar=self.grammar)
644                self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,do_dREL=False)  #this will recurse internal imports
645                print('Added %s to cached dictionaries' % full_uri)
646            import_from = self.template_cache[full_uri]
647            dupl = import_ref.get('dupl','Exit')
648            miss = import_ref.get('miss','Exit')
649            target_key = import_ref["save"]
650            try:
651                import_target = import_from[target_key]
652            except KeyError:
653                if miss == 'Exit':
654                   raise CifError('Import frame %s not found in %s' % (target_key,full_uri))
655                else: continue
656            # now import appropriately
657            mode = import_ref.get("mode",'Contents').lower()
658            if target_key in self and mode=='full':  #so blockname will be duplicated
659                if dupl == 'Exit':
660                    raise CifError('Import frame %s already in dictionary' % target_key)
661                elif dupl == 'Ignore':
662                    continue
663            if mode == 'contents':   #merge attributes only
664                self[parent_block].merge(import_target)
665            elif mode =="full":
666                # Do the syntactic merge
667                syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting
668                from_cat_head = import_target['_name.object_id']
669                child_frames = import_from.ddlm_all_children(from_cat_head)
670                 # Check for Head merging Head
671                if self[parent_block].get('_definition.class','Datum')=='Head' and \
672                   import_target.get('_definition.class','Datum')=='Head':
673                      head_to_head = True
674                else:
675                      head_to_head = False
676                      child_frames.remove(from_cat_head)
677                # As we are in syntax land, we call the CifFile methods
678                child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames])
679                child_blocks = super(CifDic,import_from).makebc(child_blocks)
680                # Prune out any datablocks that have identical definitions
681                from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()])
682                double_defs = list([b for b in from_defs.items() if self.has_key(b[1])])
683                print ('Definitions for %s superseded' % repr(double_defs))
684                for b in double_defs:
685                    del child_blocks[b[0]]
686                super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head)      #
687                print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames),
688                   mode,len(self)))
689                # Now the semantic merge
690                # First expand our definition <-> blockname tree
691                self.create_def_block_table()
692                merging_cat = self[parent_block]['_name.object_id']      #new parent
693                if head_to_head:
694                    child_frames = self.ddlm_immediate_children(from_cat_head)    #old children
695                    #the new parent is the importing category for all old children
696                    for f in child_frames:
697                        self[f].overwrite = True
698                        self[f]['_name.category_id'] = merging_cat
699                        self[f].overwrite = False
700                    # remove the old head
701                    del self[from_cat_head]
702                    print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat))
703                else:  #imported category is only child
704                    from_frame = import_from[target_key]['_definition.id'] #so we can find it
705                    child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0]
706                    self[child_frame]['_name.category_id'] = merging_cat
707                    print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat))
708            # it will never happen again...
709            del self[parent_block]["_import.get"]
710
711    def resolve_path(self,file_loc):
712        url_comps = urlparse(file_loc)
713        if url_comps[0]: return file_loc    #already full URI
714        new_url = urljoin(self.my_uri,file_loc)
715        #print("Transformed %s to %s for import " % (file_loc,new_url))
716        return new_url
717
718
719
720    def create_def_block_table(self):
721        """ Create an internal table matching definition to block id """
722        proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()]
723        # now get the actual ids instead of blocks
724        proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table])
725        # remove non-definitions
726        if self.diclang != "DDL1":
727            top_blocks = list([a[0].lower() for a in self.get_roots()])
728        else:
729            top_blocks = ["on_this_dictionary"]
730        # catch dodgy duplicates
731        uniques = set([a[0] for a in proto_table])
732        if len(uniques)<len(proto_table):
733            def_names = list([a[0] for a in proto_table])
734            dodgy = [a for a in def_names if def_names.count(a)>1]
735            raise CifError('Duplicate definitions in dictionary:' + repr(dodgy))
736        self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks])
737       
738    def __getitem__(self,key):
739        """Access a datablock by definition id, after the lookup has been created"""
740        try:
741            return super(CifDic,self).__getitem__(self.block_id_table[key.lower()])
742        except AttributeError:   #block_id_table not present yet
743            return super(CifDic,self).__getitem__(key)
744        except KeyError: # key is missing
745            # print(Definition for %s not found, reverting to CifFile' % key)
746            return super(CifDic,self).__getitem__(key)
747
748    def __setitem__(self,key,value):
749        """Add a new definition block"""
750        super(CifDic,self).__setitem__(key,value)
751        try:
752            self.block_id_table[value['_definition.id']]=key
753        except AttributeError:   #does not exist yet
754            pass
755
756    def __delitem__(self,key):
757        """Remove a definition"""
758        try:
759            super(CifDic,self).__delitem__(self.block_id_table[key.lower()])
760            del self.block_id_table[key.lower()]
761        except (AttributeError,KeyError):   #block_id_table not present yet
762            super(CifDic,self).__delitem__(key)
763            return
764        # fix other datastructures
765        # cat_obj table
766       
767    def keys(self):
768        """Return all definitions"""
769        try:
770            return self.block_id_table.keys()
771        except AttributeError:
772            return super(CifDic,self).keys()
773
774    def has_key(self,key):
775        return key in self
776
777    def __contains__(self,key):
778        try:
779            return key.lower() in self.block_id_table
780        except AttributeError:
781            return super(CifDic,self).__contains__(key)
782           
783    def items(self):
784        """Return (key,value) pairs"""
785        return list([(a,self[a]) for a in self.keys()])
786
787    def unlock(self):
788        """Allow overwriting of all definitions in this collection"""
789        for a in self.keys():
790            self[a].overwrite=True
791
792    def lock(self):
793        """Disallow changes in definitions"""
794        for a in self.keys():
795            self[a].overwrite=False
796
797    def rename(self,oldname,newname,blockname_as_well=True):
798        """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True,
799        change the underlying blockname too."""
800        if blockname_as_well:
801            super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname)       
802            self.block_id_table[newname.lower()]=newname
803            if oldname.lower() in self.block_id_table: #not removed
804               del self.block_id_table[oldname.lower()]
805        else:
806            self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()]
807            del self.block_id_table[oldname.lower()]
808            return
809                                                 
810    def get_root_category(self):
811        """Get the single 'Head' category of this dictionary"""
812        root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head']
813        if len(root_cats)>1 or len(root_cats)==0:
814            raise CifError("Cannot determine a unique Head category, got" % repr(root_cats))
815        return root_cats[0]
816
817    def ddlm_immediate_children(self,catname):
818        """Return a list of datanames for the immediate children of catname.  These are
819        semantic children (i.e. based on _name.category_id), not structural children as
820        in the case of StarFile.get_immediate_children"""
821                                                 
822        straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()]
823        return list(straight_children)
824
825    def ddlm_all_children(self,catname):
826        """Return a list of all children, including the `catname`"""
827        all_children = self.ddlm_immediate_children(catname)
828        cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category']
829        for c in cat_children:
830            all_children.remove(c)
831            all_children += self.ddlm_all_children(c)
832        return all_children + [catname]
833
834    def is_semantic_child(self,parent,maybe_child):
835        """Return true if `maybe_child` is a child of `parent`"""
836        all_children = self.ddlm_all_children(parent)
837        return maybe_child in all_children
838
839    def ddlm_danglers(self):
840        """Return a list of definitions that do not have a category defined
841        for them, or are children of an unattached category"""
842        top_block = self.get_root_category()
843        connected = set(self.ddlm_all_children(top_block))
844        all_keys = set(self.keys())
845        unconnected = all_keys - connected
846        return list(unconnected)
847
848    def get_ddlm_parent(self,itemname):
849        """Get the parent category of itemname"""
850        parent = self[itemname].get('_name.category_id','')
851        if parent == '':  # use the top block by default
852            raise CifError("%s has no parent" % itemname)
853        return parent
854
855    def expand_category_opt(self,name_list):
856        """Return a list of all non-category items in a category or return the name
857           if the name is not a category"""
858        new_list = []
859        for name in name_list:
860          if self.get(name,{}).get('_definition.scope','Item') == 'Category':
861            new_list += self.expand_category_opt([a for a in self.keys() if \
862                     self[a].get('_name.category_id','').lower() == name.lower()])
863          else:
864            new_list.append(name)
865        return new_list
866
867    def get_categories(self):
868        """Return a list of category names"""
869        return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category'])
870
871    def names_in_cat(self,cat,names_only=False):
872        names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()]
873        if not names_only:
874            return list([a for a in names if self[a].get('_definition.scope','Item')=='Item'])
875        else:
876            return list([self[a]["_name.object_id"] for a in names])
877
878                           
879
880    def create_alias_table(self):
881        """Populate an alias table that we can look up when searching for a dataname"""
882        all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]]
883        self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases])
884
885    def create_cat_obj_table(self):
886        """Populate a table indexed by (cat,obj) and returning the correct dataname"""
887        base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \
888                           for a in self.keys() if self[a].get('_definition.scope','Item')=='Item'])
889        loopable = self.get_loopable_cats() 
890        loopers = [self.ddlm_immediate_children(a) for a in loopable]
891        print('Loopable cats:' + repr(loopable))
892        loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers]
893        expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0])
894        print("Expansion list:" + repr(expand_list))
895        extra_table = {}   #for debugging we keep it separate from base_table until the end
896        def expand_base_table(parent_cat,child_cats):
897            extra_names = []
898            # first deal with all the child categories
899            for child_cat in child_cats:
900              nn = []
901              if child_cat in expand_list:  # a nested category: grab its names
902                nn = expand_base_table(child_cat,expand_list[child_cat])
903                # store child names
904                extra_names += nn
905              # add all child names to the table
906              child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
907                             for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key']
908              child_names += extra_names
909              extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table]))
910            # and the repeated ones get appended instead
911            repeats = [a for a in child_names if a in extra_table]
912            for obj,name in repeats:
913                extra_table[(parent_cat,obj)] += [name]
914            # and finally, add our own names to the return list
915            child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
916                            for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key']
917            return child_names
918        [expand_base_table(parent,child) for parent,child in expand_list.items()]
919        print('Expansion cat/obj values: ' + repr(extra_table))
920        # append repeated ones
921        non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table])
922        repeats = [a for a in extra_table.keys() if a in base_table]
923        base_table.update(non_repeats)
924        for k in repeats:
925            base_table[k] += extra_table[k]
926        self.cat_obj_lookup_table = base_table
927        self.loop_expand_list = expand_list
928
929    def get_loopable_cats(self):
930        """A short utility function which returns a list of looped categories. This
931        is preferred to a fixed attribute as that fixed attribute would need to be
932        updated after any edits"""
933        return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop']
934
935    def create_cat_key_table(self):
936        """Create a utility table with a list of keys applicable to each category. A key is
937        a compound key, that is, it is a list"""
938        self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name",
939            [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()])
940        def collect_keys(parent_cat,child_cats):
941                kk = []
942                for child_cat in child_cats:
943                    if child_cat in self.loop_expand_list:
944                        kk += collect_keys(child_cat)
945                    # add these keys to our list
946                    kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))]
947                self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk
948                return kk
949        for k,v in self.loop_expand_list.items():
950            collect_keys(k,v)
951        print('Keys for categories' + repr(self.cat_key_table))
952
953    def add_type_info(self):
954        if "_item_type_list.construct" in self.master_block:
955            types = self.master_block["_item_type_list.code"]
956            prim_types = self.master_block["_item_type_list.primitive_code"]
957            constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]])
958            # add in \r wherever we see \n, and change \{ to \\{
959            def regex_fiddle(mm_regex):
960                brack_match = r"((.*\[.+)(\\{)(.*\].*))"
961                ret_match = r"((.*\[.+)(\\n)(.*\].*))"
962                fixed_regexp = mm_regex[:]  #copy
963                # fix the brackets
964                bm = re.match(brack_match,mm_regex)
965                if bm != None:
966                    fixed_regexp = bm.expand(r"\2\\\\{\4")
967                # fix missing \r
968                rm = re.match(ret_match,fixed_regexp)
969                if rm != None:
970                    fixed_regexp = rm.expand(r"\2\3\\r\4")
971                #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp))
972                return fixed_regexp
973            constructs = map(regex_fiddle,constructs)
974            for typecode,construct in zip(types,constructs):
975                self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL)
976            # now make a primitive <-> type construct mapping
977            for typecode,primtype in zip(types,prim_types):
978                self.primdic[typecode] = primtype
979
980    def add_category_info(self,full=True):
981        if self.diclang == "DDLm":
982            catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category']
983            looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop']
984            self.parent_lookup = {}
985            for one_cat in looped_cats:
986                parent_cat = one_cat
987                parent_def = self[parent_cat]
988                next_up = parent_def['_name.category_id'].lower()
989                while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop':
990                    parent_def = self[next_up]
991                    parent_cat = next_up
992                    next_up = parent_def['_name.category_id'].lower()
993                self.parent_lookup[one_cat] = parent_cat
994
995            if full:
996                self.key_equivs = {}
997                for one_cat in looped_cats:   #follow them up
998                    lower_keys = listify(self[one_cat]['_category_key.name'])
999                    start_keys = lower_keys[:]
1000                    while len(lower_keys)>0:
1001                        this_cat = self[lower_keys[0]]['_name.category_id']
1002                        parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a]
1003                        #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent)))
1004                        if len(parent)>1:
1005                            raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent)))
1006                        if len(parent)==0: break
1007                        parent = parent[0]
1008                        parent_keys = listify(self[parent]['_category_key.name'])
1009                        linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys]
1010                        # sanity check
1011                        if set(parent_keys) != set(linked_keys):
1012                            raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys))
1013                            # now add in our information
1014                        for parent,child in zip(linked_keys,start_keys):
1015                            self.key_equivs[child] = self.key_equivs.get(child,[])+[parent]
1016                        lower_keys = linked_keys  #preserves order of start keys
1017
1018        else:
1019            self.parent_lookup = {}
1020            self.key_equivs = {}
1021
1022    def change_category_name(self,oldname,newname):
1023        self.unlock()
1024        """Change the category name from [[oldname]] to [[newname]]"""
1025        if oldname not in self:
1026            raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname))
1027        if newname in self:
1028            raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname))
1029        child_defs = self.ddlm_immediate_children(oldname)
1030        self.rename(oldname,newname)   #NB no name integrity checks
1031        self[newname]['_name.object_id']=newname
1032        self[newname]['_definition.id']=newname
1033        for child_def in child_defs:
1034            self[child_def]['_name.category_id'] = newname
1035            if self[child_def].get('_definition.scope','Item')=='Item':
1036                newid = self.create_catobj_name(newname,self[child_def]['_name.object_id'])
1037                self[child_def]['_definition.id']=newid
1038                self.rename(child_def,newid[1:])  #no underscore at the beginning
1039        self.lock()
1040
1041    def create_catobj_name(self,cat,obj):
1042        """Combine category and object in approved fashion to create id"""
1043        return ('_'+cat+'.'+obj)
1044
1045    def change_category(self,itemname,catname):
1046        """Move itemname into catname, return new handle"""
1047        defid = self[itemname]
1048        if defid['_name.category_id'].lower()==catname.lower():
1049            print('Already in category, no change')
1050            return itemname
1051        if catname not in self:    #don't have it
1052            print('No such category %s' % catname)
1053            return itemname
1054        self.unlock()
1055        objid = defid['_name.object_id']
1056        defid['_name.category_id'] = catname
1057        newid = itemname # stays the same for categories
1058        if defid.get('_definition.scope','Item') == 'Item':
1059            newid = self.create_catobj_name(catname,objid)
1060            defid['_definition.id']= newid
1061            self.rename(itemname,newid)
1062        self.set_parent(catname,newid)
1063        self.lock()
1064        return newid
1065
1066    def change_name(self,one_def,newobj):
1067        """Change the object_id of one_def to newobj. This is not used for
1068        categories, but can be used for dictionaries"""
1069        if '_dictionary.title' not in self[one_def]:  #a dictionary block
1070            newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj)
1071            self.unlock()
1072            self.rename(one_def,newid)
1073            self[newid]['_definition.id']=newid
1074            self[newid]['_name.object_id']=newobj
1075        else:
1076            self.unlock()
1077            newid = newobj
1078            self.rename(one_def,newobj)
1079            self[newid]['_dictionary.title'] = newid
1080        self.lock()
1081        return newid
1082
1083    # Note that our semantic parent is given by catparent, but our syntactic parent is
1084    # always just the root block
1085    def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False):
1086        """Add a new category to the dictionary with name [[catname]].
1087           If [[catparent]] is None, the category will be a child of
1088           the topmost 'Head' category or else the top data block. If
1089           [[is_loop]] is false, a Set category is created. If [[allow_dangler]]
1090           is true, the parent category does not have to exist."""
1091        if catname in self:
1092            raise CifError('Attempt to add existing category %s' % catname)
1093        self.unlock()
1094        syntactic_root = self.get_roots()[0][0]
1095        if catparent is None:
1096            semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head']
1097            if len(semantic_root)>0:
1098                semantic_root = semantic_root[0]
1099            else:
1100                semantic_root = syntactic_root
1101        else:
1102            semantic_root = catparent
1103        realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root)
1104        self.block_id_table[catname.lower()]=realname
1105        self[catname]['_name.object_id'] = catname
1106        if not allow_dangler or catparent is None:
1107            self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id']
1108        else:
1109            self[catname]['_name.category_id'] = catparent
1110        self[catname]['_definition.id'] = catname
1111        self[catname]['_definition.scope'] = 'Category'
1112        if is_loop:
1113            self[catname]['_definition.class'] = 'Loop'
1114        else:
1115            self[catname]['_definition.class'] = 'Set'
1116        self[catname]['_description.text'] = 'No definition provided'
1117        self.lock()
1118        return catname
1119
1120    def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False):
1121        """Add itemname to category [[catparent]]. If itemname contains periods,
1122        all text before the final period is ignored. If [[allow_dangler]] is True,
1123        no check for a parent category is made."""
1124        self.unlock()
1125        if '.' in itemname:
1126            objname = itemname.split('.')[-1]
1127        else:
1128            objname = itemname
1129        objname = objname.strip('_')
1130        if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'):
1131            raise CifError('No category %s in dictionary' % catparent)
1132        fullname = '_'+catparent.lower()+'.'+objname
1133        print('New name: %s' % fullname)
1134        syntactic_root = self.get_roots()[0][0]
1135        realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change
1136        # update our dictionary structures
1137        self.block_id_table[fullname]=realname
1138        self[fullname]['_definition.id']=fullname
1139        self[fullname]['_name.object_id']=objname
1140        self[fullname]['_name.category_id']=catparent
1141        self[fullname]['_definition.class']='Datum'
1142        self[fullname]['_description.text']=def_text
1143       
1144    def remove_definition(self,defname):
1145        """Remove a definition from the dictionary."""
1146        if defname not in self:
1147            return
1148        if self[defname].get('_definition.scope')=='Category':
1149            children = self.ddlm_immediate_children(defname)
1150            [self.remove_definition(a) for a in children]
1151            cat_id = self[defname]['_definition.id'].lower()
1152        del self[defname]
1153
1154    def get_cat_obj(self,name):
1155        """Return (cat,obj) tuple. [[name]] must contain only a single period"""
1156        cat,obj = name.split('.')
1157        return (cat.strip('_'),obj)
1158
1159    def get_name_by_cat_obj(self,category,object,give_default=False):
1160        """Return the dataname corresponding to the given category and object"""
1161        if category[0] == '_':    #accidentally left in
1162           true_cat = category[1:].lower()
1163        else:
1164           true_cat = category.lower()
1165        try:
1166            return self.cat_obj_lookup_table[(true_cat,object.lower())][0]
1167        except KeyError:
1168            if give_default:
1169               return '_'+true_cat+'.'+object
1170        raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object))
1171
1172
1173    def WriteOut(self,**kwargs):
1174        myblockorder = self.get_full_child_list()
1175        self.set_grammar(self.grammar)
1176        self.standard = 'Dic'
1177        return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)
1178
1179    def get_full_child_list(self):
1180        """Return a list of definition blocks in order parent-child-child-child-parent-child..."""
1181        top_block = self.get_roots()[0][0]
1182        root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head']
1183        if len(root_cat) == 1:
1184            all_names = [top_block] + self.recurse_child_list(root_cat[0])
1185            unrooted = self.ddlm_danglers()
1186            double_names =  set(unrooted).intersection(set(all_names))
1187            if len(double_names)>0:
1188                raise CifError('Names are children of internal and external categories:%s' % repr(double_names))
1189            remaining = unrooted[:]
1190            for no_root in unrooted:
1191                if self[no_root].get('_definition.scope','Item')=='Category':
1192                    all_names += [no_root]
1193                    remaining.remove(no_root)
1194                    these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()]
1195                    all_names += these_children
1196                    [remaining.remove(n) for n in these_children]
1197            # now sort by category
1198            ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining])
1199            for e in ext_cats:
1200                cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e]
1201                [remaining.remove(n) for n in cat_items]
1202                all_names += cat_items
1203            if len(remaining)>0:
1204                print('WARNING: following items do not seem to belong to a category??')
1205                print(repr(remaining))
1206                all_names += remaining
1207            print('Final block order: ' + repr(all_names))
1208            return all_names
1209        raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead')
1210
1211    def cat_from_name(self,one_name):
1212        """Guess the category from the name. This should be used only when this is not important semantic information,
1213        for example, when printing out"""
1214        (cat,obj) = one_name.split(".")
1215        if cat[0] == "_": cat = cat[1:]
1216        return cat
1217
1218    def recurse_child_list(self,parentname):
1219        """Recursively expand the logical child list of [[parentname]]"""
1220        final_list = [parentname]
1221        child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()]
1222        child_blocks.sort()    #we love alphabetical order
1223        child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item']
1224        final_list += child_items
1225        child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category']
1226        for child_cat in child_cats:
1227            final_list += self.recurse_child_list(child_cat)
1228        return final_list
1229
1230
1231
1232    def get_key_pack(self,category,value,data):
1233        keyname = self[category][self.unique_spec]
1234        onepack = data.GetPackKey(keyname,value)
1235        return onepack
1236
1237    def get_number_with_esd(numstring):
1238        import string
1239        numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
1240        our_match = re.match(numb_re,numstring)
1241        if our_match:
1242            a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
1243            # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
1244        else:
1245            return None,None
1246        if dot or q: return None,None     #a dot or question mark
1247        if exp:          #has exponent
1248           exp = exp.replace("d","e")     # mop up old fashioned numbers
1249           exp = exp.replace("D","e")
1250           base_num = base_num + exp
1251        # print("Debug: have %s for base_num from %s" % (base_num,numstring))
1252        base_num = float(base_num)
1253        # work out esd, if present.
1254        if esd:
1255            esd = float(esd[1:-1])    # no brackets
1256            if dad:                   # decimal point + digits
1257                esd = esd * (10 ** (-1* len(dad)))
1258            if exp:
1259                esd = esd * (10 ** (float(exp[1:])))
1260        return base_num,esd
1261
1262    def getmaxmin(self,rangeexp):
1263        regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*'
1264        regexp = regexp + ":" + regexp
1265        regexp = re.match(regexp,rangeexp)
1266        try:
1267            minimum = regexp.group(1)
1268            maximum = regexp.group(7)
1269        except AttributeError:
1270            print("Can't match %s" % rangeexp)
1271        if minimum == None: minimum = "."
1272        else: minimum = float(minimum)
1273        if maximum == None: maximum = "."
1274        else: maximum = float(maximum)
1275        return maximum,minimum
1276
1277    def initialise_drel(self):
1278        """Parse drel functions and prepare data structures in dictionary"""
1279        self.ddlm_parse_valid() #extract validity information from data block
1280        self.transform_drel()   #parse the drel functions
1281        self.add_drel_funcs()   #put the drel functions into the namespace
1282
1283    def transform_drel(self):
1284        from .drel import drel_ast_yacc
1285        from .drel import py_from_ast
1286        import traceback
1287        parser = drel_ast_yacc.parser
1288        lexer = drel_ast_yacc.lexer
1289        my_namespace = self.keys()
1290        my_namespace = dict(zip(my_namespace,my_namespace))
1291        # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...})
1292        loopable_cats = self.get_loopable_cats()
1293        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
1294        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
1295        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
1296        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
1297        # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")]
1298        derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \
1299                              and self[a].get("_name.category_id","")!= "function"]
1300        for derivable in derivable_list:
1301            target_id = derivable
1302            # reset the list of visible names for parser
1303            special_ids = [dict(zip(self.keys(),self.keys()))]
1304            print("Target id: %s" % derivable)
1305            drel_exprs = self[derivable]["_method.expression"]
1306            drel_purposes = self[derivable]["_method.purpose"]
1307            all_methods = []
1308            if not isinstance(drel_exprs,list):
1309                drel_exprs = [drel_exprs]
1310                drel_purposes = [drel_purposes]
1311            for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs):
1312                if drel_purpose != 'Evaluation':
1313                    continue
1314                drel_expr = "\n".join(drel_expr.splitlines())
1315                # print("Transforming %s" % drel_expr)
1316                # List categories are treated differently...
1317                try:
1318                    meth_ast = parser.parse(drel_expr+"\n",lexer=lexer)
1319                except:
1320                    print('Syntax error in method for %s; leaving as is' % derivable)
1321                    a,b = sys.exc_info()[:2]
1322                    print((repr(a),repr(b)))
1323                    print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout))
1324                    # reset the lexer
1325                    lexer.begin('INITIAL')
1326                    continue
1327                # Construct the python method
1328                cat_meth = False
1329                if self[derivable].get('_definition.scope','Item') == 'Category':
1330                    cat_meth = True
1331                pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id,
1332                                                                           loopable=loop_info,
1333                                                             cif_dic = self,cat_meth=cat_meth)
1334                all_methods.append(pyth_meth)
1335            if len(all_methods)>0:
1336                save_overwrite = self[derivable].overwrite
1337                self[derivable].overwrite = True
1338                self[derivable]["_method.py_expression"] = all_methods
1339                self[derivable].overwrite = save_overwrite
1340            #print("Final result:\n " + repr(self[derivable]["_method.py_expression"]))
1341
1342    def add_drel_funcs(self):
1343        from .drel import drel_ast_yacc
1344        from .drel import py_from_ast
1345        funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function']
1346        funcnames = [(self[a]["_name.object_id"],
1347                      getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist]
1348        # create executable python code...
1349        parser = drel_ast_yacc.parser
1350        # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...})
1351        loopable_cats = self.get_loopable_cats()
1352        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
1353        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
1354        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
1355        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
1356        for funcname,funcbody in funcnames:
1357            newline_body = "\n".join(funcbody.splitlines())
1358            parser.target_id = funcname
1359            res_ast = parser.parse(newline_body)
1360            py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self)
1361            #print('dREL library function ->\n' + py_function)
1362            global_table = globals()
1363            exec(py_function, global_table)    #add to namespace
1364        #print('Globals after dREL functions added:' + repr(globals()))
1365        self.ddlm_functions = globals()  #for outside access
1366
1367    @track_recursion
1368    def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True):
1369        key = start_key   #starting value
1370        result = None     #success is a non-None value
1371        default_result = False #we have not used a default value
1372        # check for aliases
1373        # check for an older form of a new value
1374        found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata]
1375        if len(found_it)>0:
1376            corrected_type = self.change_type(key,cifdata[found_it[0]])
1377            return corrected_type
1378        # now do the reverse check - any alternative form
1379        alias_name = [a for a in self.alias_table.items() if key in a[1]]
1380        print('Aliases for %s: %s' % (key,repr(alias_name)))
1381        if len(alias_name)==1:
1382            key = alias_name[0][0]   #actual definition name
1383            if key in cifdata: return self.change_type(key,cifdata[key])
1384            found_it = [k for k in alias_name[0][1] if k in cifdata]
1385            if len(found_it)>0:
1386                return self.change_type(key,cifdata[found_it[0]])
1387        elif len(alias_name)>1:
1388            raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name))
1389
1390        the_category = self[key]["_name.category_id"]
1391        cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
1392        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
1393        # store any default value in case we have a problem
1394        def_val = self[key].get("_enumeration.default","")
1395        def_index_val = self[key].get("_enumeration.def_index_id","")
1396        if len(has_cat_names)==0: # try category method
1397            cat_result = {}
1398            pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]]
1399            pulled_from_cats = [(k,[
1400                                  self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']]
1401                               ) for k in pulled_from_cats]
1402            pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]]
1403            if '_category_construct_local.type' in self[the_category]:
1404                print("**Now constructing category %s using DDLm attributes**" % the_category)
1405                try:
1406                    cat_result = self.construct_category(the_category,cifdata,store_value=True)
1407                except (CifRecursionError,StarFile.StarDerivationError):
1408                    print('** Failed to construct category %s (error)' % the_category)
1409            # Trying a pull-back when the category is partially populated
1410            # will not work, hence we test that cat_result has no keys
1411            if len(pulled_to_cats)>0 and len(cat_result)==0:
1412                print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats)))
1413                try:
1414                    cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True)
1415                except (CifRecursionError,StarFile.StarDerivationError):
1416                    print('** Failed to construct category %s from pullback information (error)' % the_category)
1417            if '_method.py_expression' in self[the_category] and key not in cat_result:
1418                print("**Now applying category method for %s in search of %s**" % (the_category,key))
1419                cat_result = self.derive_item(the_category,cifdata,store_value=True)
1420            print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result))
1421            # do we now have our value?
1422            if key in cat_result:
1423                return cat_result[key]
1424
1425        # Recalculate in case it actually worked
1426        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
1427        the_funcs = self[key].get('_method.py_expression',"")
1428        if the_funcs:   #attempt to calculate it
1429            #global_table = globals()
1430            #global_table.update(self.ddlm_functions)
1431            for one_func in the_funcs:
1432                print('Executing function for %s:' % key)
1433                #print(one_func)
1434                exec(one_func, globals())  #will access dREL functions, puts "pyfunc" in scope
1435                # print('in following global environment: ' + repr(global_table))
1436                stored_setting = cifdata.provide_value
1437                cifdata.provide_value = True
1438                try:
1439                    result = pyfunc(cifdata)
1440                except CifRecursionError as s:
1441                    print(s)
1442                    result = None
1443                except StarFile.StarDerivationError as s:
1444                    print(s)
1445                    result = None
1446                finally:
1447                    cifdata.provide_value = stored_setting
1448                if result is not None:
1449                    break
1450                #print("Function returned {!r}".format(result))
1451
1452        if result is None and allow_defaults:   # try defaults
1453            if def_val:
1454                result = self.change_type(key,def_val)
1455                default_result = True
1456            elif def_index_val:            #derive a default value
1457                index_vals = self[key]["_enumeration_default.index"]
1458                val_to_index = cifdata[def_index_val]     #what we are keying on
1459                if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']:
1460                    lcase_comp = True
1461                    index_vals = [a.lower() for a in index_vals]
1462                # Handle loops
1463                if isinstance(val_to_index,list):
1464                    if lcase_comp:
1465                        val_to_index = [a.lower() for a in val_to_index]
1466                    keypos = [index_vals.index(a) for a in val_to_index]
1467                    result = [self[key]["_enumeration_default.value"][a]  for a in keypos]
1468                else:
1469                    if lcase_comp:
1470                        val_to_index = val_to_index.lower()
1471                    keypos = index_vals.index(val_to_index)   #value error if no such value available
1472                    result = self[key]["_enumeration_default.value"][keypos]
1473                    default_result = True   #flag that it must be extended
1474                result = self.change_type(key,result)
1475                print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index)))
1476
1477        # read it in
1478        if result is None:   #can't do anything else
1479            print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults)))
1480            raise StarFile.StarDerivationError(start_key)
1481        is_looped = False
1482        if self[the_category].get('_definition.class','Set')=='Loop':
1483            is_looped = True
1484            if len(has_cat_names)>0:   #this category already exists
1485                if result is None or default_result: #need to create a list of values
1486                    loop_len = len(cifdata[has_cat_names[0]])
1487                    out_result = [result]*loop_len
1488                    result = out_result
1489            else:   #nothing exists in this category, we can't store this at all
1490                print('Resetting result %s for %s to null list as category is empty' % (key,result))
1491                result = []
1492
1493        # now try to insert the new information into the right place
1494        # find if items of this category already appear...
1495        # Never cache empty values
1496        if not (isinstance(result,list) and len(result)==0) and\
1497          store_value:
1498            if self[key].get("_definition.scope","Item")=='Item':
1499                if is_looped:
1500                    result = self.store_new_looped_value(key,cifdata,result,default_result)
1501                else:
1502                    result = self.store_new_unlooped_value(key,cifdata,result)
1503            else:
1504                self.store_new_cat_values(cifdata,result,the_category)
1505        return result
1506
1507    def store_new_looped_value(self,key,cifdata,result,default_result):
1508          """Store a looped value from the dREL system into a CifFile"""
1509          # try to change any matrices etc. to lists
1510          the_category = self[key]["_name.category_id"]
1511          out_result = result
1512          if result is not None and not default_result:
1513                  # find any numpy arrays
1514                  def conv_from_numpy(one_elem):
1515                      if not hasattr(one_elem,'dtype'):
1516                         if isinstance(one_elem,(list,tuple)):
1517                            return StarFile.StarList([conv_from_numpy(a) for a in one_elem])
1518                         return one_elem
1519                      if one_elem.size > 1:   #so is not a float
1520                         return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()])
1521                      else:
1522                          try:
1523                            return one_elem.item(0)
1524                          except:
1525                            return one_elem
1526                  out_result = [conv_from_numpy(a) for a in result]
1527          # so out_result now contains a value suitable for storage
1528          cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
1529          has_cat_names = [a for a in cat_names if a in cifdata]
1530          print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names))
1531          if len(has_cat_names)>0:   #this category already exists
1532              cifdata[key] = out_result      #lengths must match or else!!
1533              cifdata.AddLoopName(has_cat_names[0],key)
1534          else:
1535              cifdata[key] = out_result
1536              cifdata.CreateLoop([key])
1537          print('Loop info:' + repr(cifdata.loops))
1538          return out_result
1539
1540    def store_new_unlooped_value(self,key,cifdata,result):
1541          """Store a single value from the dREL system"""
1542          if result is not None and hasattr(result,'dtype'):
1543              if result.size > 1:
1544                  out_result = StarFile.StarList(result.tolist())
1545                  cifdata[key] = out_result
1546              else:
1547                  cifdata[key] = result.item(0)
1548          else:
1549              cifdata[key] = result
1550          return result
1551
1552    def construct_category(self,category,cifdata,store_value=True):
1553        """Construct a category using DDLm attributes"""
1554        con_type = self[category].get('_category_construct_local.type',None)
1555        if con_type == None:
1556            return {}
1557        if con_type == 'Pullback' or con_type == 'Filter':
1558            morphisms  = self[category]['_category_construct_local.components']
1559            morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat
1560            cats = [self[a]['_name.category_id'] for a in morphisms]
1561            cat_keys = [self[a]['_category.key_id'] for a in cats]
1562            cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat
1563            if con_type == 'Filter':
1564                int_filter = self[category].get('_category_construct_local.integer_filter',None)
1565                text_filter = self[category].get('_category_construct_local.text_filter',None)
1566                if int_filter is not None:
1567                    morph_values.append([int(a) for a in int_filter])
1568                if text_filter is not None:
1569                    morph_values.append(text_filter)
1570                cat_values.append(range(len(morph_values[-1])))
1571            # create the mathematical product filtered by equality of dataname values
1572            pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \
1573                            if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]]
1574            # now prepare for return
1575            if len(pullback_ids)==0:
1576                return {}
1577            newids = self[category]['_category_construct_local.new_ids']
1578            fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids]
1579            if con_type == 'Pullback':
1580                final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]}
1581                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
1582                final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids))
1583            elif con_type == 'Filter':   #simple filter
1584                final_results = {fullnewids[0]:[x[0] for x in pullback_ids]}
1585                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
1586            if store_value:
1587                self.store_new_cat_values(cifdata,final_results,category)
1588            return final_results
1589
1590    def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True):
1591        """Each of the categories in source_categories are pullbacks that include
1592        the target_category"""
1593        target_key = self[target_category]['_category.key_id']
1594        result = {target_key:[]}
1595        first_time = True
1596        # for each source category, determine which element goes to the target
1597        for sc in source_categories:
1598            components = self[sc]['_category_construct_local.components']
1599            comp_cats = [self[c]['_name.category_id'] for c in components]
1600            new_ids = self[sc]['_category_construct_local.new_ids']
1601            source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids]
1602            if len(components) == 2:  # not a filter
1603                element_pos = comp_cats.index(target_category)
1604                old_id = source_ids[element_pos]
1605                print('Using %s to populate %s' % (old_id,target_key))
1606                result[target_key].extend(cifdata[old_id])
1607                # project through all identical names
1608                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key])
1609                # we only include keys that are common to all categories
1610                if first_time:
1611                    result.update(extra_result)
1612                else:
1613                    for k in extra_result.keys():
1614                        if k in result:
1615                            print('Updating %s: was %s' % (k,repr(result[k])))
1616                            result[k].extend(extra_result[k])
1617            else:
1618                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids)
1619                if len(extra_result)>0 or source_ids[0] in cifdata:  #something is present
1620                    result[target_key].extend(cifdata[source_ids[0]])
1621                    for k in extra_result.keys():
1622                        if k in result:
1623                            print('Reverse filter: Updating %s: was %s' % (k,repr(result[k])))
1624                            result[k].extend(extra_result[k])
1625                        else:
1626                            result[k]=extra_result[k]
1627    # Bonus derivation if there is a singleton filter
1628                    if self[sc]['_category_construct_local.type'] == 'Filter':
1629                        int_filter = self[sc].get('_category_construct_local.integer_filter',None)
1630                        text_filter = self[sc].get('_category_construct_local.text_filter',None)
1631                        if int_filter is not None:
1632                            filter_values = int_filter
1633                        else:
1634                            filter_values = text_filter
1635                        if len(filter_values)==1:    #a singleton
1636                            extra_dataname = self[sc]['_category_construct_local.components'][0]
1637                            if int_filter is not None:
1638                                new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]])
1639                            else:
1640                                new_value = filter_values * len(cifdata[source_ids[0]])
1641                            if extra_dataname not in result:
1642                                result[extra_dataname] = new_value
1643                            else:
1644                                result[extra_dataname].extend(new_value)
1645                    else:
1646                        raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type'])
1647            first_time = False
1648        # check for sanity - all dataname lengths must be identical
1649        datalen = len(set([len(a) for a in result.values()]))
1650        if datalen != 1:
1651            raise AssertionError('Failed to construct equal-length category items,'+ repr(result))
1652        if store_value:
1653            print('Now storing ' + repr(result))
1654            self.store_new_cat_values(cifdata,result,target_category)
1655        return result
1656
1657    def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]):
1658        """Copy across datanames for which the from_category key equals [[key_vals]]"""
1659        result = {}
1660        s_names_in_cat = set(self.names_in_cat(from_category,names_only=True))
1661        t_names_in_cat = set(self.names_in_cat(to_category,names_only=True))
1662        can_project = s_names_in_cat & t_names_in_cat
1663        can_project -= set(skip_names)  #already dealt with
1664        source_key = self[from_category]['_category.key_id']
1665        print('Source dataname set: ' + repr(s_names_in_cat))
1666        print('Target dataname set: ' + repr(t_names_in_cat))
1667        print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project))
1668        for project_name in can_project:
1669            full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0]
1670            full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0]
1671            if key_vals is None:
1672                try:
1673                    result[full_to_name] = cifdata[full_from_name]
1674                except StarFile.StarDerivationError:
1675                    pass
1676            else:
1677                all_key_vals = cifdata[source_key]
1678                filter_pos = [all_key_vals.index(a) for a in key_vals]
1679                try:
1680                    all_data_vals = cifdata[full_from_name]
1681                except StarFile.StarDerivationError:
1682                    pass
1683                result[full_to_name] = [all_data_vals[i] for i in filter_pos]
1684        return result
1685
1686    def store_new_cat_values(self,cifdata,result,the_category):
1687        """Store the values in [[result]] into [[cifdata]]"""
1688        the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key']
1689        double_names = [a for a in result.keys() if a in cifdata]
1690        if len(double_names)>0:
1691            already_present = [a for a in self.names_in_cat(the_category) if a in cifdata]
1692            if set(already_present) != set(result.keys()):
1693                print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys()))))
1694                return
1695            #check key values
1696            old_keys = set(cifdata[the_key])
1697            common_keys = old_keys & set(result[the_key])
1698            if len(common_keys)>0:
1699                print("Category %s not updated, key values in common:" % (common_keys))
1700                return
1701            #extend result values with old values
1702            for one_name,one_value in result.items():
1703                result[one_name].extend(cifdata[one_name])
1704        for one_name, one_value in result.items():
1705            try:
1706                self.store_new_looped_value(one_name,cifdata,one_value,False)
1707            except StarFile.StarError:
1708                print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value)))
1709        #put the key as the first item
1710        print('Fixing item order for {}'.format(repr(the_key)))
1711        for one_key in the_key:  #should only be one
1712            cifdata.ChangeItemOrder(one_key,0)
1713
1714
1715    def generate_default_packet(self,catname,catkey,keyvalue):
1716        """Return a StarPacket with items from ``catname`` and a key value
1717        of ``keyvalue``"""
1718        newpack = StarPacket()
1719        for na in self.names_in_cat(catname):
1720            def_val = self[na].get("_enumeration.default","")
1721            if def_val:
1722                final_val = self.change_type(na,def_val)
1723                newpack.extend(final_val)
1724                setattr(newpack,na,final_val)
1725        if len(newpack)>0:
1726            newpack.extend(keyvalue)
1727            setattr(newpack,catkey,keyvalue)
1728        return newpack
1729
1730
1731    def switch_numpy(self,to_val):
1732        pass
1733
1734    def change_type(self,itemname,inval):
1735        import numpy
1736        if inval == "?": return inval
1737        change_function = convert_type(self[itemname])
1738        if isinstance(inval,list) and not isinstance(inval,StarFile.StarList):   #from a loop
1739            newval = list([change_function(a) for a in inval])
1740        else:
1741            newval = change_function(inval)
1742        return newval
1743
1744    def install_validation_functions(self):
1745        """Install the DDL-appropriate validation checks"""
1746        if self.diclang != 'DDLm':
1747          self.item_validation_funs = [
1748            self.validate_item_type,
1749            self.validate_item_esd,
1750            self.validate_item_enum,   # functions which check conformance
1751            self.validate_enum_range,
1752            self.validate_looping]
1753          self.loop_validation_funs = [
1754            self.validate_loop_membership,
1755            self.validate_loop_key,
1756            self.validate_loop_references]    # functions checking loop values
1757          self.global_validation_funs = [
1758            self.validate_exclusion,
1759            self.validate_parent,
1760            self.validate_child,
1761            self.validate_dependents,
1762            self.validate_uniqueness] # where we need to look at other values
1763          self.block_validation_funs = [  # where only a full block will do
1764            self.validate_mandatory_category]
1765          self.global_remove_validation_funs = [
1766            self.validate_remove_parent_child] # removal is quicker with special checks
1767        elif self.diclang == 'DDLm':
1768            self.item_validation_funs = [
1769                self.validate_item_enum,
1770                self.validate_item_esd_ddlm,
1771                ]
1772            self.loop_validation_funs = [
1773                self.validate_looping_ddlm,
1774                self.validate_loop_key_ddlm,
1775                self.validate_loop_membership
1776                ]
1777            self.global_validation_funs = []
1778            self.block_validation_funs = [
1779                self.check_mandatory_items,
1780                self.check_prohibited_items
1781                ]
1782            self.global_remove_validation_funs = []
1783        self.optimize = False        # default value
1784        self.done_parents = []
1785        self.done_children = []
1786        self.done_keys = []
1787
1788    def validate_item_type(self,item_name,item_value):
1789        def mymatch(m,a):
1790            res = m.match(a)
1791            if res != None: return res.group()
1792            else: return ""
1793        target_type = self[item_name].get(self.type_spec)
1794        if target_type == None:          # e.g. a category definition
1795            return {"result":True}                  # not restricted in any way
1796        matchexpr = self.typedic[target_type]
1797        item_values = listify(item_value)
1798        #for item in item_values:
1799            #print("Type match " + item_name + " " + item + ":",)
1800        #skip dots and question marks
1801        check_all = [a for a in item_values if a !="." and a != "?"]
1802        check_all = [a for a in check_all if mymatch(matchexpr,a) != a]
1803        if len(check_all)>0: return {"result":False,"bad_values":check_all}
1804        else: return {"result":True}
1805
1806    def decide(self,result_list):
1807        """Construct the return list"""
1808        if len(result_list)==0:
1809               return {"result":True}
1810        else:
1811               return {"result":False,"bad_values":result_list}
1812
1813    def validate_item_container(self, item_name,item_value):
1814        container_type = self[item_name]['_type.container']
1815        item_values = listify(item_value)
1816        if container_type == 'Single':
1817           okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))]
1818           return decide(okcheck)
1819        if container_type in ('Multiple','List'):
1820           okcheck = [a for a in item_values if not isinstance(a,StarList)]
1821           return decide(okcheck)
1822        if container_type == 'Array':    #A list with numerical values
1823           okcheck = [a for a in item_values if not isinstance(a,StarList)]
1824           first_check = decide(okcheck)
1825           if not first_check['result']: return first_check
1826           #num_check = [a for a in item_values if len([b for b in a if not isinstance
1827
1828    def validate_item_esd(self,item_name,item_value):
1829        if self[item_name].get(self.primitive_type) != 'numb':
1830            return {"result":None}
1831        can_esd = self[item_name].get(self.esd_spec,"none") == "esd"
1832        if can_esd: return {"result":True}         #must be OK!
1833        item_values = listify(item_value)
1834        check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None])
1835        if len(check_all)>0: return {"result":False,"bad_values":check_all}
1836        return {"result":True}
1837
1838    def validate_item_esd_ddlm(self,item_name,item_value):
1839        if self[item_name].get('self.primitive_type') not in \
1840        ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']:
1841            return {"result":None}
1842        can_esd = True
1843        if self[item_name].get('_type.purpose') != 'Measurand':
1844            can_esd = False
1845        item_values = listify(item_value)
1846        check_all = [get_number_with_esd(a)[1] for a in item_values]
1847        check_all = [v for v in check_all if (can_esd and v == None) or \
1848                 (not can_esd and v != None)]
1849        if len(check_all)>0: return {"result":False,"bad_values":check_all}
1850        return {"result":True}
1851
1852    def validate_enum_range(self,item_name,item_value):
1853        if "_item_range.minimum" not in self[item_name] and \
1854           "_item_range.maximum" not in self[item_name]:
1855            return {"result":None}
1856        minvals = self[item_name].get("_item_range.minimum",default = ["."])
1857        maxvals = self[item_name].get("_item_range.maximum",default = ["."])
1858        def makefloat(a):
1859            if a == ".": return a
1860            else: return float(a)
1861        maxvals = map(makefloat, maxvals)
1862        minvals = map(makefloat, minvals)
1863        rangelist = list(zip(minvals,maxvals))
1864        item_values = listify(item_value)
1865        def map_check(rangelist,item_value):
1866            if item_value == "?" or item_value == ".": return True
1867            iv,esd = get_number_with_esd(item_value)
1868            if iv==None: return None  #shouldn't happen as is numb type
1869            for lower,upper in rangelist:
1870                #check the minima
1871                if lower == ".": lower = iv - 1
1872                if upper == ".": upper = iv + 1
1873                if iv > lower and iv < upper: return True
1874                if upper == lower and iv == upper: return True
1875            # debug
1876            # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper))
1877            return False
1878        check_all = [a for a in item_values if map_check(rangelist,a) != True]
1879        if len(check_all)>0: return {"result":False,"bad_values":check_all}
1880        else: return {"result":True}
1881
1882    def validate_item_enum(self,item_name,item_value):
1883        try:
1884            enum_list = self[item_name][self.enum_spec][:]
1885        except KeyError:
1886            return {"result":None}
1887        enum_list.append(".")   #default value
1888        enum_list.append("?")   #unknown
1889        item_values = listify(item_value)
1890        #print("Enum check: {!r} in {!r}".format(item_values, enum_list))
1891        check_all = [a for a in item_values if a not in enum_list]
1892        if len(check_all)>0: return {"result":False,"bad_values":check_all}
1893        else: return {"result":True}
1894
1895    def validate_looping(self,item_name,item_value):
1896        try:
1897            must_loop = self[item_name][self.must_loop_spec]
1898        except KeyError:
1899            return {"result":None}
1900        if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped
1901            return {"result":False}      #this could be triggered
1902        if must_loop == 'no' and not isinstance(item_value,(unicode,str)):
1903            return {"result":False}
1904        return {"result":True}
1905
1906    def validate_looping_ddlm(self,loop_names):
1907        """Check that all names are loopable"""
1908        truly_loopy = self.get_final_cats(loop_names)
1909        if len(truly_loopy)<len(loop_names):  #some are bad
1910            categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names]
1911            not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()]
1912            return {"result":False,"bad_items":not_looped}
1913        return {"result":True}
1914
1915
1916    def validate_loop_membership(self,loop_names):
1917        final_cat = self.get_final_cats(loop_names)
1918        bad_items =  [a for a in final_cat if a != final_cat[0]]
1919        if len(bad_items)>0:
1920            return {"result":False,"bad_items":bad_items}
1921        else: return {"result":True}
1922
1923    def get_final_cats(self,loop_names):
1924        """Return a list of the uppermost parent categories for the loop_names. Names
1925        that are not from loopable categories are ignored."""
1926        try:
1927            categories = [self[a][self.cat_spec].lower() for a in loop_names]
1928        except KeyError:       #category is mandatory
1929            raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0]))
1930        truly_looped = [a for a in categories if a in self.parent_lookup.keys()]
1931        return [self.parent_lookup[a] for a in truly_looped]
1932
1933    def validate_loop_key(self,loop_names):
1934        category = self[loop_names[0]][self.cat_spec]
1935        # find any unique values which must be present
1936        key_spec = self[category].get(self.key_spec,[])
1937        for names_to_check in key_spec:
1938            if isinstance(names_to_check,unicode):   #only one
1939                names_to_check = [names_to_check]
1940            for loop_key in names_to_check:
1941                if loop_key not in loop_names:
1942                    #is this one of those dang implicit items?
1943                    if self[loop_key].get(self.must_exist_spec,None) == "implicit":
1944                        continue          #it is virtually there...
1945                    alternates = self.get_alternates(loop_key)
1946                    if alternates == []:
1947                        return {"result":False,"bad_items":loop_key}
1948                    for alt_names in alternates:
1949                        alt = [a for a in alt_names if a in loop_names]
1950                        if len(alt) == 0:
1951                            return {"result":False,"bad_items":loop_key}  # no alternates
1952        return {"result":True}
1953
1954    def validate_loop_key_ddlm(self,loop_names):
1955        """Make sure at least one of the necessary keys are available"""
1956        final_cats = self.get_final_cats(loop_names)
1957        if len(final_cats)>0:
1958            poss_keys = self.cat_key_table[final_cats[0]]
1959            found_keys = [a for a in poss_keys if a in loop_names]
1960            if len(found_keys)>0:
1961                return {"result":True}
1962            else:
1963                return {"result":False,"bad_items":poss_keys}
1964        else:
1965            return {"result":True}
1966
1967    def validate_loop_references(self,loop_names):
1968        must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names]
1969        must_haves = [a for a in must_haves if a != None]
1970        # build a flat list.  For efficiency we don't remove duplicates,as
1971        # we expect no more than the order of 10 or 20 looped names.
1972        def flat_func(a,b):
1973            if isinstance(b,unicode):
1974               a.append(b)       #single name
1975            else:
1976               a.extend(b)       #list of names
1977            return a
1978        flat_mh = []
1979        [flat_func(flat_mh,a) for a in must_haves]
1980        group_mh = filter(lambda a:a[-1]=="_",flat_mh)
1981        single_mh = filter(lambda a:a[-1]!="_",flat_mh)
1982        res = [a for a in single_mh if a not in loop_names]
1983        def check_gr(s_item, name_list):
1984            nl = map(lambda a:a[:len(s_item)],name_list)
1985            if s_item in nl: return True
1986            return False
1987        res_g = [a for a in group_mh if check_gr(a,loop_names)]
1988        if len(res) == 0 and len(res_g) == 0: return {"result":True}
1989        # construct alternate list
1990        alternates = map(lambda a: (a,self.get_alternates(a)),res)
1991        alternates = [a for a in alternates if a[1] != []]
1992        # next line purely for error reporting
1993        missing_alts = [a[0] for a in alternates if a[1] == []]
1994        if len(alternates) != len(res):
1995           return {"result":False,"bad_items":missing_alts}   #short cut; at least one
1996                                                       #doesn't have an altern
1997        #loop over alternates
1998        for orig_name,alt_names in alternates:
1999             alt = [a for a in alt_names if a in loop_names]
2000             if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates
2001        return {"result":True}        #found alternates
2002
2003    def get_alternates(self,main_name,exclusive_only=False):
2004        alternates = self[main_name].get(self.related_func,None)
2005        alt_names = []
2006        if alternates != None:
2007            alt_names =  self[main_name].get(self.related_item,None)
2008            if isinstance(alt_names,unicode):
2009                alt_names = [alt_names]
2010                alternates = [alternates]
2011            together = zip(alt_names,alternates)
2012            if exclusive_only:
2013                alt_names = [a for a in together if a[1]=="alternate_exclusive" \
2014                                             or a[1]=="replace"]
2015            else:
2016                alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"]
2017            alt_names = list([a[0] for a in alt_names])
2018        # now do the alias thing
2019        alias_names = listify(self[main_name].get("_item_aliases.alias_name",[]))
2020        alt_names.extend(alias_names)
2021        # print("Alternates for {}: {!r}".format(main_name, alt_names))
2022        return alt_names
2023
2024
2025    def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}):
2026       alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)]
2027       item_name_list = [a.lower() for a in whole_block.keys()]
2028       item_name_list.extend([a.lower() for a in provisional_items.keys()])
2029       bad = [a for a in alternates if a in item_name_list]
2030       if len(bad)>0:
2031           print("Bad: %s, alternates %s" % (repr(bad),repr(alternates)))
2032           return {"result":False,"bad_items":bad}
2033       else: return {"result":True}
2034
2035    # validate that parent exists and contains matching values
2036    def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}):
2037        parent_item = self[item_name].get(self.parent_spec)
2038        if not parent_item: return {"result":None}   #no parent specified
2039        if isinstance(parent_item,list):
2040            parent_item = parent_item[0]
2041        if self.optimize:
2042            if parent_item in self.done_parents:
2043                return {"result":None}
2044            else:
2045                self.done_parents.append(parent_item)
2046                print("Done parents %s" % repr(self.done_parents))
2047        # initialise parent/child values
2048        if isinstance(item_value,unicode):
2049            child_values = [item_value]
2050        else: child_values = item_value[:]    #copy for safety
2051        # track down the parent
2052        # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block))
2053        # if globals contains the parent values, we are doing a DDL2 dictionary, and so
2054        # we have collected all parent values into the global block - so no need to search
2055        # for them elsewhere.
2056        # print("Looking for {!r}".format(parent_item))
2057        parent_values = globals.get(parent_item)
2058        if not parent_values:
2059            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
2060        if not parent_values:
2061            # go for alternates
2062            namespace = whole_block.keys()
2063            namespace.extend(provisional_items.keys())
2064            namespace.extend(globals.keys())
2065            alt_names = filter_present(self.get_alternates(parent_item),namespace)
2066            if len(alt_names) == 0:
2067                if len([a for a in child_values if a != "." and a != "?"])>0:
2068                    return {"result":False,"parent":parent_item}#no parent available -> error
2069                else:
2070                    return {"result":None}       #maybe True is more appropriate??
2071            parent_item = alt_names[0]           #should never be more than one??
2072            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
2073            if not parent_values:   # check global block
2074                parent_values = globals.get(parent_item)
2075        if isinstance(parent_values,unicode):
2076            parent_values = [parent_values]
2077        #print("Checking parent %s against %s, values %r/%r" % (parent_item,
2078        #                                          item_name, parent_values, child_values))
2079        missing = self.check_parent_child(parent_values,child_values)
2080        if len(missing) > 0:
2081            return {"result":False,"bad_values":missing,"parent":parent_item}
2082        return {"result":True}
2083
2084    def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}):
2085        try:
2086            child_items = self[item_name][self.child_spec][:]  #copy
2087        except KeyError:
2088            return {"result":None}    #not relevant
2089        # special case for dictionaries  -> we check parents of children only
2090        if item_name in globals:  #dictionary so skip
2091            return {"result":None}
2092        if isinstance(child_items,unicode): # only one child
2093            child_items = [child_items]
2094        if isinstance(item_value,unicode): # single value
2095            parent_values = [item_value]
2096        else: parent_values = item_value[:]
2097        # expand child list with list of alternates
2098        for child_item in child_items[:]:
2099            child_items.extend(self.get_alternates(child_item))
2100        # now loop over the children
2101        for child_item in child_items:
2102            if self.optimize:
2103                if child_item in self.done_children:
2104                    return {"result":None}
2105                else:
2106                    self.done_children.append(child_item)
2107                    print("Done children %s" % repr(self.done_children))
2108            if child_item in provisional_items:
2109                child_values = provisional_items[child_item][:]
2110            elif child_item in whole_block:
2111                child_values = whole_block[child_item][:]
2112            else:  continue
2113            if isinstance(child_values,unicode):
2114                child_values = [child_values]
2115                # print("Checking child %s against %s, values %r/%r" % (child_item,
2116                #       item_name, child_values, parent_values))
2117            missing = self.check_parent_child(parent_values,child_values)
2118            if len(missing)>0:
2119                return {"result":False,"bad_values":missing,"child":child_item}
2120        return {"result":True}       #could mean that no child items present
2121
2122    #a generic checker: all child vals should appear in parent_vals
2123    def check_parent_child(self,parent_vals,child_vals):
2124        # shield ourselves from dots and question marks
2125        pv = parent_vals[:]
2126        pv.extend([".","?"])
2127        res =  [a for a in child_vals if a not in pv]
2128        #print("Missing: %s" % res)
2129        return res
2130
2131    def validate_remove_parent_child(self,item_name,whole_block):
2132        try:
2133            child_items = self[item_name][self.child_spec]
2134        except KeyError:
2135            return {"result":None}
2136        if isinstance(child_items,unicode): # only one child
2137            child_items = [child_items]
2138        for child_item in child_items:
2139            if child_item in whole_block:
2140                return {"result":False,"child":child_item}
2141        return {"result":True}
2142
2143    def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}):
2144        try:
2145            dep_items = self[item_name][self.dep_spec][:]
2146        except KeyError:
2147            return {"result":None}    #not relevant
2148        if isinstance(dep_items,unicode):
2149            dep_items = [dep_items]
2150        actual_names = whole_block.keys()
2151        actual_names.extend(prov.keys())
2152        actual_names.extend(globals.keys())
2153        missing = [a for a in dep_items if a not in actual_names]
2154        if len(missing) > 0:
2155            alternates = map(lambda a:[self.get_alternates(a),a],missing)
2156            # compact way to get a list of alternative items which are
2157            # present
2158            have_check = [(filter_present(b[0],actual_names),
2159                                       b[1]) for b in alternates]
2160            have_check = list([a for a in have_check if len(a[0])==0])
2161            if len(have_check) > 0:
2162                have_check = [a[1] for a in have_check]
2163                return {"result":False,"bad_items":have_check}
2164        return {"result":True}
2165
2166    def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={},
2167                                                                  globals={}):
2168        category = self[item_name].get(self.cat_spec)
2169        if category == None:
2170            print("No category found for %s" % item_name)
2171            return {"result":None}
2172        # print("Category {!r} for item {}".format(category, item_name))
2173        # we make a copy in the following as we will be removing stuff later!
2174        unique_i = self[category].get("_category_key.name",[])[:]
2175        if isinstance(unique_i,unicode):
2176            unique_i = [unique_i]
2177        if item_name not in unique_i:       #no need to verify
2178            return {"result":None}
2179        if isinstance(item_value,unicode):  #not looped
2180            return {"result":None}
2181        # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i))
2182        # check that we can't optimize by not doing this check
2183        if self.optimize:
2184            if unique_i in self.done_keys:
2185                return {"result":None}
2186            else:
2187                self.done_keys.append(unique_i)
2188        val_list = []
2189        # get the matching data from any other data items
2190        unique_i.remove(item_name)
2191        other_data = []
2192        if len(unique_i) > 0:            # i.e. do have others to think about
2193           for other_name in unique_i:
2194           # we look for the value first in the provisional dict, then the main block
2195           # the logic being that anything in the provisional dict overrides the
2196           # main block
2197               if other_name in provisional_items:
2198                   other_data.append(provisional_items[other_name])
2199               elif other_name in whole_block:
2200                   other_data.append(whole_block[other_name])
2201               elif self[other_name].get(self.must_exist_spec)=="implicit":
2202                   other_data.append([item_name]*len(item_value))  #placeholder
2203               else:
2204                   return {"result":False,"bad_items":other_name}#missing data name
2205        # ok, so we go through all of our values
2206        # this works by comparing lists of strings to one other, and
2207        # so could be fooled if you think that '1.' and '1' are
2208        # identical
2209        for i in range(len(item_value)):
2210            #print("Value no. %d" % i, end=" ")
2211            this_entry = item_value[i]
2212            for j in range(len(other_data)):
2213                this_entry = " ".join([this_entry,other_data[j][i]])
2214            #print("Looking for {!r} in {!r}: ".format(this_entry, val_list))
2215            if this_entry in val_list:
2216                return {"result":False,"bad_values":this_entry}
2217            val_list.append(this_entry)
2218        return {"result":True}
2219
2220
2221    def validate_mandatory_category(self,whole_block):
2222        mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"]
2223        if len(mand_cats) == 0:
2224            return {"result":True}
2225        # print("Mandatory categories - {!r}".format(mand_cats)
2226        # find which categories each of our datanames belongs to
2227        all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()]
2228        missing = set(mand_cats) - set(all_cats)
2229        if len(missing) > 0:
2230            return {"result":False,"bad_items":repr(missing)}
2231        return {"result":True}
2232
2233    def check_mandatory_items(self,whole_block,default_scope='Item'):
2234        """Return an error if any mandatory items are missing"""
2235        if len(self.scopes_mandatory)== 0: return {"result":True}
2236        if default_scope == 'Datablock':
2237            return {"result":True}     #is a data file
2238        scope = whole_block.get('_definition.scope',default_scope)
2239        if '_dictionary.title' in whole_block:
2240           scope = 'Dictionary'
2241        missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block])
2242        if len(missing)==0:
2243            return {"result":True}
2244        else:
2245            return {"result":False,"bad_items":missing}
2246
2247    def check_prohibited_items(self,whole_block,default_scope='Item'):
2248        """Return an error if any prohibited items are present"""
2249        if len(self.scopes_naughty)== 0: return {"result":True}
2250        if default_scope == 'Datablock':
2251            return {"result":True}     #is a data file
2252        scope = whole_block.get('_definition.scope',default_scope)
2253        if '_dictionary.title' in whole_block:
2254           scope = 'Dictionary'
2255        present = list([a for a in self.scopes_naughty[scope] if a in whole_block])
2256        if len(present)==0:
2257            return {"result":True}
2258        else:
2259            return {"result":False,"bad_items":present}
2260
2261
2262    def run_item_validation(self,item_name,item_value):
2263        return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])}
2264
2265    def run_loop_validation(self,loop_names):
2266        return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])}
2267
2268    def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}):
2269        results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs])
2270        return {item_name:results}
2271
2272    def run_block_validation(self,whole_block,block_scope='Item'):
2273        results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs])
2274        # fix up the return values
2275        return {"whole_block":results}
2276
2277    def optimize_on(self):
2278        self.optimize = True
2279        self.done_keys = []
2280        self.done_children = []
2281        self.done_parents = []
2282
2283    def optimize_off(self):
2284        self.optimize = False
2285        self.done_keys = []
2286        self.done_children = []
2287        self.done_parents = []
2288
2289
2290
2291class ValidCifBlock(CifBlock):
2292    """A `CifBlock` that is valid with respect to a given CIF dictionary.  Methods
2293    of `CifBlock` are overridden where necessary to disallow addition of invalid items to the
2294    `CifBlock`.
2295
2296    ## Initialisation
2297
2298    * `dic` is a `CifDic` object to be used for validation.
2299
2300    """
2301    def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords):
2302        CifBlock.__init__(self,*args,**kwords)
2303        if dic and diclist:
2304            print("Warning: diclist argument ignored when initialising ValidCifBlock")
2305        if isinstance(dic,CifDic):
2306            self.fulldic = dic
2307        else:
2308            raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument")
2309        if len(diclist)==0 and not dic:
2310            raise ValidCifError( "At least one dictionary must be specified")
2311        if diclist and not dic:
2312            self.fulldic = merge_dic(diclist,mergemode)
2313        if not self.run_data_checks()[0]:
2314            raise ValidCifError( self.report())
2315
2316    def run_data_checks(self,verbose=False):
2317        self.v_result = {}
2318        self.fulldic.optimize_on()
2319        for dataname in self.keys():
2320            update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname]))
2321            update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self))
2322        for loop_names in self.loops.values():
2323            update_value(self.v_result,self.fulldic.run_loop_validation(loop_names))
2324        # now run block-level checks
2325        update_value(self.v_result,self.fulldic.run_block_validation(self))
2326        # return false and list of baddies if anything didn't match
2327        self.fulldic.optimize_off()
2328        all_keys = list(self.v_result.keys()) #dictionary will change
2329        for test_key in all_keys:
2330            #print("%s: %r" % (test_key, self.v_result[test_key]))
2331            self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False]
2332            if len(self.v_result[test_key]) == 0:
2333                del self.v_result[test_key]
2334        isvalid = len(self.v_result)==0
2335        #if not isvalid:
2336        #    print("Baddies: {!r}".format(self.v_result))
2337        return isvalid,self.v_result
2338
2339    def single_item_check(self,item_name,item_value):
2340        #self.match_single_item(item_name)
2341        if item_name not in self.fulldic:
2342            result = {item_name:[]}
2343        else:
2344            result = self.fulldic.run_item_validation(item_name,item_value)
2345        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
2346        # if even one false one is found, this should trigger
2347        isvalid = (len(baddies) == 0)
2348        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
2349        return isvalid,baddies
2350
2351    def loop_item_check(self,loop_names):
2352        in_dic_names = list([a for a in loop_names if a in self.fulldic])
2353        if len(in_dic_names)==0:
2354            result = {loop_names[0]:[]}
2355        else:
2356            result = self.fulldic.run_loop_validation(in_dic_names)
2357        baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False])
2358        # if even one false one is found, this should trigger
2359        isvalid = (len(baddies) == 0)
2360        # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies))
2361        return isvalid,baddies
2362
2363    def global_item_check(self,item_name,item_value,provisional_items={}):
2364        if item_name not in self.fulldic:
2365            result = {item_name:[]}
2366        else:
2367            result = self.fulldic.run_global_validation(item_name,
2368               item_value,self,provisional_items = provisional_items)
2369        baddies = list([a for a in result[item_name] if a[1]["result"] is False])
2370        # if even one false one is found, this should trigger
2371        isvalid = (len(baddies) == 0)
2372        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
2373        return isvalid,baddies
2374
2375    def remove_global_item_check(self,item_name):
2376        if item_name not in self.fulldic:
2377            result = {item_name:[]}
2378        else:
2379            result = self.fulldic.run_remove_global_validation(item_name,self,False)
2380        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
2381        # if even one false one is found, this should trigger
2382        isvalid = (len(baddies) == 0)
2383        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
2384        return isvalid,baddies
2385
2386    def AddToLoop(self,dataname,loopdata):
2387        # single item checks
2388        paired_data = loopdata.items()
2389        for name,value in paired_data:
2390            valid,problems = self.single_item_check(name,value)
2391            self.report_if_invalid(valid,problems)
2392        # loop item checks; merge with current loop
2393        found = 0
2394        for aloop in self.block["loops"]:
2395            if dataname in aloop:
2396                loopnames = aloop.keys()
2397                for new_name in loopdata.keys():
2398                    if new_name not in loopnames: loopnames.append(new_name)
2399                valid,problems = self.looped_item_check(loopnames)
2400                self.report_if_invalid(valid,problems)
2401        prov_dict = loopdata.copy()
2402        for name,value in paired_data:
2403            del prov_dict[name]   # remove temporarily
2404            valid,problems = self.global_item_check(name,value,prov_dict)
2405            prov_dict[name] = value  # add back in
2406            self.report_if_invalid(valid,problems)
2407        CifBlock.AddToLoop(self,dataname,loopdata)
2408
2409    def AddCifItem(self,data):
2410        if isinstance(data[0],(unicode,str)):   # single item
2411            valid,problems = self.single_item_check(data[0],data[1])
2412            self.report_if_invalid(valid,problems,data[0])
2413            valid,problems = self.global_item_check(data[0],data[1])
2414            self.report_if_invalid(valid,problems,data[0])
2415        elif isinstance(data[0],tuple) or isinstance(data[0],list):
2416            paired_data = list(zip(data[0],data[1]))
2417            for name,value in paired_data:
2418                valid,problems = self.single_item_check(name,value)
2419                self.report_if_invalid(valid,problems,name)
2420            valid,problems = self.loop_item_check(data[0])
2421            self.report_if_invalid(valid,problems,data[0])
2422            prov_dict = {}            # for storing temporary items
2423            for name,value in paired_data: prov_dict[name]=value
2424            for name,value in paired_data:
2425                del prov_dict[name]   # remove temporarily
2426                valid,problems = self.global_item_check(name,value,prov_dict)
2427                prov_dict[name] = value  # add back in
2428                self.report_if_invalid(valid,problems,name)
2429        else:
2430            raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
2431        super(ValidCifBlock,self).AddCifItem(data)
2432
2433    def AddItem(self,key,value,**kwargs):
2434        """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
2435        valid,problems = self.single_item_check(key,value)
2436        self.report_if_invalid(valid,problems,key)
2437        valid,problems = self.global_item_check(key,value)
2438        self.report_if_invalid(valid,problems,key)
2439        super(ValidCifBlock,self).AddItem(key,value,**kwargs)
2440
2441    # utility function
2442    def report_if_invalid(self,valid,bad_list,data_name):
2443        if not valid:
2444            bad_tests = [a[0] for a in bad_list]
2445            error_string = ",".join(bad_tests)
2446            error_string = repr(data_name) + " fails following validity checks: "  + error_string
2447            raise ValidCifError( error_string)
2448
2449    def __delitem__(self,key):
2450        # we don't need to run single item checks; we do need to run loop and
2451        # global checks.
2452        if key in self:
2453            try:
2454                loop_items = self.GetLoop(key)
2455            except TypeError:
2456                loop_items = []
2457            if loop_items:             #need to check loop conformance
2458                loop_names = [a[0] for a in loop_items if a[0] != key]
2459                valid,problems = self.loop_item_check(loop_names)
2460                self.report_if_invalid(valid,problems)
2461            valid,problems = self.remove_global_item_check(key)
2462            self.report_if_invalid(valid,problems)
2463        self.RemoveCifItem(key)
2464
2465
2466    def report(self):
2467       outstr = StringIO()
2468       outstr.write( "Validation results\n")
2469       outstr.write( "------------------\n")
2470       print("%d invalid items found\n" % len(self.v_result))
2471       for item_name,val_func_list in self.v_result.items():
2472           outstr.write("%s fails following tests:\n" % item_name)
2473           for val_func in val_func_list:
2474               outstr.write("\t%s\n")
2475       return outstr.getvalue()
2476
2477
2478class ValidCifFile(CifFile):
2479    """A CIF file for which all datablocks are valid.  Argument `dic` to
2480    initialisation specifies a `CifDic` object to use for validation."""
2481    def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs):
2482        if not diclist and not dic and not hasattr(self,'bigdic'):
2483            raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object")
2484        if not dic and diclist:     #merge here for speed
2485            self.bigdic = merge_dic(diclist,mergemode)
2486        elif dic and not diclist:
2487            self.bigdic = dic
2488        CifFile.__init__(self,*args,**kwargs)
2489        for blockname in self.keys():
2490            self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)
2491
2492    def NewBlock(self,blockname,blockcontents,**kwargs):
2493        CifFile.NewBlock(self,blockname,blockcontents,**kwargs)
2494        # dictionary[blockname] is now a CifBlock object.  We
2495        # turn it into a ValidCifBlock object
2496        self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic,
2497                                         data=self.dictionary[blockname])
2498
2499
2500class ValidationResult:
2501    """Represents validation result. It is initialised with """
2502    def __init__(self,results):
2503        """results is return value of validate function"""
2504        self.valid_result, self.no_matches = results
2505
2506    def report(self,use_html):
2507        """Return string with human-readable description of validation result"""
2508        return validate_report((self.valid_result, self.no_matches),use_html)
2509
2510    def is_valid(self,block_name=None):
2511        """Return True for valid CIF file, otherwise False"""
2512        if block_name is not None:
2513            block_names = [block_name]
2514        else:
2515            block_names = self.valid_result.iterkeys()
2516        for block_name in block_names:
2517            if not self.valid_result[block_name] == (True,{}):
2518                valid = False
2519                break
2520            else:
2521                valid = True
2522        return valid
2523
2524    def has_no_match_items(self,block_name=None):
2525        """Return true if some items are not found in dictionary"""
2526        if block_name is not None:
2527            block_names = [block_name]
2528        else:
2529            block_names = self.no_matches.iter_keys()
2530        for block_name in block_names:
2531            if self.no_matches[block_name]:
2532                has_no_match_items = True
2533                break
2534            else:
2535                has_no_match_items = False
2536        return has_no_match_items
2537
2538
2539
2540def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False):
2541    """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing,
2542    to the results of merging the `CifDic` objects in `diclist` according to `mergemode`.  Flag
2543    `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be
2544    accessed for validation and that mandatory_category should be interpreted differently for DDL2."""
2545    if not isinstance(ciffile,CifFile):
2546        check_file = CifFile(ciffile)
2547    else:
2548        check_file = ciffile
2549    if not dic:
2550        fulldic = merge_dic(diclist,mergemode)
2551    else:
2552        fulldic = dic
2553    no_matches = {}
2554    valid_result = {}
2555    if isdic:          #assume one block only
2556        check_file.scoping = 'instance' #only data blocks visible
2557        top_level = check_file.keys()[0]
2558        check_file.scoping = 'dictionary'   #all blocks visible
2559        # collect a list of parents for speed
2560        if fulldic.diclang == 'DDL2':
2561            poss_parents = fulldic.get_all("_item_linked.parent_name")
2562            for parent in poss_parents:
2563                curr_parent = listify(check_file.get(parent,[]))
2564                new_vals = check_file.get_all(parent)
2565                new_vals.extend(curr_parent)
2566                if len(new_vals)>0:
2567                    check_file[parent] = new_vals
2568                print("Added %s (len %d)" % (parent,len(check_file[parent])))
2569    # now run the validations
2570    for block in check_file.keys():
2571        if isdic and block == top_level:
2572           block_scope = 'Dictionary'
2573        elif isdic:
2574           block_scope = 'Item'
2575        else:
2576           block_scope = 'Datablock'
2577        no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic]
2578        # remove non-matching items
2579        print("Not matched: " + repr(no_matches[block]))
2580        for nogood in no_matches[block]:
2581             del check_file[block][nogood]
2582        print("Validating block %s, scope %s" % (block,block_scope))
2583        valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope)
2584    return valid_result,no_matches
2585
2586def validate_report(val_result,use_html=False):
2587    valid_result,no_matches = val_result
2588    outstr = StringIO()
2589    if use_html:
2590        outstr.write("<h2>Validation results</h2>")
2591    else:
2592        outstr.write( "Validation results\n")
2593        outstr.write( "------------------\n")
2594    if len(valid_result) > 10:
2595        suppress_valid = True         #don't clutter with valid messages
2596        if use_html:
2597           outstr.write("<p>For brevity, valid blocks are not reported in the output.</p>")
2598    else:
2599        suppress_valid = False
2600    for block in valid_result.keys():
2601        block_result = valid_result[block]
2602        if block_result[0]:
2603            out_line = "Block '%s' is VALID" % block
2604        else:
2605            out_line = "Block '%s' is INVALID" % block
2606        if use_html:
2607            if (block_result[0] and (not suppress_valid or len(no_matches[block])>0)) or not block_result[0]:
2608                outstr.write( "<h3>%s</h3><p>" % out_line)
2609        else:
2610                outstr.write( "\n %s\n" % out_line)
2611        if len(no_matches[block])!= 0:
2612            if use_html:
2613                outstr.write( "<p>The following items were not found in the dictionary")
2614                outstr.write(" (note that this does not invalidate the data block):</p>")
2615                outstr.write("<p><table>\n")
2616                [outstr.write("<tr><td>%s</td></tr>" % it) for it in no_matches[block]]
2617                outstr.write("</table>\n")
2618            else:
2619                outstr.write( "\n The following items were not found in the dictionary:\n")
2620                outstr.write("Note that this does not invalidate the data block\n")
2621                [outstr.write("%s\n" % it) for it in no_matches[block]]
2622        # now organise our results by type of error, not data item...
2623        error_type_dic = {}
2624        for error_item, error_list in block_result[1].items():
2625            for func_name,bad_result in error_list:
2626                bad_result.update({"item_name":error_item})
2627                try:
2628                    error_type_dic[func_name].append(bad_result)
2629                except KeyError:
2630                    error_type_dic[func_name] = [bad_result]
2631        # make a table of test name, test message
2632        info_table = {\
2633        'validate_item_type':\
2634            "The following data items had badly formed values",
2635        'validate_item_esd':\
2636            "The following data items should not have esds appended",
2637        'validate_enum_range':\
2638            "The following data items have values outside permitted range",
2639        'validate_item_enum':\
2640            "The following data items have values outside permitted set",
2641        'validate_looping':\
2642            "The following data items violate looping constraints",
2643        'validate_loop_membership':\
2644            "The following looped data names are of different categories to the first looped data name",
2645        'validate_loop_key':\
2646            "A required dataname for this category is missing from the loop\n containing the dataname",
2647        'validate_loop_key_ddlm':\
2648            "A loop key is missing for the category containing the dataname",
2649        'validate_loop_references':\
2650            "A dataname required by the item is missing from the loop",
2651        'validate_parent':\
2652            "A parent dataname is missing or contains different values",
2653        'validate_child':\
2654            "A child dataname contains different values to the parent",
2655        'validate_uniqueness':\
2656            "One or more data items do not take unique values",
2657        'validate_dependents':\
2658            "A dataname required by the item is missing from the data block",
2659        'validate_exclusion': \
2660            "Both dataname and exclusive alternates or aliases are present in data block",
2661        'validate_mandatory_category':\
2662            "A required category is missing from this block",
2663        'check_mandatory_items':\
2664            "A required data attribute is missing from this block",
2665        'check_prohibited_items':\
2666            "A prohibited data attribute is present in this block"}
2667
2668        for test_name,test_results in error_type_dic.items():
2669           if use_html:
2670               outstr.write(html_error_report(test_name,info_table[test_name],test_results))
2671           else:
2672               outstr.write(error_report(test_name,info_table[test_name],test_results))
2673               outstr.write("\n\n")
2674    return outstr.getvalue()
2675
2676# A function to lay out a single error report.  We are passed
2677# the name of the error (one of our validation functions), the
2678# explanation to print out, and a dictionary with the error
2679# information.  We print no more than 50 characters of the item
2680
2681def error_report(error_name,error_explanation,error_dics):
2682   retstring = "\n\n " + error_explanation + ":\n\n"
2683   headstring = "%-32s" % "Item name"
2684   bodystring = ""
2685   if "bad_values" in error_dics[0]:
2686      headstring += "%-20s" % "Bad value(s)"
2687   if "bad_items" in error_dics[0]:
2688      headstring += "%-20s" % "Bad dataname(s)"
2689   if "child" in error_dics[0]:
2690      headstring += "%-20s" % "Child"
2691   if "parent" in error_dics[0]:
2692      headstring += "%-20s" % "Parent"
2693   headstring +="\n"
2694   for error in error_dics:
2695      bodystring += "\n%-32s" % error["item_name"]
2696      if "bad_values" in error:
2697          out_vals = [repr(a)[:50] for a in error["bad_values"]]
2698          bodystring += "%-20s" % out_vals
2699      if "bad_items" in error:
2700          bodystring += "%-20s" % repr(error["bad_items"])
2701      if "child" in error:
2702          bodystring += "%-20s" % repr(error["child"])
2703      if "parent" in error:
2704          bodystring += "%-20s" % repr(error["parent"])
2705   return retstring + headstring + bodystring
2706
2707#  This lays out an HTML error report
2708
2709def html_error_report(error_name,error_explanation,error_dics,annotate=[]):
2710   retstring = "<h4>" + error_explanation + ":</h4>"
2711   retstring = retstring + "<table cellpadding=5><tr>"
2712   headstring = "<th>Item name</th>"
2713   bodystring = ""
2714   if "bad_values" in error_dics[0]:
2715      headstring += "<th>Bad value(s)</th>"
2716   if "bad_items" in error_dics[0]:
2717      headstring += "<th>Bad dataname(s)</th>"
2718   if "child" in error_dics[0]:
2719      headstring += "<th>Child</th>"
2720   if "parent" in error_dics[0]:
2721      headstring += "<th>Parent</th>"
2722   headstring +="</tr>\n"
2723   for error in error_dics:
2724      bodystring += "<tr><td><tt>%s</tt></td>" % error["item_name"]
2725      if "bad_values" in error:
2726          bodystring += "<td>%s</td>" % error["bad_values"]
2727      if "bad_items" in error:
2728          bodystring += "<td><tt>%s</tt></td>" % error["bad_items"]
2729      if "child" in error:
2730          bodystring += "<td><tt>%s</tt></td>" % error["child"]
2731      if "parent" in error:
2732          bodystring += "<td><tt>%s</tt></td>" % error["parent"]
2733      bodystring += "</tr>\n"
2734   return retstring + headstring + bodystring + "</table>\n"
2735
2736def run_data_checks(check_block,fulldic,block_scope='Item'):
2737    v_result = {}
2738    for key in check_block.keys():
2739        update_value(v_result, fulldic.run_item_validation(key,check_block[key]))
2740        update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block))
2741    for loopnames in check_block.loops.values():
2742        update_value(v_result, fulldic.run_loop_validation(loopnames))
2743    update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope))
2744    # return false and list of baddies if anything didn't match
2745    all_keys = list(v_result.keys())
2746    for test_key in all_keys:
2747        v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False]
2748        if len(v_result[test_key]) == 0:
2749            del v_result[test_key]
2750    # if even one false one is found, this should trigger
2751    # print("Baddies: {!r}".format(v_result))
2752    isvalid = len(v_result)==0
2753    return isvalid,v_result
2754
2755
2756def get_number_with_esd(numstring):
2757    import string
2758    numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
2759    our_match = re.match(numb_re,numstring)
2760    if our_match:
2761        a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
2762        # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
2763    else:
2764        return None,None
2765    if dot or q: return None,None     #a dot or question mark
2766    if exp:          #has exponent
2767       exp = exp.replace("d","e")     # mop up old fashioned numbers
2768       exp = exp.replace("D","e")
2769       base_num = base_num + exp
2770    # print("Debug: have %s for base_num from %s" % (base_num,numstring))
2771    base_num = float(base_num)
2772    # work out esd, if present.
2773    if esd:
2774        esd = float(esd[1:-1])    # no brackets
2775        if dad:                   # decimal point + digits
2776            esd = esd * (10 ** (-1* len(dad)))
2777        if exp:
2778            esd = esd * (10 ** (float(exp[1:])))
2779    return base_num,esd
2780
2781def float_with_esd(inval):
2782    if isinstance(inval,unicode):
2783        j = inval.find("(")
2784        if j>=0:  return float(inval[:j])
2785    return float(inval)
2786
2787
2788
2789def convert_type(definition):
2790    """Convert value to have the type given by definition"""
2791    #extract the actual required type information
2792    container = definition['_type.container']
2793    dimension = definition.get('_type.dimension',StarFile.StarList([]))
2794    structure = interpret_structure(definition['_type.contents'])
2795    if container == 'Single':   #a single value to convert
2796        return convert_single_value(structure)
2797    elif container == 'List':   #lots of the same value
2798        return convert_list_values(structure,dimension)
2799    elif container == 'Multiple': #no idea
2800        return None
2801    elif container in ('Array','Matrix'): #numpy array
2802        return convert_matrix_values(structure)
2803    return lambda a:a    #unable to convert
2804
2805def convert_single_value(type_spec):
2806    """Convert a single item according to type_spec"""
2807    if type_spec == 'Real':
2808        return float_with_esd
2809    if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'):
2810        return int
2811    if type_spec == 'Complex':
2812        return complex
2813    if type_spec == 'Imag':
2814        return lambda a:complex(0,a)
2815    if type_spec in ('Code','Name','Tag'):  #case-insensitive -> lowercase
2816        return lambda a:a.lower()
2817    return lambda a:a   #can't do anything numeric
2818
2819#def convert_list_values(structure,dimension):
2820#    """Convert the values according to the element
2821#       structure given in [[structure]]"""
2822#    if isinstance(structure,(unicode,str)):   #simple repetition
2823#        func_def =  "element_convert = convert_single_value('%s')" % structure
2824#    else:
2825#        func_def =       "def element_convert(element):\n"
2826#        func_def +=      "   final_val = []\n"
2827#        for pos_no in range(len(structure)):
2828#            func_def +=  "   final_val.append("
2829#            type_spec = structure[pos_no]
2830#            if type_spec == 'Real':
2831#                cf = "float_with_esd("
2832#            elif type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'):
2833#                cf = 'int('
2834#            elif type_spec == 'Complex':
2835#                cf = 'complex('
2836#            elif type_spec == 'Imag':
2837#                cf = 'complex(0,'
2838#            elif type_spec in ('Code','Name','Tag'):
2839#                cf = '('
2840#            else: cf = ''
2841#            func_def += cf
2842#            func_def += "element[%d]" % pos_no
2843#            if "(" in cf: func_def +=")"
2844#            if type_spec in ('Code','Name','Tag'):
2845#                func_def +=".lower()"
2846#            func_def +=")\n"  # close append
2847#        func_def +=      "   return final_val\n"
2848#    print(func_def)
2849#    exec(func_def, globals()) #(re)defines element_convert in global namespace
2850#    if len(dimension)> 0 and int(dimension[0]) != 1:
2851#        return lambda a: list(map(element_convert,a))
2852#    else: return element_convert
2853#
2854#def convert_matrix_values(valtype):
2855#    """Convert a dREL String or Float valued List structure to a numpy matrix structure"""
2856#    # first convert to numpy array, then let numpy do the work
2857#    try: import numpy
2858#    except:
2859#        return lambda a:a   #cannot do it
2860#    func_def =     "def matrix_convert(a):\n"
2861#    func_def +=    "    import numpy\n"
2862#    func_def +=    "    p = numpy.array(a)\n"
2863#    if valtype == 'Real':
2864#        func_def+= "    return p.astype('float')\n"
2865#    elif valtype == 'Integer':
2866#        func_def +="    return p.astype('int')\n"
2867#    elif valtype == 'Complex':
2868#        func_def +="    return p.astype('complex')\n"
2869#    else:
2870#        raise ValueError('Unknown matrix value type')
2871#    exec(func_def,globals())  #matrix convert is defined
2872#    return matrix_convert
2873
2874def interpret_structure(struc_spec):
2875    """Interpret a DDLm structure specification"""
2876    from . import TypeContentsParser as t
2877    p = t.TypeParser(t.TypeParserScanner(struc_spec))
2878    return getattr(p,"input")()
2879
2880
2881# A utility function to append to item values rather than replace them
2882def update_value(base_dict,new_items):
2883    for new_key in new_items.keys():
2884        if new_key in base_dict:
2885            base_dict[new_key].extend(new_items[new_key])
2886        else:
2887            base_dict[new_key] = new_items[new_key]
2888
2889#Transpose the list of lists passed to us
2890def transpose(base_list):
2891    new_lofl = []
2892    full_length = len(base_list)
2893    opt_range = range(full_length)
2894    for i in range(len(base_list[0])):
2895       new_packet = []
2896       for j in opt_range:
2897          new_packet.append(base_list[j][i])
2898       new_lofl.append(new_packet)
2899    return new_lofl
2900
2901# listify strings - used surprisingly often
2902def listify(item):
2903    if isinstance(item,(unicode,str)): return [item]
2904    else: return item
2905
2906# given a list of search items, return a list of items
2907# actually contained in the given data block
2908def filter_present(namelist,datablocknames):
2909    return [a for a in namelist if a in datablocknames]
2910
2911# Make an item immutable, used if we want a list to be a key
2912def make_immutable(values):
2913    """Turn list of StarList values into a list of immutable items"""
2914    if not isinstance(values[0],StarList):
2915        return values
2916    else:
2917        return [tuple(a) for a in values]
2918
2919# merge ddl dictionaries.  We should be passed filenames or CifFile
2920# objects
2921def merge_dic(diclist,mergemode="replace",ddlspec=None):
2922    dic_as_cif_list = []
2923    for dic in diclist:
2924        if not isinstance(dic,CifFile) and \
2925           not isinstance(dic,(unicode,str)):
2926               raise TypeError("Require list of CifFile names/objects for dictionary merging")
2927        if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic))
2928        else: dic_as_cif_list.append(dic)
2929    # we now merge left to right
2930    basedic = dic_as_cif_list[0]
2931    if "on_this_dictionary" in basedic:   #DDL1 style only
2932        for dic in dic_as_cif_list[1:]:
2933           basedic.merge(dic,mode=mergemode,match_att=["_name"])
2934    elif len(basedic.keys()) == 1:                     #One block: DDL2/m style
2935        old_block = basedic[basedic.keys()[0]]
2936        for dic in dic_as_cif_list[1:]:
2937           new_block = dic[dic.keys()[0]]
2938           basedic.merge(dic,mode=mergemode,
2939                         single_block=[basedic.keys()[0],dic.keys()[0]],
2940                         match_att=["_item.name"],match_function=find_parent)
2941    return CifDic(basedic)
2942
2943def find_parent(ddl2_def):
2944    if "_item.name" not in ddl2_def:
2945       return None
2946    if isinstance(ddl2_def["_item.name"],unicode):
2947        return ddl2_def["_item.name"]
2948    if "_item_linked.child_name" not in ddl2_def:
2949        raise CifError("Asked to find parent in block with no child_names")
2950    if "_item_linked.parent_name" not in ddl2_def:
2951        raise CifError("Asked to find parent in block with no parent_names")
2952    result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]])
2953    if len(result)>1 or len(result)==0:
2954        raise CifError("Unable to find single unique parent data item")
2955    return result[0]
2956
2957
2958def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF'):
2959    """ Read in a CIF file, returning a `CifFile` object.
2960
2961    * `filename` may be a URL, a file
2962    path on the local system, or any object with a `read` method.
2963
2964    * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1`
2965    is identical except for the exclusion of square brackets as the first characters in
2966    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
2967    read files according to the STAR2 publication.  If grammar is `None`, autodetection
2968    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for
2969    properly-formed CIF2.0 files.  Note that only Unicode characters in the basic multilingual
2970    plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).
2971
2972    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
2973    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
2974    fast C routines, but is not available for CIF2/STAR2 files.  Note that running PyCIFRW in
2975    Jython uses native Java regular expressions
2976    to provide a speedup regardless of this argument (and does not yet support CIF2).
2977
2978    * `scoping` is only relevant where nested save frames are expected (STAR2 only).
2979    `instance` scoping makes nested save frames
2980    invisible outside their hierarchy, allowing duplicate save frame names in separate
2981    hierarchies. `dictionary` scoping makes all save frames within a data block visible to each
2982    other, thereby restricting all save frames to have unique names.
2983    Currently the only recognised value for `standard` is `CIF`, which when set enforces a
2984    maximum length of 75 characters for datanames and has no other effect. """
2985
2986    finalcif = CifFile(scoping=scoping,standard=standard)
2987    return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype)
2988    #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs)
2989
2990class CifLoopBlock(StarFile.LoopBlock):
2991    def __init__(self,data=(),**kwargs):
2992        super(CifLoopBlock,self).__init__(data,**kwargs)
2993
2994#No documentation flags
2995
Note: See TracBrowser for help on using the repository browser.