Changeset 1001
- Timestamp:
- Jul 10, 2012 4:20:00 PM (11 years ago)
- Location:
- specdomain/trunk/src/specdomain
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
specdomain/trunk/src/specdomain/macros/test-battery.mac
r1000 r1001 233 233 global MULTI_IMGS # useful 8-ID's imm fileformat; currently not used 234 234 235 #: twice the ratio of circle's circumference to its diameter235 #: twice the ratio of a circle's circumference to its diameter 236 236 constant TWO_PI 6.283 237 237 local _newspaper #: don't expect to see new ones -
specdomain/trunk/src/specdomain/sphinxcontrib/specmacrofileparser.py
r994 r1001 12 12 """ 13 13 Construct a SPEC macro source code file parser for 14 use by the specdomain for Sphinx. 14 use by the specdomain for Sphinx. This parser locates 15 code blocks in the SPEC macro source code file across multiple lines. 15 16 16 17 :copyright: Copyright 2012 by BCDA, Advanced Photon Source, Argonne National Laboratory … … 20 21 import os 21 22 import re 22 23 from pprint import pprint 23 24 24 25 # http://www.txt2re.com/index-python.php3 … … 36 37 extended_comment_marker = r'\"{3}' 37 38 extended_comment_match = r'(' + extended_comment_marker + r')' 38 39 40 # TODO: handle "#: " indicating a description of a variable on the preceding line 39 macro_name = r'[a-zA-Z_][\w_]*' 40 macro_name_match = r'(' + macro_name + r')' 41 arglist_match = r'(' + match_all + r')' 42 non_greedy_filler_match = r'(' + non_greedy_filler + r')' 43 variable_name_match = r'(@?' + macro_name + r'\[?\]?)' 41 44 42 45 class SpecMacrofileParser: … … 55 58 56 59 An additional step would be to parse for: 57 * def (done)58 * cdef (done)59 * rdef (done)60 * global (done)61 * local (done)62 * constant (done)60 * def 61 * cdef 62 * rdef 63 * global 64 * local 65 * constant 63 66 * array 64 67 * ... 65 68 ''' 66 67 # consider using: docutils.statemachine here 68 states = ( # assume SPEC def macros cannot be nested 69 'global', # the level that provides the SPEC command prompt 70 'extended comment', # inside a multiline extended comment 71 'def macro', # inside a multiline def macro definition 72 'rdef macro', # inside a multiline rdef macro definition 73 'cdef macro', # inside a multiline cdef macro definition 74 'parsed', # parsing of file is complete 75 ) 76 69 77 70 def __init__(self, macrofile): 78 '''79 Constructor80 '''81 71 self.buf = None 82 72 self.findings = [] … … 85 75 self.parse_macro_file() 86 76 87 def read(self, filename): 88 """ 89 load the SPEC macro source code file into an internal buffer 77 def read(self, macrofile): 78 """ 79 load the SPEC macro source code file into an internal buffer. 80 Also remember the start and end position of each line. 90 81 91 82 :param str filename: name (with optional path) of SPEC macro file 92 83 (The path is relative to the ``.rst`` document.) 93 84 """ 94 if not os.path.exists(filename): 95 raise RuntimeError, "file not found: " + filename 96 self.filename = filename 97 self.buf = open(filename, 'r').read() 85 if not os.path.exists(macrofile): 86 raise RuntimeError, "file not found: " + macrofile 87 self.filename = macrofile 88 buf = open(macrofile, 'r').readlines() 89 offset = 0 90 lines = [] 91 for linenumber, line in enumerate(buf): 92 end = offset+len(line) 93 lines.append([linenumber+1, offset, end]) 94 offset = end 95 self.buf = ''.join(buf) 96 self.line_positions = lines 97 98 def std_read(self, macrofile): 99 """ 100 load the SPEC macro source code file into an internal buffer 101 102 :param str filename: name (with optional path) of SPEC macro file 103 (The path is relative to the ``.rst`` document.) 104 """ 105 if not os.path.exists(macrofile): 106 raise RuntimeError, "file not found: " + macrofile 107 self.filename = macrofile 108 self.buf = open(macrofile, 'r').read() 98 109 99 110 def parse_macro_file(self): 100 """ 101 parse the internal buffer 102 """ 103 line_number = 0 104 self.state = 'global' 105 self.state_stack = [] 106 for line in self.buf.split('\n'): 107 108 line_number += 1 109 if self.state not in self.states: 110 # this quickly points out a programmer error 111 msg = "unexpected parser state: %s, line %s" % (self.state, line_number) 112 raise RuntimeError, msg 113 114 if self.state == 'global': 115 for thing in ( 116 self._is_function_macro, 117 self._is_def_macro, 118 self._is_cdef_macro, 119 self._is_lgc_variable, 120 self._is_one_line_extended_comment, 121 self._is_multiline_start_extended_comment 122 ): 123 if thing(line, line_number): 124 break 125 elif self.state == 'extended comment': 126 if not self._is_multiline_end_extended_comment(line, line_number): 127 # multiline extended comment continues 128 self.ec['text'].append(line) 129 continue 130 elif self.state == 'def macro': 131 pass 132 elif self.state == 'cdef macro': 133 pass 134 elif self.state == 'rdef macro': 135 pass 136 137 if len(self.state_stack) > 0: 138 fmt = "encountered EOF while parsing %s, line %d, in state %s, stack=%s" 139 msg = fmt % (self.filename, line_number, self.state, self.state_stack) 140 #raise RuntimeWarning, msg 141 print msg 142 143 self.state = 'parsed' 144 145 lgc_variable_sig_re = re.compile(string_start 146 + non_greedy_whitespace 147 + r'(local|global|constant)' 148 + r'((?:,?\s*@?[\w.eE+-]+\[?\]?)*)' 149 + non_greedy_whitespace 150 + r'#' + non_greedy_filler 151 + string_end, 152 re.VERBOSE) 153 154 def _is_lgc_variable(self, line, line_number): 155 ''' local, global, or constant variable declaration ''' 156 m = self._search(self.lgc_variable_sig_re, line) 157 if m is None: 158 return False 159 160 objtype, args = self.lgc_variable_sig_re.match(line).groups() 161 pos = args.find('#') 162 if pos > -1: 163 args = args[:pos] 164 m['objtype'] = objtype 165 m['start_line'] = m['end_line'] = line_number 166 del m['start'], m['end'] 167 if objtype == 'constant': 168 if not len(args.split()) == 2: 169 print "line_number, args: ", line_number, args 170 var, _ = args.split() 171 m['name'] = var.rstrip(',') 172 self.findings.append(dict(m)) 173 else: 174 # TODO: consider not indexing "global" inside a def 175 # TODO: consider not indexing "local" at global level 176 # or leave these decisions for later, including some kind of analyzer 177 for var in args.split(): 178 m['name'] = var.rstrip(',') 179 self.findings.append(dict(m)) 180 # TODO: to what is this local? (remember the def it belongs to) 181 return True 182 183 extended_comment_block_sig_re = re.compile(string_start 184 + non_greedy_whitespace 185 + extended_comment_marker 186 + r'(' + non_greedy_filler + r')' 187 + extended_comment_marker 188 + non_greedy_filler 189 + string_end, 190 re.IGNORECASE|re.DOTALL|re.MULTILINE) 191 192 def _is_one_line_extended_comment(self, line, line_number): 193 m = self._search(self.extended_comment_block_sig_re, line) 194 if m is None: 195 return False 196 line = m['line'] 197 del m['start'], m['end'] 198 m['objtype'] = 'extended comment' 199 m['start_line'] = m['end_line'] = line_number 200 m['text'] = m['text'].strip() 201 self.findings.append(dict(m)) 202 return True 203 204 extended_comment_start_sig_re = re.compile(string_start 205 + non_greedy_whitespace 206 + extended_comment_match, 207 re.IGNORECASE|re.VERBOSE) 208 209 def _is_multiline_start_extended_comment(self, line, line_number): 210 m = self._search(self.extended_comment_start_sig_re, line) 211 if m is None: 212 return False 213 line = m['line'] 214 text = m['line'][m['end']:] 215 del m['start'], m['end'] 216 m['objtype'] = 'extended comment' 217 m['start_line'] = line_number 218 self.ec = dict(m) # container for extended comment data 219 self.ec['text'] = [text] 220 self.state_stack.append(self.state) 221 self.state = 'extended comment' 222 return True 223 224 extended_comment_end_sig_re = re.compile(non_greedy_whitespace 225 + extended_comment_match 226 + non_greedy_whitespace 227 + r'#' + non_greedy_filler 228 + string_end, 229 re.IGNORECASE|re.VERBOSE) 230 231 def _is_multiline_end_extended_comment(self, line, line_number): 232 m = self._search(self.extended_comment_end_sig_re, line) 233 if m is None: 234 return False 235 text = m['line'][:m['start']] 236 self.ec['text'].append(text) 237 self.ec['text'] = '\n'.join(self.ec['text']) 238 self.ec['end_line'] = line_number 239 self.findings.append(dict(self.ec)) 240 self.state = self.state_stack.pop() 241 del self.ec 242 return True 111 self.findings = [] 112 self.findings.extend(self.find_extended_comments()) 113 self.findings.extend(self.find_def_macro()) 114 vd = self.find_variable_descriptions() 115 if len(vd) > 0: 116 self.findings.extend(vd) 117 self.findings.extend(self.find_variables()) 118 # TODO: decide the parent for each item, expect all def are at global scope 119 # TODO: decide which macros and variables should not be documented 120 121 extended_comment_block_sig_re = re.compile( 122 string_start 123 + non_greedy_whitespace 124 + extended_comment_marker 125 + r'(' + non_greedy_filler + r')' 126 + extended_comment_marker 127 + non_greedy_filler 128 + string_end, 129 re.IGNORECASE|re.DOTALL|re.MULTILINE) 130 131 def find_extended_comments(self): 132 """ 133 parse the internal buffer for triple-quoted strings, possibly multiline 134 """ 135 items = [] 136 for mo in self.extended_comment_block_sig_re.finditer(self.buf): 137 start = self.find_line_pos(mo.start(1)) 138 end = self.find_line_pos(mo.end(1)) 139 text = mo.group(1) 140 items.append({ 141 'start_line': start, 142 'end_line': end, 143 'objtype': 'extended comment', 144 'text': text, 145 'parent': None, 146 }) 147 return items 148 149 variable_description_re = re.compile( 150 string_start 151 + non_greedy_filler 152 + r'#:' 153 + non_greedy_whitespace 154 + r'(' + non_greedy_filler + r')' 155 + non_greedy_whitespace 156 + string_end, 157 re.IGNORECASE|re.DOTALL|re.MULTILINE) 158 159 def find_variable_descriptions(self): 160 """ 161 parse the internal buffer for variable descriptions that look like:: 162 163 #: two-theta, the scattering angle 164 global tth 165 """ 166 items = [] 167 for mo in self.variable_description_re.finditer(self.buf): 168 start = self.find_line_pos(mo.start(1)) 169 end = self.find_line_pos(mo.end(1)) 170 items.append({ 171 'start_line': start, 172 'end_line': end, 173 'objtype': 'variable description', 174 'text': mo.group(1), 175 'parent': None, 176 }) 177 return items 178 179 lgc_variable_sig_re = re.compile( 180 r'' 181 + string_start 182 + non_greedy_whitespace 183 + r'(local|global|constant)' # 1: object type 184 + non_greedy_whitespace 185 + r'(' + non_greedy_filler + r')' # 2: too complicated to parse all at once 186 + string_end 187 , 188 re.DOTALL 189 |re.MULTILINE 190 ) 191 192 variable_name_re = re.compile( 193 variable_name_match, 194 re.IGNORECASE|re.DOTALL|re.MULTILINE 195 ) 196 197 def find_variables(self): 198 """ 199 parse the internal buffer for local, global, and constant variable declarations 200 """ 201 items = [] 202 for mo in self.lgc_variable_sig_re.finditer(self.buf): 203 start = self.find_line_pos(mo.start(1)) 204 end = self.find_line_pos(mo.end(1)) 205 objtype = mo.group(1) 206 content = mo.group(2) 207 p = content.find('#') 208 if p >= 0: # strip off any comment 209 content = content[:p] 210 content = re.sub('[,;]', ' ', content) # replace , or ; with blank space 211 if content.find('[') >= 0: 212 content = re.sub('\s*?\[', '[', content) # remove blank space before [ 213 for var in self.variable_name_re.finditer(content): 214 name = var.group(1) 215 if len(name) > 0: 216 items.append({ 217 'start_line': start, 218 'end_line': end, 219 'objtype': objtype, 220 'name': name, 221 'parent': None, 222 'text': 'FIX in find_variables(self):', 223 }) 224 return items 243 225 244 226 spec_macro_declaration_match_re = re.compile( 245 r'^' # line start 246 + r'\s*?' # optional blank space 247 + r'(r?def)' # 0: def_type (rdef | def) 248 + r'\s*?' # optional blank space 249 + r'([a-zA-Z_][\w_]*)' # 1: macro_name 250 + r'(.*?)' # 2: optional arguments 251 + r'(#.*?)?' # 3: optional comment 252 + r'$' # line end 253 ) 254 255 def _is_def_macro(self, line, line_number): 256 m = self._search(self.spec_macro_declaration_match_re, line) 257 if m is None: 258 return False 259 self.ec = dict(m) 260 del self.ec['text'] 261 m = self.spec_macro_declaration_match_re.match(line) 262 macrotype, name, args, comment = m.groups() 263 self.ec['start_line'] = line_number 264 self.ec['end_line'] = line_number # TODO: consider the multiline definition later 265 self.ec['objtype'] = macrotype 266 self.ec['name'] = name 267 self.ec['args'] = args 268 self.ec['comment'] = comment 269 self.findings.append(dict(self.ec)) 270 del self.ec 271 return True 272 273 spec_cdef_declaration_match_re = re.compile( 274 r'^' # line start 275 + r'.*?' # optional any kind of preceding stuff, was \s*? (optional blank space) 276 + r'(cdef)' # 0: cdef 277 + r'\(' # opening parenthesis 278 + r'(.*?)' # 1: args (anything between the parentheses) 279 + r'\)' # closing parenthesis 280 + r'.*?' # optional any kind of stuff 281 + r'(#.*?)?' # 2: optional comment with content 282 + r'$' # line end 283 ) 284 285 def _is_cdef_macro(self, line, line_number): 286 m = self._search(self.spec_cdef_declaration_match_re, line) 287 if m is None: 288 return False 289 self.ec = dict(m) 290 del self.ec['text'] 291 m = self.spec_cdef_declaration_match_re.match(line) 292 macrotype, args, comment = m.groups() 293 name = args.split(',')[0].strip('"') 294 self.ec['start_line'] = line_number 295 self.ec['end_line'] = line_number # TODO: consider the multiline definition later 296 self.ec['objtype'] = macrotype 297 self.ec['name'] = name 298 self.ec['args'] = args 299 self.ec['comment'] = comment 300 self.findings.append(dict(self.ec)) 301 del self.ec 302 return True 303 304 spec_function_declaration_match_re = re.compile( 305 r'^' # line start 306 + r'\s*?' # optional blank space 307 + r'(r?def)' # 0: def_type (rdef | def) 308 + r'\s*?' # optional blank space 309 + r'([a-zA-Z_][\w_]*)' # 1: function_name 310 + r'\s*?' # optional blank space 311 + r'\(' # opening parenthesis 312 + r'(.*?)' # 2: args (anything between the parentheses) 313 + r'\)' # closing parenthesis 314 + r'\s*?' # optional blank space 315 + r'\'' # open macro content 316 + r'(.*?)' # 3: content, optional 317 + r'(#.*?)?' # 4: optional comment 318 + r'$' # line end 319 ) 320 321 def _is_function_macro(self, line, line_number): 322 m = self._search(self.spec_function_declaration_match_re, line) 323 if m is None: 324 return False 325 self.ec = dict(m) 326 del self.ec['text'] 327 m = self.spec_function_declaration_match_re.match(line) 328 macrotype, name, args, content, comment = m.groups() 329 self.ec['start_line'] = line_number 330 self.ec['end_line'] = line_number # TODO: consider the multiline definition later 331 self.ec['objtype'] = 'function ' + macrotype 332 self.ec['name'] = name 333 self.ec['args'] = args 334 self.ec['content'] = content 335 self.ec['comment'] = comment 336 self.findings.append(dict(self.ec)) 337 del self.ec 338 return True 339 340 def _search(self, regexp, line): 341 '''regular expression search of line, returns a match as a dictionary or None''' 342 m = regexp.search(line) 343 if m is None: 344 return None 345 # TODO: define a parent key somehow 346 d = { 347 'start': m.start(1), 348 'end': m.end(1), 349 'text': m.group(1), 350 'line': line, 351 'filename': self.filename, 352 } 353 return d 354 355 def __str__(self): 356 s = [] 357 for r in self.findings: 358 s.append( '' ) 359 t = '%s %s %d %d %s' % ('.. ' + '*'*20, 360 r['objtype'], 361 r['start_line'], 362 r['end_line'], 363 '*'*20) 364 s.append( t ) 365 s.append( '' ) 366 s.append( r['text'] ) 367 return '\n'.join(s) 227 string_start 228 + r'\s*?' # optional blank space 229 + r'(r?def)' # 1: def_type (rdef | def) 230 + non_greedy_whitespace 231 + macro_name_match # 2: macro_name 232 + non_greedy_filler_match # 3: optional arguments 233 + r'\'\{?' # start body section 234 + non_greedy_filler_match # 4: body 235 + r'\}?\'' # end body section 236 + r'(#.*?)?' # 5: optional comment 237 + string_end, 238 re.IGNORECASE|re.DOTALL|re.MULTILINE) 239 240 args_match = re.compile( 241 r'\(' 242 + arglist_match # 1: argument list 243 + r'\)', 244 re.DOTALL) 245 246 def find_def_macro(self): 247 """ 248 parse the internal buffer for def and rdef macro declarations 249 """ 250 items = [] 251 for mo in self.spec_macro_declaration_match_re.finditer(self.buf): 252 objtype = mo.group(1) 253 start = self.find_line_pos(mo.start(1)) 254 end = self.find_line_pos(mo.end(4)) 255 args = mo.group(3) 256 if len(args)>2: 257 m = self.args_match.search(args) 258 if m is not None: 259 objtype = 'function ' + objtype 260 args = m.group(1) 261 items.append({ 262 'start_line': start, 263 'end_line': end, 264 'objtype': objtype, 265 'name': mo.group(2), 266 'args': args, 267 'body': mo.group(4), 268 'comment': mo.group(5), 269 'parent': None, 270 }) 271 return items 272 273 def find_line_pos(self, pos): 274 """ 275 find the line number that includes *pos* 276 277 :param int pos: position in the file 278 """ 279 # straight search 280 # TODO: optimize using search by bisection 281 linenumber = None 282 for linenumber, start, end in self.line_positions: 283 if pos >= start and pos < end: 284 break 285 return linenumber 368 286 369 287 def ReST(self): 370 288 """create the ReStructured Text from what has been found""" 371 if not self.state == 'parsed':372 raise RuntimeWarning, "state = %s, should be 'parsed'" % self.filename289 # if not self.state == 'parsed': 290 # raise RuntimeWarning, "state = %s, should be 'parsed'" % self.filename 373 291 return self._simple_ReST_renderer() 374 292 375 293 def _simple_ReST_renderer(self): 376 294 """create a simple ReStructured Text rendition of the findings""" 377 if not self.state == 'parsed':378 raise RuntimeWarning, "state = %s, should be 'parsed'" % self.filename295 # if not self.state == 'parsed': 296 # raise RuntimeWarning, "state = %s, should be 'parsed'" % self.filename 379 297 380 298 declarations = [] # variables and constants … … 424 342 declarations.append(r) 425 343 426 s += report_table('Variable Declarations (%s)' % self.filename, declarations, (' start_line', 'objtype', 'name', 'line',))427 s += report_table('Macro Declarations (%s)' % self.filename, macros, (' start_line', 'name', 'line',))428 s += report_table('Function Macro Declarations (%s)' % self.filename, functions )344 s += report_table('Variable Declarations (%s)' % self.filename, declarations, ('objtype', 'name', 'start_line', )) 345 s += report_table('Macro Declarations (%s)' % self.filename, macros, ('objtype', 'name', 'start_line', 'end_line')) 346 s += report_table('Function Macro Declarations (%s)' % self.filename, functions, ('objtype', 'name', 'start_line', 'end_line', 'args')) 429 347 #s += report_table('Findings from .mac File', self.findings, ('start_line', 'objtype', 'line',)) 430 348 … … 432 350 433 351 434 def report_table(title, itemlist, col_keys = (' start_line', 'line',)):352 def report_table(title, itemlist, col_keys = ('objtype', 'start_line', 'end_line', )): 435 353 """ 436 354 return the itemlist as a reST table … … 495 413 p = SpecMacrofileParser(filename) 496 414 print p.ReST() 415 pprint (p.findings)
Note: See TracChangeset
for help on using the changeset viewer.