source: trunk/CifFile/yapps3_compiled_rt.py @ 469

Last change on this file since 469 was 469, checked in by toby, 11 years ago

rework phase import

  • Property svn:executable set to *
File size: 12.8 KB
Line 
1#
2# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
3# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
4#
5# This version of the Yapps 2 Runtime can be distributed under the
6# terms of the MIT open source license, either found in the LICENSE file
7# included with the Yapps distribution
8# <http://theory.stanford.edu/~amitp/yapps/> or at
9# <http://www.opensource.org/licenses/mit-license.php>
10#
11# Modified for PyCIFRW by JRH to allow external scanner
12#
13""" Detail of JRH modifications.
14
15The compiled module handles all token administration by itself, but
16does not deal with restrictions.  It also effectively removes the
17context-sensitivity of Yapps, as it ignores restrictions, but
18these restrictions turn out to be  unnecessary for CIF.
19
20Interestingly, the module scan function is never called directly
21from python.
22
23"""
24
25"""Run time libraries needed to run parsers generated by Yapps.
26
27This module defines parse-time exception classes, a scanner class, a
28base class for parsers produced by Yapps, and a context class that
29keeps track of the parse stack.
30
31"""
32
33# TODO: it should be possible to embed yappsrt into the generated
34# grammar to make a standalone module.
35
36import sys, re
37try:
38    import StarScan
39    have_star_scan = True
40except ImportError:
41    have_star_scan = False
42
43class SyntaxError(Exception):
44    """When we run into an unexpected token, this is the exception to use"""
45    def __init__(self, charpos=-1, msg="Bad Token", context=None):
46        Exception.__init__(self)
47        self.charpos = charpos
48        self.msg = msg
49        self.context = context
50       
51    def __str__(self):
52        if self.charpos < 0: return 'SyntaxError'
53        else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg)
54
55class NoMoreTokens(Exception):
56    """Another exception object, for when we run out of tokens"""
57    pass
58
59class Scanner:
60    """Yapps scanner.
61
62    The Yapps scanner can work in context sensitive or context
63    insensitive modes.  The token(i) method is used to retrieve the
64    i-th token.  It takes a restrict set that limits the set of tokens
65    it is allowed to return.  In context sensitive mode, this restrict
66    set guides the scanner.  In context insensitive mode, there is no
67    restriction (the set is always the full set of tokens).
68   
69    """
70   
71    def __init__(self, patterns, ignore, input, scantype="standard"):
72        """Initialize the scanner.
73
74        Parameters:
75          patterns : [(terminal, uncompiled regex), ...] or None
76          ignore : [terminal,...]
77          input : string
78
79        If patterns is None, we assume that the subclass has
80        defined self.patterns : [(terminal, compiled regex), ...].
81        Note that the patterns parameter expects uncompiled regexes,
82        whereas the self.patterns field expects compiled regexes.
83        """
84        self.tokens = [] # [(begin char pos, end char pos, token name, matched text), ...]
85        self.restrictions = []
86        self.input = input
87        self.pos = 0
88        self.ignore = ignore
89        self.scantype = scantype
90        self.first_line_number = 1
91        if self.scantype == "flex" and have_star_scan:
92            StarScan.prepare(input)
93            self.scan = self.compiled_scan
94            self.token = self.compiled_token
95            self.__del__ = StarScan.cleanup
96        elif self.scantype == "flex":
97            print "Warning: using Python scanner"
98            self.scantype = "standard"
99        if self.scantype != "flex":
100            self.scan = self.interp_scan
101            self.token = self.interp_token
102       
103        if patterns is not None:
104            # Compile the regex strings into regex objects
105            self.patterns = []
106            for terminal, regex in patterns:
107                self.patterns.append( (terminal, re.compile(regex)) )
108
109    def get_token_pos(self):
110        """Get the current token position in the input text."""
111        return len(self.tokens)
112
113    def get_char_pos(self):
114        """Get the current char position in the input text."""
115        return self.pos
116   
117    def get_prev_char_pos(self, i=None):
118        """Get the previous position (one token back) in the input text."""
119        if self.pos == 0: return 0
120        if i is None: i = -1
121        return self.tokens[i][0]
122   
123    def get_line_number(self):
124        """Get the line number of the current position in the input text."""
125        # TODO: make this work at any token/char position
126        return self.first_line_number + self.get_input_scanned().count('\n')
127
128    def get_column_number(self):
129        """Get the column number of the current position in the input text."""
130        s = self.get_input_scanned()
131        i = s.rfind('\n') # may be -1, but that's okay in this case
132        return len(s) - (i+1)
133   
134    def get_input_scanned(self):
135        """Get the portion of the input that has been tokenized."""
136        return self.input[:self.pos]
137
138    def get_input_unscanned(self):
139        """Get the portion of the input that has not yet been tokenized."""
140        return self.input[self.pos:]
141
142    def interp_token(self, i, restrict=None):
143        """Get the i'th token in the input.
144
145        If i is one past the end, then scan for another token.
146       
147        Args:
148
149        restrict : [token, ...] or None; if restrict is None, then any
150        token is allowed.  You may call token(i) more than once.
151        However, the restrict set may never be larger than what was
152        passed in on the first call to token(i).
153       
154        """
155        if i == len(self.tokens):
156            self.scan(restrict)
157        if i < len(self.tokens):
158            # Make sure the restriction is more restricted.  This
159            # invariant is needed to avoid ruining tokenization at
160            # position i+1 and higher.
161            if restrict and self.restrictions[i]:
162                for r in restrict:
163                    if r not in self.restrictions[i]:
164                        raise NotImplementedError("Unimplemented: restriction set changed")
165            return self.tokens[i]
166        raise NoMoreTokens()
167   
168    def compiled_token(self,i,restrict=0):
169        try:
170            return StarScan.token(i)
171        except IndexError:
172            raise NoMoreTokens()
173   
174    def __repr__(self):
175        """Print the last 10 tokens that have been scanned in"""
176        output = ''
177        if self.scantype != "flex":
178            for t in self.tokens[-10:]:
179                output = '%s\n  (@%s%s  =  %s' % (output,t[0],t[2],repr(t[3]))
180        else:
181            out_tokens = StarScan.last_ten()
182            for t in out_tokens:
183                output = '%s\n  (~line %s%s  =  %s' % (output,t[0],t[2],repr(t[3]))
184        return output
185   
186    def interp_scan(self, restrict):
187        """Should scan another token and add it to the list, self.tokens,
188        and add the restriction to self.restrictions"""
189        # Prepare accepted pattern list
190        if restrict:
191           # only patterns in the 'restrict' parameter or in self.ignore
192           # are accepted
193           accepted_patterns=[]
194           for p_name, p_regexp in self.patterns:
195               if p_name not in restrict and p_name not in self.ignore:
196                   pass
197               else:
198                   accepted_patterns.append((p_name,p_regexp))
199        else:
200           # every pattern is good
201           accepted_patterns=self.patterns
202        # Keep looking for a token, ignoring any in self.ignore
203        while 1:
204            # Search the patterns for the longest match, with earlier
205            # tokens in the list having preference
206            best_match = -1
207            best_pat = '(error)'
208            for p,regexp in accepted_patterns:
209                m = regexp.match(self.input, self.pos)
210                if m and len(m.group(0)) > best_match:
211                    # We got a match that's better than the previous one
212                    best_pat = p
213                    best_match = len(m.group(0))
214                   
215            # If we didn't find anything, raise an error
216            if best_pat == '(error)' and best_match < 0:
217                msg = 'Bad Token'
218                if restrict:
219                    msg = 'Trying to find one of '+', '.join(restrict)
220                raise SyntaxError(self.pos, msg)
221
222            # If we found something that isn't to be ignored, return it
223            if best_pat not in self.ignore:
224                # Create a token with this data
225                token = (self.pos, self.pos+best_match, best_pat,
226                         self.input[self.pos:self.pos+best_match])
227                self.pos = self.pos + best_match
228                # Only add this token if it's not in the list
229                # (to prevent looping)
230                if not self.tokens or token != self.tokens[-1]:
231                    self.tokens.append(token)
232                    self.restrictions.append(restrict)
233                return
234            else:
235                # This token should be ignored ..
236                self.pos = self.pos + best_match
237
238    def compiled_scan(self,restrict):
239        token = StarScan.scan()
240        print "Calling compiled scan, got %s" % `token`
241        if token[2] not in restrict:
242            msg = "Bad Token"
243            if restrict:
244                msg = "Trying to find one of "+join(restrict,", ")
245            raise SyntaxError(self.pos,msg)
246        self.tokens.append(token)
247        self.restrictions.append(restrict)
248        return
249
250class Parser:
251    """Base class for Yapps-generated parsers.
252
253    """
254   
255    def __init__(self, scanner):
256        self._scanner = scanner
257        self._pos = 0
258       
259    def _peek(self, *types):
260        """Returns the token type for lookahead; if there are any args
261        then the list of args is the set of token types to allow"""
262        tok = self._scanner.token(self._pos, types)
263        return tok[2]
264       
265    def _scan(self, type):
266        """Returns the matched text, and moves to the next token"""
267        tok = self._scanner.token(self._pos, [type])
268        if tok[2] != type:
269            raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,')
270        self._pos = 1 + self._pos
271        return tok[3]
272
273class Context:
274    """Class to represent the parser's call stack.
275
276    Every rule creates a Context that links to its parent rule.  The
277    contexts can be used for debugging.
278
279    """
280   
281    def __init__(self, parent, scanner, tokenpos, rule, args=()):
282        """Create a new context.
283
284        Args:
285        parent: Context object or None
286        scanner: Scanner object
287        pos: integer (scanner token position)
288        rule: string (name of the rule)
289        args: tuple listing parameters to the rule
290
291        """
292        self.parent = parent
293        self.scanner = scanner
294        self.tokenpos = tokenpos
295        self.rule = rule
296        self.args = args
297
298    def __str__(self):
299        output = ''
300        if self.parent: output = str(self.parent) + ' > '
301        output += self.rule
302        return output
303
304#
305#  Note that this sort of error printout is useless with the
306#  compiled scanner
307#
308   
309def print_line_with_pointer(text, p):
310    """Print the line of 'text' that includes position 'p',
311    along with a second line with a single caret (^) at position p"""
312
313    # TODO: separate out the logic for determining the line/character
314    # location from the logic for determining how to display an
315    # 80-column line to stderr.
316   
317    # Now try printing part of the line
318    text = text[max(p-80, 0):p+80]
319    p = p - max(p-80, 0)
320
321    # Strip to the left
322    i = text[:p].rfind('\n')
323    j = text[:p].rfind('\r')
324    if i < 0 or (0 <= j < i): i = j
325    if 0 <= i < p:
326        p = p - i - 1
327        text = text[i+1:]
328
329    # Strip to the right
330    i = text.find('\n', p)
331    j = text.find('\r', p)
332    if i < 0 or (0 <= j < i): i = j
333    if i >= 0:
334        text = text[:i]
335
336    # Now shorten the text
337    while len(text) > 70 and p > 60:
338        # Cut off 10 chars
339        text = "..." + text[10:]
340        p = p - 7
341
342    # Now print the string, along with an indicator
343    print >>sys.stderr, '> ',text
344    print >>sys.stderr, '> ',' '*p + '^'
345   
346def print_error(input, err, scanner):
347    """Print error messages, the parser stack, and the input text -- for human-readable error messages."""
348    # NOTE: this function assumes 80 columns :-(
349    # Figure out the line number
350    line_number = scanner.get_line_number()
351    column_number = scanner.get_column_number()
352    print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg)
353
354    context = err.context
355    if not context:
356        print_line_with_pointer(input, err.charpos)
357       
358    while context:
359        # TODO: add line number
360        print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
361        print_line_with_pointer(input, context.scanner.get_prev_char_pos(context.tokenpos))
362        context = context.parent
363
364def wrap_error_reporter(parser, rule):
365    try:
366        return getattr(parser, rule)()
367    except SyntaxError, e:
368        input = parser._scanner.input
369        print_error(input, e, parser._scanner)
370    except NoMoreTokens:
371        print >>sys.stderr, 'Could not complete parsing; stopped around here:'
372        print >>sys.stderr, parser._scanner
Note: See TracBrowser for help on using the repository browser.