source: trunk/CifFile/yapps3_compiled_rt.py @ 4082

Last change on this file since 4082 was 4082, checked in by vondreele, 4 years ago
File size: 14.2 KB
Line 
1#
2# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
3# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
4#
5# This version of the Yapps 2 Runtime can be distributed under the
6# terms of the MIT open source license, either found in the LICENSE file
7# included with the Yapps distribution
8# <http://theory.stanford.edu/~amitp/yapps/> or at
9# <http://www.opensource.org/licenses/mit-license.php>
10#
11# Modified for PyCIFRW by JRH to allow external scanner
12#
13# To maximize python3/python2 compatibility
14from __future__ import print_function
15from __future__ import unicode_literals
16from __future__ import division
17from __future__ import absolute_import
18
19""" Detail of JRH modifications.
20
21The compiled module handles all token administration by itself, but
22does not deal with restrictions.  It also effectively removes the
23context-sensitivity of Yapps, as it ignores restrictions, but
24these restrictions turn out to be  unnecessary for CIF.
25
26Interestingly, the module scan function is never called directly
27from python.
28
29"""
30
31"""Run time libraries needed to run parsers generated by Yapps.
32
33This module defines parse-time exception classes, a scanner class, a
34base class for parsers produced by Yapps, and a context class that
35keeps track of the parse stack.
36
37"""
38
39# TODO: it should be possible to embed yappsrt into the generated
40# grammar to make a standalone module.
41
42import sys, re
43
44
45# For normal installation this module is "CifFile.yapps3_compiled_rt"
46# and StarScan is an extension module within the parent CifFile module.
47if __name__.startswith('CifFile.'):
48    try:
49        from . import StarScan
50        have_star_scan = True
51    except ImportError:
52        have_star_scan = False
53# Otherwise assume this is imported from the yapps3/yapps2.py script
54# that is executed from Makefile to generate YappsStarParser sources.
55else:
56    assert __name__ == 'yapps3_compiled_rt', "Unexpected module name."
57    assert sys.argv[0].endswith('yapps2.py'), (
58        "This should be reached only when running yapps2.py in Makefile.")
59    have_star_scan = False
60
61class SyntaxError(Exception):
62    """When we run into an unexpected token, this is the exception to use"""
63    def __init__(self, charpos=-1, msg="Bad Token", context=None):
64        Exception.__init__(self)
65        self.charpos = charpos
66        self.msg = msg
67        self.context = context
68       
69    def __str__(self):
70        if self.charpos < 0: return 'SyntaxError'
71        else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg)
72
73class NoMoreTokens(Exception):
74    """Another exception object, for when we run out of tokens"""
75    pass
76
77class Scanner:
78    """Yapps scanner.
79
80    The Yapps scanner can work in context sensitive or context
81    insensitive modes.  The token(i) method is used to retrieve the
82    i-th token.  It takes a restrict set that limits the set of tokens
83    it is allowed to return.  In context sensitive mode, this restrict
84    set guides the scanner.  In context insensitive mode, there is no
85    restriction (the set is always the full set of tokens).
86   
87    """
88   
89    def __init__(self, patterns, ignore, input, scantype="standard"):
90        """Initialize the scanner.
91
92        Parameters:
93          patterns : [(terminal, uncompiled regex), ...] or None
94          ignore : [terminal,...]
95          input : string
96
97        If patterns is None, we assume that the subclass has
98        defined self.patterns : [(terminal, compiled regex), ...].
99        Note that the patterns parameter expects uncompiled regexes,
100        whereas the self.patterns field expects compiled regexes.
101        """
102        self.tokens = [] # [(begin char pos, end char pos, token name, matched text), ...]
103        self.restrictions = []
104        self.input = input
105        self.pos = 0
106        self.ignore = ignore
107        self.scantype = scantype
108        self.first_line_number = 1
109        if self.scantype == "flex" and have_star_scan:
110            StarScan.prepare(input)
111            self.scan = self.compiled_scan
112            self.token = self.compiled_token
113            self.__del__ = StarScan.cleanup
114        elif self.scantype == "flex":
115            print("WARNING: using Python scanner although C scanner requested")
116            self.scantype = "standard"
117        if self.scantype != "flex":
118            self.scan = self.interp_scan
119            self.token = self.interp_token
120
121        if patterns is not None:
122            # Compile the regex strings into regex objects
123            self.patterns = []
124            for terminal, regex in patterns:
125                self.patterns.append( (terminal, re.compile(regex)) )
126
127    def get_token_pos(self):
128        """Get the current token position in the input text."""
129        return len(self.tokens)
130
131    def get_char_pos(self):
132        """Get the current char position in the input text."""
133        return self.pos
134   
135    def get_prev_char_pos(self, i=None):
136        """Get the previous position (one token back) in the input text."""
137        if self.pos == 0: return 0
138        if i is None: i = -1
139        return self.tokens[i][0]
140   
141    def get_line_number(self):
142        """Get the line number of the current position in the input text."""
143        # TODO: make this work at any token/char position
144        return self.first_line_number + self.get_input_scanned().count('\n')
145
146    def get_column_number(self):
147        """Get the column number of the current position in the input text."""
148        s = self.get_input_scanned()
149        i = s.rfind('\n') # may be -1, but that's okay in this case
150        return len(s) - (i+1)
151   
152    def get_input_scanned(self):
153        """Get the portion of the input that has been tokenized."""
154        return self.input[:self.pos]
155
156    def get_input_unscanned(self):
157        """Get the portion of the input that has not yet been tokenized."""
158        return self.input[self.pos:]
159
160    def interp_token(self, i, restrict=None):
161        """Get the i'th token in the input.
162
163        If i is one past the end, then scan for another token.
164       
165        Args:
166
167        restrict : [token, ...] or None; if restrict is None, then any
168        token is allowed.  You may call token(i) more than once.
169        However, the restrict set may never be larger than what was
170        passed in on the first call to token(i).
171       
172        """
173        if i == len(self.tokens):
174            self.scan(restrict)
175        if i < len(self.tokens):
176            # Make sure the restriction is more restricted.  This
177            # invariant is needed to avoid ruining tokenization at
178            # position i+1 and higher.
179            if restrict and self.restrictions[i]:
180                for r in restrict:
181                    if r not in self.restrictions[i]:
182                        raise NotImplementedError("Unimplemented: restriction set changed")
183            return self.tokens[i]
184        raise NoMoreTokens()
185   
186    def compiled_token(self,i,restrict=0):
187        try:
188            return StarScan.token(i)
189        except IndexError:
190            raise NoMoreTokens()
191   
192    def __repr__(self):
193        """Print the last 10 tokens that have been scanned in"""
194        output = ''
195        if self.scantype != "flex":
196            for t in self.tokens[-10:]:
197                output = '%s\n  (@%s%s  =  %s' % (output,t[0],t[2],repr(t[3]))
198        else:
199            out_tokens = StarScan.last_ten()
200            for t in out_tokens:
201                output = '%s\n  (~line %s%s  =  %s' % (output,t[0],t[2],repr(t[3]))
202        return output
203   
204    def interp_scan(self, restrict):
205        """Should scan another token and add it to the list, self.tokens,
206        and add the restriction to self.restrictions"""
207        # Prepare accepted pattern list
208        if restrict:
209           # only patterns in the 'restrict' parameter or in self.ignore
210           # are accepted
211           accepted_patterns=[]
212           for p_name, p_regexp in self.patterns:
213               if p_name not in restrict and p_name not in self.ignore:
214                   pass
215               else:
216                   accepted_patterns.append((p_name,p_regexp))
217        else:
218           # every pattern is good
219           accepted_patterns=self.patterns
220        # Keep looking for a token, ignoring any in self.ignore
221        while 1:
222            # Search the patterns for the longest match, with earlier
223            # tokens in the list having preference
224            best_match = -1
225            best_pat = '(error)'
226            for p,regexp in accepted_patterns:
227                m = regexp.match(self.input, self.pos)
228                if m and len(m.group(0)) > best_match:
229                    # We got a match that's better than the previous one
230                    best_pat = p
231                    best_match = len(m.group(0))
232                   
233            # If we didn't find anything, raise an error
234            if best_pat == '(error)' and best_match < 0:
235                msg = 'Bad Token'
236                if restrict:
237                    msg = 'Trying to find one of '+', '.join(restrict)
238                raise SyntaxError(self.pos, msg)
239
240            # If we found something that isn't to be ignored, return it
241            if best_pat not in self.ignore:
242                # Create a token with this data
243                token = (self.pos, self.pos+best_match, best_pat,
244                         self.input[self.pos:self.pos+best_match])
245                self.pos = self.pos + best_match
246                # Only add this token if it's not in the list
247                # (to prevent looping)
248                if not self.tokens or token != self.tokens[-1]:
249                    self.tokens.append(token)
250                    self.restrictions.append(restrict)
251                return
252            else:
253                # This token should be ignored ..
254                self.pos = self.pos + best_match
255
256    def compiled_scan(self,restrict):
257        token = StarScan.scan()
258        print("Calling compiled scan, got %s" % repr(token))
259        if token[2] not in restrict:
260            msg = "Bad Token"
261            if restrict:
262               msg = "Trying to find one of "+join(restrict,", ")
263            raise SyntaxError(self.pos,msg)
264        self.tokens.append(token)
265        self.restrictions.append(restrict)
266        return
267
268class Parser:
269    """Base class for Yapps-generated parsers.
270
271    """
272   
273    def __init__(self, scanner):
274        self._scanner = scanner
275        self._pos = 0
276       
277    def _peek(self, *types):
278        """Returns the token type for lookahead; if there are any args
279        then the list of args is the set of token types to allow"""
280        tok = self._scanner.token(self._pos, types)
281        return tok[2]
282       
283    def _scan(self, type):
284        """Returns the matched text, and moves to the next token"""
285        tok = self._scanner.token(self._pos, [type])
286        if tok[2] != type:
287            raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,')
288        self._pos = 1 + self._pos
289        return tok[3]
290
291class Context:
292    """Class to represent the parser's call stack.
293
294    Every rule creates a Context that links to its parent rule.  The
295    contexts can be used for debugging.
296
297    """
298   
299    def __init__(self, parent, scanner, tokenpos, rule, args=()):
300        """Create a new context.
301
302        Args:
303        parent: Context object or None
304        scanner: Scanner object
305        pos: integer (scanner token position)
306        rule: string (name of the rule)
307        args: tuple listing parameters to the rule
308
309        """
310        self.parent = parent
311        self.scanner = scanner
312        self.tokenpos = tokenpos
313        self.rule = rule
314        self.args = args
315
316    def __str__(self):
317        output = ''
318        if self.parent: output = str(self.parent) + ' > '
319        output += self.rule
320        return output
321
322#
323#  Note that this sort of error printout is useless with the
324#  compiled scanner
325#
326   
327def print_line_with_pointer(text, p):
328    """Print the line of 'text' that includes position 'p',
329    along with a second line with a single caret (^) at position p"""
330
331    # TODO: separate out the logic for determining the line/character
332    # location from the logic for determining how to display an
333    # 80-column line to stderr.
334   
335    # Now try printing part of the line
336    text = text[max(p-80, 0):p+80]
337    p = p - max(p-80, 0)
338
339    # Strip to the left
340    i = text[:p].rfind('\n')
341    j = text[:p].rfind('\r')
342    if i < 0 or (0 <= j < i): i = j
343    if 0 <= i < p:
344        p = p - i - 1
345        text = text[i+1:]
346
347    # Strip to the right
348    i = text.find('\n', p)
349    j = text.find('\r', p)
350    if i < 0 or (0 <= j < i): i = j
351    if i >= 0:
352        text = text[:i]
353
354    # Now shorten the text
355    while len(text) > 70 and p > 60:
356        # Cut off 10 chars
357        text = "..." + text[10:]
358        p = p - 7
359
360    # Now print the string, along with an indicator
361    print('> ',text,file=sys.stderr)
362    print('> ',' '*p + '^',file=sys.stderr)
363   
364def print_error(input, err, scanner):
365    """Print error messages, the parser stack, and the input text -- for human-readable error messages."""
366    # NOTE: this function assumes 80 columns :-(
367    # Figure out the line number
368    line_number = scanner.get_line_number()
369    column_number = scanner.get_column_number()
370    print('%d:%d: %s' % (line_number, column_number, err.msg),file=sys.stderr)
371
372    context = err.context
373    if not context:
374        print_line_with_pointer(input, err.charpos)
375       
376    while context:
377        # TODO: add line number
378        print('while parsing %s%s:' % (context.rule, tuple(context.args)),file=sys.stderr)
379        print_line_with_pointer(input, context.scanner.get_prev_char_pos(context.tokenpos))
380        context = context.parent
381
382def wrap_error_reporter(parser, rule):
383    try:
384        return getattr(parser, rule)()
385    except SyntaxError as e:
386        input = parser._scanner.input
387        print_error(input, e, parser._scanner)
388    except NoMoreTokens:
389        print('Could not complete parsing; stopped around here:',file=sys.stderr)
390        print(parser._scanner,file=sys.stderr)
Note: See TracBrowser for help on using the repository browser.