Skip to content

Commit

Permalink
Removing trailing whitespace in lex.py.
Browse files Browse the repository at this point in the history
  • Loading branch information
shoeffner committed Nov 18, 2018
1 parent d472650 commit 9ddc011
Showing 1 changed file with 40 additions and 44 deletions.
84 changes: 40 additions & 44 deletions src/ccg2xml/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,19 @@
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#
# See the file COPYING for a complete copy of the LGPL.
#
#
#
# This module automatically constructs a lexical analysis module from regular
# expression rules defined in a user-defined module. The idea is essentially the same
# as that used in John Aycock's Spark framework, but the implementation works
Expand Down Expand Up @@ -102,9 +102,9 @@ def t_NUMBER(t):
name of the rule without the leading t_
t.value = The value of the lexeme.
t.lineno = The value of the line number where the token was encountered
For example, the t_NUMBER() rule above might be called with the following:
t.type = 'NUMBER'
t.value = '42'
t.lineno = 3
Expand Down Expand Up @@ -182,7 +182,7 @@ def t_newline(t):
above, parsing modules can safely import 'plex' without having
to import the rule file or any additional imformation about the
scanner you have defined.
"""
"""

# -----------------------------------------------------------------------------

Expand Down Expand Up @@ -241,11 +241,11 @@ def __copy__(self):
c.lexerrorf = self.lexerrorf
c.lextokens = self.lextokens
c.lexignore = self.lexignore
c.debug = self.debug
c.debug = self.debug
c.lineno = self.lineno
c.optimize = self.optimize
c.token = c.realtoken
return c
return c

# ------------------------------------------------------------
# input() - Push a new string into the lexer
Expand All @@ -257,7 +257,7 @@ def input(self,s):
self.lexpos = 0
self.lexlen = len(s)
self.token = self.realtoken

# Change the token routine to point to realtoken()
global token
if token == self.errtoken:
Expand All @@ -268,7 +268,7 @@ def input(self,s):
# ------------------------------------------------------------
def errtoken(self):
raise RuntimeError, "No input string given with input()"

# ------------------------------------------------------------
# token() - Return the next token from the Lexer
#
Expand All @@ -282,7 +282,7 @@ def realtoken(self):
lexlen = self.lexlen
lexignore = self.lexignore
lexdata = self.lexdata

while lexpos < lexlen:
# This code provides some short-circuit code for whitespace, tabs, and other ignored characters
if lexdata[lexpos] in lexignore:
Expand All @@ -302,15 +302,15 @@ def realtoken(self):
if not func:
self.lexpos = lexpos
return tok

# If token is processed by a function, call it
self.lexpos = lexpos
newtok = func(tok)
self.lineno = tok.lineno # Update line number

# Every function must return a token, if nothing, we just move to next token
if not newtok: continue

# Verify type of the token. If not in the token map, raise an error
if not self.optimize:
if not self.lextokens.has_key(newtok.type):
Expand Down Expand Up @@ -345,7 +345,7 @@ def realtoken(self):
self.lexpos = lexpos + 1
return None


# -----------------------------------------------------------------------------
# validate_file()
#
Expand Down Expand Up @@ -405,7 +405,7 @@ def _read_lextab(lexer, fdict, module):
lexer.lexignore = lextab._lexignore
if lextab._lexerrorf:
lexer.lexerrorf = fdict[lextab._lexerrorf]

# -----------------------------------------------------------------------------
# lex(module)
#
Expand All @@ -420,7 +420,7 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab"):
lexer.debug = debug
lexer.optimize = optimize
global token,input

if module:
# User supplied a module object.
if isinstance(module, types.ModuleType):
Expand All @@ -432,7 +432,7 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab"):
ldict[i] = v
else:
raise ValueError,"Expected a module or instance"

else:
# No module given. We might be able to get information from the caller.
try:
Expand All @@ -446,14 +446,14 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab"):
if optimize and lextab:
try:
_read_lextab(lexer,ldict, lextab)
if not lexer.lexignore: lexer.lexignore = ""
if not lexer.lexignore: lexer.lexignore = ""
token = lexer.token
input = lexer.input
return lexer

except ImportError:
pass

# Get the tokens map
if (module and isinstance(module,types.InstanceType)):
tokens = getattr(module,"tokens",None)
Expand All @@ -462,7 +462,7 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab"):
tokens = ldict["tokens"]
except KeyError:
tokens = None

if not tokens:
raise SyntaxError,"lex: module does not define 'tokens'"
if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
Expand All @@ -477,7 +477,7 @@ def is_identifier(s):
for c in s:
if not (c.isalnum() or c == '_'): return 0
return 1

for n in tokens:
if not is_identifier(n):
print "lex: Bad token name '%s'" % n
Expand All @@ -487,14 +487,14 @@ def is_identifier(s):
lexer.lextokens[n] = None
else:
for n in tokens: lexer.lextokens[n] = None


if debug:
print "lex: tokens = '%s'" % lexer.lextokens.keys()

# Get a list of symbols with the t_ prefix
tsymbols = [f for f in ldict.keys() if f[:2] == 't_']

# Now build up a list of functions and a list of strings
fsymbols = [ ]
ssymbols = [ ]
Expand All @@ -506,20 +506,20 @@ def is_identifier(s):
else:
print "lex: %s not defined as a function or string" % f
error = 1

# Sort the functions by line number
fsymbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))

# Sort the strings by regular expression length
ssymbols.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))

# Check for non-empty symbols
if len(fsymbols) == 0 and len(ssymbols) == 0:
raise SyntaxError,"lex: no rules of the form t_rulename are defined."

# Add all of the rules defined with actions first
for f in fsymbols:

line = f.func_code.co_firstlineno
file = f.func_code.co_filename
files[file] = None
Expand All @@ -546,7 +546,7 @@ def is_identifier(s):
print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__)
error = 1
continue

if f.__name__ == 't_error':
lexer.lexerrorf = f
continue
Expand All @@ -565,7 +565,7 @@ def is_identifier(s):

# Okay. The regular expression seemed okay. Let's append it to the master regular
# expression we're building

if (regex): regex += "|"
regex += "(?P<%s>%s)" % (f.__name__,f.__doc__)
else:
Expand All @@ -577,13 +577,13 @@ def is_identifier(s):
if name == 't_ignore':
lexer.lexignore = r
continue

if not optimize:
if name == 't_error':
raise SyntaxError,"lex: Rule 't_error' must be defined as a function"
error = 1
continue

if not lexer.lextokens.has_key(name[2:]):
print "lex: Rule '%s' defined for an unspecified token %s." % (name,name[2:])
error = 1
Expand All @@ -596,7 +596,7 @@ def is_identifier(s):
continue
if debug:
print "lex: Adding rule %s -> '%s'" % (name,r)

if regex: regex += "|"
regex += "(?P<%s>%s)" % (name,r)

Expand All @@ -622,7 +622,7 @@ def is_identifier(s):

# If a lextab was specified, we create a file containing the precomputed
# regular expression and index table

if lextab and optimize:
lt = open(lextab+".py","w")
lt.write("# %s.py. This file automatically created by PLY. Don't edit.\n" % lextab)
Expand All @@ -637,7 +637,7 @@ def is_identifier(s):
lt.write(" (None,%s),\n" % repr(t[1]))
else:
lt.write(" None,\n")

lt.write("]\n");
lt.write("_lextokens = %s\n" % repr(lexer.lextokens))
lt.write("_lexignore = %s\n" % repr(lexer.lexignore))
Expand All @@ -646,7 +646,7 @@ def is_identifier(s):
else:
lt.write("_lexerrorf = None\n")
lt.close()

except re.error,e:
print "lex: Fatal error. Unable to compile regular expression rules. %s" % e
error = 1
Expand All @@ -656,11 +656,11 @@ def is_identifier(s):
print "lex: Warning. no t_error rule is defined."

if not lexer.lexignore: lexer.lexignore = ""

# Create global versions of the token() and input() functions
token = lexer.token
input = lexer.input

return lexer

# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -689,12 +689,8 @@ def runmain(lexer=None,data=None):
_token = lexer.token
else:
_token = token

while 1:
tok = _token()
if not tok: break
print "(%s,'%s',%d)" % (tok.type, tok.value, tok.lineno)




0 comments on commit 9ddc011

Please sign in to comment.