tokenize
index
/usr/lib/python2.1/tokenize.py

Tokenization help for Python programs.
 
This module exports a function called 'tokenize()' that breaks a stream of
text into Python tokens.  It accepts a readline-like method which is called
repeatedly to get the next line of input (or "" for EOF) and a "token-eater"
function which is called once for each token found.  The latter function is
passed the token type, a string containing the token, the starting and
ending (row, column) coordinates of the token, and the original line.  It is
designed to match the working of the Python tokenizer exactly, except that
it produces COMMENT tokens for comments and gives type OP for all operators.

 
Modules
            
re
string

 
Classes
            
exceptions.Exception
StopTokenizing
TokenError

 
class StopTokenizing(exceptions.Exception)
       
  
__getitem__(...) from exceptions.Exception
__init__(...) from exceptions.Exception
__str__(...) from exceptions.Exception

 
class TokenError(exceptions.Exception)
       
  
__getitem__(...) from exceptions.Exception
__init__(...) from exceptions.Exception
__str__(...) from exceptions.Exception

 
Functions
            
any(*choices)
group(*choices)
maybe(*choices)
printtoken(type, token, (srow, scol), (erow, ecol), line)
tokenize(readline, tokeneater=<function printtoken>)
tokenize_loop(readline, tokeneater)

 
Data
             AMPER = 19
AMPEREQUAL = 42
BACKQUOTE = 25
Bracket = '[][(){}]'
CIRCUMFLEX = 33
CIRCUMFLEXEQUAL = 44
COLON = 11
COMMA = 12
COMMENT = 50
Comment = r'#[^\r\n]*'
ContStr = '([uU]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*(\'|\\\\\\r?\\n)|[uU]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*("|\\\\\\r?\\n))'
DEDENT = 6
DOT = 23
DOUBLESTAR = 36
DOUBLESTAREQUAL = 47
Decnumber = r'[1-9]\d*[lL]?'
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
ENDMARKER = 0
EQEQUAL = 28
EQUAL = 22
ERRORTOKEN = 49
Expfloat = r'[1-9]\d*[eE][-+]?\d+'
Exponent = r'[eE][-+]?\d+'
Floatnumber = r'((\d+\.\d*|\.\d+)([eE][-+]?\d+)?|[1-9]\d*[eE][-+]?\d+)'
Funny = r'((\*\*=?|>>=?|<<=?|<>|!=|[+\-*/%&|^=<>]=?|~)|[][(){}]|(\r?\n|[:;.,`]))'
GREATER = 21
GREATEREQUAL = 31
Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
INDENT = 5
Ignore = r'[ \f\t]*(\\\r?\n[ \f\t]*)*(#[^\r\n]*)?'
Imagnumber = r'(0[jJ]|[1-9]\d*[jJ]|((\d+\.\d*|\.\d+)([eE][-+]?\d+)?|[1-9]\d*[eE][-+]?\d+)[jJ])'
Intnumber = r'(0[xX][\da-fA-F]*[lL]?|0[0-7]*[lL]?|[1-9]\d*[lL]?)'
LBRACE = 26
LEFTSHIFT = 34
LEFTSHIFTEQUAL = 45
LESS = 20
LESSEQUAL = 30
LPAR = 7
LSQB = 9
MINEQUAL = 38
MINUS = 15
NAME = 1
NEWLINE = 4
NL = 51
NOTEQUAL = 29
NT_OFFSET = 256
NUMBER = 2
N_TOKENS = 52
Name = r'[a-zA-Z_]\w*'
Number = r'((0[jJ]|[1-9]\d*[jJ]|((\d+\.\d*|\.\d+)([eE][-+]?...[xX][\da-fA-F]*[lL]?|0[0-7]*[lL]?|[1-9]\d*[lL]?))'
OP = 48
Octnumber = '0[0-7]*[lL]?'
Operator = r'(\*\*=?|>>=?|<<=?|<>|!=|[+\-*/%&|^=<>]=?|~)'
PERCENT = 24
PERCENTEQUAL = 41
PLUS = 14
PLUSEQUAL = 37
PlainToken = r'(((0[jJ]|[1-9]\d*[jJ]|((\d+\.\d*|\.\d+)([eE][-+]...?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*")|[a-zA-Z_]\w*)'
Pointfloat = r'(\d+\.\d*|\.\d+)([eE][-+]?\d+)?'
PseudoExtras = '(\\\\\\r?\\n|#[^\\r\\n]*|([uU]?[rR]?\'\'\'|[uU]?[rR]?"""))'
PseudoToken = '[ \\f\\t]*((\\\\\\r?\\n|#[^\\r\\n]*|([uU]?[rR]?\'\'\'|[uU]?...n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*("|\\\\\\r?\\n))|[a-zA-Z_]\\w*)'
RBRACE = 27
RIGHTSHIFT = 35
RIGHTSHIFTEQUAL = 46
RPAR = 8
RSQB = 10
SEMI = 13
SLASH = 17
SLASHEQUAL = 40
STAR = 16
STAREQUAL = 39
STRING = 3
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
Special = r'(\r?\n|[:;.,`])'
String = '([uU]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*\'|[uU]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*")'
TILDE = 32
Token = r'[ \f\t]*(\\\r?\n[ \f\t]*)*(#[^\r\n]*)?(((0[jJ]|[...?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*")|[a-zA-Z_]\w*)'
Triple = '([uU]?[rR]?\'\'\'|[uU]?[rR]?""")'
VBAR = 18
VBAREQUAL = 43
Whitespace = r'[ \f\t]*'
__all__ = ['AMPER', 'AMPEREQUAL', 'BACKQUOTE', 'CIRCUMFLEX', 'CIRCUMFLEXEQUAL', 'COLON', 'COMMA', 'DEDENT', 'DOT', 'DOUBLESTAR', 'DOUBLESTAREQUAL', 'ENDMARKER', 'EQEQUAL', 'EQUAL', 'ERRORTOKEN', 'GREATER', 'GREATEREQUAL', 'INDENT', 'ISEOF', 'ISNONTERMINAL', ...]
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
__credits__ = 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
__file__ = '/usr/lib/python2.1/tokenize.pyc'
__name__ = 'tokenize'
double3prog = <SRE_Pattern object>
endprogs = {'"': <SRE_Pattern object>, '"""': <SRE_Pattern object>, "'": <SRE_Pattern object>, "'''": <SRE_Pattern object>, 'R': None, 'R"""': <SRE_Pattern object>, "R'''": <SRE_Pattern object>, 'U': None, 'U"""': <SRE_Pattern object>, "U'''": <SRE_Pattern object>, ...}
pseudoprog = <SRE_Pattern object>
single3prog = <SRE_Pattern object>
tabsize = 8
tok_name = {0: 'ENDMARKER', 1: 'NAME', 2: 'NUMBER', 3: 'STRING', 4: 'NEWLINE', 5: 'INDENT', 6: 'DEDENT', 7: 'LPAR', 8: 'RPAR', 9: 'LSQB', ...}
tokenprog = <SRE_Pattern object>
x = 'tok_name'

 
Author
             Ka-Ping Yee <ping@lfw.org>

 
Credits
             GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro