", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
try:
if len(symbols)==len("".join(symbols)):
return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
else:
return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
except:
warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
SyntaxWarning, stacklevel=2)
# last resort, just use MatchFirst
return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
def dictOf( key, value ):
"""Helper to easily and clearly define a dictionary by specifying the respective patterns
for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
in the proper order. The key pattern can include delimiting markers or punctuation,
as long as they are suppressed, thereby leaving the significant key text. The value
pattern can include named results, so that the Dict results can include named token
fields.
"""
return Dict( ZeroOrMore( Group ( key + value ) ) )
# convenience constants for positional expressions
empty = Empty().setName("empty")
lineStart = LineStart().setName("lineStart")
lineEnd = LineEnd().setName("lineEnd")
stringStart = StringStart().setName("stringStart")
stringEnd = StringEnd().setName("stringEnd")
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
def srange(s):
r"""Helper to easily define string ranges for use in Word construction. Borrows
syntax from regexp '[]' string range definitions::
srange("[0-9]") -> "0123456789"
srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
The input string must be enclosed in []'s, and the returned string is the expanded
character set joined into a single string.
The values enclosed in the []'s may be::
a single character
an escaped character with a leading backslash (such as \- or \])
an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
an escaped octal character with a leading '\0' (\041, which is a '!' character)
a range of any of the above, separated by a dash ('a-z', etc.)
any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
"""
try:
return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
except:
return ""
def matchOnlyAtCol(n):
"""Helper method for defining parse actions that require matching at a specific
column in the input text.
"""
def verifyCol(strg,locn,toks):
if col(locn,strg) != n:
raise ParseException(strg,locn,"matched token not at column %d" % n)
return verifyCol
def replaceWith(replStr):
"""Helper method for common parse actions that simply return a literal value. Especially
useful when used with transformString().
"""
def _replFunc(*args):
return [replStr]
return _replFunc
def removeQuotes(s,l,t):
"""Helper parse action for removing quotation marks from parsed quoted strings.
To use, add this parse action to quoted string using::
quotedString.setParseAction( removeQuotes )
"""
return t[0][1:-1]
def upcaseTokens(s,l,t):
"""Helper parse action to convert tokens to upper case."""
return [ tt.upper() for tt in map(_ustr,t) ]
def downcaseTokens(s,l,t):
"""Helper parse action to convert tokens to lower case."""
return [ tt.lower() for tt in map(_ustr,t) ]
def keepOriginalText(s,startLoc,t):
"""Helper parse action to preserve original parsed text,
overriding any nested parse actions."""
try:
endloc = getTokensEndLoc()
except ParseException:
raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
del t[:]
t += ParseResults(s[startLoc:endloc])
return t
def getTokensEndLoc():
"""Method to be called from within a parse action to determine the end
location of the parsed tokens."""
import inspect
fstack = inspect.stack()
try:
# search up the stack (through intervening argument normalizers) for correct calling routine
for f in fstack[2:]:
if f[3] == "_parseNoCache":
endloc = f[0].f_locals["loc"]
return endloc
else:
raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
finally:
del fstack
def _makeTags(tagStr, xml):
"""Internal helper to construct opening and closing tag expressions, given a tag name"""
if isinstance(tagStr,basestring):
resname = tagStr
tagStr = Keyword(tagStr, caseless=not xml)
else:
resname = tagStr.name
tagAttrName = Word(alphas,alphanums+"_-:")
if (xml):
tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
openTag = Suppress("<") + tagStr + \
Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
else:
printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
openTag = Suppress("<") + tagStr + \
Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
Optional( Suppress("=") + tagAttrValue ) ))) + \
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
closeTag = Combine(_L("") + tagStr + ">")
openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("%s>" % tagStr)
return openTag, closeTag
def makeHTMLTags(tagStr):
"""Helper to construct opening and closing tag expressions for HTML, given a tag name"""
return _makeTags( tagStr, False )
def makeXMLTags(tagStr):
"""Helper to construct opening and closing tag expressions for XML, given a tag name"""
return _makeTags( tagStr, True )
def withAttribute(*args,**attrDict):
"""Helper to create a validating parse action to be used with start tags created
with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
with a required attribute value, to avoid false matches on common tags such as
or .
Call withAttribute with a series of attribute names and values. Specify the list
of filter attributes names and values as:
- keyword arguments, as in (class="Customer",align="right"), or
- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
For attribute names with a namespace prefix, you must use the second form. Attribute
names are matched insensitive to upper/lower case.
To verify that the attribute exists, but without specifying a value, pass
withAttribute.ANY_VALUE as the value.
"""
if args:
attrs = args[:]
else:
attrs = attrDict.items()
attrs = [(k,v) for k,v in attrs]
def pa(s,l,tokens):
for attrName,attrValue in attrs:
if attrName not in tokens:
raise ParseException(s,l,"no matching attribute " + attrName)
if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
(attrName, tokens[attrName], attrValue))
return pa
withAttribute.ANY_VALUE = object()
opAssoc = _Constants()
opAssoc.LEFT = object()
opAssoc.RIGHT = object()
def operatorPrecedence( baseExpr, opList ):
"""Helper method for constructing grammars of expressions made up of
operators working in a precedence hierarchy. Operators may be unary or
binary, left- or right-associative. Parse actions can also be attached
to operator expressions.
Parameters:
- baseExpr - expression representing the most basic element for the nested
- opList - list of tuples, one for each operator precedence level in the
expression grammar; each tuple is of the form
(opExpr, numTerms, rightLeftAssoc, parseAction), where:
- opExpr is the pyparsing expression for the operator;
may also be a string, which will be converted to a Literal;
if numTerms is 3, opExpr is a tuple of two expressions, for the
two operators separating the 3 terms
- numTerms is the number of terms for this operator (must
be 1, 2, or 3)
- rightLeftAssoc is the indicator whether the operator is
right or left associative, using the pyparsing-defined
constants opAssoc.RIGHT and opAssoc.LEFT.
- parseAction is the parse action to be associated with
expressions matching this operator expression (the
parse action tuple member may be omitted)
"""
ret = Forward()
lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
for i,operDef in enumerate(opList):
opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
if arity == 3:
if opExpr is None or len(opExpr) != 2:
raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
opExpr1, opExpr2 = opExpr
thisExpr = Forward()#.setName("expr%d" % i)
if rightLeftAssoc == opAssoc.LEFT:
if arity == 1:
matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
elif arity == 2:
if opExpr is not None:
matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
else:
matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
elif arity == 3:
matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
else:
raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
elif rightLeftAssoc == opAssoc.RIGHT:
if arity == 1:
# try to avoid LR with this extra test
if not isinstance(opExpr, Optional):
opExpr = Optional(opExpr)
matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
elif arity == 2:
if opExpr is not None:
matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
else:
matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
elif arity == 3:
matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
else:
raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
else:
raise ValueError("operator must indicate right or left associativity")
if pa:
matchExpr.setParseAction( pa )
thisExpr << ( matchExpr | lastExpr )
lastExpr = thisExpr
ret << lastExpr
return ret
dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
unicodeString = Combine(_L('u') + quotedString.copy())
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
"""Helper method for defining nested lists enclosed in opening and closing
delimiters ("(" and ")" are the default).
Parameters:
- opener - opening character for a nested list (default="("); can also be a pyparsing expression
- closer - closing character for a nested list (default=")"); can also be a pyparsing expression
- content - expression for items within the nested lists (default=None)
- ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
If an expression is not provided for the content argument, the nested
expression will capture all whitespace-delimited content between delimiters
as a list of separate values.
Use the ignoreExpr argument to define expressions that may contain
opening or closing characters that should not be treated as opening
or closing characters for nesting, such as quotedString or a comment
expression. Specify multiple expressions using an Or or MatchFirst.
The default is quotedString, but if no expressions are to be ignored,
then pass None for this argument.
"""
if opener == closer:
raise ValueError("opening and closing strings cannot be the same")
if content is None:
if isinstance(opener,basestring) and isinstance(closer,basestring):
if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr +
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else:
content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
else:
raise ValueError("opening and closing arguments must be strings if no content expression is given")
ret = Forward()
if ignoreExpr is not None:
ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
else:
ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
return ret
def indentedBlock(blockStatementExpr, indentStack, indent=True):
"""Helper method for defining space-delimited indentation blocks, such as
those used to define block statements in Python source code.
Parameters:
- blockStatementExpr - expression defining syntax of statement that
is repeated within the indented block
- indentStack - list created by caller to manage indentation stack
(multiple statementWithIndentedBlock expressions within a single grammar
should share a common indentStack)
- indent - boolean indicating whether block must be indented beyond the
the current level; set to False for block of left-most statements
(default=True)
A valid block must contain at least one blockStatement.
"""
def checkPeerIndent(s,l,t):
if l >= len(s): return
curCol = col(l,s)
if curCol != indentStack[-1]:
if curCol > indentStack[-1]:
raise ParseFatalException(s,l,"illegal nesting")
raise ParseException(s,l,"not a peer entry")
def checkSubIndent(s,l,t):
curCol = col(l,s)
if curCol > indentStack[-1]:
indentStack.append( curCol )
else:
raise ParseException(s,l,"not a subentry")
def checkUnindent(s,l,t):
if l >= len(s): return
curCol = col(l,s)
if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
raise ParseException(s,l,"not an unindent")
indentStack.pop()
NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
INDENT = Empty() + Empty().setParseAction(checkSubIndent)
PEER = Empty().setParseAction(checkPeerIndent)
UNDENT = Empty().setParseAction(checkUnindent)
if indent:
smExpr = Group( Optional(NL) +
FollowedBy(blockStatementExpr) +
INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
else:
smExpr = Group( Optional(NL) +
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
blockStatementExpr.ignore("\\" + LineEnd())
return smExpr
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
htmlComment = Regex(r"")
restOfLine = Regex(r".*").leaveWhitespace()
dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist))
print ("tokens = " + str(tokens))
print ("tokens.columns = " + str(tokens.columns))
print ("tokens.tables = " + str(tokens.tables))
print (tokens.asXML("SQL",True))
except ParseBaseException,err:
print (teststring + "->")
print (err.line)
print (" "*(err.column-1) + "^")
print (err)
print()
selectToken = CaselessLiteral( "select" )
fromToken = CaselessLiteral( "from" )
ident = Word( alphas, alphanums + "_$" )
columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
simpleSQL = ( selectToken + \
( '*' | columnNameList ).setResultsName( "columns" ) + \
fromToken + \
tableNameList.setResultsName( "tables" ) )
test( "SELECT * from XYZZY, ABC" )
test( "select * from SYS.XYZZY" )
test( "Select A from Sys.dual" )
test( "Select AA,BB,CC from Sys.dual" )
test( "Select A, B, C from Sys.dual" )
test( "Select A, B, C from Sys.dual" )
test( "Xelect A, B, C from Sys.dual" )
test( "Select A, B, C frox Sys.dual" )
test( "Select" )
test( "Select ^^^ frox Sys.dual" )
test( "Select A, B, C from Sys.dual, Table2 " )
|