Files
iTop/core/oql/oql-lexer.plex

366 lines
8.7 KiB
Plaintext

<?php
// Copyright (C) 2010 Combodo SARL
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; version 3 of the License.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* OQL syntax analyzer, to be used prior to run the lexical analyzer
*
* @author Erwan Taloc <erwan.taloc@combodo.com>
* @author Romain Quetiez <romain.quetiez@combodo.com>
* @author Denis Flaven <denis.flaven@combodo.com>
* @license http://www.opensource.org/licenses/gpl-3.0.html LGPL
*/
// Notes (from the source file: oql-lexer.plex) - Romain
//
// The strval rule is a little bit cryptic.
// This is due to both a bug in the lexer generator and the complexity of our need
// The rule means: either a quoted string with ", or a quoted string with '
// literal " (resp. ') must be escaped by a \
// \ must be escaped by an additional \
//
// Here are the issues and limitation found in the lexer generator:
// * Matching simple quotes is an issue, because regexp are not correctly escaped (and the ESC code is escaped itself)
// Workaround: insert '.chr(39).' which will be a real ' in the end
// * Matching an alternate regexp is an issue because you must specify "|^...."
// and the regexp parser will not accept that syntax
// Workaround: insert '.chr(94).' which will be a real ^
//
// Let's analyze an overview of the regexp, we have
// 1) The strval rule in the lexer definition
// /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
// 2) Becomes the php expression in the lexer
// (note the escaped double quotes, hopefully having no effect, but showing where the issue is!)
// $myRegexp = '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/';
//
// To be fixed in LexerGenerator/Parser.y, in doLongestMatch (doFirstMatch is ok)
//
//
// Now, let's explain how the regexp has been designed.
// Here is a simplified version, dealing with simple quotes, and based on the assumption that the lexer generator has been fixed!
// The strval rule in the lexer definition
// /'([^\\']*(\\')*(\\\\)*)*'/
// This means anything containing \\ or \' or any other char but a standalone ' or \
// This means ' or \ could not be found without a preceding \
//
class OQLLexerRaw
{
protected $data; // input string
public $token; // token id
public $value; // token string representation
protected $line; // current line
protected $count; // current column
function __construct($data)
{
$this->data = $data;
$this->count = 0;
$this->line = 1;
}
/*!lex2php
%input $this->data
%counter $this->count
%token $this->token
%value $this->value
%line $this->line
%matchlongest 1
whitespace = /[ \t\n\r]+/
select = "SELECT"
from = "FROM"
as_alias = "AS"
where = "WHERE"
join = "JOIN"
on = "ON"
coma = ","
par_open = "("
par_close = ")"
math_div = "/"
math_mult = "*"
math_plus = "+"
math_minus = "-"
log_and = "AND"
log_or = "OR"
regexp = "REGEXP"
eq = "="
not_eq = "!="
gt = ">"
lt = "<"
ge = ">="
le = "<="
like = "LIKE"
not_like = "NOT LIKE"
in = "IN"
not_in = "NOT IN"
interval = "INTERVAL"
f_if = "IF"
f_elt = "ELT"
f_coalesce = "COALESCE"
f_isnull = "ISNULL"
f_concat = "CONCAT"
f_substr = "SUBSTR"
f_trim = "TRIM"
f_date = "DATE"
f_date_format = "DATE_FORMAT"
f_current_date = "CURRENT_DATE"
f_now = "NOW"
f_time = "TIME"
f_to_days = "TO_DAYS"
f_from_days = "FROM_DAYS"
f_year = "YEAR"
f_month = "MONTH"
f_day = "DAY"
f_hour = "HOUR"
f_minute = "MINUTE"
f_second = "SECOND"
f_date_add = "DATE_ADD"
f_date_sub = "DATE_SUB"
f_round = "ROUND"
f_floor = "FLOOR"
f_inet_aton = "INET_ATON"
f_inet_ntoa = "INET_NTOA"
numval = /[0-9]+|0x[0-9a-fA-F]+/
strval = /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
name = /([_a-zA-Z][_a-zA-Z0-9]*|`[^`]+`)/
varname = /:([_a-zA-Z][_a-zA-Z0-9]*->[_a-zA-Z][_a-zA-Z0-9]*|[_a-zA-Z][_a-zA-Z0-9]*)/
dot = "."
*/
/*!lex2php
whitespace {
return false;
}
select {
$this->token = OQLParser::SELECT;
}
from {
$this->token = OQLParser::FROM;
}
as_alias {
$this->token = OQLParser::AS_ALIAS;
}
where {
$this->token = OQLParser::WHERE;
}
join {
$this->token = OQLParser::JOIN;
}
on {
$this->token = OQLParser::ON;
}
math_div {
$this->token = OQLParser::MATH_DIV;
}
math_mult {
$this->token = OQLParser::MATH_MULT;
}
math_plus {
$this->token = OQLParser::MATH_PLUS;
}
math_minus {
$this->token = OQLParser::MATH_MINUS;
}
log_and {
$this->token = OQLParser::LOG_AND;
}
log_or {
$this->token = OQLParser::LOG_OR;
}
coma {
$this->token = OQLParser::COMA;
}
par_open {
$this->token = OQLParser::PAR_OPEN;
}
par_close {
$this->token = OQLParser::PAR_CLOSE;
}
regexp {
$this->token = OQLParser::REGEXP;
}
eq {
$this->token = OQLParser::EQ;
}
not_eq {
$this->token = OQLParser::NOT_EQ;
}
gt {
$this->token = OQLParser::GT;
}
lt {
$this->token = OQLParser::LT;
}
ge {
$this->token = OQLParser::GE;
}
le {
$this->token = OQLParser::LE;
}
like {
$this->token = OQLParser::LIKE;
}
not_like {
$this->token = OQLParser::NOT_LIKE;
}
in {
$this->token = OQLParser::IN;
}
not_in {
$this->token = OQLParser::NOT_IN;
}
interval {
$this->token = OQLParser::INTERVAL;
}
f_if {
$this->token = OQLParser::F_IF;
}
f_elt {
$this->token = OQLParser::F_ELT;
}
f_coalesce {
$this->token = OQLParser::F_COALESCE;
}
f_isnull {
$this->token = OQLParser::F_ISNULL;
}
f_concat {
$this->token = OQLParser::F_CONCAT;
}
f_substr {
$this->token = OQLParser::F_SUBSTR;
}
f_trim {
$this->token = OQLParser::F_TRIM;
}
f_date {
$this->token = OQLParser::F_DATE;
}
f_date_format {
$this->token = OQLParser::F_DATE_FORMAT;
}
f_current_date {
$this->token = OQLParser::F_CURRENT_DATE;
}
f_now {
$this->token = OQLParser::F_NOW;
}
f_time {
$this->token = OQLParser::F_TIME;
}
f_to_days {
$this->token = OQLParser::F_TO_DAYS;
}
f_from_days {
$this->token = OQLParser::F_FROM_DAYS;
}
f_year {
$this->token = OQLParser::F_YEAR;
}
f_month {
$this->token = OQLParser::F_MONTH;
}
f_day {
$this->token = OQLParser::F_DAY;
}
f_hour {
$this->token = OQLParser::F_HOUR;
}
f_minute {
$this->token = OQLParser::F_MINUTE;
}
f_second {
$this->token = OQLParser::F_SECOND;
}
f_date_add {
$this->token = OQLParser::F_DATE_ADD;
}
f_date_sub {
$this->token = OQLParser::F_DATE_SUB;
}
f_round {
$this->token = OQLParser::F_ROUND;
}
f_floor {
$this->token = OQLParser::F_FLOOR;
}
f_inet_aton {
$this->token = OQLParser::F_INET_ATON;
}
f_inet_ntoa {
$this->token = OQLParser::F_INET_NTOA;
}
numval {
$this->token = OQLParser::NUMVAL;
}
strval {
$this->token = OQLParser::STRVAL;
}
name {
$this->token = OQLParser::NAME;
}
varname {
$this->token = OQLParser::VARNAME;
}
dot {
$this->token = OQLParser::DOT;
}
*/
}
define('UNEXPECTED_INPUT_AT_LINE', 'Unexpected input at line');
class OQLLexerException extends OQLException
{
public function __construct($sInput, $iLine, $iCol, $sUnexpected)
{
parent::__construct("Syntax error", $sInput, $iLine, $iCol, $sUnexpected);
}
}
class OQLLexer extends OQLLexerRaw
{
public function getTokenPos()
{
return max(0, $this->count - strlen($this->value));
}
function yylex()
{
try
{
return parent::yylex();
}
catch (Exception $e)
{
$sMessage = $e->getMessage();
if (substr($sMessage, 0, strlen(UNEXPECTED_INPUT_AT_LINE)) == UNEXPECTED_INPUT_AT_LINE)
{
$sLineAndChar = substr($sMessage, strlen(UNEXPECTED_INPUT_AT_LINE));
if (preg_match('#^([0-9]+): (.+)$#', $sLineAndChar, $aMatches))
{
$iLine = $aMatches[1];
$sUnexpected = $aMatches[2];
throw new OQLLexerException($this->data, $iLine, $this->count, $sUnexpected);
}
}
// Default: forward the exception
throw $e;
}
}
}
?>