Integrated the lexer/parser build tools (Lexer=0.4.0, Parser=0.1.7)

SVN:trunk[3590]
This commit is contained in:
Romain Quetiez
2015-06-10 13:23:03 +00:00
parent d9fcd83370
commit 7a5bbd0613
30 changed files with 17020 additions and 96 deletions

View File

@@ -1,4 +0,0 @@
#!/bin/bash
php /usr/share/php/PHP/LexerGenerator/cli.php oql-lexer.plex
php /usr/share/php/PHP/ParserGenerator/cli.php oql-parser.y

View File

@@ -1,3 +0,0 @@
c:\itop\php-5.2.3\php.exe -q "C:\itop\PHP-5.2.3\PEAR\PHP\LexerGenerator\cli.php" oql-lexer.plex
c:\itop\php-5.2.3\php.exe -q "C:\itop\PHP-5.2.3\PEAR\PHP\ParserGenerator\cli.php" oql-parser.y
pause

View File

@@ -0,0 +1,937 @@
<?php
/* Driver template for the PHP_ParserGenerator parser generator. (PHP port of LEMON)
*/
/**
* This can be used to store both the string representation of
* a token, and any useful meta-data associated with the token.
*
* meta-data should be stored as an array
*/
class ParseyyToken implements ArrayAccess
{
public $string = '';
public $metadata = array();
function __construct($s, $m = array())
{
if ($s instanceof ParseyyToken) {
$this->string = $s->string;
$this->metadata = $s->metadata;
} else {
$this->string = (string) $s;
if ($m instanceof ParseyyToken) {
$this->metadata = $m->metadata;
} elseif (is_array($m)) {
$this->metadata = $m;
}
}
}
function __toString()
{
return $this->string;
}
function offsetExists($offset)
{
return isset($this->metadata[$offset]);
}
function offsetGet($offset)
{
return $this->metadata[$offset];
}
function offsetSet($offset, $value)
{
if ($offset === null) {
if (isset($value[0])) {
$x = ($value instanceof ParseyyToken) ?
$value->metadata : $value;
$this->metadata = array_merge($this->metadata, $x);
return;
}
$offset = count($this->metadata);
}
if ($value === null) {
return;
}
if ($value instanceof ParseyyToken) {
if ($value->metadata) {
$this->metadata[$offset] = $value->metadata;
}
} elseif ($value) {
$this->metadata[$offset] = $value;
}
}
function offsetUnset($offset)
{
unset($this->metadata[$offset]);
}
}
/** The following structure represents a single element of the
* parser's stack. Information stored includes:
*
* + The state number for the parser at this level of the stack.
*
* + The value of the token stored at this level of the stack.
* (In other words, the "major" token.)
*
* + The semantic value stored at this level of the stack. This is
* the information used by the action routines in the grammar.
* It is sometimes called the "minor" token.
*/
class ParseyyStackEntry
{
public $stateno; /* The state-number */
public $major; /* The major token value. This is the code
** number for the token at this stack level */
public $minor; /* The user-supplied minor token value. This
** is the value of the token */
};
// code external to the class is included here
%%
// declare_class is output here
%%
{
/* First off, code is included which follows the "include_class" declaration
** in the input file. */
%%
/* Next is all token values, as class constants
*/
/*
** These constants (all generated automatically by the parser generator)
** specify the various kinds of tokens (terminals) that the parser
** understands.
**
** Each symbol here is a terminal symbol in the grammar.
*/
%%
/* Next are that tables used to determine what action to take based on the
** current state and lookahead token. These tables are used to implement
** functions that take a state number and lookahead value and return an
** action integer.
**
** Suppose the action integer is N. Then the action is determined as
** follows
**
** 0 <= N < self::YYNSTATE Shift N. That is,
** push the lookahead
** token onto the stack
** and goto state N.
**
** self::YYNSTATE <= N < self::YYNSTATE+self::YYNRULE Reduce by rule N-YYNSTATE.
**
** N == self::YYNSTATE+self::YYNRULE A syntax error has occurred.
**
** N == self::YYNSTATE+self::YYNRULE+1 The parser accepts its
** input. (and concludes parsing)
**
** N == self::YYNSTATE+self::YYNRULE+2 No such action. Denotes unused
** slots in the yy_action[] table.
**
** The action table is constructed as a single large static array $yy_action.
** Given state S and lookahead X, the action is computed as
**
** self::$yy_action[self::$yy_shift_ofst[S] + X ]
**
** If the index value self::$yy_shift_ofst[S]+X is out of range or if the value
** self::$yy_lookahead[self::$yy_shift_ofst[S]+X] is not equal to X or if
** self::$yy_shift_ofst[S] is equal to self::YY_SHIFT_USE_DFLT, it means that
** the action is not in the table and that self::$yy_default[S] should be used instead.
**
** The formula above is for computing the action when the lookahead is
** a terminal symbol. If the lookahead is a non-terminal (as occurs after
** a reduce action) then the static $yy_reduce_ofst array is used in place of
** the static $yy_shift_ofst array and self::YY_REDUCE_USE_DFLT is used in place of
** self::YY_SHIFT_USE_DFLT.
**
** The following are the tables generated in this section:
**
** self::$yy_action A single table containing all actions.
** self::$yy_lookahead A table containing the lookahead for each entry in
** yy_action. Used to detect hash collisions.
** self::$yy_shift_ofst For each state, the offset into self::$yy_action for
** shifting terminals.
** self::$yy_reduce_ofst For each state, the offset into self::$yy_action for
** shifting non-terminals after a reduce.
** self::$yy_default Default action for each state.
*/
%%
/* The next thing included is series of defines which control
** various aspects of the generated parser.
** self::YYNOCODE is a number which corresponds
** to no legal terminal or nonterminal number. This
** number is used to fill in empty slots of the hash
** table.
** self::YYFALLBACK If defined, this indicates that one or more tokens
** have fall-back values which should be used if the
** original value of the token will not parse.
** self::YYSTACKDEPTH is the maximum depth of the parser's stack.
** self::YYNSTATE the combined number of states.
** self::YYNRULE the number of rules in the grammar
** self::YYERRORSYMBOL is the code number of the error symbol. If not
** defined, then do no error processing.
*/
%%
/** The next table maps tokens into fallback tokens. If a construct
* like the following:
*
* %fallback ID X Y Z.
*
* appears in the grammer, then ID becomes a fallback token for X, Y,
* and Z. Whenever one of the tokens X, Y, or Z is input to the parser
* but it does not parse, the type of the token is changed to ID and
* the parse is retried before an error is thrown.
*/
static public $yyFallback = array(
%%
);
/**
* Turn parser tracing on by giving a stream to which to write the trace
* and a prompt to preface each trace message. Tracing is turned off
* by making either argument NULL
*
* Inputs:
*
* - A stream resource to which trace output should be written.
* If NULL, then tracing is turned off.
* - A prefix string written at the beginning of every
* line of trace output. If NULL, then tracing is
* turned off.
*
* Outputs:
*
* - None.
* @param resource
* @param string
*/
static function Trace($TraceFILE, $zTracePrompt)
{
if (!$TraceFILE) {
$zTracePrompt = 0;
} elseif (!$zTracePrompt) {
$TraceFILE = 0;
}
self::$yyTraceFILE = $TraceFILE;
self::$yyTracePrompt = $zTracePrompt;
}
/**
* Output debug information to output (php://output stream)
*/
static function PrintTrace()
{
self::$yyTraceFILE = fopen('php://output', 'w');
self::$yyTracePrompt = '';
}
/**
* @var resource|0
*/
static public $yyTraceFILE;
/**
* String to prepend to debug output
* @var string|0
*/
static public $yyTracePrompt;
/**
* @var int
*/
public $yyidx = -1; /* Index of top element in stack */
/**
* @var int
*/
public $yyerrcnt; /* Shifts left before out of the error */
/**
* @var array
*/
public $yystack = array(); /* The parser's stack */
/**
* For tracing shifts, the names of all terminals and nonterminals
* are required. The following table supplies these names
* @var array
*/
static public $yyTokenName = array(
%%
);
/**
* For tracing reduce actions, the names of all rules are required.
* @var array
*/
static public $yyRuleName = array(
%%
);
/**
* This function returns the symbolic name associated with a token
* value.
* @param int
* @return string
*/
function tokenName($tokenType)
{
if ($tokenType === 0) {
return 'End of Input';
}
if ($tokenType > 0 && $tokenType < count(self::$yyTokenName)) {
return self::$yyTokenName[$tokenType];
} else {
return "Unknown";
}
}
/**
* The following function deletes the value associated with a
* symbol. The symbol can be either a terminal or nonterminal.
* @param int the symbol code
* @param mixed the symbol's value
*/
static function yy_destructor($yymajor, $yypminor)
{
switch ($yymajor) {
/* Here is inserted the actions which take place when a
** terminal or non-terminal is destroyed. This can happen
** when the symbol is popped from the stack during a
** reduce or during error processing or when a parser is
** being destroyed before it is finished parsing.
**
** Note: during a reduce, the only symbols destroyed are those
** which appear on the RHS of the rule, but which are not used
** inside the C code.
*/
%%
default: break; /* If no destructor action specified: do nothing */
}
}
/**
* Pop the parser's stack once.
*
* If there is a destructor routine associated with the token which
* is popped from the stack, then call it.
*
* Return the major token number for the symbol popped.
* @param ParseyyParser
* @return int
*/
function yy_pop_parser_stack()
{
if (!count($this->yystack)) {
return;
}
$yytos = array_pop($this->yystack);
if (self::$yyTraceFILE && $this->yyidx >= 0) {
fwrite(self::$yyTraceFILE,
self::$yyTracePrompt . 'Popping ' . self::$yyTokenName[$yytos->major] .
"\n");
}
$yymajor = $yytos->major;
self::yy_destructor($yymajor, $yytos->minor);
$this->yyidx--;
return $yymajor;
}
/**
* Deallocate and destroy a parser. Destructors are all called for
* all stack elements before shutting the parser down.
*/
function __destruct()
{
while ($this->yyidx >= 0) {
$this->yy_pop_parser_stack();
}
if (is_resource(self::$yyTraceFILE)) {
fclose(self::$yyTraceFILE);
}
}
/**
* Based on the current state and parser stack, get a list of all
* possible lookahead tokens
* @param int
* @return array
*/
function yy_get_expected_tokens($token)
{
$state = $this->yystack[$this->yyidx]->stateno;
$expected = self::$yyExpectedTokens[$state];
if (in_array($token, self::$yyExpectedTokens[$state], true)) {
return $expected;
}
$stack = $this->yystack;
$yyidx = $this->yyidx;
do {
$yyact = $this->yy_find_shift_action($token);
if ($yyact >= self::YYNSTATE && $yyact < self::YYNSTATE + self::YYNRULE) {
// reduce action
$done = 0;
do {
if ($done++ == 100) {
$this->yyidx = $yyidx;
$this->yystack = $stack;
// too much recursion prevents proper detection
// so give up
return array_unique($expected);
}
$yyruleno = $yyact - self::YYNSTATE;
$this->yyidx -= self::$yyRuleInfo[$yyruleno]['rhs'];
$nextstate = $this->yy_find_reduce_action(
$this->yystack[$this->yyidx]->stateno,
self::$yyRuleInfo[$yyruleno]['lhs']);
if (isset(self::$yyExpectedTokens[$nextstate])) {
$expected += self::$yyExpectedTokens[$nextstate];
if (in_array($token,
self::$yyExpectedTokens[$nextstate], true)) {
$this->yyidx = $yyidx;
$this->yystack = $stack;
return array_unique($expected);
}
}
if ($nextstate < self::YYNSTATE) {
// we need to shift a non-terminal
$this->yyidx++;
$x = new ParseyyStackEntry;
$x->stateno = $nextstate;
$x->major = self::$yyRuleInfo[$yyruleno]['lhs'];
$this->yystack[$this->yyidx] = $x;
continue 2;
} elseif ($nextstate == self::YYNSTATE + self::YYNRULE + 1) {
$this->yyidx = $yyidx;
$this->yystack = $stack;
// the last token was just ignored, we can't accept
// by ignoring input, this is in essence ignoring a
// syntax error!
return array_unique($expected);
} elseif ($nextstate === self::YY_NO_ACTION) {
$this->yyidx = $yyidx;
$this->yystack = $stack;
// input accepted, but not shifted (I guess)
return $expected;
} else {
$yyact = $nextstate;
}
} while (true);
}
break;
} while (true);
return array_unique($expected);
}
/**
* Based on the parser state and current parser stack, determine whether
* the lookahead token is possible.
*
* The parser will convert the token value to an error token if not. This
* catches some unusual edge cases where the parser would fail.
* @param int
* @return bool
*/
function yy_is_expected_token($token)
{
if ($token === 0) {
return true; // 0 is not part of this
}
$state = $this->yystack[$this->yyidx]->stateno;
if (in_array($token, self::$yyExpectedTokens[$state], true)) {
return true;
}
$stack = $this->yystack;
$yyidx = $this->yyidx;
do {
$yyact = $this->yy_find_shift_action($token);
if ($yyact >= self::YYNSTATE && $yyact < self::YYNSTATE + self::YYNRULE) {
// reduce action
$done = 0;
do {
if ($done++ == 100) {
$this->yyidx = $yyidx;
$this->yystack = $stack;
// too much recursion prevents proper detection
// so give up
return true;
}
$yyruleno = $yyact - self::YYNSTATE;
$this->yyidx -= self::$yyRuleInfo[$yyruleno]['rhs'];
$nextstate = $this->yy_find_reduce_action(
$this->yystack[$this->yyidx]->stateno,
self::$yyRuleInfo[$yyruleno]['lhs']);
if (isset(self::$yyExpectedTokens[$nextstate]) &&
in_array($token, self::$yyExpectedTokens[$nextstate], true)) {
$this->yyidx = $yyidx;
$this->yystack = $stack;
return true;
}
if ($nextstate < self::YYNSTATE) {
// we need to shift a non-terminal
$this->yyidx++;
$x = new ParseyyStackEntry;
$x->stateno = $nextstate;
$x->major = self::$yyRuleInfo[$yyruleno]['lhs'];
$this->yystack[$this->yyidx] = $x;
continue 2;
} elseif ($nextstate == self::YYNSTATE + self::YYNRULE + 1) {
$this->yyidx = $yyidx;
$this->yystack = $stack;
if (!$token) {
// end of input: this is valid
return true;
}
// the last token was just ignored, we can't accept
// by ignoring input, this is in essence ignoring a
// syntax error!
return false;
} elseif ($nextstate === self::YY_NO_ACTION) {
$this->yyidx = $yyidx;
$this->yystack = $stack;
// input accepted, but not shifted (I guess)
return true;
} else {
$yyact = $nextstate;
}
} while (true);
}
break;
} while (true);
$this->yyidx = $yyidx;
$this->yystack = $stack;
return true;
}
/**
* Find the appropriate action for a parser given the terminal
* look-ahead token iLookAhead.
*
* If the look-ahead token is YYNOCODE, then check to see if the action is
* independent of the look-ahead. If it is, return the action, otherwise
* return YY_NO_ACTION.
* @param int The look-ahead token
*/
function yy_find_shift_action($iLookAhead)
{
$stateno = $this->yystack[$this->yyidx]->stateno;
/* if ($this->yyidx < 0) return self::YY_NO_ACTION; */
if (!isset(self::$yy_shift_ofst[$stateno])) {
// no shift actions
return self::$yy_default[$stateno];
}
$i = self::$yy_shift_ofst[$stateno];
if ($i === self::YY_SHIFT_USE_DFLT) {
return self::$yy_default[$stateno];
}
if ($iLookAhead == self::YYNOCODE) {
return self::YY_NO_ACTION;
}
$i += $iLookAhead;
if ($i < 0 || $i >= self::YY_SZ_ACTTAB ||
self::$yy_lookahead[$i] != $iLookAhead) {
if (count(self::$yyFallback) && $iLookAhead < count(self::$yyFallback)
&& ($iFallback = self::$yyFallback[$iLookAhead]) != 0) {
if (self::$yyTraceFILE) {
fwrite(self::$yyTraceFILE, self::$yyTracePrompt . "FALLBACK " .
self::$yyTokenName[$iLookAhead] . " => " .
self::$yyTokenName[$iFallback] . "\n");
}
return $this->yy_find_shift_action($iFallback);
}
return self::$yy_default[$stateno];
} else {
return self::$yy_action[$i];
}
}
/**
* Find the appropriate action for a parser given the non-terminal
* look-ahead token $iLookAhead.
*
* If the look-ahead token is self::YYNOCODE, then check to see if the action is
* independent of the look-ahead. If it is, return the action, otherwise
* return self::YY_NO_ACTION.
* @param int Current state number
* @param int The look-ahead token
*/
function yy_find_reduce_action($stateno, $iLookAhead)
{
/* $stateno = $this->yystack[$this->yyidx]->stateno; */
if (!isset(self::$yy_reduce_ofst[$stateno])) {
return self::$yy_default[$stateno];
}
$i = self::$yy_reduce_ofst[$stateno];
if ($i == self::YY_REDUCE_USE_DFLT) {
return self::$yy_default[$stateno];
}
if ($iLookAhead == self::YYNOCODE) {
return self::YY_NO_ACTION;
}
$i += $iLookAhead;
if ($i < 0 || $i >= self::YY_SZ_ACTTAB ||
self::$yy_lookahead[$i] != $iLookAhead) {
return self::$yy_default[$stateno];
} else {
return self::$yy_action[$i];
}
}
/**
* Perform a shift action.
* @param int The new state to shift in
* @param int The major token to shift in
* @param mixed the minor token to shift in
*/
function yy_shift($yyNewState, $yyMajor, $yypMinor)
{
$this->yyidx++;
if ($this->yyidx >= self::YYSTACKDEPTH) {
$this->yyidx--;
if (self::$yyTraceFILE) {
fprintf(self::$yyTraceFILE, "%sStack Overflow!\n", self::$yyTracePrompt);
}
while ($this->yyidx >= 0) {
$this->yy_pop_parser_stack();
}
/* Here code is inserted which will execute if the parser
** stack ever overflows */
%%
return;
}
$yytos = new ParseyyStackEntry;
$yytos->stateno = $yyNewState;
$yytos->major = $yyMajor;
$yytos->minor = $yypMinor;
array_push($this->yystack, $yytos);
if (self::$yyTraceFILE && $this->yyidx > 0) {
fprintf(self::$yyTraceFILE, "%sShift %d\n", self::$yyTracePrompt,
$yyNewState);
fprintf(self::$yyTraceFILE, "%sStack:", self::$yyTracePrompt);
for ($i = 1; $i <= $this->yyidx; $i++) {
fprintf(self::$yyTraceFILE, " %s",
self::$yyTokenName[$this->yystack[$i]->major]);
}
fwrite(self::$yyTraceFILE,"\n");
}
}
/**
* The following table contains information about every rule that
* is used during the reduce.
*
* <pre>
* array(
* array(
* int $lhs; Symbol on the left-hand side of the rule
* int $nrhs; Number of right-hand side symbols in the rule
* ),...
* );
* </pre>
*/
static public $yyRuleInfo = array(
%%
);
/**
* The following table contains a mapping of reduce action to method name
* that handles the reduction.
*
* If a rule is not set, it has no handler.
*/
static public $yyReduceMap = array(
%%
);
/* Beginning here are the reduction cases. A typical example
** follows:
** #line <lineno> <grammarfile>
** function yy_r0($yymsp){ ... } // User supplied code
** #line <lineno> <thisfile>
*/
%%
/**
* placeholder for the left hand side in a reduce operation.
*
* For a parser with a rule like this:
* <pre>
* rule(A) ::= B. { A = 1; }
* </pre>
*
* The parser will translate to something like:
*
* <code>
* function yy_r0(){$this->_retvalue = 1;}
* </code>
*/
private $_retvalue;
/**
* Perform a reduce action and the shift that must immediately
* follow the reduce.
*
* For a rule such as:
*
* <pre>
* A ::= B blah C. { dosomething(); }
* </pre>
*
* This function will first call the action, if any, ("dosomething();" in our
* example), and then it will pop three states from the stack,
* one for each entry on the right-hand side of the expression
* (B, blah, and C in our example rule), and then push the result of the action
* back on to the stack with the resulting state reduced to (as described in the .out
* file)
* @param int Number of the rule by which to reduce
*/
function yy_reduce($yyruleno)
{
//int $yygoto; /* The next state */
//int $yyact; /* The next action */
//mixed $yygotominor; /* The LHS of the rule reduced */
//ParseyyStackEntry $yymsp; /* The top of the parser's stack */
//int $yysize; /* Amount to pop the stack */
$yymsp = $this->yystack[$this->yyidx];
if (self::$yyTraceFILE && $yyruleno >= 0
&& $yyruleno < count(self::$yyRuleName)) {
fprintf(self::$yyTraceFILE, "%sReduce (%d) [%s].\n",
self::$yyTracePrompt, $yyruleno,
self::$yyRuleName[$yyruleno]);
}
$this->_retvalue = $yy_lefthand_side = null;
if (array_key_exists($yyruleno, self::$yyReduceMap)) {
// call the action
$this->_retvalue = null;
$this->{'yy_r' . self::$yyReduceMap[$yyruleno]}();
$yy_lefthand_side = $this->_retvalue;
}
$yygoto = self::$yyRuleInfo[$yyruleno]['lhs'];
$yysize = self::$yyRuleInfo[$yyruleno]['rhs'];
$this->yyidx -= $yysize;
for ($i = $yysize; $i; $i--) {
// pop all of the right-hand side parameters
array_pop($this->yystack);
}
$yyact = $this->yy_find_reduce_action($this->yystack[$this->yyidx]->stateno, $yygoto);
if ($yyact < self::YYNSTATE) {
/* If we are not debugging and the reduce action popped at least
** one element off the stack, then we can push the new element back
** onto the stack here, and skip the stack overflow test in yy_shift().
** That gives a significant speed improvement. */
if (!self::$yyTraceFILE && $yysize) {
$this->yyidx++;
$x = new ParseyyStackEntry;
$x->stateno = $yyact;
$x->major = $yygoto;
$x->minor = $yy_lefthand_side;
$this->yystack[$this->yyidx] = $x;
} else {
$this->yy_shift($yyact, $yygoto, $yy_lefthand_side);
}
} elseif ($yyact == self::YYNSTATE + self::YYNRULE + 1) {
$this->yy_accept();
}
}
/**
* The following code executes when the parse fails
*
* Code from %parse_fail is inserted here
*/
function yy_parse_failed()
{
if (self::$yyTraceFILE) {
fprintf(self::$yyTraceFILE, "%sFail!\n", self::$yyTracePrompt);
}
while ($this->yyidx >= 0) {
$this->yy_pop_parser_stack();
}
/* Here code is inserted which will be executed whenever the
** parser fails */
%%
}
/**
* The following code executes when a syntax error first occurs.
*
* %syntax_error code is inserted here
* @param int The major type of the error token
* @param mixed The minor type of the error token
*/
function yy_syntax_error($yymajor, $TOKEN)
{
%%
}
/**
* The following is executed when the parser accepts
*
* %parse_accept code is inserted here
*/
function yy_accept()
{
if (self::$yyTraceFILE) {
fprintf(self::$yyTraceFILE, "%sAccept!\n", self::$yyTracePrompt);
}
while ($this->yyidx >= 0) {
$stack = $this->yy_pop_parser_stack();
}
/* Here code is inserted which will be executed whenever the
** parser accepts */
%%
}
/**
* The main parser program.
*
* The first argument is the major token number. The second is
* the token value string as scanned from the input.
*
* @param int $yymajor the token number
* @param mixed $yytokenvalue the token value
* @param mixed ... any extra arguments that should be passed to handlers
*
* @return void
*/
function doParse($yymajor, $yytokenvalue)
{
// $yyact; /* The parser action. */
// $yyendofinput; /* True if we are at the end of input */
$yyerrorhit = 0; /* True if yymajor has invoked an error */
/* (re)initialize the parser, if necessary */
if ($this->yyidx === null || $this->yyidx < 0) {
/* if ($yymajor == 0) return; // not sure why this was here... */
$this->yyidx = 0;
$this->yyerrcnt = -1;
$x = new ParseyyStackEntry;
$x->stateno = 0;
$x->major = 0;
$this->yystack = array();
array_push($this->yystack, $x);
}
$yyendofinput = ($yymajor==0);
if (self::$yyTraceFILE) {
fprintf(
self::$yyTraceFILE,
"%sInput %s\n",
self::$yyTracePrompt,
self::$yyTokenName[$yymajor]
);
}
do {
$yyact = $this->yy_find_shift_action($yymajor);
if ($yymajor < self::YYERRORSYMBOL
&& !$this->yy_is_expected_token($yymajor)
) {
// force a syntax error
$yyact = self::YY_ERROR_ACTION;
}
if ($yyact < self::YYNSTATE) {
$this->yy_shift($yyact, $yymajor, $yytokenvalue);
$this->yyerrcnt--;
if ($yyendofinput && $this->yyidx >= 0) {
$yymajor = 0;
} else {
$yymajor = self::YYNOCODE;
}
} elseif ($yyact < self::YYNSTATE + self::YYNRULE) {
$this->yy_reduce($yyact - self::YYNSTATE);
} elseif ($yyact == self::YY_ERROR_ACTION) {
if (self::$yyTraceFILE) {
fprintf(
self::$yyTraceFILE,
"%sSyntax Error!\n",
self::$yyTracePrompt
);
}
if (self::YYERRORSYMBOL) {
/* A syntax error has occurred.
** The response to an error depends upon whether or not the
** grammar defines an error token "ERROR".
**
** This is what we do if the grammar does define ERROR:
**
** * Call the %syntax_error function.
**
** * Begin popping the stack until we enter a state where
** it is legal to shift the error symbol, then shift
** the error symbol.
**
** * Set the error count to three.
**
** * Begin accepting and shifting new tokens. No new error
** processing will occur until three tokens have been
** shifted successfully.
**
*/
if ($this->yyerrcnt < 0) {
$this->yy_syntax_error($yymajor, $yytokenvalue);
}
$yymx = $this->yystack[$this->yyidx]->major;
if ($yymx == self::YYERRORSYMBOL || $yyerrorhit ) {
if (self::$yyTraceFILE) {
fprintf(
self::$yyTraceFILE,
"%sDiscard input token %s\n",
self::$yyTracePrompt,
self::$yyTokenName[$yymajor]
);
}
$this->yy_destructor($yymajor, $yytokenvalue);
$yymajor = self::YYNOCODE;
} else {
while ($this->yyidx >= 0
&& $yymx != self::YYERRORSYMBOL
&& ($yyact = $this->yy_find_shift_action(self::YYERRORSYMBOL)) >= self::YYNSTATE
) {
$this->yy_pop_parser_stack();
}
if ($this->yyidx < 0 || $yymajor==0) {
$this->yy_destructor($yymajor, $yytokenvalue);
$this->yy_parse_failed();
$yymajor = self::YYNOCODE;
} elseif ($yymx != self::YYERRORSYMBOL) {
$u2 = 0;
$this->yy_shift($yyact, self::YYERRORSYMBOL, $u2);
}
}
$this->yyerrcnt = 3;
$yyerrorhit = 1;
} else {
/* YYERRORSYMBOL is not defined */
/* This is what we do if the grammar does not define ERROR:
**
** * Report an error message, and throw away the input token.
**
** * If the input token is $, then fail the parse.
**
** As before, subsequent error messages are suppressed until
** three input tokens have been successfully shifted.
*/
if ($this->yyerrcnt <= 0) {
$this->yy_syntax_error($yymajor, $yytokenvalue);
}
$this->yyerrcnt = 3;
$this->yy_destructor($yymajor, $yytokenvalue);
if ($yyendofinput) {
$this->yy_parse_failed();
}
$yymajor = self::YYNOCODE;
}
} else {
$this->yy_accept();
$yymajor = self::YYNOCODE;
}
} while ($yymajor != self::YYNOCODE && $this->yyidx >= 0);
}
}

View File

@@ -0,0 +1,332 @@
<?php
/**
* PHP_LexerGenerator, a php 5 lexer generator.
*
* This lexer generator translates a file in a format similar to
* re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_LexerGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category php
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: LexerGenerator.php 294970 2010-02-12 03:46:38Z clockwerx $
* @since File available since Release 0.1.0
*/
/**
* The Lexer generation parser
*/
require_once 'PHP/LexerGenerator/Parser.php';
/**
* Hand-written lexer for lex2php format files
*/
require_once 'PHP/LexerGenerator/Lexer.php';
/**
* The basic home class for the lexer generator. A lexer scans text and
* organizes it into tokens for usage by a parser.
*
* Sample Usage:
* <code>
* require_once 'PHP/LexerGenerator.php';
* $lex = new PHP_LexerGenerator('/path/to/lexerfile.plex');
* </code>
*
* A file named "/path/to/lexerfile.php" will be created.
*
* File format consists of a PHP file containing specially
* formatted comments like so:
*
* <code>
* /*!lex2php
* {@*}
* </code>
*
* All lexer definition files must contain at least two lex2php comment blocks:
* - 1 regex declaration block
* - 1 or more rule declaration blocks
*
* The first lex2php comment is the regex declaration block and must contain
* several processor instruction as well as defining a name for all
* regular expressions. Processor instructions start with
* a "%" symbol and must be:
*
* - %counter
* - %input
* - %token
* - %value
* - %line
*
* token and counter should define the class variables used to define lexer input
* and the index into the input. token and value should be used to define the class
* variables used to store the token number and its textual value. Finally, line
* should be used to define the class variable used to define the current line number
* of scanning.
*
* For example:
* <code>
* /*!lex2php
* %counter {$this->N}
* %input {$this->data}
* %token {$this->token}
* %value {$this->value}
* %line {%this->linenumber}
* {@*}
* </code>
*
* Patterns consist of an identifier containing an letters or an underscore, and
* a descriptive match pattern.
*
* Descriptive match patterns may either be regular expressions (regexes) or
* quoted literal strings. Here are some examples:
*
* <pre>
* pattern = "quoted literal"
* ANOTHER = /[a-zA-Z_]+/
* COMPLEX = @<([a-zA-Z_]+)( +(([a-zA-Z_]+)=((["\'])([^\6]*)\6))+){0,1}>[^<]*</\1>@
* </pre>
*
* Quoted strings must escape the \ and " characters with \" and \\.
*
* Regex patterns must be in Perl-compatible regular expression format (preg).
* special characters (like \t \n or \x3H) can only be used in regexes, all
* \ will be escaped in literal strings.
*
* Sub-patterns may be defined and back-references (like \1) may be used. Any sub-
* patterns detected will be passed to the token handler in the variable
* $yysubmatches.
*
* In addition, lookahead expressions, and once-only expressions are allowed.
* Lookbehind expressions are impossible (scanning always occurs from the
* current position forward), and recursion (?R) can't work and is not allowed.
*
* <code>
* /*!lex2php
* %counter {$this->N}
* %input {$this->data}
* %token {$this->token}
* %value {$this->value}
* %line {%this->linenumber}
* alpha = /[a-zA-Z]/
* alphaplus = /[a-zA-Z]+/
* number = /[0-9]/
* numerals = /[0-9]+/
* whitespace = /[ \t\n]+/
* blah = "$\""
* blahblah = /a\$/
* GAMEEND = @(?:1\-0|0\-1|1/2\-1/2)@
* PAWNMOVE = /P?[a-h]([2-7]|[18]\=(Q|R|B|N))|P?[a-h]x[a-h]([2-7]|[18]\=(Q|R|B|N))/
* {@*}
* </code>
*
* All regexes must be delimited. Any legal preg delimiter can be used (as in @ or / in
* the example above)
*
* Rule lex2php blocks each define a lexer state. You can optionally name the state
* with the %statename processor instruction. State names can be used to transfer to
* a new lexer state with the yybegin() method
*
* <code>
* /*!lexphp
* %statename INITIAL
* blah {
* $this->yybegin(self::INBLAH);
* // note - $this->yybegin(2) would also work
* }
* {@*}
* /*!lex2php
* %statename INBLAH
* ANYTHING {
* $this->yybegin(self::INITIAL);
* // note - $this->yybegin(1) would also work
* }
* {@*}
* </code>
*
* You can maintain a parser state stack simply by using yypushstate() and
* yypopstate() instead of yybegin():
*
* <code>
* /*!lexphp
* %statename INITIAL
* blah {
* $this->yypushstate(self::INBLAH);
* }
* {@*}
* /*!lex2php
* %statename INBLAH
* ANYTHING {
* $this->yypopstate();
* // now INBLAH doesn't care where it was called from
* }
* {@*}
* </code>
*
* Code blocks can choose to skip the current token and cycle to the next token by
* returning "false"
*
* <code>
* /*!lex2php
* WHITESPACE {
* return false;
* }
* {@*}
* </code>
*
* If you wish to re-process the current token in a new state, simply return true.
* If you forget to change lexer state, this will cause an unterminated loop,
* so be careful!
*
* <code>
* /*!lex2php
* "(" {
* $this->yypushstate(self::INPARAMS);
* return true;
* }
* {@*}
* </code>
*
* Lastly, if you wish to cycle to the next matching rule, return any value other than
* true, false or null:
*
* <code>
* /*!lex2php
* "{@" ALPHA {
* if ($this->value == '{@internal') {
* return 'more';
* }
* ...
* }
* "{@internal" {
* ...
* }
* {@*}
* </code>
*
* Note that this procedure is exceptionally inefficient, and it would be far better
* to take advantage of PHP_LexerGenerator's top-down precedence and instead code:
*
* <code>
* /*!lex2php
* "{@internal" {
* ...
* }
* "{@" ALPHA {
* ...
* }
* {@*}
* </code>
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version @package_version@
* @since Class available since Release 0.1.0
* @example TestLexer.plex Example lexer source
* @example TestLexer.php Example lexer generated php code
* @example usage.php Example usage of PHP_LexerGenerator
* @example Lexer.plex File_ChessPGN lexer source (complex)
* @example Lexer.php File_ChessPGN lexer generated php code
*/
class PHP_LexerGenerator
{
/**
* Plex file lexer.
* @var PHP_LexerGenerator_Lexer
*/
private $_lex;
/**
* Plex file parser.
* @var PHP_LexerGenerator_Parser
*/
private $_parser;
/**
* Path to the output PHP file.
* @var string
*/
private $_outfile;
/**
* Debug flag. When set, Parser trace information is generated.
* @var boolean
*/
public $debug = false;
/**
* Create a lexer generator and optionally generate a lexer file.
*
* @param string Optional plex file {@see PHP_LexerGenerator::create}.
* @param string Optional output file {@see PHP_LexerGenerator::create}.
*/
function __construct($lexerfile = '', $outfile = '')
{
if ($lexerfile) {
$this -> create($lexerfile, $outfile);
}
}
/**
* Create a lexer file from its skeleton plex file.
*
* @param string Path to the plex file.
* @param string Optional path to output file. Default is lexerfile with
* extension of ".php".
*/
function create($lexerfile, $outfile = '')
{
$this->_lex = new PHP_LexerGenerator_Lexer(file_get_contents($lexerfile));
$info = pathinfo($lexerfile);
if ($outfile) {
$this->outfile = $outfile;
} else {
$this->outfile = $info['dirname'] . DIRECTORY_SEPARATOR .
substr($info['basename'], 0,
strlen($info['basename']) - strlen($info['extension'])) . 'php';
}
$this->_parser = new PHP_LexerGenerator_Parser($this->outfile, $this->_lex);
if ($this -> debug) {
$this->_parser->PrintTrace();
}
while ($this->_lex->advance($this->_parser)) {
$this->_parser->doParse($this->_lex->token, $this->_lex->value);
}
$this->_parser->doParse(0, 0);
}
}
//$a = new PHP_LexerGenerator('/development/File_ChessPGN/ChessPGN/Lexer.plex');
?>

View File

@@ -0,0 +1,55 @@
<?php
/**
* PHP_LexerGenerator, a php 5 lexer generator.
*
* Exception classes for the lexer generator
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_LexerGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category php
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
*/
require_once 'PEAR/Exception.php';
/**
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version @package_version@
* @since File available since Release 0.1.0
*/
class PHP_LexerGenerator_Exception extends PEAR_Exception {}
?>

View File

@@ -0,0 +1,533 @@
<?php
/**
* PHP_LexerGenerator, a php 5 lexer generator.
*
* This lexer generator translates a file in a format similar to
* re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
*
* PHP version 5
*
* LICENSE: This source file is subject to version 3.01 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_01.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
*
* @category php
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version CVS: $Id: Lexer.php 246683 2007-11-22 04:43:52Z instance $
* @since File available since Release 0.1.0
*/
require_once 'PHP/LexerGenerator/Parser.php';
/**
* Token scanner for plex files.
*
* This scanner detects comments beginning with "/*!lex2php" and
* then returns their components (processing instructions, patterns, strings
* action code, and regexes)
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version @package_version@
* @since Class available since Release 0.1.0
*/
class PHP_LexerGenerator_Lexer
{
private $data;
private $N;
private $state;
/**
* Current line number in input
* @var int
*/
public $line;
/**
* Number of scanning errors detected
* @var int
*/
public $errors = 0;
/**
* integer identifier of the current token
* @var int
*/
public $token;
/**
* string content of current token
* @var string
*/
public $value;
const CODE = PHP_LexerGenerator_Parser::CODE;
const COMMENTEND = PHP_LexerGenerator_Parser::COMMENTEND;
const COMMENTSTART = PHP_LexerGenerator_Parser::COMMENTSTART;
const PATTERN = PHP_LexerGenerator_Parser::PATTERN;
const PHPCODE = PHP_LexerGenerator_Parser::PHPCODE;
const PI = PHP_LexerGenerator_Parser::PI;
const QUOTE = PHP_LexerGenerator_Parser::QUOTE;
const SINGLEQUOTE = PHP_LexerGenerator_Parser::SINGLEQUOTE;
const SUBPATTERN = PHP_LexerGenerator_Parser::SUBPATTERN;
/**
* prepare scanning
* @param string the input
*/
function __construct($data)
{
$this->data = str_replace("\r\n", "\n", $data);
$this->N = 0;
$this->line = 1;
$this->state = 'Start';
$this->errors = 0;
}
/**
* Output an error message
* @param string
*/
private function error($msg)
{
echo 'Error on line ' . $this->line . ': ' . $msg;
$this->errors++;
}
/**
* Initial scanning state lexer
* @return boolean
*/
private function lexStart()
{
if ($this->N >= strlen($this->data)) {
return false;
}
$a = strpos($this->data, '/*!lex2php' . "\n", $this->N);
if ($a === false) {
$this->value = substr($this->data, $this->N);
$this->N = strlen($this->data);
$this->token = self::PHPCODE;
return true;
}
if ($a > $this->N) {
$this->value = substr($this->data, $this->N, $a - $this->N);
$this->N = $a;
$this->token = self::PHPCODE;
return true;
}
$this->value = '/*!lex2php' . "\n";
$this->N += 11; // strlen("/*lex2php\n")
$this->token = self::COMMENTSTART;
$this->state = 'Declare';
return true;
}
/**
* lexer for top-level canning state after the initial declaration comment
* @return boolean
*/
private function lexStartNonDeclare()
{
if ($this->N >= strlen($this->data)) {
return false;
}
$a = strpos($this->data, '/*!lex2php' . "\n", $this->N);
if ($a === false) {
$this->value = substr($this->data, $this->N);
$this->N = strlen($this->data);
$this->token = self::PHPCODE;
return true;
}
if ($a > $this->N) {
$this->value = substr($this->data, $this->N, $a - $this->N);
$this->N = $a;
$this->token = self::PHPCODE;
return true;
}
$this->value = '/*!lex2php' . "\n";
$this->N += 11; // strlen("/*lex2php\n")
$this->token = self::COMMENTSTART;
$this->state = 'Rule';
return true;
}
/**
* lexer for declaration comment state
* @return boolean
*/
private function lexDeclare()
{
while (true) {
$this -> skipWhitespaceEol();
if (
$this->N + 1 >= strlen($this->data)
|| $this->data[$this->N] != '/'
|| $this->data[$this->N + 1] != '/'
) {
break;
}
// Skip single-line comment
while (
$this->N < strlen($this->data)
&& $this->data[$this->N] != "\n"
) {
++$this->N;
}
}
if ($this->data[$this->N] == '*' && $this->data[$this->N + 1] == '/') {
$this->state = 'StartNonDeclare';
$this->value = '*/';
$this->N += 2;
$this->token = self::COMMENTEND;
return true;
}
if (preg_match('/\G%([a-z]+)/', $this->data, $token, null, $this->N)) {
$this->value = $token[1];
$this->N += strlen($token[1]) + 1;
$this->state = 'DeclarePI';
$this->token = self::PI;
return true;
}
if (preg_match('/\G[a-zA-Z_][a-zA-Z0-9_]*/', $this->data, $token, null, $this->N)) {
$this->value = $token[0];
$this->token = self::PATTERN;
$this->N += strlen($token[0]);
$this->state = 'DeclareEquals';
return true;
}
$this->error('expecting declaration of sub-patterns');
return false;
}
/**
* lexer for processor instructions within declaration comment
* @return boolean
*/
private function lexDeclarePI()
{
$this -> skipWhitespace();
if ($this->data[$this->N] == "\n") {
$this->N++;
$this->state = 'Declare';
$this->line++;
return $this->lexDeclare();
}
if ($this->data[$this->N] == '{') {
return $this->lexCode();
}
if (!preg_match("/\G[^\n]+/", $this->data, $token, null, $this->N)) {
$this->error('Unexpected end of file');
return false;
}
$this->value = $token[0];
$this->N += strlen($this->value);
$this->token = self::SUBPATTERN;
return true;
}
/**
* lexer for processor instructions inside rule comments
* @return boolean
*/
private function lexDeclarePIRule()
{
$this -> skipWhitespace();
if ($this->data[$this->N] == "\n") {
$this->N++;
$this->state = 'Rule';
$this->line++;
return $this->lexRule();
}
if ($this->data[$this->N] == '{') {
return $this->lexCode();
}
if (!preg_match("/\G[^\n]+/", $this->data, $token, null, $this->N)) {
$this->error('Unexpected end of file');
return false;
}
$this->value = $token[0];
$this->N += strlen($this->value);
$this->token = self::SUBPATTERN;
return true;
}
/**
* lexer for the state representing scanning between a pattern and the "=" sign
* @return boolean
*/
private function lexDeclareEquals()
{
$this -> skipWhitespace();
if ($this->N >= strlen($this->data)) {
$this->error('unexpected end of input, expecting "=" for sub-pattern declaration');
}
if ($this->data[$this->N] != '=') {
$this->error('expecting "=" for sub-pattern declaration');
return false;
}
$this->N++;
$this->state = 'DeclareRightside';
$this -> skipWhitespace();
if ($this->N >= strlen($this->data)) {
$this->error('unexpected end of file, expecting right side of sub-pattern declaration');
return false;
}
return $this->lexDeclareRightside();
}
/**
* lexer for the right side of a pattern, detects quotes or regexes
* @return boolean
*/
private function lexDeclareRightside()
{
if ($this->data[$this->N] == "\n") {
$this->state = 'lexDeclare';
$this->N++;
$this->line++;
return $this->lexDeclare();
}
if ($this->data[$this->N] == '"') {
return $this->lexQuote();
}
if ($this->data[$this->N] == '\'') {
return $this->lexQuote('\'');
}
$this -> skipWhitespace();
// match a pattern
$test = $this->data[$this->N];
$token = $this->N + 1;
$a = 0;
do {
if ($a++) {
$token++;
}
$token = strpos($this->data, $test, $token);
} while ($token !== false && ($this->data[$token - 1] == '\\'
&& $this->data[$token - 2] != '\\'));
if ($token === false) {
$this->error('Unterminated regex pattern (started with "' . $test . '"');
return false;
}
if (substr_count($this->data, "\n", $this->N, $token - $this->N)) {
$this->error('Regex pattern extends over multiple lines');
return false;
}
$this->value = substr($this->data, $this->N + 1, $token - $this->N - 1);
// unescape the regex marker
// we will re-escape when creating the final regex
$this->value = str_replace('\\' . $test, $test, $this->value);
$this->N = $token + 1;
$this->token = self::SUBPATTERN;
return true;
}
/**
* lexer for quoted literals
* @return boolean
*/
private function lexQuote($quote = '"')
{
$token = $this->N + 1;
$a = 0;
do {
if ($a++) {
$token++;
}
$token = strpos($this->data, $quote, $token);
} while ($token !== false && $token < strlen($this->data) &&
($this->data[$token - 1] == '\\' && $this->data[$token - 2] != '\\'));
if ($token === false) {
$this->error('unterminated quote');
return false;
}
if (substr_count($this->data, "\n", $this->N, $token - $this->N)) {
$this->error('quote extends over multiple lines');
return false;
}
$this->value = substr($this->data, $this->N + 1, $token - $this->N - 1);
$this->value = str_replace('\\'.$quote, $quote, $this->value);
$this->value = str_replace('\\\\', '\\', $this->value);
$this->N = $token + 1;
if ($quote == '\'' ) {
$this->token = self::SINGLEQUOTE;
} else {
$this->token = self::QUOTE;
}
return true;
}
/**
* lexer for rules
* @return boolean
*/
private function lexRule()
{
while (
$this->N < strlen($this->data)
&& (
$this->data[$this->N] == ' '
|| $this->data[$this->N] == "\t"
|| $this->data[$this->N] == "\n"
) || (
$this->N < strlen($this->data) - 1
&& $this->data[$this->N] == '/'
&& $this->data[$this->N + 1] == '/'
)
) {
if ( $this->data[$this->N] == '/' && $this->data[$this->N + 1] == '/' ) {
// Skip single line comments
$next_newline = strpos($this->data, "\n", $this->N) + 1;
if ($next_newline) {
$this->N = $next_newline;
} else {
$this->N = sizeof($this->data);
}
$this->line++;
} else {
if ($this->data[$this->N] == "\n") {
$this->line++;
}
$this->N++; // skip all whitespace
}
}
if ($this->N >= strlen($this->data)) {
$this->error('unexpected end of input, expecting rule declaration');
}
if ($this->data[$this->N] == '*' && $this->data[$this->N + 1] == '/') {
$this->state = 'StartNonDeclare';
$this->value = '*/';
$this->N += 2;
$this->token = self::COMMENTEND;
return true;
}
if ($this->data[$this->N] == '\'') {
return $this->lexQuote('\'');
}
if (preg_match('/\G%([a-zA-Z_]+)/', $this->data, $token, null, $this->N)) {
$this->value = $token[1];
$this->N += strlen($token[1]) + 1;
$this->state = 'DeclarePIRule';
$this->token = self::PI;
return true;
}
if ($this->data[$this->N] == "{") {
return $this->lexCode();
}
if ($this->data[$this->N] == '"') {
return $this->lexQuote();
}
if (preg_match('/\G[a-zA-Z_][a-zA-Z0-9_]*/', $this->data, $token, null, $this->N)) {
$this->value = $token[0];
$this->N += strlen($token[0]);
$this->token = self::SUBPATTERN;
return true;
} else {
$this->error('expecting token rule (quotes or sub-patterns)');
return false;
}
}
/**
* lexer for php code blocks
* @return boolean
*/
private function lexCode()
{
$cp = $this->N + 1;
for ($level = 1; $cp < strlen($this->data) && ($level > 1 || $this->data[$cp] != '}'); $cp++) {
if ($this->data[$cp] == '{') {
$level++;
} elseif ($this->data[$cp] == '}') {
$level--;
} elseif ($this->data[$cp] == '/' && $this->data[$cp + 1] == '/') {
/* Skip C++ style comments */
$cp += 2;
$z = strpos($this->data, "\n", $cp);
if ($z === false) {
$cp = strlen($this->data);
break;
}
$cp = $z;
} elseif ($this->data[$cp] == "'" || $this->data[$cp] == '"') {
/* String a character literals */
$startchar = $this->data[$cp];
$prevc = 0;
for ($cp++; $cp < strlen($this->data) && ($this->data[$cp] != $startchar || $prevc === '\\'); $cp++) {
if ($prevc === '\\') {
$prevc = 0;
} else {
$prevc = $this->data[$cp];
}
}
}
}
if ($cp >= strlen($this->data)) {
$this->error("PHP code starting on this line is not terminated before the end of the file.");
$this->error++;
return false;
} else {
$this->value = substr($this->data, $this->N + 1, $cp - $this->N - 1);
$this->token = self::CODE;
$this->N = $cp + 1;
return true;
}
}
/**
* Skip whitespace characters
*/
private function skipWhitespace() {
while (
$this->N < strlen($this->data)
&& (
$this->data[$this->N] == ' '
|| $this->data[$this->N] == "\t"
)
) {
$this->N++; // skip whitespace
}
}
/**
* Skip whitespace and EOL characters
*/
private function skipWhitespaceEol() {
while (
$this->N < strlen($this->data)
&& (
$this->data[$this->N] == ' '
|| $this->data[$this->N] == "\t"
|| $this->data[$this->N] == "\n"
)
) {
if ($this->data[$this->N] == "\n") {
++$this -> line;
}
$this->N++; // skip whitespace
}
}
/**
* Primary scanner
*
* In addition to lexing, this properly increments the line number of lexing.
* This calls the proper sub-lexer based on the parser state
* @param unknown_type $parser
* @return unknown
*/
public function advance($parser)
{
if ($this->N >= strlen($this->data)) {
return false;
}
if ($this->{'lex' . $this->state}()) {
$this->line += substr_count($this->value, "\n");
return true;
}
return false;
}
}
?>

View File

@@ -0,0 +1,492 @@
State 0:
start ::= * lexfile
lexfile ::= * declare rules
lexfile ::= * declare PHPCODE rules
lexfile ::= * PHPCODE declare rules
lexfile ::= * PHPCODE declare PHPCODE rules
declare ::= * COMMENTSTART declarations COMMENTEND
PHPCODE shift 17
COMMENTSTART shift 8
start accept
lexfile shift 52
declare shift 6
State 1:
rules ::= reset_rules * rule COMMENTEND
rules ::= reset_rules * PI SUBPATTERN rule COMMENTEND
rules ::= reset_rules * rule COMMENTEND PHPCODE
rules ::= reset_rules * PI SUBPATTERN rule COMMENTEND PHPCODE
rule ::= * rule_subpattern CODE
rule ::= * rule rule_subpattern CODE
rule_subpattern ::= * QUOTE
rule_subpattern ::= * SUBPATTERN
rule_subpattern ::= * rule_subpattern QUOTE
rule_subpattern ::= * rule_subpattern SUBPATTERN
PI shift 29
SUBPATTERN shift 50
QUOTE shift 51
rule shift 12
rule_subpattern shift 18
State 2:
rules ::= COMMENTSTART * rule COMMENTEND
rules ::= COMMENTSTART * PI SUBPATTERN rule COMMENTEND
rules ::= COMMENTSTART * rule COMMENTEND PHPCODE
rules ::= COMMENTSTART * PI SUBPATTERN rule COMMENTEND PHPCODE
rule ::= * rule_subpattern CODE
rule ::= * rule rule_subpattern CODE
rule_subpattern ::= * QUOTE
rule_subpattern ::= * SUBPATTERN
rule_subpattern ::= * rule_subpattern QUOTE
rule_subpattern ::= * rule_subpattern SUBPATTERN
PI shift 34
SUBPATTERN shift 50
QUOTE shift 51
rule shift 11
rule_subpattern shift 18
State 3:
rules ::= COMMENTSTART PI SUBPATTERN * rule COMMENTEND
rules ::= COMMENTSTART PI SUBPATTERN * rule COMMENTEND PHPCODE
rule ::= * rule_subpattern CODE
rule ::= * rule rule_subpattern CODE
rule_subpattern ::= * QUOTE
rule_subpattern ::= * SUBPATTERN
rule_subpattern ::= * rule_subpattern QUOTE
rule_subpattern ::= * rule_subpattern SUBPATTERN
SUBPATTERN shift 50
QUOTE shift 51
rule shift 10
rule_subpattern shift 18
State 4:
lexfile ::= PHPCODE declare * rules
lexfile ::= PHPCODE declare * PHPCODE rules
rules ::= * COMMENTSTART rule COMMENTEND
rules ::= * COMMENTSTART PI SUBPATTERN rule COMMENTEND
rules ::= * COMMENTSTART rule COMMENTEND PHPCODE
rules ::= * COMMENTSTART PI SUBPATTERN rule COMMENTEND PHPCODE
rules ::= * reset_rules rule COMMENTEND
rules ::= * reset_rules PI SUBPATTERN rule COMMENTEND
rules ::= * reset_rules rule COMMENTEND PHPCODE
rules ::= * reset_rules PI SUBPATTERN rule COMMENTEND PHPCODE
reset_rules ::= * rules COMMENTSTART
PHPCODE shift 7
COMMENTSTART shift 2
rules shift 25
reset_rules shift 1
State 5:
rules ::= reset_rules PI SUBPATTERN * rule COMMENTEND
rules ::= reset_rules PI SUBPATTERN * rule COMMENTEND PHPCODE
rule ::= * rule_subpattern CODE
rule ::= * rule rule_subpattern CODE
rule_subpattern ::= * QUOTE
rule_subpattern ::= * SUBPATTERN
rule_subpattern ::= * rule_subpattern QUOTE
rule_subpattern ::= * rule_subpattern SUBPATTERN
SUBPATTERN shift 50
QUOTE shift 51
rule shift 13
rule_subpattern shift 18
State 6:
lexfile ::= declare * rules
lexfile ::= declare * PHPCODE rules
rules ::= * COMMENTSTART rule COMMENTEND
rules ::= * COMMENTSTART PI SUBPATTERN rule COMMENTEND
rules ::= * COMMENTSTART rule COMMENTEND PHPCODE
rules ::= * COMMENTSTART PI SUBPATTERN rule COMMENTEND PHPCODE
rules ::= * reset_rules rule COMMENTEND
rules ::= * reset_rules PI SUBPATTERN rule COMMENTEND
rules ::= * reset_rules rule COMMENTEND PHPCODE
rules ::= * reset_rules PI SUBPATTERN rule COMMENTEND PHPCODE
reset_rules ::= * rules COMMENTSTART
PHPCODE shift 9
COMMENTSTART shift 2
rules shift 33
reset_rules shift 1
State 7:
lexfile ::= PHPCODE declare PHPCODE * rules
rules ::= * COMMENTSTART rule COMMENTEND
rules ::= * COMMENTSTART PI SUBPATTERN rule COMMENTEND
rules ::= * COMMENTSTART rule COMMENTEND PHPCODE
rules ::= * COMMENTSTART PI SUBPATTERN rule COMMENTEND PHPCODE
rules ::= * reset_rules rule COMMENTEND
rules ::= * reset_rules PI SUBPATTERN rule COMMENTEND
rules ::= * reset_rules rule COMMENTEND PHPCODE
rules ::= * reset_rules PI SUBPATTERN rule COMMENTEND PHPCODE
reset_rules ::= * rules COMMENTSTART
COMMENTSTART shift 2
rules shift 27
reset_rules shift 1
State 8:
declare ::= COMMENTSTART * declarations COMMENTEND
declarations ::= * processing_instructions pattern_declarations
processing_instructions ::= * PI SUBPATTERN
processing_instructions ::= * PI CODE
processing_instructions ::= * processing_instructions PI SUBPATTERN
processing_instructions ::= * processing_instructions PI CODE
PI shift 23
declarations shift 28
processing_instructions shift 14
State 9:
lexfile ::= declare PHPCODE * rules
rules ::= * COMMENTSTART rule COMMENTEND
rules ::= * COMMENTSTART PI SUBPATTERN rule COMMENTEND
rules ::= * COMMENTSTART rule COMMENTEND PHPCODE
rules ::= * COMMENTSTART PI SUBPATTERN rule COMMENTEND PHPCODE
rules ::= * reset_rules rule COMMENTEND
rules ::= * reset_rules PI SUBPATTERN rule COMMENTEND
rules ::= * reset_rules rule COMMENTEND PHPCODE
rules ::= * reset_rules PI SUBPATTERN rule COMMENTEND PHPCODE
reset_rules ::= * rules COMMENTSTART
COMMENTSTART shift 2
rules shift 31
reset_rules shift 1
State 10:
rules ::= COMMENTSTART PI SUBPATTERN rule * COMMENTEND
rules ::= COMMENTSTART PI SUBPATTERN rule * COMMENTEND PHPCODE
rule ::= rule * rule_subpattern CODE
rule_subpattern ::= * QUOTE
rule_subpattern ::= * SUBPATTERN
rule_subpattern ::= * rule_subpattern QUOTE
rule_subpattern ::= * rule_subpattern SUBPATTERN
COMMENTEND shift 30
SUBPATTERN shift 50
QUOTE shift 51
rule_subpattern shift 19
State 11:
rules ::= COMMENTSTART rule * COMMENTEND
rules ::= COMMENTSTART rule * COMMENTEND PHPCODE
rule ::= rule * rule_subpattern CODE
rule_subpattern ::= * QUOTE
rule_subpattern ::= * SUBPATTERN
rule_subpattern ::= * rule_subpattern QUOTE
rule_subpattern ::= * rule_subpattern SUBPATTERN
COMMENTEND shift 32
SUBPATTERN shift 50
QUOTE shift 51
rule_subpattern shift 19
State 12:
rules ::= reset_rules rule * COMMENTEND
rules ::= reset_rules rule * COMMENTEND PHPCODE
rule ::= rule * rule_subpattern CODE
rule_subpattern ::= * QUOTE
rule_subpattern ::= * SUBPATTERN
rule_subpattern ::= * rule_subpattern QUOTE
rule_subpattern ::= * rule_subpattern SUBPATTERN
COMMENTEND shift 35
SUBPATTERN shift 50
QUOTE shift 51
rule_subpattern shift 19
State 13:
rules ::= reset_rules PI SUBPATTERN rule * COMMENTEND
rules ::= reset_rules PI SUBPATTERN rule * COMMENTEND PHPCODE
rule ::= rule * rule_subpattern CODE
rule_subpattern ::= * QUOTE
rule_subpattern ::= * SUBPATTERN
rule_subpattern ::= * rule_subpattern QUOTE
rule_subpattern ::= * rule_subpattern SUBPATTERN
COMMENTEND shift 24
SUBPATTERN shift 50
QUOTE shift 51
rule_subpattern shift 19
State 14:
declarations ::= processing_instructions * pattern_declarations
processing_instructions ::= processing_instructions * PI SUBPATTERN
processing_instructions ::= processing_instructions * PI CODE
pattern_declarations ::= * PATTERN subpattern
pattern_declarations ::= * pattern_declarations PATTERN subpattern
PI shift 20
PATTERN shift 16
pattern_declarations shift 26
State 15:
pattern_declarations ::= pattern_declarations PATTERN * subpattern
subpattern ::= * QUOTE
subpattern ::= * SUBPATTERN
subpattern ::= * subpattern QUOTE
subpattern ::= * subpattern SUBPATTERN
SUBPATTERN shift 36
QUOTE shift 37
subpattern shift 21
State 16:
pattern_declarations ::= PATTERN * subpattern
subpattern ::= * QUOTE
subpattern ::= * SUBPATTERN
subpattern ::= * subpattern QUOTE
subpattern ::= * subpattern SUBPATTERN
SUBPATTERN shift 36
QUOTE shift 37
subpattern shift 22
State 17:
lexfile ::= PHPCODE * declare rules
lexfile ::= PHPCODE * declare PHPCODE rules
declare ::= * COMMENTSTART declarations COMMENTEND
COMMENTSTART shift 8
declare shift 4
State 18:
rule ::= rule_subpattern * CODE
rule_subpattern ::= rule_subpattern * QUOTE
rule_subpattern ::= rule_subpattern * SUBPATTERN
SUBPATTERN shift 54
CODE shift 47
QUOTE shift 53
State 19:
rule ::= rule rule_subpattern * CODE
rule_subpattern ::= rule_subpattern * QUOTE
rule_subpattern ::= rule_subpattern * SUBPATTERN
SUBPATTERN shift 54
CODE shift 45
QUOTE shift 53
State 20:
processing_instructions ::= processing_instructions PI * SUBPATTERN
processing_instructions ::= processing_instructions PI * CODE
SUBPATTERN shift 44
CODE shift 40
State 21:
(12) pattern_declarations ::= pattern_declarations PATTERN subpattern *
subpattern ::= subpattern * QUOTE
subpattern ::= subpattern * SUBPATTERN
SUBPATTERN shift 38
QUOTE shift 41
{default} reduce 12
State 22:
(11) pattern_declarations ::= PATTERN subpattern *
subpattern ::= subpattern * QUOTE
subpattern ::= subpattern * SUBPATTERN
SUBPATTERN shift 38
QUOTE shift 41
{default} reduce 11
State 23:
processing_instructions ::= PI * SUBPATTERN
processing_instructions ::= PI * CODE
SUBPATTERN shift 42
CODE shift 39
State 24:
(18) rules ::= reset_rules PI SUBPATTERN rule COMMENTEND *
rules ::= reset_rules PI SUBPATTERN rule COMMENTEND * PHPCODE
PHPCODE shift 48
{default} reduce 18
State 25:
(3) lexfile ::= PHPCODE declare rules *
reset_rules ::= rules * COMMENTSTART
COMMENTSTART shift 43
{default} reduce 3
State 26:
(6) declarations ::= processing_instructions pattern_declarations *
pattern_declarations ::= pattern_declarations * PATTERN subpattern
PATTERN shift 15
{default} reduce 6
State 27:
(4) lexfile ::= PHPCODE declare PHPCODE rules *
reset_rules ::= rules * COMMENTSTART
COMMENTSTART shift 43
{default} reduce 4
State 28:
declare ::= COMMENTSTART declarations * COMMENTEND
COMMENTEND shift 55
State 29:
rules ::= reset_rules PI * SUBPATTERN rule COMMENTEND
rules ::= reset_rules PI * SUBPATTERN rule COMMENTEND PHPCODE
SUBPATTERN shift 5
State 30:
(14) rules ::= COMMENTSTART PI SUBPATTERN rule COMMENTEND *
rules ::= COMMENTSTART PI SUBPATTERN rule COMMENTEND * PHPCODE
PHPCODE shift 46
{default} reduce 14
State 31:
(2) lexfile ::= declare PHPCODE rules *
reset_rules ::= rules * COMMENTSTART
COMMENTSTART shift 43
{default} reduce 2
State 32:
(13) rules ::= COMMENTSTART rule COMMENTEND *
rules ::= COMMENTSTART rule COMMENTEND * PHPCODE
PHPCODE shift 56
{default} reduce 13
State 33:
(1) lexfile ::= declare rules *
reset_rules ::= rules * COMMENTSTART
COMMENTSTART shift 43
{default} reduce 1
State 34:
rules ::= COMMENTSTART PI * SUBPATTERN rule COMMENTEND
rules ::= COMMENTSTART PI * SUBPATTERN rule COMMENTEND PHPCODE
SUBPATTERN shift 3
State 35:
(17) rules ::= reset_rules rule COMMENTEND *
rules ::= reset_rules rule COMMENTEND * PHPCODE
PHPCODE shift 49
{default} reduce 17
State 36:
(29) subpattern ::= SUBPATTERN *
{default} reduce 29
State 37:
(28) subpattern ::= QUOTE *
{default} reduce 28
State 38:
(31) subpattern ::= subpattern SUBPATTERN *
{default} reduce 31
State 39:
(8) processing_instructions ::= PI CODE *
{default} reduce 8
State 40:
(10) processing_instructions ::= processing_instructions PI CODE *
{default} reduce 10
State 41:
(30) subpattern ::= subpattern QUOTE *
{default} reduce 30
State 42:
(7) processing_instructions ::= PI SUBPATTERN *
{default} reduce 7
State 43:
(21) reset_rules ::= rules COMMENTSTART *
{default} reduce 21
State 44:
(9) processing_instructions ::= processing_instructions PI SUBPATTERN *
{default} reduce 9
State 45:
(23) rule ::= rule rule_subpattern CODE *
{default} reduce 23
State 46:
(16) rules ::= COMMENTSTART PI SUBPATTERN rule COMMENTEND PHPCODE *
{default} reduce 16
State 47:
(22) rule ::= rule_subpattern CODE *
{default} reduce 22
State 48:
(20) rules ::= reset_rules PI SUBPATTERN rule COMMENTEND PHPCODE *
{default} reduce 20
State 49:
(19) rules ::= reset_rules rule COMMENTEND PHPCODE *
{default} reduce 19
State 50:
(25) rule_subpattern ::= SUBPATTERN *
{default} reduce 25
State 51:
(24) rule_subpattern ::= QUOTE *
{default} reduce 24
State 52:
(0) start ::= lexfile *
{default} reduce 0
State 53:
(26) rule_subpattern ::= rule_subpattern QUOTE *
{default} reduce 26
State 54:
(27) rule_subpattern ::= rule_subpattern SUBPATTERN *
{default} reduce 27
State 55:
(5) declare ::= COMMENTSTART declarations COMMENTEND *
{default} reduce 5
State 56:
(15) rules ::= COMMENTSTART rule COMMENTEND PHPCODE *
{default} reduce 15

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,795 @@
%name PHP_LexerGenerator_Parser
%declare_class {class PHP_LexerGenerator_Parser}
%include {
/* ?><?php {//*/
/**
* PHP_LexerGenerator, a php 5 lexer generator.
*
* This lexer generator translates a file in a format similar to
* re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_LexerGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category php
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: Parser.y 246683 2007-11-22 04:43:52Z instance $
* @since File available since Release 0.1.0
*/
/**
* For regular expression validation
*/
require_once 'PHP/LexerGenerator/Regex/Lexer.php';
require_once 'PHP/LexerGenerator/Regex/Parser.php';
require_once 'PHP/LexerGenerator/Exception.php';
/**
* Token parser for plex files.
*
* This parser converts tokens pulled from {@link PHP_LexerGenerator_Lexer}
* into abstract patterns and rules, then creates the output file
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version @package_version@
* @since Class available since Release 0.1.0
*/
}
%syntax_error {
echo "Syntax Error on line " . $this->lex->line . ": token '" .
$this->lex->value . "' while parsing rule:";
foreach ($this->yystack as $entry) {
echo $this->tokenName($entry->major) . ' ';
}
foreach ($this->yy_get_expected_tokens($yymajor) as $token) {
$expect[] = self::$yyTokenName[$token];
}
throw new Exception('Unexpected ' . $this->tokenName($yymajor) . '(' . $TOKEN
. '), expected one of: ' . implode(',', $expect));
}
%include_class {
private $patterns;
private $out;
private $lex;
private $input;
private $counter;
private $token;
private $value;
private $line;
private $matchlongest;
private $_regexLexer;
private $_regexParser;
private $_patternIndex = 0;
private $_outRuleIndex = 1;
private $caseinsensitive;
private $patternFlags;
private $unicode;
public $transTable = array(
1 => self::PHPCODE,
2 => self::COMMENTSTART,
3 => self::COMMENTEND,
4 => self::QUOTE,
5 => self::SINGLEQUOTE,
6 => self::PATTERN,
7 => self::CODE,
8 => self::SUBPATTERN,
9 => self::PI,
);
function __construct($outfile, $lex)
{
$this->out = fopen($outfile, 'wb');
if (!$this->out) {
throw new Exception('unable to open lexer output file "' . $outfile . '"');
}
$this->lex = $lex;
$this->_regexLexer = new PHP_LexerGenerator_Regex_Lexer('');
$this->_regexParser = new PHP_LexerGenerator_Regex_Parser($this->_regexLexer);
}
function doLongestMatch($rules, $statename, $ruleindex)
{
fwrite($this->out, '
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
}
do {
$rules = array(');
foreach ($rules as $rule) {
fwrite($this->out, '
\'/\G' . $rule['pattern'] . '/' . $this->patternFlags . ' \',');
}
fwrite($this->out, '
);
$match = false;
foreach ($rules as $index => $rule) {
if (preg_match($rule, substr(' . $this->input . ', ' .
$this->counter . '), $yymatches)) {
if ($match) {
if (strlen($yymatches[0]) > strlen($match[0][0])) {
$match = array($yymatches, $index); // matches, token
}
} else {
$match = array($yymatches, $index);
}
}
}
if (!$match) {
throw new Exception(\'Unexpected input at line \' . ' . $this->line . ' .
\': \' . ' . $this->input . '[' . $this->counter . ']);
}
' . $this->token . ' = $match[1];
' . $this->value . ' = $match[0][0];
$yysubmatches = $match[0];
array_shift($yysubmatches);
if (!$yysubmatches) {
$yysubmatches = array();
}
$r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}($yysubmatches);
if ($r === null) {
' . $this->counter . ' += strlen(' . $this->value . ');
' . $this->line . ' += substr_count(' . $this->value . ', "\n");
// accept this token
return true;
} elseif ($r === true) {
// we have changed state
// process this token in the new state
return $this->yylex();
} elseif ($r === false) {
' . $this->counter . ' += strlen(' . $this->value . ');
' . $this->line . ' += substr_count(' . $this->value . ', "\n");
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
}
// skip this token
continue;
} else {');
fwrite($this->out, '
$yy_yymore_patterns = array_slice($rules, $this->token, true);
// yymore is needed
do {
if (!isset($yy_yymore_patterns[' . $this->token . '])) {
throw new Exception(\'cannot do yymore for the last token\');
}
$match = false;
foreach ($yy_yymore_patterns[' . $this->token . '] as $index => $rule) {
if (preg_match(\'/\' . $rule . \'/' . $this->patternFlags . '\',
' . $this->input . ', $yymatches, null, ' . $this->counter . ')) {
$yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
if ($match) {
if (strlen($yymatches[0]) > strlen($match[0][0])) {
$match = array($yymatches, $index); // matches, token
}
} else {
$match = array($yymatches, $index);
}
}
}
if (!$match) {
throw new Exception(\'Unexpected input at line \' . ' . $this->line . ' .
\': \' . ' . $this->input . '[' . $this->counter . ']);
}
' . $this->token . ' = $match[1];
' . $this->value . ' = $match[0][0];
$yysubmatches = $match[0];
array_shift($yysubmatches);
if (!$yysubmatches) {
$yysubmatches = array();
}
' . $this->line . ' = substr_count(' . $this->value . ', "\n");
$r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}();
} while ($r !== null || !$r);
if ($r === true) {
// we have changed state
// process this token in the new state
return $this->yylex();
} else {
// accept
' . $this->counter . ' += strlen(' . $this->value . ');
' . $this->line . ' += substr_count(' . $this->value . ', "\n");
return true;
}
}
} while (true);
');
}
function doFirstMatch($rules, $statename, $ruleindex)
{
$patterns = array();
$pattern = '/';
$ruleMap = array();
$tokenindex = array();
$actualindex = 1;
$i = 0;
foreach ($rules as $rule) {
$ruleMap[$i++] = $actualindex;
$tokenindex[$actualindex] = $rule['subpatterns'];
$actualindex += $rule['subpatterns'] + 1;
$patterns[] = '\G(' . $rule['pattern'] . ')';
}
// Re-index tokencount from zero.
$tokencount = array_values($tokenindex);
$tokenindex = var_export($tokenindex, true);
$tokenindex = explode("\n", $tokenindex);
// indent for prettiness
$tokenindex = implode("\n ", $tokenindex);
$pattern .= implode('|', $patterns);
$pattern .= '/' . $this->patternFlags;
fwrite($this->out, '
$tokenMap = ' . $tokenindex . ';
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
}
');
fwrite($this->out, '$yy_global_pattern = \'' .
$pattern . '\';' . "\n");
fwrite($this->out, '
do {
if (preg_match($yy_global_pattern,' . $this->input . ', $yymatches, null, ' .
$this->counter .
')) {
$yysubmatches = $yymatches;
$yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
if (!count($yymatches)) {
throw new Exception(\'Error: lexing failed because a rule matched\' .
\' an empty string. Input "\' . substr(' . $this->input . ',
' . $this->counter . ', 5) . \'... state ' . $statename . '\');
}
next($yymatches); // skip global match
' . $this->token . ' = key($yymatches); // token number
if ($tokenMap[' . $this->token . ']) {
// extract sub-patterns for passing to lex function
$yysubmatches = array_slice($yysubmatches, ' . $this->token . ' + 1,
$tokenMap[' . $this->token . ']);
} else {
$yysubmatches = array();
}
' . $this->value . ' = current($yymatches); // token value
$r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}($yysubmatches);
if ($r === null) {
' . $this->counter . ' += strlen(' . $this->value . ');
' . $this->line . ' += substr_count(' . $this->value . ', "\n");
// accept this token
return true;
} elseif ($r === true) {
// we have changed state
// process this token in the new state
return $this->yylex();
} elseif ($r === false) {
' . $this->counter . ' += strlen(' . $this->value . ');
' . $this->line . ' += substr_count(' . $this->value . ', "\n");
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
}
// skip this token
continue;
} else {');
fwrite($this->out, '
$yy_yymore_patterns = array(' . "\n");
$extra = 0;
for($i = 0; count($patterns); $i++) {
unset($patterns[$i]);
$extra += $tokencount[0];
array_shift($tokencount);
fwrite($this->out, ' ' . $ruleMap[$i] . ' => array(' . $extra . ', "' .
implode('|', $patterns) . "\"),\n");
}
fwrite($this->out, ' );' . "\n");
fwrite($this->out, '
// yymore is needed
do {
if (!strlen($yy_yymore_patterns[' . $this->token . '][1])) {
throw new Exception(\'cannot do yymore for the last token\');
}
$yysubmatches = array();
if (preg_match(\'/\' . $yy_yymore_patterns[' . $this->token . '][1] . \'/' . $this->patternFlags . '\',
' . $this->input . ', $yymatches, null, ' . $this->counter .')) {
$yysubmatches = $yymatches;
$yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
next($yymatches); // skip global match
' . $this->token . ' += key($yymatches) + $yy_yymore_patterns[' . $this->token . '][0]; // token number
' . $this->value . ' = current($yymatches); // token value
' . $this->line . ' = substr_count(' . $this->value . ', "\n");
if ($tokenMap[' . $this->token . ']) {
// extract sub-patterns for passing to lex function
$yysubmatches = array_slice($yysubmatches, ' . $this->token . ' + 1,
$tokenMap[' . $this->token . ']);
} else {
$yysubmatches = array();
}
}
$r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}($yysubmatches);
} while ($r !== null && !is_bool($r));
if ($r === true) {
// we have changed state
// process this token in the new state
return $this->yylex();
} elseif ($r === false) {
' . $this->counter . ' += strlen(' . $this->value . ');
' . $this->line . ' += substr_count(' . $this->value . ', "\n");
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
}
// skip this token
continue;
} else {
// accept
' . $this->counter . ' += strlen(' . $this->value . ');
' . $this->line . ' += substr_count(' . $this->value . ', "\n");
return true;
}
}
} else {
throw new Exception(\'Unexpected input at line\' . ' . $this->line . ' .
\': \' . ' . $this->input . '[' . $this->counter . ']);
}
break;
} while (true);
');
}
function makeCaseInsensitve($string)
{
return preg_replace('/[a-z]/ie', "'[\\0'.strtoupper('\\0').']'", strtolower($string));
}
function outputRules($rules, $statename)
{
if (!$statename) {
$statename = $this -> _outRuleIndex;
}
fwrite($this->out, '
function yylex' . $this -> _outRuleIndex . '()
{');
if ($this->matchlongest) {
$ruleMap = array();
foreach ($rules as $i => $rule) {
$ruleMap[$i] = $i;
}
$this->doLongestMatch($rules, $statename, $this -> _outRuleIndex);
} else {
$ruleMap = array();
$actualindex = 1;
$i = 0;
foreach ($rules as $rule) {
$ruleMap[$i++] = $actualindex;
$actualindex += $rule['subpatterns'] + 1;
}
$this->doFirstMatch($rules, $statename, $this -> _outRuleIndex);
}
fwrite($this->out, '
} // end function
');
if (is_string($statename)) {
fwrite($this->out, '
const ' . $statename . ' = ' . $this -> _outRuleIndex . ';
');
}
foreach ($rules as $i => $rule) {
fwrite($this->out, ' function yy_r' . $this -> _outRuleIndex . '_' . $ruleMap[$i] . '($yy_subpatterns)
{
' . $rule['code'] .
' }
');
}
$this -> _outRuleIndex++; // for next set of rules
}
function error($msg)
{
echo 'Error on line ' . $this->lex->line . ': ' , $msg;
}
function _validatePattern($pattern, $update = false)
{
$this->_regexLexer->reset($pattern, $this->lex->line);
$this->_regexParser->reset($this->_patternIndex, $update);
try {
while ($this->_regexLexer->yylex()) {
$this->_regexParser->doParse(
$this->_regexLexer->token, $this->_regexLexer->value);
}
$this->_regexParser->doParse(0, 0);
} catch (PHP_LexerGenerator_Exception $e) {
$this->error($e->getMessage());
throw new PHP_LexerGenerator_Exception('Invalid pattern "' . $pattern . '"');
}
return $this->_regexParser->result;
}
}
start ::= lexfile.
lexfile ::= declare rules(B). {
fwrite($this->out, '
private $_yy_state = 1;
private $_yy_stack = array();
function yylex()
{
return $this->{\'yylex\' . $this->_yy_state}();
}
function yypushstate($state)
{
array_push($this->_yy_stack, $this->_yy_state);
$this->_yy_state = $state;
}
function yypopstate()
{
$this->_yy_state = array_pop($this->_yy_stack);
}
function yybegin($state)
{
$this->_yy_state = $state;
}
');
foreach (B as $rule) {
$this->outputRules($rule['rules'], $rule['statename']);
if ($rule['code']) {
fwrite($this->out, $rule['code']);
}
}
}
lexfile ::= declare(D) PHPCODE(B) rules(C). {
fwrite($this->out, '
private $_yy_state = 1;
private $_yy_stack = array();
function yylex()
{
return $this->{\'yylex\' . $this->_yy_state}();
}
function yypushstate($state)
{
array_push($this->_yy_stack, $this->_yy_state);
$this->_yy_state = $state;
}
function yypopstate()
{
$this->_yy_state = array_pop($this->_yy_stack);
}
function yybegin($state)
{
$this->_yy_state = $state;
}
');
if (strlen(B)) {
fwrite($this->out, B);
}
foreach (C as $rule) {
$this->outputRules($rule['rules'], $rule['statename']);
if ($rule['code']) {
fwrite($this->out, $rule['code']);
}
}
}
lexfile ::= PHPCODE(B) declare(D) rules(C). {
if (strlen(B)) {
fwrite($this->out, B);
}
fwrite($this->out, '
private $_yy_state = 1;
private $_yy_stack = array();
function yylex()
{
return $this->{\'yylex\' . $this->_yy_state}();
}
function yypushstate($state)
{
array_push($this->_yy_stack, $this->_yy_state);
$this->_yy_state = $state;
}
function yypopstate()
{
$this->_yy_state = array_pop($this->_yy_stack);
}
function yybegin($state)
{
$this->_yy_state = $state;
}
');
foreach (C as $rule) {
$this->outputRules($rule['rules'], $rule['statename']);
if ($rule['code']) {
fwrite($this->out, $rule['code']);
}
}
}
lexfile ::= PHPCODE(A) declare(D) PHPCODE(B) rules(C). {
if (strlen(A)) {
fwrite($this->out, A);
}
fwrite($this->out, '
private $_yy_state = 1;
private $_yy_stack = array();
function yylex()
{
return $this->{\'yylex\' . $this->_yy_state}();
}
function yypushstate($state)
{
array_push($this->_yy_stack, $this->_yy_state);
$this->_yy_state = $state;
}
function yypopstate()
{
$this->_yy_state = array_pop($this->_yy_stack);
}
function yybegin($state)
{
$this->_yy_state = $state;
}
');
if (strlen(B)) {
fwrite($this->out, B);
}
foreach (C as $rule) {
$this->outputRules($rule['rules'], $rule['statename']);
if ($rule['code']) {
fwrite($this->out, $rule['code']);
}
}
}
declare(A) ::= COMMENTSTART declarations(B) COMMENTEND. {
A = B;
$this->patterns = B['patterns'];
$this->_patternIndex = 1;
}
declarations(A) ::= processing_instructions(B) pattern_declarations(C). {
$expected = array(
'counter' => true,
'input' => true,
'token' => true,
'value' => true,
'line' => true,
);
foreach (B as $pi) {
if (isset($expected[$pi['pi']])) {
unset($expected[$pi['pi']]);
continue;
}
if (count($expected)) {
throw new Exception('Processing Instructions "' .
implode(', ', array_keys($expected)) . '" must be defined');
}
}
$expected = array(
'caseinsensitive' => true,
'counter' => true,
'input' => true,
'token' => true,
'value' => true,
'line' => true,
'matchlongest' => true,
'unicode' => true,
);
foreach (B as $pi) {
if (isset($expected[$pi['pi']])) {
$this->{$pi['pi']} = $pi['definition'];
if ($pi['pi'] == 'matchlongest') {
$this->matchlongest = true;
}
continue;
}
$this->error('Unknown processing instruction %' . $pi['pi'] .
', should be one of "' . implode(', ', array_keys($expected)) . '"');
}
$this->patternFlags = ($this->caseinsensitive ? 'i' : '')
. ($this->unicode ? 'u' : '');
A = array('patterns' => C, 'pis' => B);
$this->_patternIndex = 1;
}
processing_instructions(A) ::= PI(B) SUBPATTERN(C). {
A = array(array('pi' => B, 'definition' => C));
}
processing_instructions(A) ::= PI(B) CODE(C). {
A = array(array('pi' => B, 'definition' => C));
}
processing_instructions(A) ::= processing_instructions(P) PI(B) SUBPATTERN(C). {
A = P;
A[] = array('pi' => B, 'definition' => C);
}
processing_instructions(A) ::= processing_instructions(P) PI(B) CODE(C). {
A = P;
A[] = array('pi' => B, 'definition' => C);
}
pattern_declarations(A) ::= PATTERN(B) subpattern(C). {
A = array(B => C);
// reset internal indicator of where we are in a pattern
$this->_patternIndex = 0;
}
pattern_declarations(A) ::= pattern_declarations(B) PATTERN(C) subpattern(D). {
A = B;
if (isset(A[C])) {
throw new Exception('Pattern "' . C . '" is already defined as "' .
A[C] . '", cannot redefine as "' . D->string . '"');
}
A[C] = D;
// reset internal indicator of where we are in a pattern declaration
$this->_patternIndex = 0;
}
rules(A) ::= COMMENTSTART rule(B) COMMENTEND. {
A = array(array('rules' => B, 'code' => '', 'statename' => ''));
}
rules(A) ::= COMMENTSTART PI(P) SUBPATTERN(S) rule(B) COMMENTEND. {
if (P != 'statename') {
throw new Exception('Error: only %statename processing instruction ' .
'is allowed in rule sections (found ' . P . ').');
}
A = array(array('rules' => B, 'code' => '', 'statename' => S));
}
rules(A) ::= COMMENTSTART rule(B) COMMENTEND PHPCODE(C). {
A = array(array('rules' => B, 'code' => C, 'statename' => ''));
}
rules(A) ::= COMMENTSTART PI(P) SUBPATTERN(S) rule(B) COMMENTEND PHPCODE(C). {
if (P != 'statename') {
throw new Exception('Error: only %statename processing instruction ' .
'is allowed in rule sections (found ' . P . ').');
}
A = array(array('rules' => B, 'code' => C, 'statename' => S));
$this->_patternIndex = 1;
}
rules(A) ::= reset_rules(R) rule(B) COMMENTEND. {
A = R;
A[] = array('rules' => B, 'code' => '', 'statename' => '');
$this->_patternIndex = 1;
}
rules(A) ::= reset_rules(R) PI(P) SUBPATTERN(S) rule(B) COMMENTEND. {
if (P != 'statename') {
throw new Exception('Error: only %statename processing instruction ' .
'is allowed in rule sections (found ' . P . ').');
}
A = R;
A[] = array('rules' => B, 'code' => '', 'statename' => S);
}
rules(A) ::= reset_rules(R) rule(B) COMMENTEND PHPCODE(C). {
A = R;
A[] = array('rules' => B, 'code' => C, 'statename' => '');
}
rules(A) ::= reset_rules(R) PI(P) SUBPATTERN(S) rule(B) COMMENTEND PHPCODE(C). {
if (P != 'statename') {
throw new Exception('Error: only %statename processing instruction ' .
'is allowed in rule sections (found ' . P . ').');
}
A = R;
A[] = array('rules' => B, 'code' => C, 'statename' => S);
}
reset_rules(A) ::= rules(R) COMMENTSTART. {
A = R;
$this->_patternIndex = 1;
}
rule(A) ::= rule_subpattern(B) CODE(C). {
$name = B[1];
B = B[0];
B = $this->_validatePattern(B);
$this->_patternIndex += B['subpatterns'] + 1;
if (@preg_match('/' . str_replace('/', '\\/', B['pattern']) . '/', '')) {
$this->error('Rule "' . $name . '" can match the empty string, this will break lexing');
}
A = array(array('pattern' => str_replace('/', '\\/', B->string), 'code' => C, 'subpatterns' => B['subpatterns']));
}
rule(A) ::= rule(R) rule_subpattern(B) CODE(C).{
A = R;
$name = B[1];
B = B[0];
B = $this->_validatePattern(B);
$this->_patternIndex += B['subpatterns'] + 1;
if (@preg_match('/' . str_replace('/', '\\/', B['pattern']) . '/', '')) {
$this->error('Rule "' . $name . '" can match the empty string, this will break lexing');
}
A[] = array('pattern' => str_replace('/', '\\/', B->string), 'code' => C, 'subpatterns' => B['subpatterns']);
}
rule_subpattern(A) ::= QUOTE(B). {
A = array(preg_quote(B, '/'), B);
}
rule_subpattern(A) ::= SINGLEQUOTE(B). {
A = array($this->makeCaseInsensitve(preg_quote(B, '/')), B);
}
rule_subpattern(A) ::= SUBPATTERN(B). {
if (!isset($this->patterns[B])) {
$this->error('Undefined pattern "' . B . '" used in rules');
throw new Exception('Undefined pattern "' . B . '" used in rules');
}
A = array($this->patterns[B], B);
}
rule_subpattern(A) ::= rule_subpattern(B) QUOTE(C). {
A = array(B[0] . preg_quote(C, '/'), B[1] . ' ' . C);
}
rule_subpattern(A) ::= rule_subpattern(B) SINGLEQUOTE(C). {
A = array(B[0] . $this->makeCaseInsensitve(preg_quote(C, '/')), B[1] . ' ' . C);
}
rule_subpattern(A) ::= rule_subpattern(B) SUBPATTERN(C). {
if (!isset($this->patterns[C])) {
$this->error('Undefined pattern "' . C . '" used in rules');
throw new Exception('Undefined pattern "' . C . '" used in rules');
}
A = array(B[0] . $this->patterns[C], B[1] . ' ' . C);
}
subpattern(A) ::= QUOTE(B). {
A = preg_quote(B, '/');
}
subpattern(A) ::= SINGLEQUOTE(B). {
A = $this->makeCaseInsensitve(preg_quote(B, '/'));
}
subpattern(A) ::= SUBPATTERN(B). {
// increment internal sub-pattern counter
// adjust back-references in pattern based on previous pattern
$test = $this->_validatePattern(B, true);
$this->_patternIndex += $test['subpatterns'];
A = $test['pattern'];
}
subpattern(A) ::= subpattern(B) QUOTE(C). {
A = B . preg_quote(C, '/');
}
subpattern(A) ::= subpattern(B) SINGLEQUOTE(C). {
A = B . $this->makeCaseInsensitve(preg_quote(C, '/'));
}
subpattern(A) ::= subpattern(B) SUBPATTERN(C). {
// increment internal sub-pattern counter
// adjust back-references in pattern based on previous pattern
$test = $this->_validatePattern(C, true);
$this->_patternIndex += $test['subpatterns'];
A = B . $test['pattern'];
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,285 @@
<?php
require_once 'PHP/LexerGenerator/Regex/Parser.php';
class PHP_LexerGenerator_Regex_Lexer
{
const MATCHSTART = PHP_LexerGenerator_Regex_Parser::MATCHSTART;
const MATCHEND = PHP_LexerGenerator_Regex_Parser::MATCHEND;
const CONTROLCHAR = PHP_LexerGenerator_Regex_Parser::CONTROLCHAR;
const OPENCHARCLASS = PHP_LexerGenerator_Regex_Parser::OPENCHARCLASS;
const FULLSTOP = PHP_LexerGenerator_Regex_Parser::FULLSTOP;
const TEXT = PHP_LexerGenerator_Regex_Parser::TEXT;
const BACKREFERENCE = PHP_LexerGenerator_Regex_Parser::BACKREFERENCE;
const OPENASSERTION = PHP_LexerGenerator_Regex_Parser::OPENASSERTION;
const COULDBEBACKREF = PHP_LexerGenerator_Regex_Parser::COULDBEBACKREF;
const NEGATE = PHP_LexerGenerator_Regex_Parser::NEGATE;
const HYPHEN = PHP_LexerGenerator_Regex_Parser::HYPHEN;
const CLOSECHARCLASS = PHP_LexerGenerator_Regex_Parser::CLOSECHARCLASS;
const BAR = PHP_LexerGenerator_Regex_Parser::BAR;
const MULTIPLIER = PHP_LexerGenerator_Regex_Parser::MULTIPLIER;
const INTERNALOPTIONS = PHP_LexerGenerator_Regex_Parser::INTERNALOPTIONS;
const COLON = PHP_LexerGenerator_Regex_Parser::COLON;
const OPENPAREN = PHP_LexerGenerator_Regex_Parser::OPENPAREN;
const CLOSEPAREN = PHP_LexerGenerator_Regex_Parser::CLOSEPAREN;
const PATTERNNAME = PHP_LexerGenerator_Regex_Parser::PATTERNNAME;
const POSITIVELOOKBEHIND = PHP_LexerGenerator_Regex_Parser::POSITIVELOOKBEHIND;
const NEGATIVELOOKBEHIND = PHP_LexerGenerator_Regex_Parser::NEGATIVELOOKBEHIND;
const POSITIVELOOKAHEAD = PHP_LexerGenerator_Regex_Parser::POSITIVELOOKAHEAD;
const NEGATIVELOOKAHEAD = PHP_LexerGenerator_Regex_Parser::NEGATIVELOOKAHEAD;
const ONCEONLY = PHP_LexerGenerator_Regex_Parser::ONCEONLY;
const COMMENT = PHP_LexerGenerator_Regex_Parser::COMMENT;
const RECUR = PHP_LexerGenerator_Regex_Parser::RECUR;
const ESCAPEDBACKSLASH = PHP_LexerGenerator_Regex_Parser::ESCAPEDBACKSLASH;
private $input;
private $N;
public $token;
public $value;
public $line;
function __construct($data)
{
$this->input = $data;
$this->N = 0;
}
function reset($data, $line)
{
$this->input = $data;
$this->N = 0;
// passed in from parent parser
$this->line = $line;
$this->yybegin(self::INITIAL);
}
/*!lex2php
%input {$this->input}
%counter {$this->N}
%token {$this->token}
%value {$this->value}
%line {$this->line}
NONESCAPE = /[^[\\^$.|()?*+{}]+/
NONESCAPECHARCLASS = /[^\-\\]/
ESCAPEDTHING = /\\[][{}*.^$|?()+]/
ESCAPEDCHARCLASSTHING = /\\[]\.\-\^]/
MULTIPLIER = /\*\?|\+\?|[*?+]|\{[0-9]+\}|\{[0-9]+,\}|\{[0-9]+,[0-9]+\}/
STRINGCHAR = /\\[frnt]|\\x[0-9a-fA-F][0-9a-fA-F]?|\\[0-7][0-7][0-7]|\\x\{[0-9a-fA-F]+\}/
CONTROLCHAR = /\\[abBGcedDsSwW0C]|\\c\\/
COULDBEBACKREF = /\\[0-9][0-9]/
CHARCLASSCONTROLCHAR = /\\[bacedDsSwW0C]|\\c\\|\\x\{[0-9a-fA-F]+\}|\\[0-7][0-7][0-7]|\\x[0-9a-fA-F][0-9a-fA-F]?/
SUBJECTEND = /\\[zZ]/
BACKREF = /\\[1-9]/
UNICODESTUFF = /\\p\{\^?..?\}|\\P\{..?\}|\\X/
PROPERTYCODES = /C[cfnos]?|L[lmotu]?|M[cen]?|N[dlo]?|P[cdefios]?|S[ckmo]?|Z[lps]?/
SIMPLEPROPERTYCODES = /[CLMNPSZ]/
INTERNALOPTIONS = /[imsxUX]+-[imsxUX]+|[imsxUX]+|-[imsxUX]+/
ANYTHING = /./
PATTERNNAME = /[^>]+/
COMMENT = /#[^)]+/
HYPHEN = /-(?!])/
*/
/*!lex2php
%statename INITIAL
"\\\\" {
$this->token = self::ESCAPEDBACKSLASH;
}
NONESCAPE {
$this->token = self::TEXT;
}
ESCAPEDTHING {
$this->token = self::CONTROLCHAR;
}
"[" {
$this->token = self::OPENCHARCLASS;
$this->yybegin(self::CHARACTERCLASSSTART);
}
"|" {
$this->token = self::BAR;
}
STRINGCHAR {
$this->token = self::TEXT;
}
COULDBEBACKREF {
$this->token = self::COULDBEBACKREF;
}
CONTROLCHAR {
$this->token = self::CONTROLCHAR;
}
"^" {
$this->token = self::MATCHSTART;
}
"\\A" {
$this->token = self::MATCHSTART;
}
")" {
$this->token = self::CLOSEPAREN;
$this->yybegin(self::INITIAL);
}
"$" {
$this->token = self::MATCHEND;
}
MULTIPLIER {
$this->token = self::MULTIPLIER;
}
SUBJECTEND {
$this->token = self::MATCHEND;
}
"(?" {
$this->token = self::OPENASSERTION;
$this->yybegin(self::ASSERTION);
}
"(" {
$this->token = self::OPENPAREN;
}
"." {
$this->token = self::FULLSTOP;
}
BACKREF {
$this->token = self::BACKREFERENCE;
}
UNICODESTUFF {
$this->token = self::CONTROLCHAR;
}
"\\p{" PROPERTYCODES "}" {
$this->token = self::CONTROLCHAR;
}
"\\p{^" PROPERTYCODES "}" {
$this->token = self::CONTROLCHAR;
}
"\\p" SIMPLEPROPERTYCODES {
$this->token = self::CONTROLCHAR;
}
"\\" {
return false;
}
*/
/*!lex2php
%statename CHARACTERCLASSSTART
"^" {
$this->token = self::NEGATE;
}
"]" {
$this->yybegin(self::CHARACTERCLASS);
$this->token = self::TEXT;
}
ANYTHING {
$this->yybegin(self::CHARACTERCLASS);
return true;
}
*/
/*!lex2php
%statename CHARACTERCLASS
"\\\\" {
$this->token = self::ESCAPEDBACKSLASH;
}
"]" {
$this->yybegin(self::INITIAL);
$this->token = self::CLOSECHARCLASS;
}
STRINGCHAR {
$this->token = self::TEXT;
}
CHARCLASSCONTROLCHAR {
$this->token = self::TEXT;
}
COULDBEBACKREF {
$this->token = self::COULDBEBACKREF;
}
BACKREF {
$this->token = self::BACKREFERENCE;
}
ESCAPEDCHARCLASSTHING {
$this->token = self::TEXT;
}
HYPHEN {
$this->token = self::HYPHEN;
$this->yybegin(self::RANGE);
}
NONESCAPECHARCLASS {
$this->token = self::TEXT;
}
"\\" {
return false; // ignore escaping of normal text
}
ANYTHING {
$this->token = self::TEXT;
}
*/
/*!lex2php
%statename RANGE
"\\\\" {
$this->token = self::ESCAPEDBACKSLASH;
}
"\\]" {
$this->token = self::TEXT;
$this->yybegin(self::CHARACTERCLASS);
}
CHARCLASSCONTROLCHAR {
$this->token = self::TEXT;
$this->yybegin(self::CHARACTERCLASS);
}
COULDBEBACKREF {
$this->token = self::COULDBEBACKREF;
}
BACKREF {
$this->token = self::BACKREFERENCE;
}
NONESCAPECHARCLASS {
$this->token = self::TEXT;
$this->yybegin(self::CHARACTERCLASS);
}
"\\" {
return false; // ignore escaping of normal text
}
*/
/*!lex2php
%statename ASSERTION
INTERNALOPTIONS {
$this->token = self::INTERNALOPTIONS;
}
":" {
$this->token = self::COLON;
$this->yybegin(self::INITIAL);
}
")" {
$this->token = self::CLOSEPAREN;
$this->yybegin(self::INITIAL);
}
"P<" PATTERNNAME ">" {
$this->token = self::PATTERNNAME;
$this->yybegin(self::INITIAL);
}
"<=" {
$this->token = self::POSITIVELOOKBEHIND;
$this->yybegin(self::INITIAL);
}
"<!" {
$this->token = self::NEGATIVELOOKBEHIND;
$this->yybegin(self::INITIAL);
}
"=" {
$this->token = self::POSITIVELOOKAHEAD;
$this->yybegin(self::INITIAL);
}
"!" {
$this->token = self::NEGATIVELOOKAHEAD;
$this->yybegin(self::INITIAL);
}
">" {
$this->token = self::ONCEONLY;
$this->yybegin(self::INITIAL);
}
"(?" {
$this->token = self::OPENASSERTION;
}
COMMENT {
$this->token = self::COMMENT;
$this->yybegin(self::INITIAL);
}
"R" {
$this->token = self::RECUR;
}
ANYTHING {
$this->yybegin(self::INITIAL);
return true;
}
*/
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,477 @@
%name PHP_LexerGenerator_Regex_
%include {
require_once 'PHP/LexerGenerator/Exception.php';
}
%declare_class {class PHP_LexerGenerator_Regex_Parser}
%syntax_error {
/* ?><?php */
// we need to add auto-escaping of all stuff that needs it for result.
// and then validate the original regex only
echo "Syntax Error on line " . $this->_lex->line . ": token '" .
$this->_lex->value . "' while parsing rule:";
foreach ($this->yystack as $entry) {
echo $this->tokenName($entry->major) . ' ';
}
foreach ($this->yy_get_expected_tokens($yymajor) as $token) {
$expect[] = self::$yyTokenName[$token];
}
throw new Exception('Unexpected ' . $this->tokenName($yymajor) . '(' . $TOKEN
. '), expected one of: ' . implode(',', $expect));
}
%include_class {
private $_lex;
private $_subpatterns;
private $_updatePattern;
private $_patternIndex;
public $result;
function __construct($lex)
{
$this->result = new PHP_LexerGenerator_ParseryyToken('');
$this->_lex = $lex;
$this->_subpatterns = 0;
$this->_patternIndex = 1;
}
function reset($patternIndex, $updatePattern = false)
{
$this->_updatePattern = $updatePattern;
$this->_patternIndex = $patternIndex;
$this->_subpatterns = 0;
$this->result = new PHP_LexerGenerator_ParseryyToken('');
}
}
%left OPENPAREN OPENASSERTION BAR.
%right MULTIPLIER.
start ::= pattern(B). {
B->string = str_replace('"', '\\"', B->string);
$x = B->metadata;
$x['subpatterns'] = $this->_subpatterns;
B->metadata = $x;
$this->_subpatterns = 0;
$this->result = B;
}
pattern ::= MATCHSTART(B) basic_pattern MATCHEND(C). {
throw new PHP_LexerGenerator_Exception('Cannot include start match "' .
B . '" or end match "' . C . '"');
}
pattern ::= MATCHSTART basic_pattern. {
throw new PHP_LexerGenerator_Exception('Cannot include start match "' .
B . '"');
}
pattern ::= basic_pattern MATCHEND(C). {
throw new PHP_LexerGenerator_Exception('Cannot include end match "' . C . '"');
}
pattern(A) ::= basic_pattern(B). {A = B;}
pattern(A) ::= pattern(B) BAR pattern(C). {
A = new PHP_LexerGenerator_ParseryyToken(B->string . '|' . C->string, array(
'pattern' => B['pattern'] . '|' . C['pattern']));
}
basic_pattern(A) ::= basic_text(B). {A = B;}
basic_pattern(A) ::= character_class(B). {A = B;}
basic_pattern ::= assertion.
basic_pattern(A) ::= grouping(B). {A = B;}
basic_pattern(A) ::= lookahead(B). {A = B;}
basic_pattern ::= lookbehind.
basic_pattern(A) ::= subpattern(B). {A = B;}
basic_pattern(A) ::= onceonly(B). {A = B;}
basic_pattern(A) ::= comment(B). {A = B;}
basic_pattern(A) ::= recur(B). {A = B;}
basic_pattern(A) ::= conditional(B). {A = B;}
basic_pattern(A) ::= basic_pattern(P) basic_text(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
basic_pattern(A) ::= basic_pattern(P) character_class(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
basic_pattern ::= basic_pattern assertion.
basic_pattern(A) ::= basic_pattern(P) grouping(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
basic_pattern(A) ::= basic_pattern(P) lookahead(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
basic_pattern ::= basic_pattern lookbehind.
basic_pattern(A) ::= basic_pattern(P) subpattern(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
basic_pattern(A) ::= basic_pattern(P) onceonly(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
basic_pattern(A) ::= basic_pattern(P) comment(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
basic_pattern(A) ::= basic_pattern(P) recur(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
basic_pattern(A) ::= basic_pattern(P) conditional(B). {
A = new PHP_LexerGenerator_ParseryyToken(P->string . B->string, array(
'pattern' => P['pattern'] . B['pattern']));
}
character_class(A) ::= OPENCHARCLASS character_class_contents(B) CLOSECHARCLASS. {
A = new PHP_LexerGenerator_ParseryyToken('[' . B->string . ']', array(
'pattern' => '[' . B['pattern'] . ']'));
}
character_class(A) ::= OPENCHARCLASS NEGATE character_class_contents(B) CLOSECHARCLASS. {
A = new PHP_LexerGenerator_ParseryyToken('[^' . B->string . ']', array(
'pattern' => '[^' . B['pattern'] . ']'));
}
character_class(A) ::= OPENCHARCLASS character_class_contents(B) CLOSECHARCLASS MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken('[' . B->string . ']' . M, array(
'pattern' => '[' . B['pattern'] . ']' . M));
}
character_class(A) ::= OPENCHARCLASS NEGATE character_class_contents(B) CLOSECHARCLASS MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken('[^' . B->string . ']' . M, array(
'pattern' => '[^' . B['pattern'] . ']' . M));
}
character_class_contents(A) ::= TEXT(B). {
A = new PHP_LexerGenerator_ParseryyToken(B, array(
'pattern' => B));
}
character_class_contents(A) ::= ESCAPEDBACKSLASH(B). {
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . B, array(
'pattern' => B));
}
character_class_contents(A) ::= ESCAPEDBACKSLASH(B) HYPHEN TEXT(C). {
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . B . '-' . C, array(
'pattern' => B . '-' . C));
}
character_class_contents(A) ::= TEXT(B) HYPHEN TEXT(C). {
A = new PHP_LexerGenerator_ParseryyToken(B . '-' . C, array(
'pattern' => B . '-' . C));
}
character_class_contents(A) ::= TEXT(B) HYPHEN ESCAPEDBACKSLASH(C). {
A = new PHP_LexerGenerator_ParseryyToken(B . '-\\\\' . C, array(
'pattern' => B . '-' . C));
}
character_class_contents(A) ::= BACKREFERENCE(B). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception('Back-reference refers to non-existent ' .
'sub-pattern ' . substr(B, 1));
}
B = substr(B, 1);
// adjust back-reference for containing ()
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . (B + $this->_patternIndex), array(
'pattern' => '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B)));
}
character_class_contents(A) ::= COULDBEBACKREF(B). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception(B . ' will be interpreted as an invalid' .
' back-reference, use "\\0' . substr(B, 1) . ' for octal');
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . (B + $this->_patternIndex), array(
'pattern' => '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B)));
}
character_class_contents(A) ::= character_class_contents(D) CONTROLCHAR(B). {
A = new PHP_LexerGenerator_ParseryyToken(D->string . '\\' . B, array(
'pattern' => D['pattern'] . B));
}
character_class_contents(A) ::= character_class_contents(D) ESCAPEDBACKSLASH(B). {
A = new PHP_LexerGenerator_ParseryyToken(D->string . '\\\\' . B, array(
'pattern' => D['pattern'] . B));
}
character_class_contents(A) ::= character_class_contents(D) TEXT(B). {
A = new PHP_LexerGenerator_ParseryyToken(D->string . B, array(
'pattern' => D['pattern'] . B));
}
character_class_contents(A) ::= character_class_contents(D) ESCAPEDBACKSLASH(B) HYPHEN CONTROLCHAR(C). {
A = new PHP_LexerGenerator_ParseryyToken(D->string . '\\\\' . B . '-\\' . C, array(
'pattern' => D['pattern'] . B . '-' . C));
}
character_class_contents(A) ::= character_class_contents(D) ESCAPEDBACKSLASH(B) HYPHEN TEXT(C). {
A = new PHP_LexerGenerator_ParseryyToken(D->string . '\\\\' . B . '-' . C, array(
'pattern' => D['pattern'] . B . '-' . C));
}
character_class_contents(A) ::= character_class_contents(D) TEXT(B) HYPHEN ESCAPEDBACKSLASH(C). {
A = new PHP_LexerGenerator_ParseryyToken(D->string . B . '-\\\\' . C, array(
'pattern' => D['pattern'] . B . '-' . C));
}
character_class_contents(A) ::= character_class_contents(D) TEXT(B) HYPHEN TEXT(C). {
A = new PHP_LexerGenerator_ParseryyToken(D->string . B . '-' . C, array(
'pattern' => D['pattern'] . B . '-' . C));
}
character_class_contents(A) ::= character_class_contents(P) BACKREFERENCE(B). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception('Back-reference refers to non-existent ' .
'sub-pattern ' . substr(B, 1));
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken(P->string . '\\\\' . (B + $this->_patternIndex), array(
'pattern' => P['pattern'] . '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B)));
}
character_class_contents(A) ::= character_class_contents(P) COULDBEBACKREF(B). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception(B . ' will be interpreted as an invalid' .
' back-reference, use "\\0' . substr(B, 1) . ' for octal');
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken(P->string . '\\\\' . (B + $this->_patternIndex), array(
'pattern' => P['pattern'] . '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B)));
}
basic_text(A) ::= TEXT(B). {
A = new PHP_LexerGenerator_ParseryyToken(B, array(
'pattern' => B));
}
basic_text(A) ::= TEXT(B) MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken(B . M, array(
'pattern' => B . M));
}
basic_text(A) ::= FULLSTOP(B). {
A = new PHP_LexerGenerator_ParseryyToken(B, array(
'pattern' => B));
}
basic_text(A) ::= FULLSTOP(B) MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken(B . M, array(
'pattern' => B . M));
}
basic_text(A) ::= CONTROLCHAR(B). {
A = new PHP_LexerGenerator_ParseryyToken('\\' . B, array(
'pattern' => B));
}
basic_text(A) ::= CONTROLCHAR(B) MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken('\\' . B . M, array(
'pattern' => B . M));
}
basic_text(A) ::= ESCAPEDBACKSLASH(B). {
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . B, array(
'pattern' => B));
}
basic_text(A) ::= ESCAPEDBACKSLASH(B) MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . B . M, array(
'pattern' => B . M));
}
basic_text(A) ::= BACKREFERENCE(B). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception('Back-reference refers to non-existent ' .
'sub-pattern ' . substr(B, 1));
}
B = substr(B, 1);
// adjust back-reference for containing ()
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . (B + $this->_patternIndex), array(
'pattern' => '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B)));
}
basic_text(A) ::= BACKREFERENCE(B) MULTIPLIER(M). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception('Back-reference refers to non-existent ' .
'sub-pattern ' . substr(B, 1));
}
B = substr(B, 1);
// adjust back-reference for containing ()
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . (B + $this->_patternIndex) . M, array(
'pattern' => '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B) . M));
}
basic_text(A) ::= COULDBEBACKREF(B). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception(B . ' will be interpreted as an invalid' .
' back-reference, use "\\0' . substr(B, 1) . ' for octal');
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . (B + $this->_patternIndex), array(
'pattern' => '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B)));
}
basic_text(A) ::= COULDBEBACKREF(B) MULTIPLIER(M). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception(B . ' will be interpreted as an invalid' .
' back-reference, use "\\0' . substr(B, 1) . ' for octal');
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken('\\\\' . (B + $this->_patternIndex) . M, array(
'pattern' => '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B) . M));
}
basic_text(A) ::= basic_text(T) TEXT(B). {
A = new PHP_LexerGenerator_ParseryyToken(T->string . B, array(
'pattern' => T['pattern'] . B));
}
basic_text(A) ::= basic_text(T) TEXT(B) MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken(T->string . B . M, array(
'pattern' => T['pattern'] . B . M));
}
basic_text(A) ::= basic_text(T) FULLSTOP(B). {
A = new PHP_LexerGenerator_ParseryyToken(T->string . B, array(
'pattern' => T['pattern'] . B));
}
basic_text(A) ::= basic_text(T) FULLSTOP(B) MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken(T->string . B . M, array(
'pattern' => T['pattern'] . B . M));
}
basic_text(A) ::= basic_text(T) CONTROLCHAR(B). {
A = new PHP_LexerGenerator_ParseryyToken(T->string . '\\' . B, array(
'pattern' => T['pattern'] . B));
}
basic_text(A) ::= basic_text(T) CONTROLCHAR(B) MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken(T->string . '\\' . B . M, array(
'pattern' => T['pattern'] . B . M));
}
basic_text(A) ::= basic_text(T) ESCAPEDBACKSLASH(B). {
A = new PHP_LexerGenerator_ParseryyToken(T->string . '\\\\' . B, array(
'pattern' => T['pattern'] . B));
}
basic_text(A) ::= basic_text(T) ESCAPEDBACKSLASH(B) MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken(T->string . '\\\\' . B . M, array(
'pattern' => T['pattern'] . B . M));
}
basic_text(A) ::= basic_text(P) BACKREFERENCE(B). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception('Back-reference refers to non-existent ' .
'sub-pattern ' . substr(B, 1));
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken(P->string . '\\\\' . (B + $this->_patternIndex), array(
'pattern' => P['pattern'] . '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B)));
}
basic_text(A) ::= basic_text(P) BACKREFERENCE(B) MULTIPLIER(M). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception('Back-reference refers to non-existent ' .
'sub-pattern ' . substr(B, 1));
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken(P->string . '\\\\' . (B + $this->_patternIndex) . M, array(
'pattern' => P['pattern'] . '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B) . M));
}
basic_text(A) ::= basic_text(P) COULDBEBACKREF(B). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception(B . ' will be interpreted as an invalid' .
' back-reference, use "\\0' . substr(B, 1) . ' for octal');
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken(P->string . '\\\\' . (B + $this->_patternIndex), array(
'pattern' => P['pattern'] . '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B)));
}
basic_text(A) ::= basic_text(P) COULDBEBACKREF(B) MULTIPLIER(M). {
if (((int) substr(B, 1)) > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception(B . ' will be interpreted as an invalid' .
' back-reference, use "\\0' . substr(B, 1) . ' for octal');
}
B = substr(B, 1);
A = new PHP_LexerGenerator_ParseryyToken(P->string . '\\\\' . (B + $this->_patternIndex) . M, array(
'pattern' => P['pattern'] . '\\' . ($this->_updatePattern ? (B + $this->_patternIndex) : B) . M));
}
assertion ::= OPENASSERTION(B) INTERNALOPTIONS(C) CLOSEPAREN(D). {
throw new PHP_LexerGenerator_Exception('Error: cannot set preg options directly with "' .
B . C . D . '"');
}
assertion ::= OPENASSERTION(B) INTERNALOPTIONS(C) COLON(D) pattern(E) CLOSEPAREN(F). {
throw new PHP_LexerGenerator_Exception('Error: cannot set preg options directly with "' .
B . C . D . E['pattern'] . F . '"');
}
grouping(A) ::= OPENASSERTION COLON pattern(B) CLOSEPAREN. {
A = new PHP_LexerGenerator_ParseryyToken('(?:' . B->string . ')', array(
'pattern' => '(?:' . B['pattern'] . ')'));
}
grouping(A) ::= OPENASSERTION COLON pattern(B) CLOSEPAREN MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken('(?:' . B->string . ')' . M, array(
'pattern' => '(?:' . B['pattern'] . ')' . M));
}
conditional(A) ::= OPENASSERTION OPENPAREN TEXT(T) CLOSEPAREN pattern(B) CLOSEPAREN MULTIPLIER(M). {
if (T != 'R') {
if (!preg_match('/[1-9][0-9]*/', T)) {
throw new PHP_LexerGenerator_Exception('Invalid sub-pattern conditional: "(?(' . T . ')"');
}
if (T > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception('sub-pattern conditional . "' . T . '" refers to non-existent sub-pattern');
}
} else {
throw new PHP_LexerGenerator_Exception('Recursive conditional (?(' . T . ')" cannot work in this lexer');
}
A = new PHP_LexerGenerator_ParseryyToken('(?(' . T . ')' . B->string . ')' . M, array(
'pattern' => '(?(' . T . ')' . B['pattern'] . ')' . M));
}
conditional(A) ::= OPENASSERTION OPENPAREN TEXT(T) CLOSEPAREN pattern(B) CLOSEPAREN. {
if (T != 'R') {
if (!preg_match('/[1-9][0-9]*/', T)) {
throw new PHP_LexerGenerator_Exception('Invalid sub-pattern conditional: "(?(' . T . ')"');
}
if (T > $this->_subpatterns) {
throw new PHP_LexerGenerator_Exception('sub-pattern conditional . "' . T . '" refers to non-existent sub-pattern');
}
} else {
throw new PHP_LexerGenerator_Exception('Recursive conditional (?(' . T . ')" cannot work in this lexer');
}
A = new PHP_LexerGenerator_ParseryyToken('(?(' . T . ')' . B->string . ')', array(
'pattern' => '(?(' . T . ')' . B['pattern'] . ')'));
}
conditional(A) ::= OPENASSERTION lookahead(B) pattern(C) CLOSEPAREN. {
A = new PHP_LexerGenerator_ParseryyToken('(?' . B->string . C->string . ')', array(
'pattern' => '(?' . B['pattern'] . C['pattern'] . ')'));
}
conditional(A) ::= OPENASSERTION lookahead(B) pattern(C) CLOSEPAREN MULTIPLIER(M). {
A = new PHP_LexerGenerator_ParseryyToken('(?' . B->string . C->string . ')' . M, array(
'pattern' => '(?' . B['pattern'] . C['pattern'] . ')' . M));
}
conditional ::= OPENASSERTION lookbehind pattern(B) CLOSEPAREN. {
throw new PHP_LexerGenerator_Exception('Look-behind assertions cannot be used: "(?<=' .
B['pattern'] . ')');
}
conditional ::= OPENASSERTION lookbehind pattern(B) CLOSEPAREN MULTIPLIER. {
throw new PHP_LexerGenerator_Exception('Look-behind assertions cannot be used: "(?<=' .
B['pattern'] . ')');
}
lookahead(A) ::= OPENASSERTION POSITIVELOOKAHEAD pattern(B) CLOSEPAREN. {
A = new PHP_LexerGenerator_ParseryyToken('(?=' . B->string . ')', array(
'pattern '=> '(?=' . B['pattern'] . ')'));
}
lookahead(A) ::= OPENASSERTION NEGATIVELOOKAHEAD pattern(B) CLOSEPAREN. {
A = new PHP_LexerGenerator_ParseryyToken('(?!' . B->string . ')', array(
'pattern' => '(?!' . B['pattern'] . ')'));
}
lookbehind ::= OPENASSERTION POSITIVELOOKBEHIND pattern(B) CLOSEPAREN. {
throw new PHP_LexerGenerator_Exception('Look-behind assertions cannot be used: "(?<=' .
B['pattern'] . ')');
}
lookbehind ::= OPENASSERTION NEGATIVELOOKBEHIND pattern(B) CLOSEPAREN. {
throw new PHP_LexerGenerator_Exception('Look-behind assertions cannot be used: "(?<!' .
B['pattern'] . ')');
}
subpattern ::= OPENASSERTION PATTERNNAME(B) pattern CLOSEPAREN. {
throw new PHP_LexerGenerator_Exception('Cannot use named sub-patterns: "(' .
B['pattern'] . ')');
}
subpattern ::= OPENASSERTION PATTERNNAME(B) pattern CLOSEPAREN MULTIPLIER. {
throw new PHP_LexerGenerator_Exception('Cannot use named sub-patterns: "(' .
B['pattern'] . ')');
}
subpattern(A) ::= OPENPAREN pattern(B) CLOSEPAREN. {
$this->_subpatterns++;
A = new PHP_LexerGenerator_ParseryyToken('(' . B->string . ')', array(
'pattern' => '(' . B['pattern'] . ')'));
}
subpattern(A) ::= OPENPAREN pattern(B) CLOSEPAREN MULTIPLIER(M). {
$this->_subpatterns++;
A = new PHP_LexerGenerator_ParseryyToken('(' . B->string . ')' . M, array(
'pattern' => '(' . B['pattern'] . ')' . M));
}
onceonly(A) ::= OPENASSERTION ONCEONLY pattern(B) CLOSEPAREN. {
A = new PHP_LexerGenerator_ParseryyToken('(?>' . B->string . ')', array(
'pattern' => '(?>' . B['pattern'] . ')'));
}
comment(A) ::= OPENASSERTION COMMENT(B) CLOSEPAREN. {
A = new PHP_LexerGenerator_ParseryyToken('(' . B->string . ')', array(
'pattern' => '(' . B['pattern'] . ')'));
}
recur ::= OPENASSERTION RECUR CLOSEPAREN. {
throw new Exception('(?R) cannot work in this lexer');
}

View File

@@ -0,0 +1,4 @@
<?php
require_once 'PHP/LexerGenerator.php';
$a = new PHP_LexerGenerator($_SERVER['argv'][1]);
?>

View File

@@ -0,0 +1,806 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* There are a few PHP-specific changes to the lemon parser generator.
*
* - %extra_argument is removed, as class constructor can be used to
* pass in extra information
* - %token_type and company are irrelevant in PHP, and so are removed
* - %declare_class is added to define the parser class name and any
* implements/extends information
* - %include_class is added to allow insertion of extra class information
* such as constants, a class constructor, etc.
*
* Other changes make the parser more robust, and also make reporting
* syntax errors simpler. Detection of expected tokens eliminates some
* problematic edge cases where an unexpected token could cause the parser
* to simply accept input.
*
* Otherwise, the file format is identical to the Lemon parser generator
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: ParserGenerator.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**#@+
* Basic components of the parser generator
*/
require_once 'PHP/ParserGenerator/Action.php';
require_once 'PHP/ParserGenerator/ActionTable.php';
require_once 'PHP/ParserGenerator/Config.php';
require_once 'PHP/ParserGenerator/Data.php';
require_once 'PHP/ParserGenerator/Symbol.php';
require_once 'PHP/ParserGenerator/Rule.php';
require_once 'PHP/ParserGenerator/Parser.php';
require_once 'PHP/ParserGenerator/PropagationLink.php';
require_once 'PHP/ParserGenerator/State.php';
/**#@-*/
/**
* The basic home class for the parser generator
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
* @example Lempar.php
* @example examples/Parser.y Sample parser file format (PHP_LexerGenerator's parser)
* @example examples/Parser.php Sample parser file format PHP code (PHP_LexerGenerator's parser)
*/
class PHP_ParserGenerator
{
/**
* Set this to 1 to turn on debugging of Lemon's parsing of
* grammar files.
*/
const DEBUG = 0;
const MAXRHS = 1000;
const OPT_FLAG = 1, OPT_INT = 2, OPT_DBL = 3, OPT_STR = 4,
OPT_FFLAG = 5, OPT_FINT = 6, OPT_FDBL = 7, OPT_FSTR = 8;
public $azDefine = array();
private static $_options = array(
'b' => array(
'type' => self::OPT_FLAG,
'arg' => 'basisflag',
'message' => 'Print only the basis in report.'
),
'c' => array(
'type' => self::OPT_FLAG,
'arg' => 'compress',
'message' => 'Don\'t compress the action table.'
),
'D' => array(
'type' => self::OPT_FSTR,
'arg' => 'handleDOption',
'message' => 'Define an %ifdef macro.'
),
'g' => array(
'type' => self::OPT_FLAG,
'arg' => 'rpflag',
'message' => 'Print grammar without actions.'
),
'm' => array(
'type' => self::OPT_FLAG,
'arg' => 'mhflag',
'message' => 'Output a makeheaders compatible file'
),
'q' => array(
'type' => self::OPT_FLAG,
'arg' => 'quiet',
'message' => '(Quiet) Don\'t print the report file.'
),
's' => array(
'type' => self::OPT_FLAG,
'arg' => 'statistics',
'message' => 'Print parser stats to standard output.'
),
'x' => array(
'type' => self::OPT_FLAG,
'arg' => 'version',
'message' => 'Print the version number.'
),
'T' => array(
'type' => self::OPT_STR,
'arg' => 'parser_template',
'message' => 'Use different parser template file.'
)
);
private $_basisflag = 0;
private $_compress = 0;
private $_rpflag = 0;
private $_mhflag = 0;
private $_quiet = 0;
private $_statistics = 0;
private $_version = 0;
private $_size;
private $_parser_template = "";
/**
* Process a flag command line argument.
*
* @param int $i
* @param array $argv
*
* @return int
*/
function handleflags($i, $argv)
{
if (!isset($argv[1]) || !isset(self::$_options[$argv[$i][1]])) {
throw new Exception('Command line syntax error: undefined option "' . $argv[$i] . '"');
}
$v = self::$_options[$argv[$i][1]] == '-';
if (self::$_options[$argv[$i][1]]['type'] == self::OPT_FLAG) {
$this->{self::$_options[$argv[$i][1]]['arg']} = 1;
} elseif (self::$_options[$argv[$i][1]]['type'] == self::OPT_FFLAG) {
$this->{self::$_options[$argv[$i][1]]['arg']}($v);
} elseif (self::$_options[$argv[$i][1]]['type'] == self::OPT_FSTR) {
$this->{self::$_options[$argv[$i][1]]['arg']}(substr($v, 2));
} else {
throw new Exception('Command line syntax error: missing argument on switch: "' . $argv[$i] . '"');
}
return 0;
}
/**
* Process a command line switch which has an argument.
*
* @param int $i
* @param array $argv
*
* @return int
*/
function handleswitch($i, $argv)
{
$lv = 0;
$dv = 0.0;
$sv = $end = $cp = '';
$j; // int
$errcnt = 0;
$cp = strstr($argv[$i], '=');
if (!$cp) {
throw new Exception('INTERNAL ERROR: handleswitch passed bad argument, no "=" in arg');
}
$argv[$i] = substr($argv[$i], 0, strlen($argv[$i]) - strlen($cp));
if (!isset(self::$_options[$argv[$i]])) {
throw new Exception('Command line syntax error: undefined option "' . $argv[$i] .
$cp . '"');
}
$cp = substr($cp, 1);
switch (self::$_options[$argv[$i]]['type']) {
case self::OPT_FLAG:
case self::OPT_FFLAG:
throw new Exception('Command line syntax error: option requires an argument "' .
$argv[$i] . '=' . $cp . '"');
case self::OPT_DBL:
case self::OPT_FDBL:
$dv = (double) $cp;
break;
case self::OPT_INT:
case self::OPT_FINT:
$lv = (int) $cp;
break;
case self::OPT_STR:
case self::OPT_FSTR:
$sv = $cp;
break;
}
switch(self::$_options[$argv[$i]]['type']) {
case self::OPT_FLAG:
case self::OPT_FFLAG:
break;
case self::OPT_DBL:
$this->{self::$_options[$argv[$i]]['arg']} = $dv;
break;
case self::OPT_FDBL:
$this->{self::$_options[$argv[$i]]['arg']}($dv);
break;
case self::OPT_INT:
$this->{self::$_options[$argv[$i]]['arg']} = $lv;
break;
case self::OPT_FINT:
$this->{self::$_options[$argv[$i]]['arg']}($lv);
break;
case self::OPT_STR:
$this->{self::$_options[$argv[$i]]['arg']} = $sv;
break;
case self::OPT_FSTR:
$this->{self::$_options[$argv[$i]]['arg']}($sv);
break;
}
return 0;
}
/**
* OptInit
*
* @param array $a Arguments
*
* @return int
*/
function OptInit($a)
{
$errcnt = 0;
$argv = $a;
try {
if (is_array($argv) && count($argv) && self::$_options) {
for ($i = 1; $i < count($argv); $i++) {
if ($argv[$i][0] == '+' || $argv[$i][0] == '-') {
$errcnt += $this->handleflags($i, $argv);
} elseif (strstr($argv[$i], '=')) {
$errcnt += $this->handleswitch($i, $argv);
}
}
}
} catch (Exception $e) {
$this->OptPrint();
echo $e->getMessage()."\n";
exit(1);
}
return 0;
}
/**
* Return the index of the N-th non-switch argument. Return -1
* if N is out of range.
*
* @param int $n
* @param int $a
*
* @return int
*/
private function _argindex($n, $a)
{
$dashdash = 0;
if (!is_array($a) || !count($a)) {
return -1;
}
for ($i=1; $i < count($a); $i++) {
if ($dashdash || !($a[$i][0] == '-' || $a[$i][0] == '+' || strchr($a[$i], '='))) {
if ($n == 0) {
return $i;
}
$n--;
}
if ($_SERVER['argv'][$i] == '--') {
$dashdash = 1;
}
}
return -1;
}
/**
* Return the value of the non-option argument as indexed by $i
*
* @param int $i
* @param array $a the value of $argv
*
* @return 0|string
*/
private function _optArg($i, $a)
{
if (-1 == ($ind = $this->_argindex($i, $a))) {
return 0;
}
return $a[$ind];
}
/**
* @param array $a
*
* @return int number of arguments
*/
function OptNArgs($a)
{
$cnt = $dashdash = 0;
if (is_array($a) && count($a)) {
for ($i = 1; $i < count($a); $i++) {
if ($dashdash
|| !($a[$i][0] == '-' || $a[$i][0] == '+' || strchr($a[$i], '='))
) {
$cnt++;
}
if ($a[$i] == "--") {
$dashdash = 1;
}
}
}
return $cnt;
}
/**
* Print out command-line options
*
* @return void
*/
function OptPrint()
{
$max = 0;
foreach (self::$_options as $label => $info) {
$len = strlen($label) + 1;
switch ($info['type']) {
case self::OPT_FLAG:
case self::OPT_FFLAG:
break;
case self::OPT_INT:
case self::OPT_FINT:
$len += 9; /* length of "<integer>" */
break;
case self::OPT_DBL:
case self::OPT_FDBL:
$len += 6; /* length of "<real>" */
break;
case self::OPT_STR:
case self::OPT_FSTR:
$len += 8; /* length of "<string>" */
break;
}
if ($len > $max) {
$max = $len;
}
}
foreach (self::$_options as $label => $info) {
switch ($info['type']) {
case self::OPT_FLAG:
case self::OPT_FFLAG:
echo " -$label";
echo str_repeat(' ', $max - strlen($label));
echo " $info[message]\n";
break;
case self::OPT_INT:
case self::OPT_FINT:
echo " $label=<integer>" . str_repeat(' ', $max - strlen($label) - 9);
echo " $info[message]\n";
break;
case self::OPT_DBL:
case self::OPT_FDBL:
echo " $label=<real>" . str_repeat(' ', $max - strlen($label) - 6);
echo " $info[message]\n";
break;
case self::OPT_STR:
case self::OPT_FSTR:
echo " $label=<string>" . str_repeat(' ', $max - strlen($label) - 8);
echo " $info[message]\n";
break;
}
}
}
/**
* This routine is called with the argument to each -D command-line option.
* Add the macro defined to the azDefine array.
*
* @param string $z
*
* @return void
*/
private function _handleDOption($z)
{
if ($a = strstr($z, '=')) {
$z = substr($a, 1); // strip first =
}
$this->azDefine[] = $z;
}
/**************** From the file "main.c" ************************************/
/*
** Main program file for the LEMON parser generator.
*/
/**
* The main program. Parse the command line and do it...
*
* @return int Number of error and conflicts
*/
function main()
{
$lem = new PHP_ParserGenerator_Data;
$this->OptInit($_SERVER['argv']);
if ($this->_version) {
echo "Lemon version 1.0/PHP_ParserGenerator port version @package_version@\n";
exit(0);
}
if ($this->OptNArgs($_SERVER['argv']) != 1) {
echo "Exactly one filename argument is required.\n";
exit(1);
}
$lem->errorcnt = 0;
$lem->parser_template = $this->_parser_template;
/* Initialize the machine */
$lem->argv0 = $_SERVER['argv'][0];
$lem->filename = $this->_optArg(0, $_SERVER['argv']);
$a = pathinfo($lem->filename);
if (isset($a['extension'])) {
$ext = '.' . $a['extension'];
$lem->filenosuffix = substr($lem->filename, 0, strlen($lem->filename) - strlen($ext));
} else {
$lem->filenosuffix = $lem->filename;
}
$lem->basisflag = $this->_basisflag;
$lem->has_fallback = 0;
$lem->nconflict = 0;
$lem->name = $lem->include_code = $lem->include_classcode = $lem->arg = $lem->tokentype = $lem->start = 0;
$lem->vartype = 0;
$lem->stacksize = 0;
$lem->error = $lem->overflow = $lem->failure = $lem->accept = $lem->tokendest = $lem->tokenprefix = $lem->outname = $lem->extracode = 0;
$lem->vardest = 0;
$lem->tablesize = 0;
PHP_ParserGenerator_Symbol::Symbol_new("$");
$lem->errsym = PHP_ParserGenerator_Symbol::Symbol_new("error");
/* Parse the input file */
$parser = new PHP_ParserGenerator_Parser($this);
$parser->Parse($lem);
if ($lem->errorcnt) {
exit($lem->errorcnt);
}
if ($lem->rule === 0) {
printf("Empty grammar.\n");
exit(1);
}
/* Count and index the symbols of the grammar */
$lem->nsymbol = PHP_ParserGenerator_Symbol::Symbol_count();
PHP_ParserGenerator_Symbol::Symbol_new("{default}");
$lem->symbols = PHP_ParserGenerator_Symbol::Symbol_arrayof();
for ($i = 0; $i <= $lem->nsymbol; $i++) {
$lem->symbols[$i]->index = $i;
}
usort($lem->symbols, array('PHP_ParserGenerator_Symbol', 'sortSymbols'));
for ($i = 0; $i <= $lem->nsymbol; $i++) {
$lem->symbols[$i]->index = $i;
}
// find the first lower-case symbol
for ($i = 1; ord($lem->symbols[$i]->name[0]) <= ord('Z'); $i++);
$lem->nterminal = $i;
/* Generate a reprint of the grammar, if requested on the command line */
if ($this->_rpflag) {
$this->Reprint();
} else {
/* Initialize the size for all follow and first sets */
$this->SetSize($lem->nterminal);
/* Find the precedence for every production rule (that has one) */
$lem->FindRulePrecedences();
/* Compute the lambda-nonterminals and the first-sets for every
** nonterminal */
$lem->FindFirstSets();
/* Compute all LR(0) states. Also record follow-set propagation
** links so that the follow-set can be computed later */
$lem->nstate = 0;
$lem->FindStates();
$lem->sorted = PHP_ParserGenerator_State::State_arrayof();
/* Tie up loose ends on the propagation links */
$lem->FindLinks();
/* Compute the follow set of every reducible configuration */
$lem->FindFollowSets();
/* Compute the action tables */
$lem->FindActions();
/* Compress the action tables */
if ($this->_compress===0) {
$lem->CompressTables();
}
/* Reorder and renumber the states so that states with fewer choices
** occur at the end. */
$lem->ResortStates();
/* Generate a report of the parser generated. (the "y.output" file) */
if (!$this->_quiet) {
$lem->ReportOutput();
}
/* Generate the source code for the parser */
$lem->ReportTable($this->_mhflag);
/* Produce a header file for use by the scanner. (This step is
** omitted if the "-m" option is used because makeheaders will
** generate the file for us.) */
//if (!$this->_mhflag) {
// $this->ReportHeader();
//}
}
if ($this->_statistics) {
printf(
"Parser statistics: %d terminals, %d nonterminals, %d rules\n",
$lem->nterminal,
$lem->nsymbol - $lem->nterminal,
$lem->nrule
);
printf(
" %d states, %d parser table entries, %d conflicts\n",
$lem->nstate,
$lem->tablesize,
$lem->nconflict
);
}
if ($lem->nconflict) {
printf("%d parsing conflicts.\n", $lem->nconflict);
}
exit($lem->errorcnt + $lem->nconflict);
return ($lem->errorcnt + $lem->nconflict);
}
/**
* SetSize
*
* @param int $n
*
* @access public
* @return void
*/
function SetSize($n)
{
$this->_size = $n + 1;
}
/**
* Merge in a merge sort for a linked list
*
* Side effects:
* The "next" pointers for elements in the lists a and b are
* changed.
*
* @param mixed $a A sorted, null-terminated linked list. (May be null).
* @param mixed $b A sorted, null-terminated linked list. (May be null).
* @param function $cmp A pointer to the comparison function.
* @param integer $offset Offset in the structure to the "next" field.
*
* @return mixed A pointer to the head of a sorted list containing the
* elements of both a and b.
*/
static function merge($a, $b, $cmp, $offset)
{
if ($a === 0) {
$head = $b;
} elseif ($b === 0) {
$head = $a;
} else {
if (call_user_func($cmp, $a, $b) < 0) {
$ptr = $a;
$a = $a->$offset;
} else {
$ptr = $b;
$b = $b->$offset;
}
$head = $ptr;
while ($a && $b) {
if (call_user_func($cmp, $a, $b) < 0) {
$ptr->$offset = $a;
$ptr = $a;
$a = $a->$offset;
} else {
$ptr->$offset = $b;
$ptr = $b;
$b = $b->$offset;
}
}
if ($a !== 0) {
$ptr->$offset = $a;
} else {
$ptr->$offset = $b;
}
}
return $head;
}
#define LISTSIZE 30
/**
* Side effects:
* The "next" pointers for elements in list are changed.
*
* @param mixed $list Pointer to a singly-linked list of structures.
* @param mixed $next Pointer to pointer to the second element of the list.
* @param function $cmp A comparison function.
*
* @return mixed A pointer to the head of a sorted list containing the
* elements orginally in list.
*/
static function msort($list, $next, $cmp)
{
if ($list === 0) {
return $list;
}
if ($list->$next === 0) {
return $list;
}
$set = array_fill(0, 30, 0);
while ($list) {
$ep = $list;
$list = $list->$next;
$ep->$next = 0;
for ($i = 0; $i < 29 && $set[$i] !== 0; $i++) {
$ep = self::merge($ep, $set[$i], $cmp, $next);
$set[$i] = 0;
}
$set[$i] = $ep;
}
$ep = 0;
for ($i = 0; $i < 30; $i++) {
if ($set[$i] !== 0) {
$ep = self::merge($ep, $set[$i], $cmp, $next);
}
}
return $ep;
}
/* Find a good place to break "msg" so that its length is at least "min"
** but no more than "max". Make the point as close to max as possible.
*/
static function findbreak($msg, $min, $max)
{
if ($min >= strlen($msg)) {
return strlen($msg);
}
for ($i = $spot = $min; $i <= $max && $i < strlen($msg); $i++) {
$c = $msg[$i];
if ($c == '-' && $i < $max - 1) {
$spot = $i + 1;
}
if ($c == ' ') {
$spot = $i;
}
}
return $spot;
}
static function ErrorMsg($filename, $lineno, $format)
{
/* Prepare a prefix to be prepended to every output line */
if ($lineno > 0) {
$prefix = sprintf("%20s:%d: ", $filename, $lineno);
} else {
$prefix = sprintf("%20s: ", $filename);
}
$prefixsize = strlen($prefix);
$availablewidth = 79 - $prefixsize;
/* Generate the error message */
$ap = func_get_args();
array_shift($ap); // $filename
array_shift($ap); // $lineno
array_shift($ap); // $format
$errmsg = vsprintf($format, $ap);
$linewidth = strlen($errmsg);
/* Remove trailing "\n"s from the error message. */
while ($linewidth > 0
&& in_array($errmsg[$linewidth-1], array("\n", "\r"), true)
) {
--$linewidth;
$errmsg = substr($errmsg, 0, strlen($errmsg) - 1);
}
/* Print the error message */
$base = 0;
$errmsg = str_replace(
array("\r", "\n", "\t"),
array(' ', ' ', ' '),
$errmsg
);
while (strlen($errmsg)) {
$end = $restart = self::findbreak($errmsg, 0, $availablewidth);
if (strlen($errmsg) <= 79 && $end < strlen($errmsg) && $end <= 79) {
$end = $restart = strlen($errmsg);
}
while (isset($errmsg[$restart]) && $errmsg[$restart] == ' ') {
$restart++;
}
printf("%s%.${end}s\n", $prefix, $errmsg);
$errmsg = substr($errmsg, $restart);
}
}
/**
* Duplicate the input file without comments and without actions
* on rules
*
* @return void
*/
function Reprint()
{
printf("// Reprint of input file \"%s\".\n// Symbols:\n", $this->filename);
$maxlen = 10;
for ($i = 0; $i < $this->nsymbol; $i++) {
$sp = $this->symbols[$i];
$len = strlen($sp->name);
if ($len > $maxlen ) {
$maxlen = $len;
}
}
$ncolumns = 76 / ($maxlen + 5);
if ($ncolumns < 1) {
$ncolumns = 1;
}
$skip = ($this->nsymbol + $ncolumns - 1) / $ncolumns;
for ($i = 0; $i < $skip; $i++) {
print "//";
for ($j = $i; $j < $this->nsymbol; $j += $skip) {
$sp = $this->symbols[$j];
//assert( sp->index==j );
printf(" %3d %-${maxlen}.${maxlen}s", $j, $sp->name);
}
print "\n";
}
for ($rp = $this->rule; $rp; $rp = $rp->next) {
printf("%s", $rp->lhs->name);
/*if ($rp->lhsalias) {
printf("(%s)", $rp->lhsalias);
}*/
print " ::=";
for ($i = 0; $i < $rp->nrhs; $i++) {
$sp = $rp->rhs[$i];
printf(" %s", $sp->name);
if ($sp->type == PHP_ParserGenerator_Symbol::MULTITERMINAL) {
for ($j = 1; $j < $sp->nsubsym; $j++) {
printf("|%s", $sp->subsym[$j]->name);
}
}
/*if ($rp->rhsalias[$i]) {
printf("(%s)", $rp->rhsalias[$i]);
}*/
}
print ".";
if ($rp->precsym) {
printf(" [%s]", $rp->precsym->name);
}
/*if ($rp->code) {
print "\n " . $rp->code);
}*/
print "\n";
}
}
}

View File

@@ -0,0 +1,257 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: Action.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**
* Every shift or reduce operation is stored as one of the following objects.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_Action
{
const SHIFT = 1,
ACCEPT = 2,
REDUCE = 3,
ERROR = 4,
/**
* Was a reduce, but part of a conflict
*/
CONFLICT = 5,
/**
* Was a shift. Precedence resolved conflict
*/
SH_RESOLVED = 6,
/**
* Was a reduce. Precedence resolved conflict
*/
RD_RESOLVED = 7,
/**
* Deleted by compression
* @see PHP_ParserGenerator::CompressTables()
*/
NOT_USED = 8;
/**
* The look-ahead symbol that triggers this action
* @var PHP_ParserGenerator_Symbol
*/
public $sp; /* The look-ahead symbol */
/**
* This defines the kind of action, and must be one
* of the class constants.
*
* - {@link PHP_ParserGenerator_Action::SHIFT}
* - {@link PHP_ParserGenerator_Action::ACCEPT}
* - {@link PHP_ParserGenerator_Action::REDUCE}
* - {@link PHP_ParserGenerator_Action::ERROR}
* - {@link PHP_ParserGenerator_Action::CONFLICT}
* - {@link PHP_ParserGenerator_Action::SH_RESOLVED}
* - {@link PHP_ParserGenerator_Action:: RD_RESOLVED}
* - {@link PHP_ParserGenerator_Action::NOT_USED}
*/
public $type;
/**
* The new state, if this is a shift,
* the parser rule index, if this is a reduce.
*
* @var PHP_ParserGenerator_State|PHP_ParserGenerator_Rule
*/
public $x;
/**
* The next action for this state.
* @var PHP_ParserGenerator_Action
*/
public $next;
/**
* Compare two actions
*
* This is used by {@link Action_sort()} to compare actions
*/
static function actioncmp(PHP_ParserGenerator_Action $ap1, PHP_ParserGenerator_Action $ap2)
{
$rc = $ap1->sp->index - $ap2->sp->index;
if ($rc === 0) {
$rc = $ap1->type - $ap2->type;
}
if ($rc === 0) {
if ($ap1->type == self::SHIFT) {
if ($ap1->x->statenum != $ap2->x->statenum) {
throw new Exception('Shift conflict: ' . $ap1->sp->name .
' shifts both to state ' . $ap1->x->statenum . ' (rule ' .
$ap1->x->cfp->rp->lhs->name . ' on line ' .
$ap1->x->cfp->rp->ruleline . ') and to state ' .
$ap2->x->statenum . ' (rule ' .
$ap2->x->cfp->rp->lhs->name . ' on line ' .
$ap2->x->cfp->rp->ruleline . ')');
}
}
if ($ap1->type != self::REDUCE
&& $ap1->type != self::RD_RESOLVED
&& $ap1->type != self::CONFLICT
) {
throw new Exception('action has not been processed: ' .
$ap1->sp->name . ' on line ' . $ap1->x->cfp->rp->ruleline .
', rule ' . $ap1->x->cfp->rp->lhs->name);
}
if ($ap2->type != self::REDUCE
&& $ap2->type != self::RD_RESOLVED
&& $ap2->type != self::CONFLICT
) {
throw new Exception('action has not been processed: ' .
$ap2->sp->name . ' on line ' . $ap2->x->cfp->rp->ruleline .
', rule ' . $ap2->x->cfp->rp->lhs->name);
}
$rc = $ap1->x->index - $ap2->x->index;
}
return $rc;
}
function display($processed = false)
{
$map = array(
self::ACCEPT => 'ACCEPT',
self::CONFLICT => 'CONFLICT',
self::REDUCE => 'REDUCE',
self::SHIFT => 'SHIFT'
);
echo $map[$this->type] . ' for ' . $this->sp->name;
if ($this->type == self::REDUCE) {
echo ' - rule ' . $this->x->lhs->name . "\n";
} elseif ($this->type == self::SHIFT) {
echo ' - state ' . $this->x->statenum . ', basis ' . $this->x->cfp->rp->lhs->name . "\n";
} else {
echo "\n";
}
}
/**
* create linked list of PHP_ParserGenerator_Actions
*
* @param PHP_ParserGenerator_Action|null $app
* @param int $type one of the class constants from PHP_ParserGenerator_Action
* @param PHP_ParserGenerator_Symbol $sp
* @param PHP_ParserGenerator_State|PHP_ParserGenerator_Rule $arg
*/
static function Action_add(&$app, $type, PHP_ParserGenerator_Symbol $sp, $arg)
{
$new = new PHP_ParserGenerator_Action;
$new->next = $app;
$app = $new;
$new->type = $type;
$new->sp = $sp;
$new->x = $arg;
echo ' Adding ';
$new->display();
}
/**
* Sort parser actions
*
* @param PHP_ParserGenerator_Action $ap a parser action
*
* @see PHP_ParserGenerator_Data::FindActions()
*
* @return PHP_ParserGenerator_Action
*/
static function Action_sort(PHP_ParserGenerator_Action $ap)
{
$ap = PHP_ParserGenerator::msort($ap, 'next', array('PHP_ParserGenerator_Action', 'actioncmp'));
return $ap;
}
/**
* Print an action to the given file descriptor. Return FALSE if
* nothing was actually printed.
*
* @param resource $fp File descriptor to print on
* @param integer $indent Number of indents
*
* @see PHP_ParserGenerator_Data::ReportOutput()
*
* @return int|false
*/
function PrintAction($fp, $indent)
{
if (!$fp) {
$fp = STDOUT;
}
$result = 1;
switch ($this->type)
{
case self::SHIFT:
fprintf($fp, "%${indent}s shift %d", $this->sp->name, $this->x->statenum);
break;
case self::REDUCE:
fprintf($fp, "%${indent}s reduce %d", $this->sp->name, $this->x->index);
break;
case self::ACCEPT:
fprintf($fp, "%${indent}s accept", $this->sp->name);
break;
case self::ERROR:
fprintf($fp, "%${indent}s error", $this->sp->name);
break;
case self::CONFLICT:
fprintf($fp, "%${indent}s reduce %-3d ** Parsing conflict **", $this->sp->name, $this->x->index);
break;
case self::SH_RESOLVED:
case self::RD_RESOLVED:
case self::NOT_USED:
$result = 0;
break;
}
return $result;
}
}
?>

View File

@@ -0,0 +1,299 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: ActionTable.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**
* The state of the yy_action table under construction is an instance of
* the following structure
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_ActionTable
{
/**
* Number of used slots in {@link $aAction}
* @var int
*/
public $nAction = 0;
/**
* The $yy_action table under construction.
*
* Each entry is of format:
* <code>
* array(
* 'lookahead' => -1, // Value of the lookahead token (symbol index)
* 'action' => -1 // Action to take on the given lookahead (action index)
* );
* </code>
* @see PHP_ParserGenerator_Data::compute_action()
* @var array
*/
public $aAction = array(
array(
'lookahead' => -1,
'action' => -1
)
);
/**
* A single new transaction set.
*
* @see $aAction format of the internal array is described here
* @var array
*/
public $aLookahead = array(
array(
'lookahead' => 0,
'action' => 0
)
);
/**
* The smallest (minimum) value of any lookahead token in {@link $aLookahead}
*
* The lowest non-terminal is always introduced earlier in the parser file,
* and is therefore a more significant token.
* @var int
*/
public $mnLookahead = 0;
/**
* The action associated with the smallest lookahead token.
* @see $mnLookahead
* @var int
*/
public $mnAction = 0;
/**
* The largest (maximum) value of any lookahead token in {@link $aLookahead}
* @var int
*/
public $mxLookahead = 0;
/**
* The number of slots used in {@link $aLookahead}.
*
* This is the same as count($aLookahead), but there was no pressing reason
* to change this when porting from C.
* @see $mnLookahead
* @var int
*/
public $nLookahead = 0;
/**
* Add a new action to the current transaction set
*
* @param int $lookahead
* @param int $action
*
* @return void
*/
function acttab_action($lookahead, $action)
{
if ($this->nLookahead === 0) {
$this->aLookahead = array();
$this->mxLookahead = $lookahead;
$this->mnLookahead = $lookahead;
$this->mnAction = $action;
} else {
if ($this->mxLookahead < $lookahead) {
$this->mxLookahead = $lookahead;
}
if ($this->mnLookahead > $lookahead) {
$this->mnLookahead = $lookahead;
$this->mnAction = $action;
}
}
$this->aLookahead[$this->nLookahead] = array(
'lookahead' => $lookahead,
'action' => $action);
$this->nLookahead++;
}
/**
* Add the transaction set built up with prior calls to acttab_action()
* into the current action table. Then reset the transaction set back
* to an empty set in preparation for a new round of acttab_action() calls.
*
* Return the offset into the action table of the new transaction.
*
* @return int Return the offset that should be added to the lookahead in
* order to get the index into $yy_action of the action. This will be used
* in generation of $yy_ofst tables (reduce and shift)
* @throws Exception
*/
function acttab_insert()
{
if ($this->nLookahead <= 0) {
throw new Exception('nLookahead is not set up?');
}
/* Scan the existing action table looking for an offset where we can
** insert the current transaction set. Fall out of the loop when that
** offset is found. In the worst case, we fall out of the loop when
** i reaches $this->nAction, which means we append the new transaction set.
**
** i is the index in $this->aAction[] where $this->mnLookahead is inserted.
*/
for ($i = 0; $i < $this->nAction + $this->mnLookahead; $i++) {
if (!isset($this->aAction[$i])) {
$this->aAction[$i] = array(
'lookahead' => -1,
'action' => -1,
);
}
if ($this->aAction[$i]['lookahead'] < 0) {
for ($j = 0; $j < $this->nLookahead; $j++) {
if (!isset($this->aLookahead[$j])) {
$this->aLookahead[$j] = array(
'lookahead' => 0,
'action' => 0,
);
}
$k = $this->aLookahead[$j]['lookahead'] -
$this->mnLookahead + $i;
if ($k < 0) {
break;
}
if (!isset($this->aAction[$k])) {
$this->aAction[$k] = array(
'lookahead' => -1,
'action' => -1,
);
}
if ($this->aAction[$k]['lookahead'] >= 0) {
break;
}
}
if ($j < $this->nLookahead ) {
continue;
}
for ($j = 0; $j < $this->nAction; $j++) {
if (!isset($this->aAction[$j])) {
$this->aAction[$j] = array(
'lookahead' => -1,
'action' => -1,
);
}
if ($this->aAction[$j]['lookahead'] == $j + $this->mnLookahead - $i) {
break;
}
}
if ($j == $this->nAction) {
break; /* Fits in empty slots */
}
} elseif ($this->aAction[$i]['lookahead'] == $this->mnLookahead) {
if ($this->aAction[$i]['action'] != $this->mnAction) {
continue;
}
for ($j = 0; $j < $this->nLookahead; $j++) {
$k = $this->aLookahead[$j]['lookahead'] -
$this->mnLookahead + $i;
if ($k < 0 || $k >= $this->nAction) {
break;
}
if (!isset($this->aAction[$k])) {
$this->aAction[$k] = array(
'lookahead' => -1,
'action' => -1,
);
}
if ($this->aLookahead[$j]['lookahead'] != $this->aAction[$k]['lookahead']) {
break;
}
if ($this->aLookahead[$j]['action'] != $this->aAction[$k]['action']) {
break;
}
}
if ($j < $this->nLookahead) {
continue;
}
$n = 0;
for ($j = 0; $j < $this->nAction; $j++) {
if (!isset($this->aAction[$j])) {
$this->aAction[$j] = array(
'lookahead' => -1,
'action' => -1,
);
}
if ($this->aAction[$j]['lookahead'] < 0) {
continue;
}
if ($this->aAction[$j]['lookahead'] == $j + $this->mnLookahead - $i) {
$n++;
}
}
if ($n == $this->nLookahead) {
break; /* Same as a prior transaction set */
}
}
}
/* Insert transaction set at index i. */
for ($j = 0; $j < $this->nLookahead; $j++) {
if (!isset($this->aLookahead[$j])) {
$this->aLookahead[$j] = array(
'lookahead' => 0,
'action' => 0,
);
}
$k = $this->aLookahead[$j]['lookahead'] - $this->mnLookahead + $i;
$this->aAction[$k] = $this->aLookahead[$j];
if ($k >= $this->nAction) {
$this->nAction = $k + 1;
}
}
$this->nLookahead = 0;
$this->aLookahead = array();
/* Return the offset that is added to the lookahead in order to get the
** index into yy_action of the action */
return $i - $this->mnLookahead;
}
}
?>

View File

@@ -0,0 +1,574 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: Config.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**
/** A configuration is a production rule of the grammar together with
* a mark (dot) showing how much of that rule has been processed so far.
*
* Configurations also contain a follow-set which is a list of terminal
* symbols which are allowed to immediately follow the end of the rule.
* Every configuration is recorded as an instance of the following class.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_Config
{
const COMPLETE = 1;
const INCOMPLETE = 2;
/**
* The parser rule upon with the configuration is based.
*
* A parser rule is something like:
* <pre>
* blah ::= FOO bar.
* </pre>
* @var PHP_ParserGenerator_Rule
*/
public $rp;
/**
* The parse point.
*
* This is the index into the right-hand side of a rule that is
* represented by this configuration. In other words, possible
* dots for this rule:
*
* <pre>
* blah ::= FOO bar.
* </pre>
*
* are (represented by "[here]"):
*
* <pre>
* blah ::= [here] FOO bar.
* blah ::= FOO [here] bar.
* blah ::= FOO bar [here].
* </pre>
* @var int
*/
public $dot;
/**
* Follow-set for this configuration only
*
* This is the list of terminals and non-terminals that
* can follow this configuration.
* @var array
*/
public $fws;
/**
* Follow-set forward propagation links.
* @var PHP_ParserGenerator_PropagationLink
*/
public $fplp;
/**
* Follow-set backwards propagation links
* @var PHP_ParserGenerator_PropagationLink
*/
public $bplp;
/**
* State that contains this configuration
* @var PHP_ParserGenerator_State
*/
public $stp;
/* enum {
COMPLETE, /* The status is used during followset and
INCOMPLETE /* shift computations
} */
/**
* Status during followset and shift computations.
*
* One of PHP_ParserGenerator_Config::COMPLETE or
* PHP_ParserGenerator_Config::INCOMPLETE.
* @var int
*/
public $status;
/**
* Next configuration in the state.
*
* Index of next PHP_ParserGenerator_Config object.
* @var int
*/
public $next;
/**
* Index of the next basis configuration PHP_ParserGenerator_Config object
* @var int
*/
public $bp;
/**
* Top of the list of configurations for the current state.
* @var PHP_ParserGenerator_Config
*/
static public $current;
/**
* Last on the list of configurations for the current state.
* @var PHP_ParserGenerator_Config
*/
static public $currentend;
/**
* Top of the list of basis configurations for the current state.
* @var PHP_ParserGenerator_Config
*/
static public $basis;
/**
* Last on the list of basis configurations for the current state.
* @var PHP_ParserGenerator_Config
*/
static public $basisend;
/**
* Associative array representation of the linked list of configurations
* found in {@link $current}
*
* @var array
*/
static public $x4a = array();
/**
* Return a pointer to a new configuration
* @return PHP_ParserGenerator_Config
*/
private static function newconfig()
{
return new PHP_ParserGenerator_Config;
}
/**
* Display the current configuration for the .out file
*
* @param PHP_ParserGenerator_Config $cfp
* @see PHP_ParserGenerator_Data::ReportOutput()
*/
static function Configshow(PHP_ParserGenerator_Config $cfp)
{
$fp = fopen('php://output', 'w');
while ($cfp) {
if ($cfp->dot == $cfp->rp->nrhs) {
$buf = sprintf('(%d)', $cfp->rp->index);
fprintf($fp, ' %5s ', $buf);
} else {
fwrite($fp,' ');
}
$cfp->ConfigPrint($fp);
fwrite($fp, "\n");
if (0) {
//SetPrint(fp,cfp->fws,$this);
//PlinkPrint(fp,cfp->fplp,"To ");
//PlinkPrint(fp,cfp->bplp,"From");
}
$cfp = $cfp->next;
}
fwrite($fp, "\n");
fclose($fp);
}
/**
* Initialize the configuration list builder for a new state.
*/
static function Configlist_init()
{
self::$current = 0;
self::$currentend = &self::$current;
self::$basis = 0;
self::$basisend = &self::$basis;
self::$x4a = array();
}
/**
* Remove all data from the table.
*
* Pass each data to the function $f as it is removed if
* $f is a valid callback.
* @param callback|null
* @see Configtable_clear()
*/
static function Configtable_reset($f)
{
self::$current = 0;
self::$currentend = &self::$current;
self::$basis = 0;
self::$basisend = &self::$basis;
self::Configtable_clear(0);
}
/**
* Remove all data from the associative array representation
* of configurations.
*
* Pass each data to the function $f as it is removed if
* $f is a valid callback.
* @param callback|null
*/
static function Configtable_clear($f)
{
if (!count(self::$x4a)) {
return;
}
if ($f) {
for ($i = 0; $i < count(self::$x4a); $i++) {
call_user_func($f, self::$x4a[$i]->data);
}
}
self::$x4a = array();
}
/**
* Reset the configuration list builder for a new state.
* @see Configtable_clear()
*/
static function Configlist_reset()
{
self::Configtable_clear(0);
}
/**
* Add another configuration to the configuration list for this parser state.
* @param PHP_ParserGenerator_Rule the rule
* @param int Index into the right-hand side of the rule where the dot goes
* @return PHP_ParserGenerator_Config
*/
static function Configlist_add($rp, $dot)
{
$model = new PHP_ParserGenerator_Config;
$model->rp = $rp;
$model->dot = $dot;
$cfp = self::Configtable_find($model);
if ($cfp === 0) {
$cfp = self::newconfig();
$cfp->rp = $rp;
$cfp->dot = $dot;
$cfp->fws = array();
$cfp->stp = 0;
$cfp->fplp = $cfp->bplp = 0;
$cfp->next = 0;
$cfp->bp = 0;
self::$currentend = $cfp;
self::$currentend = &$cfp->next;
self::Configtable_insert($cfp);
}
return $cfp;
}
/**
* Add a basis configuration to the configuration list for this parser state.
*
* Basis configurations are the root for a configuration. This method also
* inserts the configuration into the regular list of configurations for this
* reason.
* @param PHP_ParserGenerator_Rule the rule
* @param int Index into the right-hand side of the rule where the dot goes
* @return PHP_ParserGenerator_Config
*/
static function Configlist_addbasis($rp, $dot)
{
$model = new PHP_ParserGenerator_Config;
$model->rp = $rp;
$model->dot = $dot;
$cfp = self::Configtable_find($model);
if ($cfp === 0) {
$cfp = self::newconfig();
$cfp->rp = $rp;
$cfp->dot = $dot;
$cfp->fws = array();
$cfp->stp = 0;
$cfp->fplp = $cfp->bplp = 0;
$cfp->next = 0;
$cfp->bp = 0;
self::$currentend = $cfp;
self::$currentend = &$cfp->next;
self::$basisend = $cfp;
self::$basisend = &$cfp->bp;
self::Configtable_insert($cfp);
}
return $cfp;
}
/**
* Compute the closure of the configuration list.
*
* This calculates all of the possible continuations of
* each configuration, ensuring that each state accounts
* for every configuration that could arrive at that state.
*/
static function Configlist_closure(PHP_ParserGenerator_Data $lemp)
{
for ($cfp = self::$current; $cfp; $cfp = $cfp->next) {
$rp = $cfp->rp;
$dot = $cfp->dot;
if ($dot >= $rp->nrhs) {
continue;
}
$sp = $rp->rhs[$dot];
if ($sp->type == PHP_ParserGenerator_Symbol::NONTERMINAL) {
if ($sp->rule === 0 && $sp !== $lemp->errsym) {
PHP_ParserGenerator::ErrorMsg($lemp->filename, $rp->line,
"Nonterminal \"%s\" has no rules.", $sp->name);
$lemp->errorcnt++;
}
for ($newrp = $sp->rule; $newrp; $newrp = $newrp->nextlhs) {
$newcfp = self::Configlist_add($newrp, 0);
for ($i = $dot + 1; $i < $rp->nrhs; $i++) {
$xsp = $rp->rhs[$i];
if ($xsp->type == PHP_ParserGenerator_Symbol::TERMINAL) {
$newcfp->fws[$xsp->index] = 1;
break;
} elseif ($xsp->type == PHP_ParserGenerator_Symbol::MULTITERMINAL) {
for ($k = 0; $k < $xsp->nsubsym; $k++) {
$newcfp->fws[$xsp->subsym[$k]->index] = 1;
}
break;
} else {
$a = array_diff_key($xsp->firstset, $newcfp->fws);
$newcfp->fws += $a;
if ($xsp->lambda === false) {
break;
}
}
}
if ($i == $rp->nrhs) {
PHP_ParserGenerator_PropagationLink::Plink_add($cfp->fplp, $newcfp);
}
}
}
}
}
/**
* Sort the configuration list
* @uses Configcmp()
*/
static function Configlist_sort()
{
$a = 0;
//self::Configshow(self::$current);
self::$current = PHP_ParserGenerator::msort(self::$current,'next', array('PHP_ParserGenerator_Config', 'Configcmp'));
//self::Configshow(self::$current);
self::$currentend = &$a;
self::$currentend = 0;
}
/**
* Sort the configuration list
* @uses Configcmp
*/
static function Configlist_sortbasis()
{
$a = 0;
self::$basis = PHP_ParserGenerator::msort(self::$current,'bp', array('PHP_ParserGenerator_Config', 'Configcmp'));
self::$basisend = &$a;
self::$basisend = 0;
}
/**
* Return a pointer to the head of the configuration list and
* reset the list
* @see $current
* @return PHP_ParserGenerator_Config
*/
static function Configlist_return()
{
$old = self::$current;
self::$current = 0;
self::$currentend = &self::$current;
return $old;
}
/**
* Return a pointer to the head of the basis list and
* reset the list
* @see $basis
* @return PHP_ParserGenerator_Config
*/
static function Configlist_basis()
{
$old = self::$basis;
self::$basis = 0;
self::$basisend = &self::$basis;
return $old;
}
/**
* Free all elements of the given configuration list
* @param PHP_ParserGenerator_Config
*/
static function Configlist_eat($cfp)
{
for (; $cfp; $cfp = $nextcfp) {
$nextcfp = $cfp->next;
if ($cfp->fplp !=0) {
throw new Exception('fplp of configuration non-zero?');
}
if ($cfp->bplp !=0) {
throw new Exception('bplp of configuration non-zero?');
}
if ($cfp->fws) {
$cfp->fws = array();
}
}
}
/**
* Compare two configurations for sorting purposes.
*
* Configurations based on higher precedence rules
* (those earlier in the file) are chosen first. Two
* configurations that are the same rule are sorted by
* dot (see {@link $dot}), and those configurations
* with a dot closer to the left-hand side are chosen first.
* @param unknown_type $a
* @param unknown_type $b
* @return unknown
*/
static function Configcmp($a, $b)
{
$x = $a->rp->index - $b->rp->index;
if (!$x) {
$x = $a->dot - $b->dot;
}
return $x;
}
/**
* Print out information on this configuration.
*
* @param resource $fp
* @see PHP_ParserGenerator_Data::ReportOutput()
*/
function ConfigPrint($fp)
{
$rp = $this->rp;
fprintf($fp, "%s ::=", $rp->lhs->name);
for ($i = 0; $i <= $rp->nrhs; $i++) {
if ($i === $this->dot) {
fwrite($fp,' *');
}
if ($i === $rp->nrhs) {
break;
}
$sp = $rp->rhs[$i];
fprintf($fp,' %s', $sp->name);
if ($sp->type == PHP_ParserGenerator_Symbol::MULTITERMINAL) {
for ($j = 1; $j < $sp->nsubsym; $j++) {
fprintf($fp, '|%s', $sp->subsym[$j]->name);
}
}
}
}
/**
* Hash a configuration for the associative array {@link $x4a}
*/
private static function confighash(PHP_ParserGenerator_Config $a)
{
$h = 0;
$h = $h * 571 + $a->rp->index * 37 + $a->dot;
return $h;
}
/**
* Insert a new record into the array. Return TRUE if successful.
* Prior data with the same key is NOT overwritten
*/
static function Configtable_insert(PHP_ParserGenerator_Config $data)
{
$h = self::confighash($data);
if (isset(self::$x4a[$h])) {
$np = self::$x4a[$h];
} else {
$np = 0;
}
while ($np) {
if (self::Configcmp($np->data, $data) == 0) {
/* An existing entry with the same key is found. */
/* Fail because overwrite is not allows. */
return 0;
}
$np = $np->next;
}
/* Insert the new data */
$np = array('data' => $data, 'next' => 0, 'from' => 0);
$np = new PHP_ParserGenerator_StateNode;
$np->data = $data;
if (isset(self::$x4a[$h])) {
self::$x4a[$h]->from = $np->next;
$np->next = self::$x4a[$h];
}
$np->from = $np;
self::$x4a[$h] = $np;
return 1;
}
/**
* Return a pointer to data assigned to the given key. Return NULL
* if no such key.
* @return PHP_ParserGenerator_Config|0
*/
static function Configtable_find(PHP_ParserGenerator_Config $key)
{
$h = self::confighash($key);
if (!isset(self::$x4a[$h])) {
return 0;
}
$np = self::$x4a[$h];
while ($np) {
if (self::Configcmp($np->data, $key) == 0) {
break;
}
$np = $np->next;
}
return $np ? $np->data : 0;
}
}
?>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,851 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* PHP version 5
*
* LICENSE: This source file is subject to version 3.01 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_01.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version CVS: $Id: Parser.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**
* The grammar parser for lemon grammar files.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_Parser
{
const INITIALIZE = 1;
const WAITING_FOR_DECL_OR_RULE = 2;
const WAITING_FOR_DECL_KEYWORD = 3;
const WAITING_FOR_DECL_ARG = 4;
const WAITING_FOR_PRECEDENCE_SYMBOL = 5;
const WAITING_FOR_ARROW = 6;
const IN_RHS = 7;
const LHS_ALIAS_1 = 8;
const LHS_ALIAS_2 = 9;
const LHS_ALIAS_3 = 10;
const RHS_ALIAS_1 = 11;
const RHS_ALIAS_2 = 12;
const PRECEDENCE_MARK_1 = 13;
const PRECEDENCE_MARK_2 = 14;
const RESYNC_AFTER_RULE_ERROR = 15;
const RESYNC_AFTER_DECL_ERROR = 16;
const WAITING_FOR_DESTRUCTOR_SYMBOL = 17;
const WAITING_FOR_DATATYPE_SYMBOL = 18;
const WAITING_FOR_FALLBACK_ID = 19;
/**
* Name of the input file
*
* @var string
*/
public $filename;
/**
* Linenumber at which current token starts
* @var int
*/
public $tokenlineno;
/**
* Number of parsing errors so far
* @var int
*/
public $errorcnt;
/**
* Index of current token within the input string
* @var int
*/
public $tokenstart;
/**
* Global state vector
* @var PHP_ParserGenerator_Data
*/
public $gp;
/**
* Parser state (one of the class constants for this class)
*
* - PHP_ParserGenerator_Parser::INITIALIZE,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_OR_RULE,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_KEYWORD,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_ARG,
* - PHP_ParserGenerator_Parser::WAITING_FOR_PRECEDENCE_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_ARROW,
* - PHP_ParserGenerator_Parser::IN_RHS,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_1,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_2,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_3,
* - PHP_ParserGenerator_Parser::RHS_ALIAS_1,
* - PHP_ParserGenerator_Parser::RHS_ALIAS_2,
* - PHP_ParserGenerator_Parser::PRECEDENCE_MARK_1,
* - PHP_ParserGenerator_Parser::PRECEDENCE_MARK_2,
* - PHP_ParserGenerator_Parser::RESYNC_AFTER_RULE_ERROR,
* - PHP_ParserGenerator_Parser::RESYNC_AFTER_DECL_ERROR,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DESTRUCTOR_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DATATYPE_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_FALLBACK_ID
* @var int
*/
public $state;
/**
* The fallback token
* @var PHP_ParserGenerator_Symbol
*/
public $fallback;
/**
* Left-hand side of the current rule
* @var PHP_ParserGenerator_Symbol
*/
public $lhs;
/**
* Alias for the LHS
* @var string
*/
public $lhsalias;
/**
* Number of right-hand side symbols seen
* @var int
*/
public $nrhs;
/**
* Right-hand side symbols
* @var array array of {@link PHP_ParserGenerator_Symbol} objects
*/
public $rhs = array();
/**
* Aliases for each RHS symbol name (or NULL)
* @var array array of strings
*/
public $alias = array();
/**
* Previous rule parsed
* @var PHP_ParserGenerator_Rule
*/
public $prevrule;
/**
* Keyword of a declaration
*
* This is one of the %keyword keywords in the grammar file
* @var string
*/
public $declkeyword;
/**
* Where the declaration argument should be put
*
* This is assigned as a reference to an internal variable
* @var mixed
*/
public $declargslot = array();
/**
* Where the declaration linenumber is put
*
* This is assigned as a reference to an internal variable
* @var mixed
*/
public $decllnslot;
/*enum e_assoc*/
public $declassoc; /* Assign this association to decl arguments */
public $preccounter; /* Assign this precedence to decl arguments */
/**
* @var PHP_ParserGenerator_Rule
*/
public $firstrule; /* Pointer to first rule in the grammar */
/**
* @var PHP_ParserGenerator_Rule
*/
public $lastrule; /* Pointer to the most recently parsed rule */
/**
* @var PHP_ParserGenerator
*/
private $lemon;
function __construct(PHP_ParserGenerator $lem)
{
$this->lemon = $lem;
}
/**
* Run the preprocessor over the input file text. The Lemon variable
* $azDefine contains the names of all defined
* macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and
* comments them out. Text in between is also commented out as appropriate.
* @param string
*/
private function preprocess_input(&$z)
{
$lineno = $exclude = 0;
for ($i=0; $i < strlen($z); $i++) {
if ($z[$i] == "\n") {
$lineno++;
}
if ($z[$i] != '%' || ($i > 0 && $z[$i-1] != "\n")) {
continue;
}
if (substr($z, $i, 6) === "%endif" && trim($z[$i+6]) === '') {
if ($exclude) {
$exclude--;
if ($exclude === 0) {
for ($j = $start; $j < $i; $j++) {
if ($z[$j] != "\n") $z[$j] = ' ';
}
}
}
for ($j = $i; $j < strlen($z) && $z[$j] != "\n"; $j++) {
$z[$j] = ' ';
}
} elseif (substr($z, $i, 6) === "%ifdef" && trim($z[$i+6]) === '' ||
substr($z, $i, 7) === "%ifndef" && trim($z[$i+7]) === '') {
if ($exclude) {
$exclude++;
} else {
$j = $i;
$n = strtok(substr($z, $j), " \t");
$exclude = 1;
if (isset($this->lemon->azDefine[$n])) {
$exclude = 0;
}
if ($z[$i + 3]=='n') {
// this is a rather obtuse way of checking whether this is %ifndef
$exclude = !$exclude;
}
if ($exclude) {
$start = $i;
$start_lineno = $lineno;
}
}
//for ($j = $i; $j < strlen($z) && $z[$j] != "\n"; $j++) $z[$j] = ' ';
$j = strpos(substr($z, $i), "\n");
if ($j === false) {
$z = substr($z, 0, $i); // remove instead of adding ' '
} else {
$z = substr($z, 0, $i) . substr($z, $i + $j); // remove instead of adding ' '
}
}
}
if ($exclude) {
throw new Exception("unterminated %ifdef starting on line $start_lineno\n");
}
}
/**
* In spite of its name, this function is really a scanner.
*
* It reads in the entire input file (all at once) then tokenizes it.
* Each token is passed to the function "parseonetoken" which builds all
* the appropriate data structures in the global state vector "gp".
* @param PHP_ParserGenerator_Data
*/
function Parse(PHP_ParserGenerator_Data $gp)
{
$startline = 0;
$this->gp = $gp;
$this->filename = $gp->filename;
$this->errorcnt = 0;
$this->state = self::INITIALIZE;
/* Begin by reading the input file */
$filebuf = file_get_contents($this->filename);
if (!$filebuf) {
PHP_ParserGenerator::ErrorMsg($this->filename, 0, "Can't open this file for reading.");
$gp->errorcnt++;
return;
}
if (filesize($this->filename) != strlen($filebuf)) {
ErrorMsg($this->filename, 0, "Can't read in all %d bytes of this file.",
filesize($this->filename));
$gp->errorcnt++;
return;
}
/* Make an initial pass through the file to handle %ifdef and %ifndef */
$this->preprocess_input($filebuf);
/* Now scan the text of the input file */
$lineno = 1;
for ($cp = 0, $c = $filebuf[0]; $cp < strlen($filebuf); $cp++) {
$c = $filebuf[$cp];
if ($c == "\n") $lineno++; /* Keep track of the line number */
if (trim($c) === '') {
continue;
} /* Skip all white space */
if ($filebuf[$cp] == '/' && ($cp + 1 < strlen($filebuf)) && $filebuf[$cp + 1] == '/') {
/* Skip C++ style comments */
$cp += 2;
$z = strpos(substr($filebuf, $cp), "\n");
if ($z === false) {
$cp = strlen($filebuf);
break;
}
$lineno++;
$cp += $z;
continue;
}
if ($filebuf[$cp] == '/' && ($cp + 1 < strlen($filebuf)) && $filebuf[$cp + 1] == '*') {
/* Skip C style comments */
$cp += 2;
$z = strpos(substr($filebuf, $cp), '*/');
if ($z !== false) {
$lineno += count(explode("\n", substr($filebuf, $cp, $z))) - 1;
}
$cp += $z + 1;
continue;
}
$this->tokenstart = $cp; /* Mark the beginning of the token */
$this->tokenlineno = $lineno; /* Linenumber on which token begins */
if ($filebuf[$cp] == '"') { /* String literals */
$cp++;
$oldcp = $cp;
$test = strpos(substr($filebuf, $cp), '"');
if ($test === false) {
PHP_ParserGenerator::ErrorMsg($this->filename, $startline,
"String starting on this line is not terminated before the end of the file.");
$this->errorcnt++;
$nextcp = $cp = strlen($filebuf);
} else {
$cp += $test;
$nextcp = $cp + 1;
}
$lineno += count(explode("\n", substr($filebuf, $oldcp, $cp - $oldcp))) - 1;
} elseif ($filebuf[$cp] == '{') { /* A block of C code */
$cp++;
if ($filebuf[$cp]=="}") {
$filebuf = substr($filebuf, 0, $cp)." ".substr($filebuf, $cp);
}
for ($level = 1; $cp < strlen($filebuf) && ($level > 1 || $filebuf[$cp] != '}'); $cp++) {
if ($filebuf[$cp] == "\n") {
$lineno++;
} elseif ($filebuf[$cp] == '{') {
$level++;
} elseif ($filebuf[$cp] == '}') {
$level--;
} elseif ($filebuf[$cp] == '/' && $filebuf[$cp + 1] == '*') {
/* Skip comments */
$cp += 2;
$z = strpos(substr($filebuf, $cp), '*/');
if ($z !== false) {
$lineno += count(explode("\n", substr($filebuf, $cp, $z))) - 1;
}
$cp += $z + 2;
} elseif ($filebuf[$cp] == '/' && $filebuf[$cp + 1] == '/') {
/* Skip C++ style comments too */
$cp += 2;
$z = strpos(substr($filebuf, $cp), "\n");
if ($z === false) {
$cp = strlen($filebuf);
break;
} else {
$lineno++;
}
$cp += $z;
} elseif ($filebuf[$cp] == "'" || $filebuf[$cp] == '"') {
/* String a character literals */
$startchar = $filebuf[$cp];
$prevc = 0;
for ($cp++; $cp < strlen($filebuf) && ($filebuf[$cp] != $startchar || $prevc === '\\'); $cp++) {
if ($filebuf[$cp] == "\n") {
$lineno++;
}
if ($prevc === '\\') {
$prevc = 0;
} else {
$prevc = $filebuf[$cp];
}
}
}
}
if ($cp >= strlen($filebuf)) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"PHP code starting on this line is not terminated before the end of the file.");
$this->errorcnt++;
$nextcp = $cp;
} else {
$nextcp = $cp + 1;
}
} elseif (preg_match('/[a-zA-Z0-9]/', $filebuf[$cp])) {
/* Identifiers */
preg_match('/[a-zA-Z0-9_]+/', substr($filebuf, $cp), $preg_results);
$cp += strlen($preg_results[0]);
$nextcp = $cp;
} elseif ($filebuf[$cp] == ':' && $filebuf[$cp + 1] == ':' &&
$filebuf[$cp + 2] == '=') {
/* The operator "::=" */
$cp += 3;
$nextcp = $cp;
} elseif (($filebuf[$cp] == '/' || $filebuf[$cp] == '|') &&
preg_match('/[a-zA-Z]/', $filebuf[$cp + 1])) {
$cp += 2;
preg_match('/[a-zA-Z0-9_]+/', substr($filebuf, $cp), $preg_results);
$cp += strlen($preg_results[0]);
$nextcp = $cp;
} else {
/* All other (one character) operators */
$cp ++;
$nextcp = $cp;
}
$this->parseonetoken(substr($filebuf, $this->tokenstart,
$cp - $this->tokenstart)); /* Parse the token */
$cp = $nextcp - 1;
}
$gp->rule = $this->firstrule;
$gp->errorcnt = $this->errorcnt;
}
/**
* Parse a single token
* @param string token
*/
function parseonetoken($token)
{
$x = $token;
$this->a = 0; // for referencing in WAITING_FOR_DECL_KEYWORD
if (PHP_ParserGenerator::DEBUG) {
printf("%s:%d: Token=[%s] state=%d\n",
$this->filename, $this->tokenlineno, $token, $this->state);
}
switch ($this->state) {
case self::INITIALIZE:
$this->prevrule = 0;
$this->preccounter = 0;
$this->firstrule = $this->lastrule = 0;
$this->gp->nrule = 0;
/* Fall thru to next case */
case self::WAITING_FOR_DECL_OR_RULE:
if ($x[0] == '%') {
$this->state = self::WAITING_FOR_DECL_KEYWORD;
} elseif (preg_match('/[a-z]/', $x[0])) {
$this->lhs = PHP_ParserGenerator_Symbol::Symbol_new($x);
$this->nrhs = 0;
$this->lhsalias = 0;
$this->state = self::WAITING_FOR_ARROW;
} elseif ($x[0] == '{') {
if ($this->prevrule === 0) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"There is no prior rule opon which to attach the code
fragment which begins on this line.");
$this->errorcnt++;
} elseif ($this->prevrule->code != 0) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Code fragment beginning on this line is not the first \
to follow the previous rule.");
$this->errorcnt++;
} else {
$this->prevrule->line = $this->tokenlineno;
$this->prevrule->code = substr($x, 1);
}
} elseif ($x[0] == '[') {
$this->state = self::PRECEDENCE_MARK_1;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Token \"%s\" should be either \"%%\" or a nonterminal name.",
$x);
$this->errorcnt++;
}
break;
case self::PRECEDENCE_MARK_1:
if (!preg_match('/[A-Z]/', $x[0])) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"The precedence symbol must be a terminal.");
$this->errorcnt++;
} elseif ($this->prevrule === 0) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"There is no prior rule to assign precedence \"[%s]\".", $x);
$this->errorcnt++;
} elseif ($this->prevrule->precsym != 0) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Precedence mark on this line is not the first to follow the previous rule.");
$this->errorcnt++;
} else {
$this->prevrule->precsym = PHP_ParserGenerator_Symbol::Symbol_new($x);
}
$this->state = self::PRECEDENCE_MARK_2;
break;
case self::PRECEDENCE_MARK_2:
if ($x[0] != ']') {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Missing \"]\" on precedence mark.");
$this->errorcnt++;
}
$this->state = self::WAITING_FOR_DECL_OR_RULE;
break;
case self::WAITING_FOR_ARROW:
if ($x[0] == ':' && $x[1] == ':' && $x[2] == '=') {
$this->state = self::IN_RHS;
} elseif ($x[0] == '(') {
$this->state = self::LHS_ALIAS_1;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Expected to see a \":\" following the LHS symbol \"%s\".",
$this->lhs->name);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_RULE_ERROR;
}
break;
case self::LHS_ALIAS_1:
if (preg_match('/[A-Za-z]/', $x[0])) {
$this->lhsalias = $x;
$this->state = self::LHS_ALIAS_2;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"\"%s\" is not a valid alias for the LHS \"%s\"\n",
$x, $this->lhs->name);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_RULE_ERROR;
}
break;
case self::LHS_ALIAS_2:
if ($x[0] == ')') {
$this->state = self::LHS_ALIAS_3;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Missing \")\" following LHS alias name \"%s\".",$this->lhsalias);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_RULE_ERROR;
}
break;
case self::LHS_ALIAS_3:
if ($x == '::=') {
$this->state = self::IN_RHS;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Missing \"->\" following: \"%s(%s)\".",
$this->lhs->name, $this->lhsalias);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_RULE_ERROR;
}
break;
case self::IN_RHS:
if ($x[0] == '.') {
$rp = new PHP_ParserGenerator_Rule;
$rp->ruleline = $this->tokenlineno;
for ($i = 0; $i < $this->nrhs; $i++) {
$rp->rhs[$i] = $this->rhs[$i];
$rp->rhsalias[$i] = $this->alias[$i];
}
if (count(array_unique($rp->rhsalias)) != count($rp->rhsalias)) {
$used = array();
foreach ($rp->rhsalias as $i => $symbol) {
if (!is_string($symbol)) {
continue;
}
if (isset($used[$symbol])) {
PHP_ParserGenerator::ErrorMsg($this->filename,
$this->tokenlineno,
"RHS symbol \"%s\" used multiple times.",
$symbol);
$this->errorcnt++;
} else {
$used[$symbol] = $i;
}
}
}
$rp->lhs = $this->lhs;
$rp->lhsalias = $this->lhsalias;
$rp->nrhs = $this->nrhs;
$rp->code = 0;
$rp->precsym = 0;
$rp->index = $this->gp->nrule++;
$rp->nextlhs = $rp->lhs->rule;
$rp->lhs->rule = $rp;
$rp->next = 0;
if ($this->firstrule === 0) {
$this->firstrule = $this->lastrule = $rp;
} else {
$this->lastrule->next = $rp;
$this->lastrule = $rp;
}
$this->prevrule = $rp;
$this->state = self::WAITING_FOR_DECL_OR_RULE;
} elseif (preg_match('/[a-zA-Z]/', $x[0])) {
if ($this->nrhs >= PHP_ParserGenerator::MAXRHS) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Too many symbols on RHS or rule beginning at \"%s\".",
$x);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_RULE_ERROR;
} else {
if (isset($this->rhs[$this->nrhs - 1])) {
$msp = $this->rhs[$this->nrhs - 1];
if ($msp->type == PHP_ParserGenerator_Symbol::MULTITERMINAL) {
$inf = array_reduce($msp->subsym,
array($this, '_printmulti'), '');
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
'WARNING: symbol ' . $x . ' will not' .
' be part of previous multiterminal %s',
substr($inf, 0, strlen($inf) - 1)
);
}
}
$this->rhs[$this->nrhs] = PHP_ParserGenerator_Symbol::Symbol_new($x);
$this->alias[$this->nrhs] = 0;
$this->nrhs++;
}
} elseif (($x[0] == '|' || $x[0] == '/') && $this->nrhs > 0) {
$msp = $this->rhs[$this->nrhs - 1];
if ($msp->type != PHP_ParserGenerator_Symbol::MULTITERMINAL) {
$origsp = $msp;
$msp = new PHP_ParserGenerator_Symbol;
$msp->type = PHP_ParserGenerator_Symbol::MULTITERMINAL;
$msp->nsubsym = 1;
$msp->subsym = array($origsp);
$msp->name = $origsp->name;
$this->rhs[$this->nrhs - 1] = $msp;
}
$msp->nsubsym++;
$msp->subsym[$msp->nsubsym - 1] = PHP_ParserGenerator_Symbol::Symbol_new(substr($x, 1));
if (preg_match('/[a-z]/', $x[1]) ||
preg_match('/[a-z]/', $msp->subsym[0]->name[0])) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Cannot form a compound containing a non-terminal");
$this->errorcnt++;
}
} elseif ($x[0] == '(' && $this->nrhs > 0) {
$this->state = self::RHS_ALIAS_1;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Illegal character on RHS of rule: \"%s\".", $x);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_RULE_ERROR;
}
break;
case self::RHS_ALIAS_1:
if (preg_match('/[A-Za-z]/', $x[0])) {
$this->alias[$this->nrhs - 1] = $x;
$this->state = self::RHS_ALIAS_2;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
$x, $this->rhs[$this->nrhs - 1]->name);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_RULE_ERROR;
}
break;
case self::RHS_ALIAS_2:
if ($x[0] == ')') {
$this->state = self::IN_RHS;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Missing \")\" following LHS alias name \"%s\".", $this->lhsalias);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_RULE_ERROR;
}
break;
case self::WAITING_FOR_DECL_KEYWORD:
if(preg_match('/[A-Za-z]/', $x[0])) {
$this->declkeyword = $x;
$this->declargslot = &$this->a;
$this->decllnslot = &$this->a;
$this->state = self::WAITING_FOR_DECL_ARG;
if ('name' == $x) {
$this->declargslot = &$this->gp->name;
} elseif ('include' == $x) {
$this->declargslot = &$this->gp->include_code;
$this->decllnslot = &$this->gp->includeln;
} elseif ('include_class' == $x) {
$this->declargslot = &$this->gp->include_classcode;
$this->decllnslot = &$this->gp->include_classln;
} elseif ('declare_class' == $x) {
$this->declargslot = &$this->gp->declare_classcode;
$this->decllnslot = &$this->gp->declare_classln;
} elseif ('code' == $x) {
$this->declargslot = &$this->gp->extracode;
$this->decllnslot = &$this->gp->extracodeln;
} elseif ('token_destructor' == $x) {
$this->declargslot = &$this->gp->tokendest;
$this->decllnslot = &$this->gp->tokendestln;
} elseif ('default_destructor' == $x) {
$this->declargslot = &$this->gp->vardest;
$this->decllnslot = &$this->gp->vardestln;
} elseif ('token_prefix' == $x) {
$this->declargslot = &$this->gp->tokenprefix;
} elseif ('syntax_error' == $x) {
$this->declargslot = &$this->gp->error;
$this->decllnslot = &$this->gp->errorln;
} elseif ('parse_accept' == $x) {
$this->declargslot = &$this->gp->accept;
$this->decllnslot = &$this->gp->acceptln;
} elseif ('parse_failure' == $x) {
$this->declargslot = &$this->gp->failure;
$this->decllnslot = &$this->gp->failureln;
} elseif ('stack_overflow' == $x) {
$this->declargslot = &$this->gp->overflow;
$this->decllnslot = &$this->gp->overflowln;
} elseif ('token_type' == $x) {
$this->declargslot = &$this->gp->tokentype;
} elseif ('default_type' == $x) {
$this->declargslot = &$this->gp->vartype;
} elseif ('stack_size' == $x) {
$this->declargslot = &$this->gp->stacksize;
} elseif ('start_symbol' == $x) {
$this->declargslot = &$this->gp->start;
} elseif ('left' == $x) {
$this->preccounter++;
$this->declassoc = PHP_ParserGenerator_Symbol::LEFT;
$this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('right' == $x) {
$this->preccounter++;
$this->declassoc = PHP_ParserGenerator_Symbol::RIGHT;
$this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('nonassoc' == $x) {
$this->preccounter++;
$this->declassoc = PHP_ParserGenerator_Symbol::NONE;
$this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('destructor' == $x) {
$this->state = self::WAITING_FOR_DESTRUCTOR_SYMBOL;
} elseif ('type' == $x) {
$this->state = self::WAITING_FOR_DATATYPE_SYMBOL;
} elseif ('fallback' == $x) {
$this->fallback = 0;
$this->state = self::WAITING_FOR_FALLBACK_ID;
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Unknown declaration keyword: \"%%%s\".", $x);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_DECL_ERROR;
}
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Illegal declaration keyword: \"%s\".", $x);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_DECL_ERROR;
}
break;
case self::WAITING_FOR_DESTRUCTOR_SYMBOL:
if (!preg_match('/[A-Za-z]/', $x[0])) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Symbol name missing after %destructor keyword");
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_DECL_ERROR;
} else {
$sp = PHP_ParserGenerator_Symbol::Symbol_new($x);
$this->declargslot = &$sp->destructor;
$this->decllnslot = &$sp->destructorln;
$this->state = self::WAITING_FOR_DECL_ARG;
}
break;
case self::WAITING_FOR_DATATYPE_SYMBOL:
if (!preg_match('/[A-Za-z]/', $x[0])) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Symbol name missing after %destructor keyword");
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_DECL_ERROR;
} else {
$sp = PHP_ParserGenerator_Symbol::Symbol_new($x);
$this->declargslot = &$sp->datatype;
$this->state = self::WAITING_FOR_DECL_ARG;
}
break;
case self::WAITING_FOR_PRECEDENCE_SYMBOL:
if ($x[0] == '.') {
$this->state = self::WAITING_FOR_DECL_OR_RULE;
} elseif (preg_match('/[A-Z]/', $x[0])) {
$sp = PHP_ParserGenerator_Symbol::Symbol_new($x);
if ($sp->prec >= 0) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Symbol \"%s\" has already been given a precedence.", $x);
$this->errorcnt++;
} else {
$sp->prec = $this->preccounter;
$sp->assoc = $this->declassoc;
}
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Can't assign a precedence to \"%s\".", $x);
$this->errorcnt++;
}
break;
case self::WAITING_FOR_DECL_ARG:
if (preg_match('/[A-Za-z0-9{"]/', $x[0])) {
if ($this->declargslot != 0) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"The argument \"%s\" to declaration \"%%%s\" is not the first.",
$x[0] == '"' ? substr($x, 1) : $x, $this->declkeyword);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_DECL_ERROR;
} else {
$this->declargslot = ($x[0] == '"' || $x[0] == '{') ? substr($x, 1) : $x;
$this->a = 1;
if (!$this->decllnslot) {
$this->decllnslot = $this->tokenlineno;
}
$this->state = self::WAITING_FOR_DECL_OR_RULE;
}
} else {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"Illegal argument to %%%s: %s",$this->declkeyword, $x);
$this->errorcnt++;
$this->state = self::RESYNC_AFTER_DECL_ERROR;
}
break;
case self::WAITING_FOR_FALLBACK_ID:
if ($x[0] == '.') {
$this->state = self::WAITING_FOR_DECL_OR_RULE;
} elseif (!preg_match('/[A-Z]/', $x[0])) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"%%fallback argument \"%s\" should be a token", $x);
$this->errorcnt++;
} else {
$sp = PHP_ParserGenerator_Symbol::Symbol_new($x);
if ($this->fallback === 0) {
$this->fallback = $sp;
} elseif (is_object($sp->fallback)) {
PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno,
"More than one fallback assigned to token %s", $x);
$this->errorcnt++;
} else {
$sp->fallback = $this->fallback;
$this->gp->has_fallback = 1;
}
}
break;
case self::RESYNC_AFTER_RULE_ERROR:
/* if ($x[0] == '.') $this->state = self::WAITING_FOR_DECL_OR_RULE;
** break; */
case self::RESYNC_AFTER_DECL_ERROR:
if ($x[0] == '.') {
$this->state = self::WAITING_FOR_DECL_OR_RULE;
}
if ($x[0] == '%') {
$this->state = self::WAITING_FOR_DECL_KEYWORD;
}
break;
}
}
/**
* return a descriptive string for a multi-terminal token.
*
* @param string $a
* @param string $b
* @return string
*/
private function _printmulti($a, $b)
{
if (!$a) {
$a = '';
}
$a .= $b->name . '|';
return $a;
}
}

View File

@@ -0,0 +1,126 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: PropagationLink.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**
* A followset propagation link indicates that the contents of one
* configuration followset should be propagated to another whenever
* the first changes.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_PropagationLink
{
/**
* The configuration that defines this propagation link
*
* @var PHP_ParserGenerator_Config
*/
public $cfp;
/**
* The next propagation link
*
* @var PHP_ParserGenerator_PropagationLink|0
*/
public $next = 0;
/**
* Add a propagation link to the current list
*
* This prepends the configuration passed in to the first parameter
* which is either 0 or a PHP_ParserGenerator_PropagationLink defining
* an existing list.
*
* @param PHP_ParserGenerator_PropagationLink|null
* @param PHP_ParserGenerator_Config
*/
static function Plink_add(&$plpp, PHP_ParserGenerator_Config $cfp)
{
$new = new PHP_ParserGenerator_PropagationLink;
$new->next = $plpp;
$plpp = $new;
$new->cfp = $cfp;
}
/**
* Transfer every propagation link on the list "from" to the list "to"
*/
static function Plink_copy(PHP_ParserGenerator_PropagationLink &$to, PHP_ParserGenerator_PropagationLink $from)
{
while ($from) {
$nextpl = $from->next;
$from->next = $to;
$to = $from;
$from = $nextpl;
}
}
/**
* Delete every propagation link on the list
*
* @param PHP_ParserGenerator_PropagationLink|0
*
* @return void
*/
static function Plink_delete($plp)
{
while ($plp) {
$nextpl = $plp->next;
$plp->next = 0;
$plp = $nextpl;
}
}
}

View File

@@ -0,0 +1,144 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: Rule.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**
* Each production rule in the grammar is stored in this class
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_Rule
{
/**
* Left-hand side of the rule
* @var array an array of {@link PHP_ParserGenerator_Symbol} objects
*/
public $lhs;
/**
* Alias for the LHS (NULL if none)
*
* @var array
*/
public $lhsalias = array();
/**
* Line number for the rule
* @var int
*/
public $ruleline;
/**
* Number of right-hand side symbols
*/
public $nrhs;
/**
* The right-hand side symbols
* @var array an array of {@link PHP_ParserGenerator_Symbol} objects
*/
public $rhs;
/**
* Aliases for each right-hand side symbol, or null if no alis.
*
* In this rule:
* <pre>
* foo ::= BAR(A) baz(B).
* </pre>
*
* The right-hand side aliases are A for BAR, and B for baz.
* @var array aliases are indexed by the right-hand side symbol index.
*/
public $rhsalias = array();
/**
* Line number at which code begins
* @var int
*/
public $line;
/**
* The code executed when this rule is reduced
*
* <pre>
* foo(R) ::= BAR(A) baz(B). {R = A + B;}
* </pre>
*
* In the rule above, the code is "R = A + B;"
* @var string|0
*/
public $code;
/**
* Precedence symbol for this rule
* @var PHP_ParserGenerator_Symbol
*/
public $precsym;
/**
* An index number for this rule
*
* Used in both naming of reduce functions and determining which rule code
* to use for reduce actions
* @var int
*/
public $index;
/**
* True if this rule is ever reduced
* @var boolean
*/
public $canReduce;
/**
* Next rule with the same left-hand side
* @var PHP_ParserGenerator_Rule|0
*/
public $nextlhs;
/**
* Next rule in the global list
* @var PHP_ParserGenerator_Rule|0
*/
public $next;
}

View File

@@ -0,0 +1,283 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: State.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**
* The structure used to represent a state in the associative array
* for a PHP_ParserGenerator_Config.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_StateNode
{
public $key;
public $data;
public $from = 0;
public $next = 0;
}
/**
* Each state of the generated parser's finite state machine
* is encoded as an instance of this class
*
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_State
{
/**
* The basis configurations for this state
* @var PHP_ParserGenerator_Config
*/
public $bp;
/**
* All configurations in this state
* @var PHP_ParserGenerator_Config
*/
public $cfp;
/**
* Sequential number for this state
*
* @var int
*/
public $statenum;
/**
* Linked list of actions for this state.
* @var PHP_ParserGenerator_Action
*/
public $ap;
/**
* Number of terminal (token) actions
*
* @var int
*/
public $nTknAct,
/**
* Number of non-terminal actions
*
* @var int
*/
$nNtAct;
/**
* The offset into the $yy_action table for terminal tokens.
*
* @var int
*/
public $iTknOfst,
/**
* The offset into the $yy_action table for non-terminals.
*
* @var int
*/
$iNtOfst;
/**
* Default action
*
* @var int
*/
public $iDflt;
/**
* Associative array of PHP_ParserGenerator_State objects
*
* @var array
*/
public static $x3a = array();
/**
* Array of PHP_ParserGenerator_State objects
*
* @var array
*/
public static $states = array();
/**
* Compare two states for sorting purposes. The smaller state is the
* one with the most non-terminal actions. If they have the same number
* of non-terminal actions, then the smaller is the one with the most
* token actions.
*/
static function stateResortCompare($a, $b)
{
$n = $b->nNtAct - $a->nNtAct;
if ($n === 0) {
$n = $b->nTknAct - $a->nTknAct;
}
return $n;
}
/**
* Compare two states based on their configurations
*
* @param PHP_ParserGenerator_Config|0 $a
* @param PHP_ParserGenerator_Config|0 $b
* @return int
*/
static function statecmp($a, $b)
{
for ($rc = 0; $rc == 0 && $a && $b; $a = $a->bp, $b = $b->bp) {
$rc = $a->rp->index - $b->rp->index;
if ($rc === 0) {
$rc = $a->dot - $b->dot;
}
}
if ($rc == 0) {
if ($a) {
$rc = 1;
}
if ($b) {
$rc = -1;
}
}
return $rc;
}
/**
* Hash a state based on its configuration
*
* @return int
*/
private static function statehash(PHP_ParserGenerator_Config $a)
{
$h = 0;
while ($a) {
$h = $h * 571 + $a->rp->index * 37 + $a->dot;
$a = $a->bp;
}
return (int) $h;
}
/**
* Return a pointer to data assigned to the given key. Return NULL
* if no such key.
* @param PHP_ParserGenerator_Config
* @return null|PHP_ParserGenerator_State
*/
static function State_find(PHP_ParserGenerator_Config $key)
{
if (!count(self::$x3a)) {
return 0;
}
$h = self::statehash($key);
if (!isset(self::$x3a[$h])) {
return 0;
}
$np = self::$x3a[$h];
while ($np) {
if (self::statecmp($np->key, $key) == 0) {
break;
}
$np = $np->next;
}
return $np ? $np->data : 0;
}
/**
* Insert a new record into the array. Return TRUE if successful.
* Prior data with the same key is NOT overwritten
*
* @param PHP_ParserGenerator_State $state
* @param PHP_ParserGenerator_Config $key
* @return unknown
*/
static function State_insert(PHP_ParserGenerator_State $state, PHP_ParserGenerator_Config $key)
{
$h = self::statehash($key);
if (isset(self::$x3a[$h])) {
$np = self::$x3a[$h];
} else {
$np = 0;
}
while ($np) {
if (self::statecmp($np->key, $key) == 0) {
/* An existing entry with the same key is found. */
/* Fail because overwrite is not allows. */
return 0;
}
$np = $np->next;
}
/* Insert the new data */
$np = new PHP_ParserGenerator_StateNode;
$np->key = $key;
$np->data = $state;
self::$states[] = $np;
// the original lemon code sets the from link always to itself
// setting up a faulty double-linked list
// however, the from links are never used, so I suspect a copy/paste
// error from a standard algorithm that was never caught
if (isset(self::$x3a[$h])) {
self::$x3a[$h]->from = $np; // lemon has $np->next here
} else {
self::$x3a[$h] = 0; // dummy to avoid notice
}
$np->next = self::$x3a[$h];
self::$x3a[$h] = $np;
$np->from = self::$x3a[$h];
return 1;
}
/**
* Get an array indexed by state number
*
* @return array
*/
static function State_arrayof()
{
return self::$states;
}
}

View File

@@ -0,0 +1,288 @@
<?php
/**
* PHP_ParserGenerator, a php 5 parser generator.
*
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: Symbol.php 302382 2010-08-17 06:08:09Z jespino $
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since File available since Release 0.1.0
*/
/**
* Symbols (terminals and nonterminals) of the grammar are stored in this class
*
* @category PHP
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/PHP_ParserGenerator
* @since Class available since Release 0.1.0
*/
class PHP_ParserGenerator_Symbol
{
/**
* Symbols that start with a capital letter like FOO.
*
* These are tokens directly from the lexer
*/
const TERMINAL = 1;
/**
* Symbols that start with a lower-case letter like foo.
*
* These are grammar rules like "foo ::= BLAH."
*/
const NONTERMINAL = 2;
/**
* Multiple terminal symbols.
*
* These are a grammar rule that consists of several terminals like
* FOO|BAR|BAZ. Note that non-terminals cannot be in a multi-terminal,
* and a multi-terminal acts like a single terminal.
*
* "FOO|BAR FOO|BAZ" is actually two multi-terminals, FOO|BAR and FOO|BAZ.
*/
const MULTITERMINAL = 3;
const LEFT = 1;
const RIGHT = 2;
const NONE = 3;
const UNK = 4;
/**
* Name of the symbol
*
* @var string
*/
public $name;
/**
* Index of this symbol.
*
* This will ultimately end up representing the symbol in the generated
* parser
* @var int
*/
public $index;
/**
* Symbol type
*
* One of PHP_ParserGenerator_Symbol::TERMINAL,
* PHP_ParserGenerator_Symbol::NONTERMINAL or
* PHP_ParserGenerator_Symbol::MULTITERMINAL
* @var int
*/
public $type;
/**
* Linked list of rules that use this symbol, if it is a non-terminal.
* @var PHP_ParserGenerator_Rule
*/
public $rule;
/**
* Fallback token in case this token doesn't parse
* @var PHP_ParserGenerator_Symbol
*/
public $fallback;
/**
* Precendence, if defined.
*
* -1 if no unusual precedence
* @var int
*/
public $prec = -1;
/**
* Associativity if precedence is defined.
*
* One of PHP_ParserGenerator_Symbol::LEFT,
* PHP_ParserGenerator_Symbol::RIGHT, PHP_ParserGenerator_Symbol::NONE
* or PHP_ParserGenerator_Symbol::UNK
* @var unknown_type
*/
public $assoc;
/**
* First-set for all rules of this symbol
*
* @var array
*/
public $firstset;
/**
* True if this symbol is a non-terminal and can generate an empty
* result.
*
* For instance "foo ::= ."
* @var boolean
*/
public $lambda;
/**
* Code that executes whenever this symbol is popped from the stack during
* error processing.
*
* @var string|0
*/
public $destructor = 0;
/**
* Line number of destructor code
* @var int
*/
public $destructorln;
/**
* Unused relic of the C version of Lemon.
*
* The data type of information held by this object. Only used
* if this is a non-terminal
* @var string
*/
public $datatype;
/**
* Unused relic of the C version of Lemon.
*
* The data type number. In the parser, the value
* stack is a union. The .yy%d element of this
* union is the correct data type for this object
* @var string
*/
public $dtnum;
/**#@+
* The following fields are used by MULTITERMINALs only
*/
/**
* Number of terminal symbols in the MULTITERMINAL
*
* This is of course the same as count($this->subsym)
* @var int
*/
public $nsubsym;
/**
* Array of terminal symbols in the MULTITERMINAL
* @var array an array of {@link PHP_ParserGenerator_Symbol} objects
*/
public $subsym = array();
/**#@-*/
/**
* Singleton storage of symbols
*
* @var array an array of PHP_ParserGenerator_Symbol objects
*/
private static $_symbol_table = array();
/**
* Return a pointer to the (terminal or nonterminal) symbol "x".
* Create a new symbol if this is the first time "x" has been seen.
* (this is a singleton)
* @param string
* @return PHP_ParserGenerator_Symbol
*/
public static function Symbol_new($x)
{
if (isset(self::$_symbol_table[$x])) {
return self::$_symbol_table[$x];
}
$sp = new PHP_ParserGenerator_Symbol;
$sp->name = $x;
$sp->type = preg_match('/[A-Z]/', $x[0]) ? self::TERMINAL : self::NONTERMINAL;
$sp->rule = 0;
$sp->fallback = 0;
$sp->prec = -1;
$sp->assoc = self::UNK;
$sp->firstset = array();
$sp->lambda = false;
$sp->destructor = 0;
$sp->datatype = 0;
self::$_symbol_table[$sp->name] = $sp;
return $sp;
}
/**
* Return the number of unique symbols
*
* @return int
*/
public static function Symbol_count()
{
return count(self::$_symbol_table);
}
public static function Symbol_arrayof()
{
return array_values(self::$_symbol_table);
}
public static function Symbol_find($x)
{
if (isset(self::$_symbol_table[$x])) {
return self::$_symbol_table[$x];
}
return 0;
}
/**
* Sort function helper for symbols
*
* Symbols that begin with upper case letters (terminals or tokens)
* must sort before symbols that begin with lower case letters
* (non-terminals). Other than that, the order does not matter.
*
* We find experimentally that leaving the symbols in their original
* order (the order they appeared in the grammar file) gives the
* smallest parser tables in SQLite.
* @param PHP_ParserGenerator_Symbol
* @param PHP_ParserGenerator_Symbol
*/
public static function sortSymbols($a, $b)
{
$i1 = $a->index + 10000000*(ord($a->name[0]) > ord('Z'));
$i2 = $b->index + 10000000*(ord($b->name[0]) > ord('Z'));
return $i1 - $i2;
}
/**
* Return true if two symbols are the same.
*/
public static function same_symbol(PHP_ParserGenerator_Symbol $a, PHP_ParserGenerator_Symbol $b)
{
if ($a === $b) return 1;
if ($a->type != self::MULTITERMINAL) return 0;
if ($b->type != self::MULTITERMINAL) return 0;
if ($a->nsubsym != $b->nsubsym) return 0;
for ($i = 0; $i < $a->nsubsym; $i++) {
if ($a->subsym[$i] != $b->subsym[$i]) return 0;
}
return 1;
}
}

View File

@@ -0,0 +1,5 @@
<?php
require_once 'PHP/ParserGenerator.php';
$me = new PHP_ParserGenerator;
$me->main();
?>

View File

@@ -0,0 +1,4 @@
#!/bin/bash
php PHP/LexerGenerator/cli.php oql-lexer.plex
php PHP/ParserGenerator/cli.php oql-parser.y

5
core/oql/build/build.cmd Normal file
View File

@@ -0,0 +1,5 @@
rem must be run with current directory = the directory of the batch
rem PEAR is required to build
php -d include_path=".;C:\iTop\PHP\PEAR" ".\PHP\LexerGenerator\cli.php" ..\oql-lexer.plex
php ".\PHP\ParserGenerator\cli.php" ..\oql-parser.y
pause

View File

@@ -2,7 +2,6 @@
# The following source files are not re-distributed with the "build" of the application
# since they are used solely for constructing other files during the build process
#
build.cmd
build.bash
build
oql-lexer.plex
oql-parser.y
oql-parser.y

View File

@@ -96,8 +96,8 @@ class OQLParser_yyStackEntry
// code external to the class is included here
// declare_class is output here
#line 24 "oql-parser.y"
class OQLParserRaw#line 102 "oql-parser.php"
#line 24 "..\oql-parser.y"
class OQLParserRaw#line 102 "..\oql-parser.php"
{
/* First off, code is included which follows the "include_class" declaration
** in the input file. */
@@ -1422,139 +1422,139 @@ static public $yy_action = array(
** function yy_r0($yymsp){ ... } // User supplied code
** #line <lineno> <thisfile>
*/
#line 29 "oql-parser.y"
#line 29 "..\oql-parser.y"
function yy_r0(){ $this->my_result = $this->yystack[$this->yyidx + 0]->minor; }
#line 1431 "oql-parser.php"
#line 32 "oql-parser.y"
#line 1431 "..\oql-parser.php"
#line 32 "..\oql-parser.y"
function yy_r2(){
$this->_retvalue = new OqlObjectQuery($this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + 0]->minor, $this->yystack[$this->yyidx + -1]->minor, array($this->yystack[$this->yyidx + -2]->minor));
}
#line 1436 "oql-parser.php"
#line 35 "oql-parser.y"
#line 1436 "..\oql-parser.php"
#line 35 "..\oql-parser.y"
function yy_r3(){
$this->_retvalue = new OqlObjectQuery($this->yystack[$this->yyidx + -4]->minor, $this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + 0]->minor, $this->yystack[$this->yyidx + -1]->minor, array($this->yystack[$this->yyidx + -2]->minor));
}
#line 1441 "oql-parser.php"
#line 39 "oql-parser.y"
#line 1441 "..\oql-parser.php"
#line 39 "..\oql-parser.y"
function yy_r4(){
$this->_retvalue = new OqlObjectQuery($this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + 0]->minor, $this->yystack[$this->yyidx + -1]->minor, $this->yystack[$this->yyidx + -4]->minor);
}
#line 1446 "oql-parser.php"
#line 42 "oql-parser.y"
#line 1446 "..\oql-parser.php"
#line 42 "..\oql-parser.y"
function yy_r5(){
$this->_retvalue = new OqlObjectQuery($this->yystack[$this->yyidx + -4]->minor, $this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + 0]->minor, $this->yystack[$this->yyidx + -1]->minor, $this->yystack[$this->yyidx + -6]->minor);
}
#line 1451 "oql-parser.php"
#line 47 "oql-parser.y"
#line 1451 "..\oql-parser.php"
#line 47 "..\oql-parser.y"
function yy_r6(){
$this->_retvalue = array($this->yystack[$this->yyidx + 0]->minor);
}
#line 1456 "oql-parser.php"
#line 50 "oql-parser.y"
#line 1456 "..\oql-parser.php"
#line 50 "..\oql-parser.y"
function yy_r7(){
array_push($this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + 0]->minor);
$this->_retvalue = $this->yystack[$this->yyidx + -2]->minor;
}
#line 1462 "oql-parser.php"
#line 55 "oql-parser.y"
#line 1462 "..\oql-parser.php"
#line 55 "..\oql-parser.y"
function yy_r8(){ $this->_retvalue = $this->yystack[$this->yyidx + 0]->minor; }
#line 1465 "oql-parser.php"
#line 56 "oql-parser.y"
#line 1465 "..\oql-parser.php"
#line 56 "..\oql-parser.y"
function yy_r9(){ $this->_retvalue = null; }
#line 1468 "oql-parser.php"
#line 58 "oql-parser.y"
#line 1468 "..\oql-parser.php"
#line 58 "..\oql-parser.y"
function yy_r10(){
// insert the join statement on top of the existing list
array_unshift($this->yystack[$this->yyidx + 0]->minor, $this->yystack[$this->yyidx + -1]->minor);
// and return the updated array
$this->_retvalue = $this->yystack[$this->yyidx + 0]->minor;
}
#line 1476 "oql-parser.php"
#line 64 "oql-parser.y"
#line 1476 "..\oql-parser.php"
#line 64 "..\oql-parser.y"
function yy_r11(){
$this->_retvalue = Array($this->yystack[$this->yyidx + 0]->minor);
}
#line 1481 "oql-parser.php"
#line 70 "oql-parser.y"
#line 1481 "..\oql-parser.php"
#line 70 "..\oql-parser.y"
function yy_r13(){
// create an array with one single item
$this->_retvalue = new OqlJoinSpec($this->yystack[$this->yyidx + -4]->minor, $this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + 0]->minor);
}
#line 1487 "oql-parser.php"
#line 75 "oql-parser.y"
#line 1487 "..\oql-parser.php"
#line 75 "..\oql-parser.y"
function yy_r14(){
// create an array with one single item
$this->_retvalue = new OqlJoinSpec($this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + 0]->minor);
}
#line 1493 "oql-parser.php"
#line 80 "oql-parser.y"
#line 1493 "..\oql-parser.php"
#line 80 "..\oql-parser.y"
function yy_r15(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, '=', $this->yystack[$this->yyidx + 0]->minor); }
#line 1496 "oql-parser.php"
#line 81 "oql-parser.y"
#line 1496 "..\oql-parser.php"
#line 81 "..\oql-parser.y"
function yy_r16(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, 'BELOW', $this->yystack[$this->yyidx + 0]->minor); }
#line 1499 "oql-parser.php"
#line 82 "oql-parser.y"
#line 1499 "..\oql-parser.php"
#line 82 "..\oql-parser.y"
function yy_r17(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, 'BELOW_STRICT', $this->yystack[$this->yyidx + 0]->minor); }
#line 1502 "oql-parser.php"
#line 83 "oql-parser.y"
#line 1502 "..\oql-parser.php"
#line 83 "..\oql-parser.y"
function yy_r18(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, 'NOT_BELOW', $this->yystack[$this->yyidx + 0]->minor); }
#line 1505 "oql-parser.php"
#line 84 "oql-parser.y"
#line 1505 "..\oql-parser.php"
#line 84 "..\oql-parser.y"
function yy_r19(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, 'NOT_BELOW_STRICT', $this->yystack[$this->yyidx + 0]->minor); }
#line 1508 "oql-parser.php"
#line 85 "oql-parser.y"
#line 1508 "..\oql-parser.php"
#line 85 "..\oql-parser.y"
function yy_r20(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, 'ABOVE', $this->yystack[$this->yyidx + 0]->minor); }
#line 1511 "oql-parser.php"
#line 86 "oql-parser.y"
#line 1511 "..\oql-parser.php"
#line 86 "..\oql-parser.y"
function yy_r21(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, 'ABOVE_STRICT', $this->yystack[$this->yyidx + 0]->minor); }
#line 1514 "oql-parser.php"
#line 87 "oql-parser.y"
#line 1514 "..\oql-parser.php"
#line 87 "..\oql-parser.y"
function yy_r22(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, 'NOT_ABOVE', $this->yystack[$this->yyidx + 0]->minor); }
#line 1517 "oql-parser.php"
#line 88 "oql-parser.y"
#line 1517 "..\oql-parser.php"
#line 88 "..\oql-parser.y"
function yy_r23(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, 'NOT_ABOVE_STRICT', $this->yystack[$this->yyidx + 0]->minor); }
#line 1520 "oql-parser.php"
#line 90 "oql-parser.y"
#line 1520 "..\oql-parser.php"
#line 90 "..\oql-parser.y"
function yy_r24(){ $this->_retvalue = $this->yystack[$this->yyidx + 0]->minor; }
#line 1523 "oql-parser.php"
#line 95 "oql-parser.y"
#line 1523 "..\oql-parser.php"
#line 95 "..\oql-parser.y"
function yy_r28(){ $this->_retvalue = new FunctionOqlExpression($this->yystack[$this->yyidx + -3]->minor, $this->yystack[$this->yyidx + -1]->minor); }
#line 1526 "oql-parser.php"
#line 96 "oql-parser.y"
#line 1526 "..\oql-parser.php"
#line 96 "..\oql-parser.y"
function yy_r29(){ $this->_retvalue = $this->yystack[$this->yyidx + -1]->minor; }
#line 1529 "oql-parser.php"
#line 97 "oql-parser.y"
#line 1529 "..\oql-parser.php"
#line 97 "..\oql-parser.y"
function yy_r30(){ $this->_retvalue = new BinaryOqlExpression($this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + -1]->minor, $this->yystack[$this->yyidx + 0]->minor); }
#line 1532 "oql-parser.php"
#line 112 "oql-parser.y"
#line 1532 "..\oql-parser.php"
#line 112 "..\oql-parser.y"
function yy_r39(){
$this->_retvalue = new ListOqlExpression($this->yystack[$this->yyidx + -1]->minor);
}
#line 1537 "oql-parser.php"
#line 123 "oql-parser.y"
#line 1537 "..\oql-parser.php"
#line 123 "..\oql-parser.y"
function yy_r42(){
$this->_retvalue = array();
}
#line 1542 "oql-parser.php"
#line 134 "oql-parser.y"
#line 1542 "..\oql-parser.php"
#line 134 "..\oql-parser.y"
function yy_r46(){ $this->_retvalue = new IntervalOqlExpression($this->yystack[$this->yyidx + -1]->minor, $this->yystack[$this->yyidx + 0]->minor); }
#line 1545 "oql-parser.php"
#line 146 "oql-parser.y"
#line 1545 "..\oql-parser.php"
#line 146 "..\oql-parser.y"
function yy_r55(){ $this->_retvalue = new ScalarOqlExpression($this->yystack[$this->yyidx + 0]->minor); }
#line 1548 "oql-parser.php"
#line 149 "oql-parser.y"
#line 1548 "..\oql-parser.php"
#line 149 "..\oql-parser.y"
function yy_r57(){ $this->_retvalue = new FieldOqlExpression($this->yystack[$this->yyidx + 0]->minor); }
#line 1551 "oql-parser.php"
#line 150 "oql-parser.y"
#line 1551 "..\oql-parser.php"
#line 150 "..\oql-parser.y"
function yy_r58(){ $this->_retvalue = new FieldOqlExpression($this->yystack[$this->yyidx + 0]->minor, $this->yystack[$this->yyidx + -2]->minor); }
#line 1554 "oql-parser.php"
#line 151 "oql-parser.y"
#line 1554 "..\oql-parser.php"
#line 151 "..\oql-parser.y"
function yy_r59(){ $this->_retvalue=$this->yystack[$this->yyidx + 0]->minor; }
#line 1557 "oql-parser.php"
#line 154 "oql-parser.y"
#line 1557 "..\oql-parser.php"
#line 154 "..\oql-parser.y"
function yy_r60(){ $this->_retvalue = new VariableOqlExpression(substr($this->yystack[$this->yyidx + 0]->minor, 1)); }
#line 1560 "oql-parser.php"
#line 156 "oql-parser.y"
#line 1560 "..\oql-parser.php"
#line 156 "..\oql-parser.y"
function yy_r61(){
if ($this->yystack[$this->yyidx + 0]->minor[0] == '`')
{
@@ -1566,22 +1566,22 @@ static public $yy_action = array(
}
$this->_retvalue = new OqlName($name, $this->m_iColPrev);
}
#line 1573 "oql-parser.php"
#line 167 "oql-parser.y"
#line 1573 "..\oql-parser.php"
#line 167 "..\oql-parser.y"
function yy_r62(){$this->_retvalue=(int)$this->yystack[$this->yyidx + 0]->minor; }
#line 1576 "oql-parser.php"
#line 168 "oql-parser.y"
#line 1576 "..\oql-parser.php"
#line 168 "..\oql-parser.y"
function yy_r63(){$this->_retvalue=(int)-$this->yystack[$this->yyidx + 0]->minor; }
#line 1579 "oql-parser.php"
#line 169 "oql-parser.y"
#line 1579 "..\oql-parser.php"
#line 169 "..\oql-parser.y"
function yy_r64(){$this->_retvalue=new OqlHexValue($this->yystack[$this->yyidx + 0]->minor); }
#line 1582 "oql-parser.php"
#line 170 "oql-parser.y"
#line 1582 "..\oql-parser.php"
#line 170 "..\oql-parser.y"
function yy_r65(){$this->_retvalue=stripslashes(substr($this->yystack[$this->yyidx + 0]->minor, 1, strlen($this->yystack[$this->yyidx + 0]->minor) - 2)); }
#line 1585 "oql-parser.php"
#line 173 "oql-parser.y"
#line 1585 "..\oql-parser.php"
#line 173 "..\oql-parser.y"
function yy_r66(){$this->_retvalue=$this->yystack[$this->yyidx + 0]->minor; }
#line 1588 "oql-parser.php"
#line 1588 "..\oql-parser.php"
/**
* placeholder for the left hand side in a reduce operation.
@@ -1693,10 +1693,10 @@ static public $yy_action = array(
*/
function yy_syntax_error($yymajor, $TOKEN)
{
#line 25 "oql-parser.y"
#line 25 "..\oql-parser.y"
throw new OQLParserException($this->m_sSourceQuery, $this->m_iLine, $this->m_iCol, $this->tokenName($yymajor), $TOKEN);
#line 1704 "oql-parser.php"
#line 1704 "..\oql-parser.php"
}
/**
@@ -1863,7 +1863,7 @@ throw new OQLParserException($this->m_sSourceQuery, $this->m_iLine, $this->m_iCo
} while ($yymajor != self::YYNOCODE && $this->yyidx >= 0);
}
}
#line 231 "oql-parser.y"
#line 231 "..\oql-parser.y"
class OQLParserException extends OQLException
@@ -1928,4 +1928,4 @@ class OQLParser extends OQLParserRaw
}
}
#line 1937 "oql-parser.php"
#line 1937 "..\oql-parser.php"