Drop in replacement for native gettext. This file is part of PHP-gettext. PHP-gettext is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. PHP-gettext is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with PHP-gettext; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * Lexical analyzer for gettext plurals expression. Takes a string to parse * during construction and returns a single token every time peek() or * fetch_token() are called. The special string '__END__' is returned if there * are no more tokens to be read. Spaces are ignored during tokenization. */ class PluralsLexer { private $string; private $position; /** * Constructor * * @param string string Contains the value gettext plurals expression to * analyze. */ public function __construct(string $string) { $this->string = $string; $this->position = 0; } /** * Return the next token and the length to advance the read position without * actually advancing the read position. Tokens for operators and variables * are simple strings containing the operator or variable. If there are no * more token to provide, the special value ['__END__', 0] is returned. If * there was an unexpected input an Exception is raised. * * @access private * @throws Exception If there is unexpected input in the provided string. * @return array The next token and length to advance the current position. */ private function _tokenize() { $buf = $this->string; // Consume all spaces until the next token $index = $this->position; while ($index < strlen($buf) && $buf[$index] == ' ') { $index++; } $this->position = $index; // Return special token if next of the string is reached. if (strlen($buf) - $index == 0) { return ['__END__', 0]; } // Operators with two characters $doubles = ['==', '!=', '>=', '<=', '&&', '||']; $next = substr($buf, $index, 2); if (in_array($next, $doubles)) { return [$next, 2]; } // Operators with single character or variable 'n'. $singles = [ 'n', '(', ')', '?', ':', '+', '-', '*', '/', '%', '!', '>', '<']; if (in_array($buf[$index], $singles)) { return [$buf[$index], 1]; } // Whole number constants, return an integer. $digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; $pos = $index; while ($pos < strlen($buf) && in_array($buf[$pos], $digits)) { $pos++; } if ($pos != $index) { $length = $pos - $index; return [(int)substr($buf, $index, $length), $length]; } // Throw and exception for all other unexpected input in the string. throw new Exception('Lexical analysis failed'); } /** * Return the next token without actually advancing the read position. * Tokens for operators and variables are simple strings containing the * operator or variable. If there are no more tokens to provide, the special * value '__END__' is returned. If there was an unexpected input an * Exception is raised. * * @throws Exception If there is unexpected input in the provided string. * @return string The next token. */ public function peek() { list($token, $length) = $this->_tokenize(); return $token; } /** * Return the next token after advancing the read position. Tokens for * operators and variables are simple strings containing the operator or * variable. If there are no more token to provide, the special value * '__END__' is returned. If there was an unexpected input an Exception is * raised. * * @throws Exception If there is unexpected input in the provided string. * @return string The next token. */ public function fetch_token() { list($token, $length) = $this->_tokenize(); $this->position += $length; return $token; } } /** * A parsed representation of the gettext plural expression. This is a tree * containing further expressions depending on how nested the given input is. * Calling the evaluate() function computes the value of the expression if the * variable 'n' is set a certain value. This is used to decide which plural * string translation to use based on the number items at hand. */ class PluralsExpression { private $operator; private $operands; const BINARY_OPERATORS = [ '==', '!=', '>=', '<=', '&&', '||', '+', '-', '*', '/', '%', '>', '<']; const UNARY_OPERATORS = ['!']; /** * Constructor * * @param string Operator for the expression. * @param (int|string|PuralsExpression)[] Variable number of operands of the * expression. One int operand is expected in case the operator is 'const'. * One string operand with value 'n' is expected in case the operator is * 'var'. For all other operators, the operands much be objects of type * PluralExpression. Unary operators expect one operand, binary operators * expect two operands and trinary operators expect three operands. */ public function __construct($operator, ...$operands) { $this->operator = $operator; $this->operands = $operands; } /** * Return a parenthesized string representation of the expression for * debugging purposes. * * @return string A string representation of the expression. */ public function to_string() { if ($this->operator == 'const' || $this->operator == 'var') { return $this->operands[0]; } elseif (in_array($this->operator, self::BINARY_OPERATORS)) { return sprintf( "(%s %s %s)", $this->operands[0]->to_string(), $this->operator, $this->operands[1]->to_string()); } elseif (in_array($this->operator, self::UNARY_OPERATORS)) { return sprintf( "(%s %s)", $this->operator, $this->operands[0]->to_string()); } elseif ($this->operator == '?') { return sprintf( "(%s ? %s : %s)", $this->operands[0]->to_string(), $this->operands[1]->to_string(), $this->operands[2]->to_string()); } } /** * Return the computed value of the expression if the variable 'n' is set to * a certain value. * * @param int The value of the variable n to use when evaluating. * @throws Exception If the expression has been constructed incorrectly. * @return int The value of the expression after evaluation. */ public function evaluate($n) { if (!in_array($this->operator, ['const', 'var'])) { $operand1 = $this->operands[0]->evaluate($n); } if (in_array($this->operator, self::BINARY_OPERATORS) || $this->operator == '?') { $operand2 = $this->operands[1]->evaluate($n); } if ($this->operator == '?') { $operand3 = $this->operands[2]->evaluate($n); } switch ($this->operator) { case 'const': return $this->operands[0]; case 'var': return $n; case '!': return !($operand1); case '==': return $operand1 == $operand2; case '!=': return $operand1 != $operand2; case '>=': return $operand1 >= $operand2; case '<=': return $operand1 <= $operand2; case '>': return $operand1 > $operand2; case '<': return $operand1 < $operand2; case '&&': return $operand1 && $operand2; case '||': return $operand1 || $operand2; case '+': return $operand1 + $operand2; case '-': return $operand1 - $operand2; case '*': return $operand1 * $operand2; case '/': return (int)($operand1 / $operand2); case '%': return $operand1 % $operand2; case '?': return $operand1 ? $operand2 : $operand3; default: throw new Exception('Invalid expression'); } } } /** * A simple operator-precedence parser for gettext plural expressions. Takes a * string during construction and returns a PluralsExpression tree when * parse() is called. */ class PluralsParser { private $lexer; /* * Operator precedence. The parsing only happens with minimum precedence of * 0. However, ':' and ')' exist here to make sure that parsing does not * proceed beyond them when they are not to be parsed. */ const PREC = [ ':' => -1, '?' => 0, '||' => 1, '&&' => 2, '==' => 3, '!=' => 3, '>' => 4, '<' => 4, '>=' => 4, '<=' => 4, '+' => 5, '-' => 5, '*' => 6, '/' => 6, '%' => 6, '!' => 7, '__END__' => -1, ')' => -1 ]; // List of right associative operators const RIGHT_ASSOC = ['?']; /** * Constructor * * @param string string the plural expression to be parsed. */ public function __construct(string $string) { $this->lexer = new PluralsLexer($string); } /** * Expect a primary next for parsing and return a PluralsExpression or throw * and exception otherwise. A primary can be the variable 'n', an whole * number constant, a unary operator expression string with '!', or a * parenthesis expression. * * @throws Exception If the next token is not a primary or if parenthesis * expression is not closes properly with ')'. * @return PluralsExpression That is constructed from the parsed primary. */ private function _parse_primary() { $token = $this->lexer->fetch_token(); if ($token === 'n') { return new PluralsExpression('var', 'n'); } elseif (is_int($token)) { return new PluralsExpression('const', (int)$token); } elseif ($token === '!') { return new PluralsExpression('!', $this->_parse_primary()); } elseif ($token === '(') { $result = $this->_parse($this->_parse_primary(), 0); if ($this->lexer->fetch_token() != ')') { throw new Exception('Mismatched parenthesis'); } return $result; } throw new Exception('Primary expected'); } /** * Fetch an operator from the lexical analyzer and test for it. Optionally * advance the position of the lexical analyzer to next token. Raise * exception if the token retrieved is not an operator. * * @access private * @param bool peek A flag to indicate whether the position of the lexical * analyzer should *not* be advanced. If false, the lexical analyzer is * advanced by one token. * @throws Exception If the token read is not an operator. * @return string The operator that has been fetched from the lexical * analyzer. */ private function _parse_operator($peek) { if ($peek) { $token = $this->lexer->peek(); } else { $token = $this->lexer->fetch_token(); } if ($token !== null && !array_key_exists($token, self::PREC)) { throw new Exception('Operator expected'); } return $token; } /** * A parsing method suitable for recursion. * * @access private * @param ParserExpression left_side A pre-parsed left-hand side expression * of the file expression to be constructed. This helps with recursion. * @param int min_precedence The minimum value of precedence for the * operators to be considered for parsing. Parsing will stop and current * expression is returned if an operator of a lower precedence is * encountered. * @throws Exception If the input string does not conform to the grammar of * the gettext plural expression. * @return ParserExpression A complete expression after parsing. */ private function _parse($left_side, $min_precedence) { $next_token = $this->_parse_operator(true); while (self::PREC[$next_token] >= $min_precedence) { $operator = $this->_parse_operator(false); $right_side = $this->_parse_primary(); $next_token = $this->_parse_operator(true); /* * Consume (recursively) into right hand side all expressions of higher * precedence. */ while ((self::PREC[$operator] < self::PREC[$next_token]) || ((self::PREC[$operator] == self::PREC[$next_token]) && in_array($operator, self::RIGHT_ASSOC))) { $right_side = $this->_parse( $right_side, self::PREC[$next_token]); $next_token = $this->_parse_operator(true); } if ($operator != '?') { /* * Handling for all binary operators. Consume into left hand side all * expressions of equal precedence. */ $left_side = new PluralsExpression($operator, $left_side, $right_side); } else { // Special handling for (a ? b : c) expression $operator = $this->lexer->fetch_token(); if ($operator != ':') { throw new Exception('Invalid ? expression'); } $right_side2 = $this->_parse( $this->_parse_primary(), self::PREC[$operator] + 1); $next_token = $this->_parse_operator(true); $left_side = new PluralsExpression( '?', $left_side, $right_side, $right_side2); } } return $left_side; } /** * A simple implementation of an operator-precedence parser. See: * https://en.wikipedia.org/wiki/Operator-precedence_parser for an analysis * of the algorithm. * * @throws Exception If the input string does not conform to the grammar of * the gettext plural expression. * @return ParserExpression A complete expression after parsing. */ public function parse() { $expression = $this->_parse($this->_parse_primary(), 0); // Special handling for an extra ')' at the end. if ($this->lexer->peek() != '__END__') { throw new Exception('Could not parse completely'); } return $expression; } } /** * Provides a class to parse the value of the 'Plural-Forms:' header in the * gettext translation files. Holds the expression tree and the number of * plurals after parsing. Parsing happens during construction which takes as * its only argument the string to parse. Error during parsing are silently * suppressed and the fallback behavior is used with the value for Germanic * languages as follows: "nplurals=2; plural=n == 1 ? 0 : 1;". */ class PluralHeader { public $total; public $expression; /** * Constructor * * @param string The value of the Plural-Forms: header as seen in .po files. */ function __construct($string) { try { list($total, $expression) = $this->parse($string); } catch (Exception $e) { $string = "nplurals=2; plural=n == 1 ? 0 : 1;"; list($total, $expression) = $this->parse($string); } $this->total = $total; $this->expression = $expression; } /** * Return the number of plural forms and the parsed expression tree. * * @access private * @param string string The value of the Plural-Forms: header. * @throws Exception If the string could not be parsed. * @return array The number of plural forms and parsed expression tree. */ private function parse($string) { $regex = "/^\s*nplurals\s*=\s*(\d+)\s*;\s*plural\s*=([^;]+);/i"; if (preg_match($regex, $string, $matches)) { $total = (int)$matches[1]; $expression_string = $matches[2]; } else { throw new Exception('Invalid header value'); } $parser = new PluralsParser($expression_string); $expression = $parser->parse(); return [$total, $expression]; } }