GuardExpressionParser.java
/*******************************************************************************
* Copyright (c) 2026 Carsten Hammer.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Carsten Hammer - initial API and implementation
*******************************************************************************/
package org.sandbox.jdt.triggerpattern.internal;
import java.util.ArrayList;
import java.util.List;
import org.sandbox.jdt.triggerpattern.api.GuardExpression;
/**
* Recursive descent parser for guard expressions.
*
* <p>Syntax:</p>
* <pre>
* expr = or_expr
* or_expr = and_expr ('||' and_expr)*
* and_expr = unary_expr ('&&' unary_expr)*
* unary_expr = '!' unary_expr | primary
* primary = '(' expr ')' | instanceof_expr | function_call
* function_call = IDENTIFIER '(' arg_list ')'
* instanceof_expr = PLACEHOLDER 'instanceof' TYPE
* arg_list = (arg (',' arg)*)?
* arg = PLACEHOLDER | IDENTIFIER | NUMBER
* </pre>
*
* <p>Examples:</p>
* <ul>
* <li>{@code sourceVersionGE(11)}</li>
* <li>{@code $x instanceof String}</li>
* <li>{@code $x instanceof String && sourceVersionGE(11)}</li>
* <li>{@code !isStatic($x)}</li>
* <li>{@code ($a || $b) && $c}</li>
* </ul>
*
* @since 1.3.2
*/
public final class GuardExpressionParser {
private static final class ParseState {
final String input;
int pos;
ParseState(String input) {
this.input = input;
this.pos = 0;
}
}
/**
* Parses a guard expression string into a {@link GuardExpression} AST.
*
* @param guardText the guard expression text
* @return the parsed guard expression
* @throws IllegalArgumentException if the expression cannot be parsed
*/
public GuardExpression parse(String guardText) {
if (guardText == null || guardText.isBlank()) {
throw new IllegalArgumentException("Guard expression cannot be null or blank"); //$NON-NLS-1$
}
ParseState state = new ParseState(guardText.trim());
GuardExpression expr = parseOrExpr(state);
skipWhitespace(state);
if (state.pos < state.input.length()) {
throw new IllegalArgumentException(
"Unexpected character at position " + state.pos + ": '" + state.input.charAt(state.pos) + "'"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
return expr;
}
/**
* or_expr = and_expr ('||' and_expr)*
*/
private GuardExpression parseOrExpr(ParseState state) {
GuardExpression left = parseAndExpr(state);
while (matchToken(state, "||")) { //$NON-NLS-1$
GuardExpression right = parseAndExpr(state);
left = new GuardExpression.Or(left, right);
}
return left;
}
/**
* and_expr = unary_expr (('&&' | ',') unary_expr)*
*
* <p>Both {@code &&} and {@code ,} are accepted as AND operators.
* The comma form is used in hint file guard lists, e.g.
* {@code sourceVersionGE(7), mode(ENFORCE_UTF8)}.</p>
*/
private GuardExpression parseAndExpr(ParseState state) {
GuardExpression left = parseUnaryExpr(state);
while (matchToken(state, "&&") || matchCommaAnd(state)) { //$NON-NLS-1$
GuardExpression right = parseUnaryExpr(state);
left = new GuardExpression.And(left, right);
}
return left;
}
/**
* Tries to match a comma used as an AND operator between guard expressions.
* Only matches when the comma is NOT inside a function argument list
* (those commas are consumed by {@link #parseArgList}).
*/
private boolean matchCommaAnd(ParseState state) {
skipWhitespace(state);
if (state.pos < state.input.length() && state.input.charAt(state.pos) == ',') {
state.pos++;
return true;
}
return false;
}
/**
* unary_expr = '!' unary_expr | primary
*/
private GuardExpression parseUnaryExpr(ParseState state) {
skipWhitespace(state);
if (state.pos < state.input.length() && state.input.charAt(state.pos) == '!') {
state.pos++;
GuardExpression operand = parseUnaryExpr(state);
return new GuardExpression.Not(operand);
}
return parsePrimary(state);
}
/**
* primary = '(' expr ')' | instanceof_expr | function_call
*/
private GuardExpression parsePrimary(ParseState state) {
skipWhitespace(state);
if (state.pos >= state.input.length()) {
throw new IllegalArgumentException("Unexpected end of expression"); //$NON-NLS-1$
}
// Parenthesized expression
if (state.input.charAt(state.pos) == '(') {
state.pos++;
GuardExpression expr = parseOrExpr(state);
skipWhitespace(state);
if (state.pos >= state.input.length() || state.input.charAt(state.pos) != ')') {
throw new IllegalArgumentException("Expected ')' at position " + state.pos); //$NON-NLS-1$
}
state.pos++;
return expr;
}
// Placeholder: might be instanceof expression or matchesAny($x) style
if (state.input.charAt(state.pos) == '$') {
String placeholder = readToken(state);
skipWhitespace(state);
// Check for instanceof
if (matchKeyword(state, "instanceof")) { //$NON-NLS-1$
skipWhitespace(state);
String typeName = readToken(state);
// Handle array types: Type[]
skipWhitespace(state);
if (state.pos + 1 < state.input.length() && state.input.charAt(state.pos) == '[' && state.input.charAt(state.pos + 1) == ']') {
typeName = typeName + "[]"; //$NON-NLS-1$
state.pos += 2;
}
return new GuardExpression.FunctionCall("instanceof", List.of(placeholder, typeName)); //$NON-NLS-1$
}
// Standalone placeholder treated as matchesAny($placeholder)
return new GuardExpression.FunctionCall("matchesAny", List.of(placeholder)); //$NON-NLS-1$
}
// Function call: IDENTIFIER '(' arg_list ')'
String name = readToken(state);
if (name.isEmpty()) {
throw new IllegalArgumentException("Expected identifier at position " + state.pos); //$NON-NLS-1$
}
skipWhitespace(state);
if (state.pos < state.input.length() && state.input.charAt(state.pos) == '(') {
state.pos++;
List<String> args = parseArgList(state);
skipWhitespace(state);
if (state.pos >= state.input.length() || state.input.charAt(state.pos) != ')') {
throw new IllegalArgumentException("Expected ')' at position " + state.pos); //$NON-NLS-1$
}
state.pos++;
return new GuardExpression.FunctionCall(name, args);
}
// Bare identifier treated as zero-arg function call
return new GuardExpression.FunctionCall(name, List.of());
}
/**
* arg_list = (arg (',' arg)*)?
*/
private List<String> parseArgList(ParseState state) {
List<String> args = new ArrayList<>();
skipWhitespace(state);
if (state.pos < state.input.length() && state.input.charAt(state.pos) == ')') {
return args;
}
args.add(readArg(state));
while (state.pos < state.input.length()) {
skipWhitespace(state);
if (state.pos >= state.input.length() || state.input.charAt(state.pos) != ',') {
break;
}
state.pos++;
args.add(readArg(state));
}
return args;
}
/**
* Reads a single argument (placeholder, identifier, or number).
*/
private String readArg(ParseState state) {
skipWhitespace(state);
if (state.pos >= state.input.length()) {
throw new IllegalArgumentException("Expected argument at position " + state.pos); //$NON-NLS-1$
}
return readToken(state);
}
/**
* Reads a token (identifier, placeholder with $ prefix, number, or quoted string literal).
*
* <p>Quoted string literals are returned with their surrounding quotes preserved
* (e.g., {@code "foo"} is returned as {@code "foo"}). Guard function implementations
* use {@code stripQuotes()} during evaluation to extract the literal value.</p>
*/
private String readToken(ParseState state) {
skipWhitespace(state);
if (state.pos >= state.input.length()) {
return ""; //$NON-NLS-1$
}
int start = state.pos;
char c = state.input.charAt(state.pos);
// Quoted string literal: "..."
if (c == '"') {
state.pos++;
StringBuilder sb = new StringBuilder();
sb.append('"');
while (state.pos < state.input.length() && state.input.charAt(state.pos) != '"') {
if (state.input.charAt(state.pos) == '\\' && state.pos + 1 < state.input.length()) {
char escaped = state.input.charAt(state.pos + 1);
// Handle escaped quote: \" becomes " in the output
if (escaped == '"') {
sb.append('"');
state.pos += 2;
continue;
}
// Preserve other escape sequences as-is
sb.append('\\');
sb.append(escaped);
state.pos += 2;
continue;
}
sb.append(state.input.charAt(state.pos));
state.pos++;
}
if (state.pos >= state.input.length()) {
throw new IllegalArgumentException(
"Unterminated string literal starting at position " + start); //$NON-NLS-1$
}
sb.append('"');
state.pos++; // consume closing quote
return sb.toString();
}
// Placeholder: $identifier
if (c == '$') {
state.pos++;
while (state.pos < state.input.length() && isIdentifierPart(state.input.charAt(state.pos))) {
state.pos++;
}
// Handle multi-placeholder ending with $
if (state.pos < state.input.length() && state.input.charAt(state.pos) == '$') {
state.pos++;
}
return state.input.substring(start, state.pos);
}
// Number
if (Character.isDigit(c)) {
while (state.pos < state.input.length() && (Character.isDigit(state.input.charAt(state.pos)) || state.input.charAt(state.pos) == '.')) {
state.pos++;
}
return state.input.substring(start, state.pos);
}
// Identifier (including qualified names like java.lang.String)
if (isIdentifierStart(c)) {
while (state.pos < state.input.length() && (isIdentifierPart(state.input.charAt(state.pos)) || state.input.charAt(state.pos) == '.')) {
state.pos++;
}
return state.input.substring(start, state.pos);
}
throw new IllegalArgumentException(
"Unexpected character at position " + state.pos + ": '" + c + "'"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
/**
* Tries to match and consume a two-character token.
*/
private boolean matchToken(ParseState state, String token) {
skipWhitespace(state);
if (state.pos + token.length() <= state.input.length()
&& state.input.substring(state.pos, state.pos + token.length()).equals(token)) {
state.pos += token.length();
return true;
}
return false;
}
/**
* Tries to match and consume a keyword (must be followed by non-identifier char).
*/
private boolean matchKeyword(ParseState state, String keyword) {
int savedPos = state.pos;
if (state.pos + keyword.length() <= state.input.length()
&& state.input.substring(state.pos, state.pos + keyword.length()).equals(keyword)) {
int afterKeyword = state.pos + keyword.length();
if (afterKeyword >= state.input.length() || !isIdentifierPart(state.input.charAt(afterKeyword))) {
state.pos += keyword.length();
return true;
}
}
state.pos = savedPos;
return false;
}
private void skipWhitespace(ParseState state) {
while (state.pos < state.input.length() && Character.isWhitespace(state.input.charAt(state.pos))) {
state.pos++;
}
}
private boolean isIdentifierStart(char c) {
return Character.isJavaIdentifierStart(c);
}
private boolean isIdentifierPart(char c) {
return Character.isJavaIdentifierPart(c);
}
}