Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable parsing of input seeds with syntax errors #261

Merged
merged 1 commit into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 32 additions & 13 deletions grammarinator/tool/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
error.ErrorListener.ConsoleErrorListener.INSTANCE = ConsoleListener()


class ExtendedErrorListener(error.ErrorListener.ErrorListener):
"""
Custom error listener for the ANTLR lexer ensuring to insert the
unrecognized tokens into the tree as well.
"""
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
recognizer.inputStream.consume()
recognizer.type = Token.INVALID_TYPE
recognizer.channel = Token.DEFAULT_CHANNEL
recognizer.emit()
recognizer.type = Token.MIN_USER_TOKEN_TYPE


class ParserTool:
"""
Tool to parse existing sources and create a tree pool from them. These
Expand Down Expand Up @@ -185,8 +198,9 @@ def _antlr_to_grammarinator_tree(self, antlr_node, parser, visited=None):
depth = max(depth, child_depth + 1)
else:
assert isinstance(antlr_node, TerminalNode), f'An ANTLR node must either be a ParserRuleContext or a TerminalNode but {antlr_node.__class__.__name__} was found.'
name, text = parser.symbolicNames[antlr_node.symbol.type] if len(parser.symbolicNames) >= antlr_node.symbol.type else '<INVALID>', antlr_node.symbol.text
name, text = parser.symbolicNames[antlr_node.symbol.type] if len(parser.symbolicNames) > antlr_node.symbol.type else '<INVALID>', antlr_node.symbol.text
assert name, f'{name} is None or empty'

if antlr_node.symbol.type == Token.EOF:
return None, 0, []

Expand Down Expand Up @@ -310,7 +324,9 @@ def _match_seq(grammar_vertices, tree_node_pos):
# They MUST match, since ANTLR has already parsed them
# During matching, quantifier and alternation structures are identified
rule_children, rule_tree_node_pos = _match_seq(self._graph.vertices[rule.name].out_neighbours + [None], 0)
assert rule_children is not None, f'Failed to match {rule.name} tree node to the related grammar rule at {rule_tree_node_pos}.'
if rule_children is None:
logger.warning('Failed to match %s tree node to the related grammar rule at %d.', rule.name, rule_tree_node_pos)
return

# Detach all children from the tree node so that they can be reattached
# in a structured way afterwards
Expand Down Expand Up @@ -368,21 +384,24 @@ def _reattach_children(rule, children):
# Create an ANTLR tree from the input stream and convert it to Grammarinator tree.
def _create_tree(self, input_stream, fn):
try:
parser = self._parser_cls(CommonTokenStream(self._lexer_cls(input_stream)))
lexer = self._lexer_cls(input_stream)
lexer.addErrorListener(ExtendedErrorListener())
parser = self._parser_cls(CommonTokenStream(lexer))
parse_tree_root = getattr(parser, self._rule)()
if not parser._syntaxErrors:
root, depth, rules = self._antlr_to_grammarinator_tree(parse_tree_root, parser)
if depth > self._max_depth:
logger.info('The tree representation of %s is %s, too deep. Skipping.', fn, depth)
return None
if parser._syntaxErrors:
logger.warning('%s syntax errors detected in %s.', parser._syntaxErrors, fn)

root, depth, rules = self._antlr_to_grammarinator_tree(parse_tree_root, parser)
if depth > self._max_depth:
logger.info('The tree representation of %s is %s, too deep. Skipping.', fn, depth)
return None

self._adjust_tree_to_generator(rules)
for transformer in self._transformers:
root = transformer(root)
self._adjust_tree_to_generator(rules)
for transformer in self._transformers:
root = transformer(root)

return root
return root

logger.warning('%s syntax errors detected in %s.', parser._syntaxErrors, fn)
except Exception as e:
logger.warning('Exception while parsing %s.', fn, exc_info=e)
return None
Expand Down
1 change: 1 addition & 0 deletions tests/parser/exp6.grtj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"t": "p", "n": "start", "c": [{"t": "a", "ai": 0, "i": 1, "c": [{"t": "p", "n": "start_Quantifiers_test", "c": [{"t": "p", "n": "element", "c": [{"t": "l", "n": "<INVALID>", "s": "pass", "z": [1, 1], "i": false}, {"t": "q", "i": 0, "b": 0, "e": 1, "c": []}]}, {"t": "q", "i": 1, "b": 1, "e": -1, "c": [{"t": "qd", "c": [{"t": "l", "n": "<INVALID>", "s": " | ", "z": [1, 1], "i": false}, {"t": "p", "n": "element", "c": [{"t": "l", "n": "<INVALID>", "s": "|", "z": [1, 1], "i": false}, {"t": "l", "n": "<INVALID>", "s": "pass", "z": [1, 1], "i": false}]}]}]}]}]}]}
1 change: 1 addition & 0 deletions tests/parser/exp7.grtj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"t": "p", "n": "start", "c": [{"t": "a", "ai": 0, "i": 0, "c": [{"t": "p", "n": "start_Quantifiers_test", "c": [{"t": "l", "n": "<INVALID>", "s": "*", "z": [1, 1], "i": false}, {"t": "p", "n": "element", "c": [{"t": "l", "n": "<INVALID>", "s": "pass", "z": [1, 1], "i": false}, {"t": "q", "i": 0, "b": 0, "e": 1, "c": []}]}]}]}]}
1 change: 1 addition & 0 deletions tests/parser/inp6.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pass | | pass
1 change: 1 addition & 0 deletions tests/parser/inp7.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* pass
2 changes: 2 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
(os.path.join(parser_dir, 'inp3.txt'), os.path.join(parser_dir, 'exp3.grtj')),
(os.path.join(parser_dir, 'inp4.txt'), os.path.join(parser_dir, 'exp4.grtj')),
(os.path.join(parser_dir, 'inp5.txt'), os.path.join(parser_dir, 'exp5.grtj')),
(os.path.join(parser_dir, 'inp6.txt'), os.path.join(parser_dir, 'exp6.grtj')),
(os.path.join(parser_dir, 'inp7.txt'), os.path.join(parser_dir, 'exp7.grtj')),
])
def test_parser(inp, expected, tmpdir):
with open(inp, 'r') as f:
Expand Down
Loading