diff --git a/.pylintrc b/.pylintrc index eb40aea..60390e5 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,7 +1,7 @@ [MASTER] # Files or directories to be skipped. They should be base names, not paths. -ignore=g4, grammars +ignore=g4, grammars, FBRule_generated.py # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may diff --git a/grammarinator/cli.py b/grammarinator/cli.py index 8fad336..1379cb4 100644 --- a/grammarinator/cli.py +++ b/grammarinator/cli.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Renata Hodovan, Akos Kiss. +# Copyright (c) 2020-2024 Renata Hodovan, Akos Kiss. # # Licensed under the BSD 3-Clause License # . @@ -10,7 +10,7 @@ from inators.imp import import_object -from .tool import JsonTreeCodec, PickleTreeCodec +from .tool import FlatBuffersTreeCodec, JsonTreeCodec, PickleTreeCodec logger = logging.getLogger('grammarinator') @@ -46,6 +46,7 @@ def add_encoding_errors_argument(parser): tree_formats = { 'pickle': {'extension': 'grtp', 'codec_class': PickleTreeCodec}, 'json': {'extension': 'grtj', 'codec_class': JsonTreeCodec}, + 'flatbuffers': {'extension': 'grtf', 'codec_class': FlatBuffersTreeCodec}, } diff --git a/grammarinator/tool/__init__.py b/grammarinator/tool/__init__.py index e729611..7fb690b 100644 --- a/grammarinator/tool/__init__.py +++ b/grammarinator/tool/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Renata Hodovan, Akos Kiss. +# Copyright (c) 2023-2024 Renata Hodovan, Akos Kiss. # # Licensed under the BSD 3-Clause License # . @@ -9,4 +9,4 @@ from .generator import DefaultGeneratorFactory, GeneratorFactory, GeneratorTool from .parser import ParserTool from .processor import ProcessorTool -from .tree_codec import AnnotatedTreeCodec, JsonTreeCodec, PickleTreeCodec, TreeCodec +from .tree_codec import AnnotatedTreeCodec, FlatBuffersTreeCodec, JsonTreeCodec, PickleTreeCodec, TreeCodec diff --git a/grammarinator/tool/fbs/FBRule_generated.py b/grammarinator/tool/fbs/FBRule_generated.py new file mode 100644 index 0000000..cf329c0 --- /dev/null +++ b/grammarinator/tool/fbs/FBRule_generated.py @@ -0,0 +1,178 @@ +# automatically generated by the FlatBuffers compiler, do not modify + +# namespace: fbs + +import flatbuffers +from flatbuffers.compat import import_numpy +np = import_numpy() + +class FBRuleType(object): + UnlexerRuleType = 0 + UnparserRuleType = 1 + UnparserRuleQuantifierType = 2 + UnparserRuleQuantifiedType = 3 + UnparserRuleAlternativeType = 4 + + +class FBRuleSize(object): + __slots__ = ['_tab'] + + @classmethod + def SizeOf(cls): + return 8 + + # FBRuleSize + def Init(self, buf, pos): + self._tab = flatbuffers.table.Table(buf, pos) + + # FBRuleSize + def Depth(self): return self._tab.Get(flatbuffers.number_types.Int32Flags, self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(0)) + # FBRuleSize + def Tokens(self): return self._tab.Get(flatbuffers.number_types.Int32Flags, self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(4)) + +def CreateFBRuleSize(builder, depth, tokens): + builder.Prep(4, 8) + builder.PrependInt32(tokens) + builder.PrependInt32(depth) + return builder.Offset() + + +class FBRule(object): + __slots__ = ['_tab'] + + @classmethod + def GetRootAs(cls, buf, offset=0): + n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset) + x = FBRule() + x.Init(buf, n + offset) + return x + + @classmethod + def GetRootAsFBRule(cls, buf, offset=0): + """This method is deprecated. Please switch to GetRootAs.""" + return cls.GetRootAs(buf, offset) + # FBRule + def Init(self, buf, pos): + self._tab = flatbuffers.table.Table(buf, pos) + + # FBRule + def Type(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4)) + if o != 0: + return self._tab.Get(flatbuffers.number_types.Int8Flags, o + self._tab.Pos) + return 0 + + # FBRule + def Name(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6)) + if o != 0: + return self._tab.String(o + self._tab.Pos) + return None + + # FBRule + def Children(self, j): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) + if o != 0: + x = self._tab.Vector(o) + x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 + x = self._tab.Indirect(x) + obj = FBRule() + obj.Init(self._tab.Bytes, x) + return obj + return None + + # FBRule + def ChildrenLength(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) + if o != 0: + return self._tab.VectorLen(o) + return 0 + + # FBRule + def ChildrenIsNone(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) + return o == 0 + + # FBRule + def Src(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10)) + if o != 0: + return self._tab.String(o + self._tab.Pos) + return None + + # FBRule + def Size(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12)) + if o != 0: + x = o + self._tab.Pos + obj = FBRuleSize() + obj.Init(self._tab.Bytes, x) + return obj + return None + + # FBRule + def Idx(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) + if o != 0: + return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) + return 0 + + # FBRule + def Start(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(16)) + if o != 0: + return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) + return 0 + + # FBRule + def Stop(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(18)) + if o != 0: + return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) + return 0 + + # FBRule + def AltIdx(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(20)) + if o != 0: + return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) + return 0 + +def FBRuleStart(builder): + builder.StartObject(9) + +def FBRuleAddType(builder, type): + builder.PrependInt8Slot(0, type, 0) + +def FBRuleAddName(builder, name): + builder.PrependUOffsetTRelativeSlot(1, flatbuffers.number_types.UOffsetTFlags.py_type(name), 0) + +def FBRuleAddChildren(builder, children): + builder.PrependUOffsetTRelativeSlot(2, flatbuffers.number_types.UOffsetTFlags.py_type(children), 0) + +def FBRuleStartChildrenVector(builder, numElems): + return builder.StartVector(4, numElems, 4) + +def FBRuleAddSrc(builder, src): + builder.PrependUOffsetTRelativeSlot(3, flatbuffers.number_types.UOffsetTFlags.py_type(src), 0) + +def FBRuleAddSize(builder, size): + builder.PrependStructSlot(4, flatbuffers.number_types.UOffsetTFlags.py_type(size), 0) + +def FBRuleAddIdx(builder, idx): + builder.PrependInt32Slot(5, idx, 0) + +def FBRuleAddStart(builder, start): + builder.PrependInt32Slot(6, start, 0) + +def FBRuleAddStop(builder, stop): + builder.PrependInt32Slot(7, stop, 0) + +def FBRuleAddAltIdx(builder, altIdx): + builder.PrependInt32Slot(8, altIdx, 0) + +def FBRuleEnd(builder): + return builder.EndObject() + + + diff --git a/grammarinator/tool/fbs/__init__.py b/grammarinator/tool/fbs/__init__.py new file mode 100644 index 0000000..484d2bc --- /dev/null +++ b/grammarinator/tool/fbs/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2024 Renata Hodovan, Akos Kiss. +# +# Licensed under the BSD 3-Clause License +# . +# This file may not be copied, modified, or distributed except +# according to those terms. + +from .FBRule_generated import CreateFBRuleSize, FBRule, FBRuleAddAltIdx, FBRuleAddChildren, FBRuleAddIdx, FBRuleAddName, FBRuleAddSize, FBRuleAddSrc, FBRuleAddStart, FBRuleAddStop, FBRuleAddType, FBRuleEnd, FBRuleSize, FBRuleStart, FBRuleStartChildrenVector, FBRuleType diff --git a/grammarinator/tool/resources/fbs/FBRule.fbs b/grammarinator/tool/resources/fbs/FBRule.fbs new file mode 100644 index 0000000..1735b6f --- /dev/null +++ b/grammarinator/tool/resources/fbs/FBRule.fbs @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024 Renata Hodovan, Akos Kiss. + * + * Licensed under the BSD 3-Clause License + * . + * This file may not be copied, modified, or distributed except + * according to those terms. + */ + +/* + * This is the flatbuffer schema of the tree format of Grammarinator. + * It was used as input for the flatc compiler to generate code for + * building and reading flatbuffer tree representations + * (grammarinator/tool/fbs/FBRule_generated.py). + * + * The used flatc command: + * flatc --python --gen-onefile -o grammarinator/tool/fbs/ FBRule.fbs + */ + +namespace grammarinator.tool.fbs; + +enum FBRuleType: byte { + UnlexerRuleType = 0, + UnparserRuleType = 1, + UnparserRuleQuantifierType = 2, + UnparserRuleQuantifiedType = 3, + UnparserRuleAlternativeType = 4 +} + +struct FBRuleSize { + depth: int; + tokens: int; +} + +table FBRule { + type: FBRuleType; + name: string; + children: [FBRule]; + src: string; + size: FBRuleSize; + idx: int; + start: int; + stop: int; + alt_idx: int; +} + +root_type FBRule; diff --git a/grammarinator/tool/tree_codec.py b/grammarinator/tool/tree_codec.py index 61e4e63..abb457e 100644 --- a/grammarinator/tool/tree_codec.py +++ b/grammarinator/tool/tree_codec.py @@ -7,8 +7,14 @@ import json import pickle +import struct + +from math import inf + +import flatbuffers from ..runtime import RuleSize, UnlexerRule, UnparserRule, UnparserRuleAlternative, UnparserRuleQuantified, UnparserRuleQuantifier +from .fbs import CreateFBRuleSize, FBRule, FBRuleAddAltIdx, FBRuleAddChildren, FBRuleAddIdx, FBRuleAddName, FBRuleAddSize, FBRuleAddSrc, FBRuleAddStart, FBRuleAddStop, FBRuleAddType, FBRuleEnd, FBRuleStart, FBRuleStartChildrenVector, FBRuleType class TreeCodec: @@ -153,3 +159,85 @@ def _dict_to_rule(dct): return json.loads(data.decode(encoding=self._encoding, errors=self._encoding_errors), object_hook=_dict_to_rule) except json.JSONDecodeError: return None + + +class FlatBuffersTreeCodec(TreeCodec): + """ + FlatBuffers-based tree codec. + """ + + def __init__(self, encoding='utf-8', encoding_errors='surrogatepass'): + """ + :param str encoding: The encoding to use when converting between + flatbuffers-encoded text and bytes (default: utf-8). + """ + self._encoding = encoding + self._encoding_errors = encoding_errors + + def encode(self, root): + def buildFBRule(rule): + if isinstance(rule, UnlexerRule): + fb_name = builder.CreateString(rule.name, encoding=self._encoding, errors=self._encoding_errors) + fb_src = builder.CreateString(rule.src, encoding=self._encoding, errors=self._encoding_errors) + FBRuleStart(builder) + FBRuleAddType(builder, FBRuleType.UnlexerRuleType) + FBRuleAddName(builder, fb_name) + FBRuleAddSrc(builder, fb_src) + FBRuleAddSize(builder, CreateFBRuleSize(builder, rule.size.depth, rule.size.tokens)) + else: + children = [buildFBRule(child) for child in rule.children] + FBRuleStartChildrenVector(builder, len(children)) + for fb_child in reversed(children): + builder.PrependUOffsetTRelative(fb_child) + fb_children = builder.EndVector() + if isinstance(rule, UnparserRule): + fb_name = builder.CreateString(rule.name, encoding=self._encoding, errors=self._encoding_errors) + FBRuleStart(builder) + FBRuleAddChildren(builder, fb_children) + if isinstance(rule, UnparserRule): + FBRuleAddName(builder, fb_name) + FBRuleAddType(builder, FBRuleType.UnparserRuleType) + elif isinstance(rule, UnparserRuleQuantifier): + FBRuleAddType(builder, FBRuleType.UnparserRuleQuantifierType) + FBRuleAddIdx(builder, rule.idx) + FBRuleAddStart(builder, rule.start) + FBRuleAddStop(builder, rule.stop if rule.stop != inf else -1) + elif isinstance(rule, UnparserRuleQuantified): + FBRuleAddType(builder, FBRuleType.UnparserRuleQuantifiedType) + elif isinstance(rule, UnparserRuleAlternative): + FBRuleAddType(builder, FBRuleType.UnparserRuleAlternativeType) + FBRuleAddAltIdx(builder, rule.alt_idx) + FBRuleAddIdx(builder, rule.idx) + return FBRuleEnd(builder) + + builder = flatbuffers.Builder() + builder.Finish(buildFBRule(root)) + return bytes(builder.Output()) + + def decode(self, data): + def readFBRule(fb_rule): + rule_type = fb_rule.Type() + if rule_type == FBRuleType.UnlexerRuleType: + fb_size = fb_rule.Size() + rule = UnlexerRule(name=fb_rule.Name().decode(self._encoding, self._encoding_errors), + src=fb_rule.Src().decode(self._encoding, self._encoding_errors), + size=RuleSize(depth=fb_size.Depth(), tokens=fb_size.Tokens())) + else: + children = [readFBRule(fb_rule.Children(i)) for i in range(fb_rule.ChildrenLength())] + if rule_type == FBRuleType.UnparserRuleType: + rule = UnparserRule(name=fb_rule.Name().decode(self._encoding, self._encoding_errors), children=children) + elif rule_type == FBRuleType.UnparserRuleQuantifierType: + stop = fb_rule.Stop() + rule = UnparserRuleQuantifier(idx=fb_rule.Idx(), start=fb_rule.Start(), stop=stop if stop != -1 else inf, children=children) + elif rule_type == FBRuleType.UnparserRuleQuantifiedType: + rule = UnparserRuleQuantified(children=children) + elif rule_type == FBRuleType.UnparserRuleAlternativeType: + rule = UnparserRuleAlternative(alt_idx=fb_rule.AltIdx(), idx=fb_rule.Idx(), children=children) + else: + assert False, f'Unexpected type {rule_type}' + return rule + + try: + return readFBRule(FBRule.GetRootAs(bytearray(data))) + except struct.error: + return None diff --git a/setup.cfg b/setup.cfg index f09411b..f437645 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,7 @@ install_requires = antlerinator>=1!3.0.0 antlr4-python3-runtime==4.13.1 autopep8 + flatbuffers inators jinja2 regex diff --git a/tests/grammars/LifeCycle.g4 b/tests/grammars/LifeCycle.g4 index 6918ee5..3303ff0 100644 --- a/tests/grammars/LifeCycle.g4 +++ b/tests/grammars/LifeCycle.g4 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Renata Hodovan, Akos Kiss. + * Copyright (c) 2023-2024 Renata Hodovan, Akos Kiss. * * Licensed under the BSD 3-Clause License * . @@ -34,7 +34,10 @@ // TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 1 -r start -n 3 --population {tmpdir}/population/j/ --tree-format json -o {tmpdir}/{grammar}JB%d.txt --keep-trees --no-generate --no-recombine // TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 1 -r start -n 3 --population {tmpdir}/population/j/ --tree-format json -o {tmpdir}/{grammar}JC%d.txt --keep-trees --no-generate --no-mutate // TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 2 -r start -n 6 --population {tmpdir}/population/j/ --tree-format json -o {tmpdir}/{grammar}JD%d.txt --no-generate - +// TEST-PARSE: {grammar}.g4 -j 1 -i {tmpdir}/LifeCycleA0.txt {tmpdir}/LifeCycleA1.txt {tmpdir}/LifeCycleA2.txt -r start --hidden WS -o {tmpdir}/population/f/ --tree-format flatbuffers +// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 1 -r start -n 3 --population {tmpdir}/population/f/ --tree-format flatbuffers -o {tmpdir}/{grammar}FB%d.txt --keep-trees --no-generate --no-recombine +// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 1 -r start -n 3 --population {tmpdir}/population/f/ --tree-format flatbuffers -o {tmpdir}/{grammar}FC%d.txt --keep-trees --no-generate --no-mutate +// TEST-GENERATE: {grammar}Generator.{grammar}Generator -j 2 -r start -n 6 --population {tmpdir}/population/f/ --tree-format flatbuffers -o {tmpdir}/{grammar}FD%d.txt --no-generate grammar LifeCycle; start : TEST testType ; diff --git a/tox.ini b/tox.ini index 425fb2c..d38366f 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,7 @@ deps = pytest commands = pylint grammarinator tests - pycodestyle grammarinator tests --ignore=E501,W503 --exclude=grammarinator/tool/g4/ANTLRv4*.py + pycodestyle grammarinator tests --ignore=E501,W503 --exclude=grammarinator/tool/g4/ANTLRv4*.py,grammarinator/tool/fbs/FBRule_generated.py [testenv:docs] deps = -rdocs/requirements.txt