Skip to content

Commit

Permalink
Support flatbuffer-based tree exports/imports (#223)
Browse files Browse the repository at this point in the history
  • Loading branch information
renatahodovan authored May 28, 2024
1 parent 4a17ada commit c1bcb39
Show file tree
Hide file tree
Showing 10 changed files with 334 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[MASTER]

# Files or directories to be skipped. They should be base names, not paths.
ignore=g4, grammars
ignore=g4, grammars, FBRule_generated.py

# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
Expand Down
5 changes: 3 additions & 2 deletions grammarinator/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023 Renata Hodovan, Akos Kiss.
# Copyright (c) 2020-2024 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
Expand All @@ -10,7 +10,7 @@

from inators.imp import import_object

from .tool import JsonTreeCodec, PickleTreeCodec
from .tool import FlatBuffersTreeCodec, JsonTreeCodec, PickleTreeCodec

logger = logging.getLogger('grammarinator')

Expand Down Expand Up @@ -46,6 +46,7 @@ def add_encoding_errors_argument(parser):
tree_formats = {
'pickle': {'extension': 'grtp', 'codec_class': PickleTreeCodec},
'json': {'extension': 'grtj', 'codec_class': JsonTreeCodec},
'flatbuffers': {'extension': 'grtf', 'codec_class': FlatBuffersTreeCodec},
}


Expand Down
4 changes: 2 additions & 2 deletions grammarinator/tool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 Renata Hodovan, Akos Kiss.
# Copyright (c) 2023-2024 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
Expand All @@ -9,4 +9,4 @@
from .generator import DefaultGeneratorFactory, GeneratorFactory, GeneratorTool
from .parser import ParserTool
from .processor import ProcessorTool
from .tree_codec import AnnotatedTreeCodec, JsonTreeCodec, PickleTreeCodec, TreeCodec
from .tree_codec import AnnotatedTreeCodec, FlatBuffersTreeCodec, JsonTreeCodec, PickleTreeCodec, TreeCodec
178 changes: 178 additions & 0 deletions grammarinator/tool/fbs/FBRule_generated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# automatically generated by the FlatBuffers compiler, do not modify

# namespace: fbs

import flatbuffers
from flatbuffers.compat import import_numpy
np = import_numpy()

class FBRuleType(object):
UnlexerRuleType = 0
UnparserRuleType = 1
UnparserRuleQuantifierType = 2
UnparserRuleQuantifiedType = 3
UnparserRuleAlternativeType = 4


class FBRuleSize(object):
__slots__ = ['_tab']

@classmethod
def SizeOf(cls):
return 8

# FBRuleSize
def Init(self, buf, pos):
self._tab = flatbuffers.table.Table(buf, pos)

# FBRuleSize
def Depth(self): return self._tab.Get(flatbuffers.number_types.Int32Flags, self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(0))
# FBRuleSize
def Tokens(self): return self._tab.Get(flatbuffers.number_types.Int32Flags, self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(4))

def CreateFBRuleSize(builder, depth, tokens):
builder.Prep(4, 8)
builder.PrependInt32(tokens)
builder.PrependInt32(depth)
return builder.Offset()


class FBRule(object):
__slots__ = ['_tab']

@classmethod
def GetRootAs(cls, buf, offset=0):
n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
x = FBRule()
x.Init(buf, n + offset)
return x

@classmethod
def GetRootAsFBRule(cls, buf, offset=0):
"""This method is deprecated. Please switch to GetRootAs."""
return cls.GetRootAs(buf, offset)
# FBRule
def Init(self, buf, pos):
self._tab = flatbuffers.table.Table(buf, pos)

# FBRule
def Type(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int8Flags, o + self._tab.Pos)
return 0

# FBRule
def Name(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
if o != 0:
return self._tab.String(o + self._tab.Pos)
return None

# FBRule
def Children(self, j):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
if o != 0:
x = self._tab.Vector(o)
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
x = self._tab.Indirect(x)
obj = FBRule()
obj.Init(self._tab.Bytes, x)
return obj
return None

# FBRule
def ChildrenLength(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
if o != 0:
return self._tab.VectorLen(o)
return 0

# FBRule
def ChildrenIsNone(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
return o == 0

# FBRule
def Src(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10))
if o != 0:
return self._tab.String(o + self._tab.Pos)
return None

# FBRule
def Size(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12))
if o != 0:
x = o + self._tab.Pos
obj = FBRuleSize()
obj.Init(self._tab.Bytes, x)
return obj
return None

# FBRule
def Idx(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
return 0

# FBRule
def Start(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(16))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
return 0

# FBRule
def Stop(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(18))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
return 0

# FBRule
def AltIdx(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(20))
if o != 0:
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos)
return 0

def FBRuleStart(builder):
builder.StartObject(9)

def FBRuleAddType(builder, type):
builder.PrependInt8Slot(0, type, 0)

def FBRuleAddName(builder, name):
builder.PrependUOffsetTRelativeSlot(1, flatbuffers.number_types.UOffsetTFlags.py_type(name), 0)

def FBRuleAddChildren(builder, children):
builder.PrependUOffsetTRelativeSlot(2, flatbuffers.number_types.UOffsetTFlags.py_type(children), 0)

def FBRuleStartChildrenVector(builder, numElems):
return builder.StartVector(4, numElems, 4)

def FBRuleAddSrc(builder, src):
builder.PrependUOffsetTRelativeSlot(3, flatbuffers.number_types.UOffsetTFlags.py_type(src), 0)

def FBRuleAddSize(builder, size):
builder.PrependStructSlot(4, flatbuffers.number_types.UOffsetTFlags.py_type(size), 0)

def FBRuleAddIdx(builder, idx):
builder.PrependInt32Slot(5, idx, 0)

def FBRuleAddStart(builder, start):
builder.PrependInt32Slot(6, start, 0)

def FBRuleAddStop(builder, stop):
builder.PrependInt32Slot(7, stop, 0)

def FBRuleAddAltIdx(builder, altIdx):
builder.PrependInt32Slot(8, altIdx, 0)

def FBRuleEnd(builder):
return builder.EndObject()



8 changes: 8 additions & 0 deletions grammarinator/tool/fbs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
# This file may not be copied, modified, or distributed except
# according to those terms.

from .FBRule_generated import CreateFBRuleSize, FBRule, FBRuleAddAltIdx, FBRuleAddChildren, FBRuleAddIdx, FBRuleAddName, FBRuleAddSize, FBRuleAddSrc, FBRuleAddStart, FBRuleAddStop, FBRuleAddType, FBRuleEnd, FBRuleSize, FBRuleStart, FBRuleStartChildrenVector, FBRuleType
47 changes: 47 additions & 0 deletions grammarinator/tool/resources/fbs/FBRule.fbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) 2024 Renata Hodovan, Akos Kiss.
*
* Licensed under the BSD 3-Clause License
* <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
* This file may not be copied, modified, or distributed except
* according to those terms.
*/

/*
* This is the flatbuffer schema of the tree format of Grammarinator.
* It was used as input for the flatc compiler to generate code for
* building and reading flatbuffer tree representations
* (grammarinator/tool/fbs/FBRule_generated.py).
*
* The used flatc command:
* flatc --python --gen-onefile -o grammarinator/tool/fbs/ FBRule.fbs
*/

namespace grammarinator.tool.fbs;

enum FBRuleType: byte {
UnlexerRuleType = 0,
UnparserRuleType = 1,
UnparserRuleQuantifierType = 2,
UnparserRuleQuantifiedType = 3,
UnparserRuleAlternativeType = 4
}

struct FBRuleSize {
depth: int;
tokens: int;
}

table FBRule {
type: FBRuleType;
name: string;
children: [FBRule];
src: string;
size: FBRuleSize;
idx: int;
start: int;
stop: int;
alt_idx: int;
}

root_type FBRule;
88 changes: 88 additions & 0 deletions grammarinator/tool/tree_codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@

import json
import pickle
import struct

from math import inf

import flatbuffers

from ..runtime import RuleSize, UnlexerRule, UnparserRule, UnparserRuleAlternative, UnparserRuleQuantified, UnparserRuleQuantifier
from .fbs import CreateFBRuleSize, FBRule, FBRuleAddAltIdx, FBRuleAddChildren, FBRuleAddIdx, FBRuleAddName, FBRuleAddSize, FBRuleAddSrc, FBRuleAddStart, FBRuleAddStop, FBRuleAddType, FBRuleEnd, FBRuleStart, FBRuleStartChildrenVector, FBRuleType


class TreeCodec:
Expand Down Expand Up @@ -153,3 +159,85 @@ def _dict_to_rule(dct):
return json.loads(data.decode(encoding=self._encoding, errors=self._encoding_errors), object_hook=_dict_to_rule)
except json.JSONDecodeError:
return None


class FlatBuffersTreeCodec(TreeCodec):
"""
FlatBuffers-based tree codec.
"""

def __init__(self, encoding='utf-8', encoding_errors='surrogatepass'):
"""
:param str encoding: The encoding to use when converting between
flatbuffers-encoded text and bytes (default: utf-8).
"""
self._encoding = encoding
self._encoding_errors = encoding_errors

def encode(self, root):
def buildFBRule(rule):
if isinstance(rule, UnlexerRule):
fb_name = builder.CreateString(rule.name, encoding=self._encoding, errors=self._encoding_errors)
fb_src = builder.CreateString(rule.src, encoding=self._encoding, errors=self._encoding_errors)
FBRuleStart(builder)
FBRuleAddType(builder, FBRuleType.UnlexerRuleType)
FBRuleAddName(builder, fb_name)
FBRuleAddSrc(builder, fb_src)
FBRuleAddSize(builder, CreateFBRuleSize(builder, rule.size.depth, rule.size.tokens))
else:
children = [buildFBRule(child) for child in rule.children]
FBRuleStartChildrenVector(builder, len(children))
for fb_child in reversed(children):
builder.PrependUOffsetTRelative(fb_child)
fb_children = builder.EndVector()
if isinstance(rule, UnparserRule):
fb_name = builder.CreateString(rule.name, encoding=self._encoding, errors=self._encoding_errors)
FBRuleStart(builder)
FBRuleAddChildren(builder, fb_children)
if isinstance(rule, UnparserRule):
FBRuleAddName(builder, fb_name)
FBRuleAddType(builder, FBRuleType.UnparserRuleType)
elif isinstance(rule, UnparserRuleQuantifier):
FBRuleAddType(builder, FBRuleType.UnparserRuleQuantifierType)
FBRuleAddIdx(builder, rule.idx)
FBRuleAddStart(builder, rule.start)
FBRuleAddStop(builder, rule.stop if rule.stop != inf else -1)
elif isinstance(rule, UnparserRuleQuantified):
FBRuleAddType(builder, FBRuleType.UnparserRuleQuantifiedType)
elif isinstance(rule, UnparserRuleAlternative):
FBRuleAddType(builder, FBRuleType.UnparserRuleAlternativeType)
FBRuleAddAltIdx(builder, rule.alt_idx)
FBRuleAddIdx(builder, rule.idx)
return FBRuleEnd(builder)

builder = flatbuffers.Builder()
builder.Finish(buildFBRule(root))
return bytes(builder.Output())

def decode(self, data):
def readFBRule(fb_rule):
rule_type = fb_rule.Type()
if rule_type == FBRuleType.UnlexerRuleType:
fb_size = fb_rule.Size()
rule = UnlexerRule(name=fb_rule.Name().decode(self._encoding, self._encoding_errors),
src=fb_rule.Src().decode(self._encoding, self._encoding_errors),
size=RuleSize(depth=fb_size.Depth(), tokens=fb_size.Tokens()))
else:
children = [readFBRule(fb_rule.Children(i)) for i in range(fb_rule.ChildrenLength())]
if rule_type == FBRuleType.UnparserRuleType:
rule = UnparserRule(name=fb_rule.Name().decode(self._encoding, self._encoding_errors), children=children)
elif rule_type == FBRuleType.UnparserRuleQuantifierType:
stop = fb_rule.Stop()
rule = UnparserRuleQuantifier(idx=fb_rule.Idx(), start=fb_rule.Start(), stop=stop if stop != -1 else inf, children=children)
elif rule_type == FBRuleType.UnparserRuleQuantifiedType:
rule = UnparserRuleQuantified(children=children)
elif rule_type == FBRuleType.UnparserRuleAlternativeType:
rule = UnparserRuleAlternative(alt_idx=fb_rule.AltIdx(), idx=fb_rule.Idx(), children=children)
else:
assert False, f'Unexpected type {rule_type}'
return rule

try:
return readFBRule(FBRule.GetRootAs(bytearray(data)))
except struct.error:
return None
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ install_requires =
antlerinator>=1!3.0.0
antlr4-python3-runtime==4.13.1
autopep8
flatbuffers
inators
jinja2
regex
Expand Down
Loading

0 comments on commit c1bcb39

Please sign in to comment.