Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a compact JSON representation for messages #41

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
50 changes: 49 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ as format-independent representations of localizable and localized messages and
so that operations like linting and transforms can be applied to them.

The Message and Resource representations are drawn from work done for the
Unicode [MessageFormat 2 specification](https://github.com/unicode-org/message-format-wg/tree/main/spec)
Unicode [MessageFormat 2 specification](https://unicode.org/reports/tr35/tr35-messageFormat.html)
and the [Message resource specification](https://github.com/eemeli/message-resource-wg/).

The library currently supports the following resource formats:
Expand Down Expand Up @@ -74,6 +74,23 @@ All the serializers accept a `trim_comments` argument
which leaves out comments from the serialized result,
but additional input types and options vary by format.

### moz.l10n.formats.mf2

```python
from moz.l10n.formats.mf2 import (
MF2ParseError, # May be raised by mf2_parse_message()
MF2ValidationError, # May be raised by mf2_from_json() and mf2_validate_message()
mf2_parse_message, # Parse MF2 message syntax into a Message
mf2_serialize_message, # Serialize a Message using MF2 syntax
mf2_from_json, # Marshal a MF2 data model JSON Schema object into a Message
mf2_to_json, # Represent a Message using the MF2 data model JSON Schema
mf2_validate_message # Validate that a Message meets all of the MF2 validity constraints
)
```

Tools for working with [MessageFormat 2.0](https://unicode.org/reports/tr35/tr35-messageFormat.html) messages,
which may be embedded in resource formats.

### moz.l10n.formats.detect_format

```python
Expand All @@ -87,6 +104,37 @@ and/or contents.

Returns a `Format` enum value, or `None` if the input is not recognized.

### moz.l10n.message.data

```python
from moz.l10n.message.data import (
CatchallKey,
Expression,
Markup,
Message, # type alias for PatternMessage | SelectMessage
Pattern, # type alias for list[str | Expression | Markup]
PatternMessage,
SelectMessage,
VariableRef
)
```

Dataclasses defining the library's representation of a single message,
either as a single-pattern `PatternMessage`,
or as a `SelectMessage` with one or more selectors and multiple variant patterns.

### moz.l10n.message: from_json() and to_json()

```python
from moz.l10n.message import from_json, to_json

def message_from_json(json: list[Any] | dict[str, Any]) -> Message: ...
def message_to_json(msg: Message) -> list[Any] | dict[str, Any]: ...
```

Converters to and from a JSON-serializable representation of a `Message`.
The format of the output is defined by the [`schema.json`](./moz/l10n/message/schema.json) JSON Schema.

### moz.l10n.paths.L10nConfigPaths

Wrapper for localization config files.
Expand Down
3 changes: 2 additions & 1 deletion moz/l10n/formats/android/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from collections.abc import Callable, Iterable, Iterator
from re import compile
from typing import Literal

from lxml import etree

Expand Down Expand Up @@ -277,7 +278,7 @@ def flatten(el: etree._Element) -> Iterator[str | Expression | Markup]:
for gc in body:
if isinstance(gc, str):
options: dict[str, str | VariableRef] = dict(child.attrib)
attr: dict[str, str | None] = {"translate": "no"}
attr: dict[str, str | Literal[True]] = {"translate": "no"}
arg: str | VariableRef | None
if id:
arg = VariableRef(get_var_name(id))
Expand Down
16 changes: 16 additions & 0 deletions moz/l10n/formats/mf2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from .from_json import mf2_from_json
from .message_parser import MF2ParseError, mf2_parse_message
from .serialize import mf2_serialize_message, mf2_serialize_pattern
from .to_json import mf2_to_json
from .validate import MF2ValidationError, mf2_validate_message

__all__ = [
"MF2ParseError",
"MF2ValidationError",
"mf2_from_json",
"mf2_parse_message",
"mf2_serialize_message",
"mf2_serialize_pattern",
"mf2_to_json",
"mf2_validate_message",
]
153 changes: 153 additions & 0 deletions moz/l10n/formats/mf2/from_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# Copyright Mozilla Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Any, Literal, cast

from ...message import data as msg
from .validate import MF2ValidationError


def mf2_from_json(json: dict[str, Any]) -> msg.Message:
"""
Marshal a MessageFormat 2 data model [JSON Schema](https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model/message.json)
object into a parsed `moz.l10n.message.data.Message`.

May raise `MF2ValidationError`.
"""
try:
msg_type = json["type"]
if msg_type not in {"message", "select"}:
raise MF2ValidationError(f"Invalid JSON message: {json}")

declarations: dict[str, msg.Expression] = {}
for decl in json["declarations"]:
decl_type = decl["type"]
if decl_type not in {"input", "local"}:
raise MF2ValidationError(f"Invalid JSON declaration type: {decl}")
decl_name = _string(decl, "name")
decl_expr = _expression(decl["value"])
if decl_type == "input":
if (
not isinstance(decl_expr.arg, msg.VariableRef)
or decl_expr.arg.name != decl_name
):
raise MF2ValidationError(f"Invalid JSON .input declaration: {decl}")
if decl_name in declarations:
raise MF2ValidationError(f"Duplicate JSON declaration for ${decl_name}")
declarations[decl_name] = decl_expr

if msg_type == "message":
pattern = _pattern(json["pattern"])
return msg.PatternMessage(pattern, declarations)

assert msg_type == "select"
selectors = tuple(_variable(sel) for sel in json["selectors"])
variants = {
tuple(_key(key) for key in vari["keys"]): _pattern(vari["value"])
for vari in json["variants"]
}
return msg.SelectMessage(declarations, selectors, variants)
except (IndexError, KeyError, TypeError) as err:
raise MF2ValidationError(f"Invalid JSON: {err!r}")


def _pattern(json: list[Any]) -> msg.Pattern:
return [
part
if isinstance(part, str)
else _markup(part)
if part["type"] == "markup"
else _expression(part)
for part in json
]


def _expression(json: dict[str, Any]) -> msg.Expression:
if json["type"] != "expression":
raise MF2ValidationError(f"Invalid JSON expression type: {json}")
arg = _value(json["arg"]) if "arg" in json else None
json_func = json.get("function", None)
if json_func:
if json_func["type"] != "function":
raise MF2ValidationError(f"Invalid JSON function type: {json_func}")
function = _string(json_func, "name")
options = _options(json_func["options"]) if "options" in json_func else {}
else:
function = None
options = {}
if arg is None and function is None:
raise MF2ValidationError(
f"Invalid JSON expression with no operand and no function: {json}"
)
attributes = _attributes(json["attributes"]) if "attributes" in json else {}
return msg.Expression(arg, function, options, attributes)


def _markup(json: dict[str, Any]) -> msg.Markup:
assert json["type"] == "markup"
kind = cast(Literal["open", "standalone", "close"], _string(json, "kind"))
if kind not in {"open", "standalone", "close"}:
raise MF2ValidationError(f"Invalid JSON markup kind: {json}")
name = _string(json, "name")
options = _options(json["options"]) if "options" in json else {}
attributes = _attributes(json["attributes"]) if "attributes" in json else {}
return msg.Markup(kind, name, options, attributes)


def _options(json: dict[str, Any]) -> dict[str, str | msg.VariableRef]:
return {name: _value(json_value) for name, json_value in json.items()}


def _attributes(json: dict[str, Any]) -> dict[str, str | Literal[True]]:
return {
name: True if json_value is True else _literal(json_value)
for name, json_value in json.items()
}


def _key(json: dict[str, Any]) -> str | msg.CatchallKey:
type = json["type"]
if type == "literal":
return _string(json, "value")
elif json["type"] == "*":
value = _string(json, "value") if "value" in json else None
return msg.CatchallKey(value)
else:
raise MF2ValidationError(f"Invalid JSON variant key: {json}")


def _value(json: dict[str, Any]) -> str | msg.VariableRef:
return _string(json, "value") if json["type"] == "literal" else _variable(json)


def _literal(json: dict[str, Any]) -> str:
if json["type"] != "literal":
raise MF2ValidationError(f"Invalid JSON literal: {json}")
return _string(json, "value")


def _variable(json: dict[str, Any]) -> msg.VariableRef:
if json["type"] != "variable":
raise MF2ValidationError(f"Invalid JSON variable: {json}")
return msg.VariableRef(_string(json, "name"))


def _string(obj: dict[str, Any], key: str | None = None) -> str:
value = obj if key is None else obj.get(key, None)
if isinstance(value, str):
return value
else:
raise MF2ValidationError(f"Expected a string value for {key} in {obj}")
Loading
Loading