Skip to content

Commit

Permalink
Add JsonWriteFeature counterpart (important!), update release notes
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Sep 17, 2024
1 parent 6fad16d commit f57c128
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 26 deletions.
9 changes: 9 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,15 @@ Antonin Janec (@xtonic)
* Contributed #1218: Simplify Unicode surrogate pair conversion for generation
(2.17.0)

Ian Roberts (@ianroberts)
* Reported #223: `UTF8JsonGenerator` writes supplementary characters as a
surrogate pair: should use 4-byte encoding
(2.18.0)

Radovan Netuka (@rnetuka)
* Contributed fix for #223: `UTF8JsonGenerator` writes supplementary characters as a
surrogate pair: should use 4-byte encoding

Jared Stehler (@jaredstehler)
* Reported, contributed fix for #1274: `NUL`-corrupted keys, values on JSON serialization
(2.18.0)
Expand Down
4 changes: 4 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ a pure JSON library.

2.18.0 (not yet released)

#223: `UTF8JsonGenerator` writes supplementary characters as a surrogate pair:
should use 4-byte encoding
(reported by Ian R)
(fix contributed by Radovan N)
#1230: Improve performance of `float` and `double` parsing from `TextBuffer`
(implemented by @pjfanning)
#1251: `InternCache` replace synchronized with `ReentrantLock` - the cache
Expand Down
14 changes: 5 additions & 9 deletions src/main/java/com/fasterxml/jackson/core/JsonGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -269,24 +269,20 @@ public enum Feature {
WRITE_HEX_UPPER_CASE(true),

/**
* Feature that specifies whether {@link JsonGenerator} should escape forward slashes.
* <p>
* Feature is disabled by default for Jackson 2.x version, and enabled by default in Jackson 3.0.
* See {@link com.fasterxml.jackson.core.json.JsonWriteFeature#ESCAPE_FORWARD_SLASHES}.
*
* @since 2.17
*/
ESCAPE_FORWARD_SLASHES(false),

/**
* Feature that specifies how 4-byte characters should be handled in {@link JsonGenerator}. If enabled,
* 4-byte characters made by surrogate pairs are combined and flushed as a single character encoded in UTF-8.
* If disabled, each pair is written as UTF-16 escape.
* <p>
* Feature is disabled by default
* See {@link com.fasterxml.jackson.core.json.JsonWriteFeature#COMBINE_UNICODE_SURROGATES_IN_UTF8}.
*
* @since 2.18
*/
COMBINE_UNICODE_SURROGATES(false);
COMBINE_UNICODE_SURROGATES_IN_UTF8(false),

;

private final boolean _defaultState;
private final int _mask;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public enum JsonWriteFeature
{
// // // Support for non-standard data format constructs: comments

// // Quoting/ecsaping-related features
// // Quoting/escaping-related features

/**
* Feature that determines whether JSON Object field names are
Expand Down Expand Up @@ -117,6 +117,28 @@ public enum JsonWriteFeature
*/
ESCAPE_FORWARD_SLASHES(false, JsonGenerator.Feature.ESCAPE_FORWARD_SLASHES),

/**
* Feature that specifies how characters outside "Basic Multilingual Plane" (BMP) -- ones encoded
* as 4-byte UTF-8 sequences but represented in JVM memory as 2 16-bit "surrogate" {@code chars} --
* should be encoded as UTF-8 by {@link JsonGenerator}.
* If enabled, surrogate pairs are combined and flushed as a
* single, 4-byte UTF-8 character.
* If disabled, each {@code char} of pair is written as 2 separate characters: that is, as 2
* separate 3-byte UTF-8 characters with values in Surrogate character ranges
* ({@code 0xD800} - {@code 0xDBFF} and {@code 0xDC00} - {@code 0xDFFF})
* <p>
* Note that this feature only has effect for {@link JsonGenerator}s that directly encode
* {@code byte}-based output, as UTF-8 (target {@link java.io.OutputStream}, {@code byte[]}
* and so on); it will not (can not) change handling of
* {@code char}-based output (like {@link java.io.Writer} or {@link java.lang.String}).
* <p>
* Feature is disabled by default in 2.x for backwards-compatibility (will be enabled
* in 3.0).
*
* @since 2.18
*/
COMBINE_UNICODE_SURROGATES_IN_UTF8(false, JsonGenerator.Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8),

;

final private boolean _defaultState;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1494,7 +1494,7 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
final byte[] outputBuffer = _outputBuffer;
final int[] escCodes = _outputEscapes;

boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES.enabledIn(_features);
boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);

while (offset < end) {
int ch = cbuf[offset++];
Expand Down Expand Up @@ -1541,7 +1541,7 @@ private final void _writeStringSegment2(final String text, int offset, final int
final byte[] outputBuffer = _outputBuffer;
final int[] escCodes = _outputEscapes;

boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES.enabledIn(_features);
boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);

while (offset < end) {
int ch = text.charAt(offset++);
Expand Down
38 changes: 24 additions & 14 deletions src/test/java/com/fasterxml/jackson/core/json/Surrogate223Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@

import org.junit.jupiter.api.Test;

import static com.fasterxml.jackson.core.JsonGenerator.Feature;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;

class Surrogate223Test extends JUnit5TestBase
{
private final JsonFactory JSON_F = new JsonFactory();
private final JsonFactory DEFAULT_JSON_F = newStreamFactory();

// for [core#223]
@Test
void surrogatesDefaultSetting() throws Exception {
// default in 2.x should be disabled:
assertFalse(DEFAULT_JSON_F.isEnabled(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8.mappedFeature()));
}

// for [core#223]
@Test
Expand All @@ -24,35 +31,40 @@ void surrogatesByteBacked() throws Exception
final String toQuote = new String(Character.toChars(0x1F602));
assertEquals(2, toQuote.length()); // just sanity check

// default should be disabled:
// assertFalse(JSON_F.isEnabled(JsonGenerator.Feature.ESCAPE_UTF8_SURROGATES));

out = new ByteArrayOutputStream();
g = JSON_F.createGenerator(out).enable(Feature.COMBINE_UNICODE_SURROGATES);

JsonFactory f = JsonFactory.builder()
.enable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8)
.build();
g = f.createGenerator(out);
g.writeStartArray();
g.writeString(toQuote);
g.writeEndArray();
g.close();
assertEquals(2 + 2 + 4, out.size()); // brackets, quotes, 4-byte encoding

// Also parse back to ensure correctness
JsonParser p = JSON_F.createParser(out.toByteArray());
JsonParser p = f.createParser(out.toByteArray());
assertToken(JsonToken.START_ARRAY, p.nextToken());
assertToken(JsonToken.VALUE_STRING, p.nextToken());
assertToken(JsonToken.END_ARRAY, p.nextToken());
p.close();

// but may revert back to original behavior
out = new ByteArrayOutputStream();
g = JSON_F.createGenerator(out).disable(Feature.COMBINE_UNICODE_SURROGATES);
f = JsonFactory.builder()
.disable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8)
.build();

g = f.createGenerator(out);
g.writeStartArray();
g.writeString(toQuote);
g.writeEndArray();
g.close();
assertEquals(2 + 2 + 12, out.size()); // brackets, quotes, 2 x 6 byte JSON escape
}

// for [core#223]
// for [core#223]: no change for character-backed (cannot do anything)
@Test
void surrogatesCharBacked() throws Exception
{
Expand All @@ -61,21 +73,19 @@ void surrogatesCharBacked() throws Exception
final String toQuote = new String(Character.toChars(0x1F602));
assertEquals(2, toQuote.length()); // just sanity check

// default should be disabled:
// assertFalse(JSON_F.isEnabled(JsonGenerator.Feature.ESCAPE_UTF8_SURROGATES));

out = new StringWriter();
g = JSON_F.createGenerator(out);
g = DEFAULT_JSON_F.createGenerator(out);
g.writeStartArray();
g.writeString(toQuote);
g.writeEndArray();
g.close();
assertEquals(2 + 2 + 2, out.toString().length()); // brackets, quotes, 2 chars as is

// Also parse back to ensure correctness
JsonParser p = JSON_F.createParser(out.toString());
JsonParser p = DEFAULT_JSON_F.createParser(out.toString());
assertToken(JsonToken.START_ARRAY, p.nextToken());
assertToken(JsonToken.VALUE_STRING, p.nextToken());
assertEquals(toQuote, p.getText());
assertToken(JsonToken.END_ARRAY, p.nextToken());
p.close();
}
Expand Down

0 comments on commit f57c128

Please sign in to comment.