From 48c4783625ac1e5989ca5c1c005d5a30820aeb5b Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Wed, 23 Aug 2023 21:31:22 -0700 Subject: [PATCH] Refactor construction and use of CharsToNameCanonicalizer (#1088) --- .../fasterxml/jackson/core/JsonFactory.java | 14 ++- .../core/json/ByteSourceJsonBootstrapper.java | 2 +- .../core/sym/CharsToNameCanonicalizer.java | 101 ++++++++++++++---- .../fasterxml/jackson/core/TestVersions.java | 17 +-- .../core/sym/SymbolTableMergingTest.java | 2 +- .../jackson/core/sym/TestSymbolTables.java | 30 +++--- .../core/sym/TestSymbolsWithMediaItem.java | 2 +- 7 files changed, 119 insertions(+), 49 deletions(-) diff --git a/src/main/java/com/fasterxml/jackson/core/JsonFactory.java b/src/main/java/com/fasterxml/jackson/core/JsonFactory.java index 87a905f376..07317f1fc6 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonFactory.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonFactory.java @@ -221,7 +221,7 @@ public static int collectDefaults() { * It should not be linked back to the original blueprint, to * avoid contents from leaking between factories. */ - protected final transient CharsToNameCanonicalizer _rootCharSymbols = CharsToNameCanonicalizer.createRoot(); + protected final transient CharsToNameCanonicalizer _rootCharSymbols; /** * Alternative to the basic symbol table, some stream-based @@ -377,6 +377,8 @@ public JsonFactory(ObjectCodec oc) { _streamWriteConstraints = StreamWriteConstraints.defaults(); _errorReportConfiguration = ErrorReportConfiguration.defaults(); _generatorDecorators = null; + + _rootCharSymbols = CharsToNameCanonicalizer.createRoot(this); } /** @@ -408,6 +410,8 @@ protected JsonFactory(JsonFactory src, ObjectCodec codec) _rootValueSeparator = src._rootValueSeparator; _maximumNonEscapedChar = src._maximumNonEscapedChar; _quoteChar = src._quoteChar; + + _rootCharSymbols = CharsToNameCanonicalizer.createRoot(this); } /** @@ -437,6 +441,8 @@ public JsonFactory(JsonFactoryBuilder b) { _rootValueSeparator = b._rootValueSeparator; _maximumNonEscapedChar = b._maximumNonEscapedChar; _quoteChar = b._quoteChar; + + _rootCharSymbols = CharsToNameCanonicalizer.createRoot(this); } /** @@ -466,6 +472,8 @@ protected JsonFactory(TSFBuilder b, boolean bogus) { _rootValueSeparator = null; _maximumNonEscapedChar = 0; _quoteChar = DEFAULT_QUOTE_CHAR; + + _rootCharSymbols = CharsToNameCanonicalizer.createRoot(this); } /** @@ -1890,7 +1898,7 @@ protected JsonParser _createParser(InputStream in, IOContext ctxt) throws IOExce */ protected JsonParser _createParser(Reader r, IOContext ctxt) throws IOException { return new ReaderBasedJsonParser(ctxt, _parserFeatures, r, _objectCodec, - _rootCharSymbols.makeChild(_factoryFeatures)); + _rootCharSymbols.makeChild()); } /** @@ -1912,7 +1920,7 @@ protected JsonParser _createParser(Reader r, IOContext ctxt) throws IOException protected JsonParser _createParser(char[] data, int offset, int len, IOContext ctxt, boolean recyclable) throws IOException { return new ReaderBasedJsonParser(ctxt, _parserFeatures, null, _objectCodec, - _rootCharSymbols.makeChild(_factoryFeatures), + _rootCharSymbols.makeChild(), data, offset, offset+len, recyclable); } diff --git a/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java b/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java index 90e53099dd..f6efe43ffe 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java +++ b/src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java @@ -276,7 +276,7 @@ public JsonParser constructParser(int parserFeatures, ObjectCodec codec, } } return new ReaderBasedJsonParser(_context, parserFeatures, constructReader(), codec, - rootCharSymbols.makeChild(factoryFeatures)); + rootCharSymbols.makeChild()); } /* diff --git a/src/main/java/com/fasterxml/jackson/core/sym/CharsToNameCanonicalizer.java b/src/main/java/com/fasterxml/jackson/core/sym/CharsToNameCanonicalizer.java index ca2e0d4a09..88316cbf1c 100644 --- a/src/main/java/com/fasterxml/jackson/core/sym/CharsToNameCanonicalizer.java +++ b/src/main/java/com/fasterxml/jackson/core/sym/CharsToNameCanonicalizer.java @@ -6,6 +6,8 @@ import java.util.concurrent.atomic.AtomicReference; import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.StreamReadConstraints; +import com.fasterxml.jackson.core.TokenStreamFactory; import com.fasterxml.jackson.core.exc.StreamConstraintsException; import com.fasterxml.jackson.core.util.InternCache; @@ -114,6 +116,14 @@ public final class CharsToNameCanonicalizer */ protected final AtomicReference _tableInfo; + /** + * Constraints used by {@link TokenStreamFactory} that uses + * this canonicalizer. + * + * @since 2.16 + */ + protected final StreamReadConstraints _streamReadConstraints; + /** * Seed value we use as the base to make hash codes non-static between * different runs, but still stable for lifetime of a single symbol table @@ -125,7 +135,11 @@ public final class CharsToNameCanonicalizer */ protected final int _seed; - protected final int _flags; + /** + * Feature flags of {@link TokenStreamFactory} that uses + * this canonicalizer. + */ + protected final int _factoryFeatures; /** * Whether any canonicalization should be attempted (whether using @@ -229,14 +243,16 @@ public final class CharsToNameCanonicalizer /** * Main method for constructing a root symbol table instance. */ - private CharsToNameCanonicalizer(int seed) + private CharsToNameCanonicalizer(StreamReadConstraints src, int factoryFeatures, + int seed) { _parent = null; _seed = seed; + _streamReadConstraints = src; // these settings don't really matter for the bootstrap instance _canonicalize = true; - _flags = -1; + _factoryFeatures = factoryFeatures; // And we'll also set flags so no copying of buckets is needed: _hashShared = false; // doesn't really matter for root instance _longestCollisionList = 0; @@ -249,14 +265,16 @@ private CharsToNameCanonicalizer(int seed) /** * Internal constructor used when creating child instances. */ - private CharsToNameCanonicalizer(CharsToNameCanonicalizer parent, int flags, int seed, + private CharsToNameCanonicalizer(CharsToNameCanonicalizer parent, + StreamReadConstraints src, int factoryFeatures, int seed, TableInfo parentState) { _parent = parent; + _streamReadConstraints = src; _seed = seed; _tableInfo = null; // not used by child tables - _flags = flags; - _canonicalize = JsonFactory.Feature.CANONICALIZE_FIELD_NAMES.enabledIn(flags); + _factoryFeatures = factoryFeatures; + _canonicalize = JsonFactory.Feature.CANONICALIZE_FIELD_NAMES.enabledIn(factoryFeatures); // Then copy shared state _symbols = parentState.symbols; @@ -282,25 +300,61 @@ private CharsToNameCanonicalizer(CharsToNameCanonicalizer parent, int flags, int /********************************************************** */ + /** + * @deprecated Since 2.16 use {@link #createRoot(TokenStreamFactory)} instead + * + * @return Root instance to use for constructing new child instances + */ + @Deprecated + public static CharsToNameCanonicalizer createRoot() { + return createRoot(null); + } + + /** + * @deprecated Since 2.16 use {@link #createRoot(TokenStreamFactory)} instead + * + * @return Root instance to use for constructing new child instances + */ + @Deprecated + public static CharsToNameCanonicalizer createRoot(int seed) { + return createRoot(null, seed); + } + /** * Method called to create root canonicalizer for a {@link com.fasterxml.jackson.core.JsonFactory} * instance. Root instance is never used directly; its main use is for * storing and sharing underlying symbol arrays as needed. * + * @param owner Factory that will use the root instance; used for accessing + * configuration + * * @return Root instance to use for constructing new child instances */ - public static CharsToNameCanonicalizer createRoot() { + public static CharsToNameCanonicalizer createRoot(TokenStreamFactory owner) { + return createRoot(owner, 0); + } + + public static CharsToNameCanonicalizer createRoot(TokenStreamFactory owner, int seed) { // Need to use a variable seed, to thwart hash-collision based attacks. // 14-Feb-2017, tatu: not sure it actually helps, at all, since it won't // change mixing or any of the steps. Should likely just remove in future. - long now = System.currentTimeMillis(); - // ensure it's not 0; and might as well require to be odd so: - int seed = (((int) now) + ((int) (now >>> 32))) | 1; - return createRoot(seed); - } + if (seed == 0) { + long now = System.currentTimeMillis(); + // ensure it's not 0; and might as well require to be odd so: + seed = (((int) now) + ((int) (now >>> 32))) | 1; + } - protected static CharsToNameCanonicalizer createRoot(int seed) { - return new CharsToNameCanonicalizer(seed); + StreamReadConstraints src; + int factoryFeatures; + + if (owner == null) { + src = StreamReadConstraints.defaults(); + factoryFeatures = 0; + } else { + src = owner.streamReadConstraints(); + factoryFeatures = owner.getFactoryFeatures(); + } + return new CharsToNameCanonicalizer(src, factoryFeatures, seed); } /** @@ -315,14 +369,21 @@ protected static CharsToNameCanonicalizer createRoot(int seed) { * on which only makeChild/mergeChild are called, but instance itself * is not used as a symbol table. * - * @param flags Bit flags of active {@link com.fasterxml.jackson.core.JsonFactory.Feature}s enabled. - * * @return Actual canonicalizer instance that can be used by a parser */ - public CharsToNameCanonicalizer makeChild(int flags) { - return new CharsToNameCanonicalizer(this, flags, _seed, _tableInfo.get()); + public CharsToNameCanonicalizer makeChild() { + return new CharsToNameCanonicalizer(this, _streamReadConstraints, + _factoryFeatures, _seed, _tableInfo.get()); } + /** + * @deprecated Since 2.16 use {@link #makeChild()} instead. + */ + @Deprecated + public CharsToNameCanonicalizer makeChild(int flags) { + return makeChild(); + } + /** * Method called by the using code to indicate it is done with this instance. * This lets instance merge accumulated changes into parent (if need be), @@ -502,7 +563,7 @@ private String _addSymbol(char[] buffer, int start, int len, int h, int index) t } String newSymbol = new String(buffer, start, len); - if (JsonFactory.Feature.INTERN_FIELD_NAMES.enabledIn(_flags)) { + if (JsonFactory.Feature.INTERN_FIELD_NAMES.enabledIn(_factoryFeatures)) { newSymbol = InternCache.instance.intern(newSymbol); } ++_size; @@ -540,7 +601,7 @@ private void _handleSpillOverflow(int bucketIndex, Bucket newBucket, int mainInd } else { if (_overflows.get(bucketIndex)) { // Has happened once already for this bucket index, so probably not coincidental... - if (JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW.enabledIn(_flags)) { + if (JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW.enabledIn(_factoryFeatures)) { _reportTooManyCollisions(MAX_COLL_CHAIN_LENGTH); } // but even if we don't fail, we will stop canonicalizing as safety measure diff --git a/src/test/java/com/fasterxml/jackson/core/TestVersions.java b/src/test/java/com/fasterxml/jackson/core/TestVersions.java index db1f2179b5..6df0ca7048 100644 --- a/src/test/java/com/fasterxml/jackson/core/TestVersions.java +++ b/src/test/java/com/fasterxml/jackson/core/TestVersions.java @@ -13,14 +13,15 @@ public class TestVersions extends com.fasterxml.jackson.core.BaseTest { public void testCoreVersions() throws Exception { - assertVersion(new JsonFactory().version()); - ReaderBasedJsonParser jp = new ReaderBasedJsonParser(getIOContext(), 0, null, null, - CharsToNameCanonicalizer.createRoot()); - assertVersion(jp.version()); - jp.close(); - JsonGenerator jgen = new WriterBasedJsonGenerator(getIOContext(), 0, null, null, '"'); - assertVersion(jgen.version()); - jgen.close(); + final JsonFactory f = new JsonFactory(); + assertVersion(f.version()); + try (ReaderBasedJsonParser p = new ReaderBasedJsonParser(getIOContext(), 0, null, null, + CharsToNameCanonicalizer.createRoot(f))) { + assertVersion(p.version()); + } + try (JsonGenerator g = new WriterBasedJsonGenerator(getIOContext(), 0, null, null, '"')) { + assertVersion(g.version()); + } } public void testMisc() { diff --git a/src/test/java/com/fasterxml/jackson/core/sym/SymbolTableMergingTest.java b/src/test/java/com/fasterxml/jackson/core/sym/SymbolTableMergingTest.java index c683449477..a2f1acd64d 100644 --- a/src/test/java/com/fasterxml/jackson/core/sym/SymbolTableMergingTest.java +++ b/src/test/java/com/fasterxml/jackson/core/sym/SymbolTableMergingTest.java @@ -48,7 +48,7 @@ public void testByteSymbolsWithEOF() throws Exception public void testHashCalc() throws Exception { - CharsToNameCanonicalizer sym = CharsToNameCanonicalizer.createRoot(123); + CharsToNameCanonicalizer sym = CharsToNameCanonicalizer.createRoot(new JsonFactory()); char[] str1 = "foo".toCharArray(); char[] str2 = " foo ".toCharArray(); diff --git a/src/test/java/com/fasterxml/jackson/core/sym/TestSymbolTables.java b/src/test/java/com/fasterxml/jackson/core/sym/TestSymbolTables.java index 57d0ff32dd..9b41b855bd 100644 --- a/src/test/java/com/fasterxml/jackson/core/sym/TestSymbolTables.java +++ b/src/test/java/com/fasterxml/jackson/core/sym/TestSymbolTables.java @@ -13,12 +13,14 @@ */ public class TestSymbolTables extends com.fasterxml.jackson.core.BaseTest { + private final static JsonFactory JSON_F = new JsonFactory(); + // Test for verifying stability of hashCode, wrt collisions, using // synthetic field name generation and character-based input public void testSyntheticWithChars() throws IOException { // pass seed, to keep results consistent: - CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1).makeChild(-1); + CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(JSON_F, -1).makeChild(); final int COUNT = 12000; for (int i = 0; i < COUNT; ++i) { String id = fieldNameFor(i); @@ -35,7 +37,7 @@ public void testSyntheticWithChars() throws IOException // holy guacamoley... there are way too many. 31 gives 3567 (!), 33 gives 2747 // ... at least before shuffling. Shuffling helps quite a lot, so: - assertEquals(3431, symbols.collisionCount()); + assertEquals(3417, symbols.collisionCount()); assertEquals(6, symbols.maxCollisionLength()); @@ -74,14 +76,12 @@ public void testSyntheticWithBytesNew() throws IOException // [Issue#145] public void testThousandsOfSymbolsWithChars() throws IOException { - final int SEED = 33333; - - CharsToNameCanonicalizer symbolsCRoot = CharsToNameCanonicalizer.createRoot(SEED); + CharsToNameCanonicalizer symbolsCRoot = CharsToNameCanonicalizer.createRoot(JSON_F); int exp = 0; for (int doc = 0; doc < 100; ++doc) { CharsToNameCanonicalizer symbolsC = - symbolsCRoot.makeChild(JsonFactory.Feature.collectDefaults()); + symbolsCRoot.makeChild(); for (int i = 0; i < 250; ++i) { String name = "f_"+doc+"_"+i; char[] ch = name.toCharArray(); @@ -191,7 +191,7 @@ private ByteQuadsCanonicalizer _findSymbols(JsonParser p) throws Exception // [core#187]: unexpectedly high number of collisions for straight numbers public void testCollisionsWithChars187() throws IOException { - CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1).makeChild(-1); + CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(JSON_F, -1).makeChild(); final int COUNT = 30000; for (int i = 0; i < COUNT; ++i) { String id = String.valueOf(10000 + i); @@ -202,9 +202,9 @@ public void testCollisionsWithChars187() throws IOException assertEquals(65536, symbols.bucketCount()); // collision count rather high, but has to do - assertEquals(7127, symbols.collisionCount()); - // as well as collision counts - assertEquals(4, symbols.maxCollisionLength()); + assertEquals(7194, symbols.collisionCount()); + // as well as max collision chain length + assertEquals(5, symbols.maxCollisionLength()); } // [core#187]: unexpectedly high number of collisions for straight numbers @@ -304,7 +304,7 @@ public void testShortQuotedDirectChars() throws IOException { final int COUNT = 400; - CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1).makeChild(-1); + CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(JSON_F, -1).makeChild(); for (int i = 0; i < COUNT; ++i) { String id = String.format("\\u%04x", i); char[] ch = id.toCharArray(); @@ -313,7 +313,7 @@ public void testShortQuotedDirectChars() throws IOException assertEquals(COUNT, symbols.size()); assertEquals(1024, symbols.bucketCount()); - assertEquals(50, symbols.collisionCount()); + assertEquals(54, symbols.collisionCount()); assertEquals(2, symbols.maxCollisionLength()); } @@ -343,7 +343,7 @@ public void testShortNameCollisionsDirect() throws IOException // First, char-based { - CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1).makeChild(-1); + CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(JSON_F, -1).makeChild(); for (int i = 0; i < COUNT; ++i) { String id = String.valueOf((char) i); char[] ch = id.toCharArray(); @@ -352,7 +352,7 @@ public void testShortNameCollisionsDirect() throws IOException assertEquals(COUNT, symbols.size()); assertEquals(1024, symbols.bucketCount()); - assertEquals(16, symbols.collisionCount()); + assertEquals(24, symbols.collisionCount()); assertEquals(1, symbols.maxCollisionLength()); } } @@ -389,7 +389,7 @@ public void testLongSymbols17Bytes() throws Exception { ByteQuadsCanonicalizer symbolsB = ByteQuadsCanonicalizer.createRoot(3).makeChild(JsonFactory.Feature.collectDefaults()); - CharsToNameCanonicalizer symbolsC = CharsToNameCanonicalizer.createRoot(3).makeChild(-1); + CharsToNameCanonicalizer symbolsC = CharsToNameCanonicalizer.createRoot(JSON_F, 3).makeChild(); for (int i = 1001; i <= 1050; ++i) { String id = "lengthmatters"+i; diff --git a/src/test/java/com/fasterxml/jackson/core/sym/TestSymbolsWithMediaItem.java b/src/test/java/com/fasterxml/jackson/core/sym/TestSymbolsWithMediaItem.java index 14d91818b2..ab3d3465bf 100644 --- a/src/test/java/com/fasterxml/jackson/core/sym/TestSymbolsWithMediaItem.java +++ b/src/test/java/com/fasterxml/jackson/core/sym/TestSymbolsWithMediaItem.java @@ -67,8 +67,8 @@ public void testSmallSymbolSetWithChars() throws IOException { final int SEED = 33333; - CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(SEED).makeChild(-1); JsonFactory f = new JsonFactory(); + CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(f, SEED).makeChild(); JsonParser p = f.createParser(JSON); JsonToken t;