From f2b428f4d3d0d5fa1fcb3e526e7dccb1e41ff3fb Mon Sep 17 00:00:00 2001 From: mepan Date: Fri, 14 Aug 2020 00:51:38 +0000 Subject: [PATCH 1/4] Develop the Proto for ParseToken and ParseTokenOptions --- zetasql/public/BUILD | 15 +++++++ zetasql/public/parse_tokens.h | 65 +++++++++++++++++++++++++++ zetasql/public/parse_tokens.proto | 61 +++++++++++++++++++++++++ zetasql/public/parse_tokens_test.cc | 70 +++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+) create mode 100644 zetasql/public/parse_tokens.proto diff --git a/zetasql/public/BUILD b/zetasql/public/BUILD index 889876b56..fd34c5cb8 100644 --- a/zetasql/public/BUILD +++ b/zetasql/public/BUILD @@ -779,6 +779,20 @@ java_proto_library( deps = [":parse_location_range_proto"], ) +proto_library( + name = "parse_tokens_proto", + srcs = ["parse_tokens.proto"], + deps = [ + ":parse_location_range_proto", + ":value_proto", + ], +) + +cc_proto_library( + name = "parse_tokens_cc_proto", + deps = [":parse_tokens_proto"], +) + proto_library( name = "type_proto", srcs = ["type.proto"], @@ -1624,6 +1638,7 @@ cc_library( "//zetasql/parser:keywords", "//zetasql/public/functions:convert_string", "//zetasql/resolved_ast:resolved_node_kind_cc_proto", + "//zetasql/public:parse_tokens_cc_proto", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", diff --git a/zetasql/public/parse_tokens.h b/zetasql/public/parse_tokens.h index 90f44532e..8b0b1f1b4 100644 --- a/zetasql/public/parse_tokens.h +++ b/zetasql/public/parse_tokens.h @@ -25,6 +25,7 @@ #include "zetasql/public/value.h" #include "absl/strings/string_view.h" #include "zetasql/base/status.h" +#include "zetasql/public/parse_tokens.pb.h" namespace zetasql { @@ -119,6 +120,32 @@ class ParseToken { // Returns the location of the token in the input. ParseLocationRange GetLocationRange() const { return location_range_; } + // Convert the token into its proto. + zetasql_base::StatusOr ToProto() const { + ParseTokenProto token_proto; + + // Create a location range proto from the field location_range_. Transform the proto into heap and + // uses a pointer to manage it. Then assign the pointer to the token proto, which will release the + // assigned location range proto when the token proto is about to be released. + auto status_or_location_range_proto = location_range_.ToProto(); + if (!status_or_location_range_proto.ok()) { + return status_or_location_range_proto.status(); + } + auto range_proto = new ParseLocationRangeProto(status_or_location_range_proto.value()); + token_proto.set_allocated_parse_location_range(range_proto); + + // New a value proto in heap, assign the fields of the value_ field to the proto, + // and transfer the ownership of the value proto to the token proto. + auto value_proto = new ValueProto(); + if (value_.is_valid()) { + ZETASQL_RETURN_IF_ERROR(value_.Serialize(value_proto)); + token_proto.set_allocated_value(value_proto); + } + token_proto.set_image(image_); + token_proto.set_kind(serialize_kind(kind_)); + return token_proto; + } + // The declarations below are intended for internal use. enum Kind { @@ -146,6 +173,26 @@ class ParseToken { ParseLocationRange location_range_; Value value_; + // Convert a Token Kind into its proto form. It is used by the ToProto method that converts + // a token to its proto. + static ParseTokenProto_Kind serialize_kind(const ParseToken::Kind kind) { + using zetasql::ParseToken; + switch (kind) { + case ParseToken::Kind::KEYWORD: + return ParseTokenProto_Kind::ParseTokenProto_Kind_KEYWORD; + case ParseToken::IDENTIFIER: + return ParseTokenProto_Kind::ParseTokenProto_Kind_IDENTIFIER; + case ParseToken::IDENTIFIER_OR_KEYWORD: + return ParseTokenProto_Kind::ParseTokenProto_Kind_IDENTIFIER_OR_KEYWORD; + case ParseToken::VALUE: + return ParseTokenProto_Kind::ParseTokenProto_Kind_VALUE; + case ParseToken::COMMENT: + return ParseTokenProto_Kind::ParseTokenProto_Kind_COMMENT; + case ParseToken::END_OF_INPUT: + return ParseTokenProto_Kind::ParseTokenProto_Kind_END_OF_INPUT; + } + } + // Copyable }; @@ -160,6 +207,24 @@ struct ParseTokenOptions { // Return the comments in the ParseToken vector or silently drop them. bool include_comments = false; + + // Convert the token options into its proto. + ParseTokenOptionsProto ToProto() const { + ParseTokenOptionsProto options_proto; + options_proto.set_max_tokens(max_tokens); + options_proto.set_stop_at_end_of_statement(stop_at_end_of_statement); + options_proto.set_include_comments(include_comments); + return options_proto; + } + + // Create a ParseTokenOption object from its proto. + static ParseTokenOptions FromProto(const ParseTokenOptionsProto& proto) { + ParseTokenOptions options; + options.max_tokens = proto.max_tokens(); + options.stop_at_end_of_statement = proto.stop_at_end_of_statement(); + options.include_comments = proto.include_comments(); + return options; + } }; // Gets a vector of ParseTokens starting from , and updates diff --git a/zetasql/public/parse_tokens.proto b/zetasql/public/parse_tokens.proto new file mode 100644 index 000000000..09ded785f --- /dev/null +++ b/zetasql/public/parse_tokens.proto @@ -0,0 +1,61 @@ +// +// Copyright 2020 ZetaSQL Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto2"; + +package zetasql; + +import "zetasql/public/parse_location_range.proto"; +import "zetasql/public/value.proto"; + +option java_package = "com.google.zetasql"; +option java_outer_classname = "ZetaSqlParseTokenProtos"; + +message ParseTokenProto { + + enum Kind { + KEYWORD = 0; // A zetasql keyword or symbol. + IDENTIFIER = 1; // An identifier that was quoted. + IDENTIFIER_OR_KEYWORD = 2; // An unquoted identifier. + VALUE = 3; // A literal value. + COMMENT = 4; // A comment. + END_OF_INPUT = 5; // The end of the input string was reached. + }; + + // The image of a token. + optional string image = 1; + // The kind of a token. + optional Kind kind = 2; + // The start and end position of a token. + optional zetasql.ParseLocationRangeProto parse_location_range = 3; + // The value of a token. This field is filled when Kind is VALUE. + optional zetasql.ValueProto value = 4; +} + +message ParseTokenOptionsProto { + // Return at most this many tokens (only if positive). It is not possible to + // resume a GetParseTokens() call for which max_tokens was set. + optional int32 max_tokens = 1 [default = 0]; + + // Stop parsing after a ";" token. The last token returned will be either + // a ";" or an EOF. + optional bool stop_at_end_of_statement = 2 [default = false]; + + // Return the comments in the ParseToken vector or silently drop them. + optional bool include_comments = 3 [default = false]; +} + + diff --git a/zetasql/public/parse_tokens_test.cc b/zetasql/public/parse_tokens_test.cc index dd877ce59..d45324d76 100644 --- a/zetasql/public/parse_tokens_test.cc +++ b/zetasql/public/parse_tokens_test.cc @@ -250,4 +250,74 @@ TEST(GetNextTokensTest, ResumeLocationIsAdjustedOnError) { EXPECT_EQ(location.byte_position(), 0); } +static void checkEqualBetweenTokenAndProto(const ParseToken& token, ParseTokenProto_Kind proto_kind) { + auto status_or_proto = token.ToProto(); + + // Confirm the ToProto of the token runs successfully and assign the proto as a local variable. + EXPECT_TRUE(status_or_proto.ok()); + auto proto = status_or_proto.value(); + + EXPECT_EQ(proto_kind, proto.kind()); + EXPECT_EQ(token.GetImage(), proto.image()); + + // Check whether the Location Range objects in both token and its proto are identical. + EXPECT_EQ(token.GetLocationRange().start().GetByteOffset(), proto.parse_location_range().start()); + EXPECT_EQ(token.GetLocationRange().end().GetByteOffset(), proto.parse_location_range().end()); + EXPECT_EQ(token.GetLocationRange().start().filename(), proto.parse_location_range().filename()); + + // If the token has no valid value, then return directly. + if (!token.GetValue().is_valid()) { + return; + } + // Check whether the values at token and its proto are identical. + switch (token.GetValue().type_kind()) { + case TypeKind::TYPE_BOOL: + EXPECT_EQ(token.GetValue().bool_value(), proto.value().bool_value()); + break; + case TypeKind::TYPE_STRING: + EXPECT_EQ(token.GetValue().string_value(), proto.value().string_value()); + break; + case TypeKind::TYPE_NUMERIC: + EXPECT_EQ(token.GetValue().numeric_value().ToString(), proto.value().numeric_value()); + break; + } +} + +TEST(GetNextTokensTest, ParseTokenToProto) { + ParseTokenOptions options; + std::vector parse_tokens; + + ParseResumeLocation location = ParseResumeLocation::FromString("SELECT 'abc' 1 \ntrue `HASH`\n foo"); + // Check error in bison tokenizer. + ZETASQL_ASSERT_OK(GetParseTokens(options, &location, &parse_tokens)); + EXPECT_EQ(7, parse_tokens.size()); + + // Check individual tokens. + checkEqualBetweenTokenAndProto(parse_tokens[0], ParseTokenProto_Kind_KEYWORD); + checkEqualBetweenTokenAndProto(parse_tokens[1], ParseTokenProto_Kind_VALUE); + checkEqualBetweenTokenAndProto(parse_tokens[2], ParseTokenProto_Kind_VALUE); + checkEqualBetweenTokenAndProto(parse_tokens[3], ParseTokenProto_Kind_KEYWORD); + checkEqualBetweenTokenAndProto(parse_tokens[4], ParseTokenProto_Kind_IDENTIFIER); + checkEqualBetweenTokenAndProto(parse_tokens[5], ParseTokenProto_Kind_IDENTIFIER_OR_KEYWORD); + checkEqualBetweenTokenAndProto(parse_tokens[6], ParseTokenProto_Kind_END_OF_INPUT); + +} + +TEST(GetNextTokensTest, ConvertBetweenParseTokenOptionsAndProto) { + ParseTokenOptions options; + options.max_tokens = 100; + options.include_comments = true; + options.stop_at_end_of_statement = true; + + auto proto = options.ToProto(); + EXPECT_EQ(100, proto.max_tokens()); + EXPECT_EQ(true, proto.include_comments()); + EXPECT_EQ(true, proto.stop_at_end_of_statement()); + + options = ParseTokenOptions::FromProto(proto); + EXPECT_EQ(100, options.max_tokens); + EXPECT_EQ(true, options.include_comments); + EXPECT_EQ(true, options.stop_at_end_of_statement); +} + } // namespace zetasql From a89ec683761e7dc36360cba74d839097f5b8c20e Mon Sep 17 00:00:00 2001 From: mepan Date: Fri, 14 Aug 2020 02:26:59 +0000 Subject: [PATCH 2/4] Integrate the GetParseTokens into the gRPC service --- zetasql/local_service/BUILD | 4 +++ zetasql/local_service/local_service.cc | 19 ++++++++++++ zetasql/local_service/local_service.h | 3 ++ zetasql/local_service/local_service.proto | 14 +++++++++ zetasql/local_service/local_service_grpc.cc | 6 ++++ zetasql/local_service/local_service_grpc.h | 6 +++- zetasql/local_service/local_service_test.cc | 32 +++++++++++++++++++++ 7 files changed, 83 insertions(+), 1 deletion(-) diff --git a/zetasql/local_service/BUILD b/zetasql/local_service/BUILD index 2ea034a57..ed25a37a1 100644 --- a/zetasql/local_service/BUILD +++ b/zetasql/local_service/BUILD @@ -53,8 +53,10 @@ cc_library( "//zetasql/public:function", "//zetasql/public:id_string", "//zetasql/public:language_options", + "//zetasql/public:parse_helpers", "//zetasql/public:parse_resume_location", "//zetasql/public:parse_resume_location_cc_proto", + "//zetasql/public:parse_tokens_cc_proto", "//zetasql/public:simple_catalog", "//zetasql/public:sql_formatter", "//zetasql/public:templated_sql_tvf", @@ -98,6 +100,7 @@ cc_test( "//zetasql/proto:function_cc_proto", "//zetasql/proto:simple_catalog_cc_proto", "//zetasql/public:parse_resume_location_cc_proto", + "//zetasql/public:parse_tokens_cc_proto", "//zetasql/public:simple_catalog", "//zetasql/public:simple_table_cc_proto", "//zetasql/public:type", @@ -145,6 +148,7 @@ proto_library( "//zetasql/proto:simple_catalog_proto", "//zetasql/public:options_proto", "//zetasql/public:parse_resume_location_proto", + "//zetasql/public:parse_tokens_proto", "//zetasql/public:simple_table_proto", "//zetasql/public:type_proto", "//zetasql/public:value_proto", diff --git a/zetasql/local_service/local_service.cc b/zetasql/local_service/local_service.cc index 06901a966..b30d6fe0b 100644 --- a/zetasql/local_service/local_service.cc +++ b/zetasql/local_service/local_service.cc @@ -34,6 +34,7 @@ #include "zetasql/public/function.h" #include "zetasql/public/id_string.h" #include "zetasql/public/language_options.h" +#include "zetasql/public/parse_tokens.h" #include "zetasql/public/simple_catalog.h" #include "zetasql/public/sql_formatter.h" #include "zetasql/public/table_from_proto.h" @@ -807,6 +808,24 @@ absl::Status ZetaSqlLocalServiceImpl::GetLanguageOptions( return absl::OkStatus(); } +absl::Status ZetaSqlLocalServiceImpl::GetParseTokens(const GetParseTokensRequest &request, GetParseTokensResponse *response) { + auto options = ParseTokenOptions::FromProto(request.options()); + auto resume_location = ParseResumeLocation::FromProto(request.resume_location()); + std::vector tokens; + ZETASQL_RETURN_IF_ERROR(::zetasql::GetParseTokens(options, &resume_location, &tokens)); + + for (auto& token : tokens) { + auto status_or_token_proto = token.ToProto(); + // Return error if a token cannot be converted to a token proto. + if (!status_or_token_proto.ok()) { + return status_or_token_proto.status(); + } + response->add_tokens()->CopyFrom(status_or_token_proto.value()); + } + + return absl::OkStatus(); +} + size_t ZetaSqlLocalServiceImpl::NumSavedPreparedExpression() const { return prepared_expressions_->NumSavedStates(); } diff --git a/zetasql/local_service/local_service.h b/zetasql/local_service/local_service.h index 4a053930f..e76a186cb 100644 --- a/zetasql/local_service/local_service.h +++ b/zetasql/local_service/local_service.h @@ -112,6 +112,9 @@ class ZetaSqlLocalServiceImpl { absl::Status GetLanguageOptions(const LanguageOptionsRequest& request, LanguageOptionsProto* response); + absl::Status GetParseTokens(const GetParseTokensRequest& request, + GetParseTokensResponse* response); + private: std::unique_ptr registered_catalogs_; std::unique_ptr prepared_expressions_; diff --git a/zetasql/local_service/local_service.proto b/zetasql/local_service/local_service.proto index b9c036f4f..f0e2739da 100644 --- a/zetasql/local_service/local_service.proto +++ b/zetasql/local_service/local_service.proto @@ -25,6 +25,7 @@ import "zetasql/proto/options.proto"; import "zetasql/proto/simple_catalog.proto"; import "zetasql/public/options.proto"; import "zetasql/public/parse_resume_location.proto"; +import "zetasql/public/parse_tokens.proto"; import "zetasql/public/simple_table.proto"; import "zetasql/public/type.proto"; import "zetasql/public/value.proto"; @@ -139,6 +140,10 @@ service ZetaSqlLocalService { rpc GetLanguageOptions(LanguageOptionsRequest) returns (LanguageOptionsProto) { } + + // Tokenize an input query (enclosed in ParseResumeLocation) and return its parse tokens. + rpc GetParseTokens(GetParseTokensRequest) returns (GetParseTokensResponse) { + } } message PrepareRequest { @@ -330,3 +335,12 @@ message LanguageOptionsRequest { optional bool maximum_features = 1; optional LanguageVersion language_version = 2; } + +message GetParseTokensRequest { + optional ParseTokenOptionsProto options = 1; + optional ParseResumeLocationProto resume_location = 2; +} + +message GetParseTokensResponse { + repeated ParseTokenProto tokens = 1; +} diff --git a/zetasql/local_service/local_service_grpc.cc b/zetasql/local_service/local_service_grpc.cc index 704617bed..85469dff6 100644 --- a/zetasql/local_service/local_service_grpc.cc +++ b/zetasql/local_service/local_service_grpc.cc @@ -184,5 +184,11 @@ grpc::Status ZetaSqlLocalServiceGrpcImpl::GetLanguageOptions( return ToGrpcStatus(service_.GetLanguageOptions(*req, resp)); } + grpc::Status ZetaSqlLocalServiceGrpcImpl::GetParseTokens(grpc::ServerContext *context, + const GetParseTokensRequest *req, + GetParseTokensResponse *resp) { + return ToGrpcStatus(service_.GetParseTokens(*req, resp)); +} + } // namespace local_service } // namespace zetasql diff --git a/zetasql/local_service/local_service_grpc.h b/zetasql/local_service/local_service_grpc.h index ad8f26b33..4889fbcff 100644 --- a/zetasql/local_service/local_service_grpc.h +++ b/zetasql/local_service/local_service_grpc.h @@ -97,7 +97,11 @@ class ZetaSqlLocalServiceGrpcImpl const LanguageOptionsRequest* req, LanguageOptionsProto* resp) override; - private: + grpc::Status GetParseTokens(grpc::ServerContext *context, + const GetParseTokensRequest* req, + GetParseTokensResponse* resp) override; + +private: ZetaSqlLocalServiceImpl service_; }; diff --git a/zetasql/local_service/local_service_test.cc b/zetasql/local_service/local_service_test.cc index 9cda8c69f..6bdd392db 100644 --- a/zetasql/local_service/local_service_test.cc +++ b/zetasql/local_service/local_service_test.cc @@ -140,6 +140,12 @@ class ZetaSqlLocalServiceImplTest : public ::testing::Test { return service_.GetBuiltinFunctions(proto, response); } + absl::Status GetParseTokens( + const GetParseTokensRequest& request, + GetParseTokensResponse* response) { + return service_.GetParseTokens(request, response); + } + ZetaSqlLocalServiceImpl service_; google::protobuf::compiler::DiskSourceTree source_tree_; std::unique_ptr proto_importer_; @@ -1188,5 +1194,31 @@ TEST_F(ZetaSqlLocalServiceImplTest, GetBuiltinFunctions) { EXPECT_EQ(function2.DebugString(), response.function(1).DebugString()); } +TEST_F(ZetaSqlLocalServiceImplTest, GetParseTokens) { + GetParseTokensRequest request; + // ParseTokenOptionsProto has the default value as same as the ParseTokenOptions. + // Therefore, it can be created directly without setting values. + request.set_allocated_options(new ParseTokenOptionsProto()); + + // Create ResumeLocation object and convert it to its proto. + // Then assign the ResumeLocationProto to the Request. + auto resume_location = ParseResumeLocation::FromString( + "some_filename", + "Select foo from bar"); + auto resume_location_proto = new ParseResumeLocationProto(); + resume_location.Serialize(resume_location_proto); + request.set_allocated_resume_location(resume_location_proto); + + GetParseTokensResponse response; + ZETASQL_EXPECT_OK(GetParseTokens(request, &response)); + + EXPECT_EQ(5, response.tokens().size()); + EXPECT_EQ("Select", response.tokens().Get(0).image()); + EXPECT_EQ("foo", response.tokens().Get(1).image()); + EXPECT_EQ("from", response.tokens().Get(2).image()); + EXPECT_EQ("bar", response.tokens().Get(3).image()); + EXPECT_EQ(ParseTokenProto_Kind_END_OF_INPUT, response.tokens().Get(4).kind()); +} + } // namespace local_service } // namespace zetasql From 5dbefa4eeeb868378d1caae9ed982daaeb8be4b8 Mon Sep 17 00:00:00 2001 From: mepan Date: Fri, 14 Aug 2020 20:06:17 +0000 Subject: [PATCH 3/4] Integrate Type into TokenProto --- zetasql/public/BUILD | 8 +++++- zetasql/public/parse_tokens.h | 46 +++++++++++++++++-------------- zetasql/public/parse_tokens.proto | 3 ++ 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/zetasql/public/BUILD b/zetasql/public/BUILD index fd34c5cb8..d234d025a 100644 --- a/zetasql/public/BUILD +++ b/zetasql/public/BUILD @@ -784,6 +784,7 @@ proto_library( srcs = ["parse_tokens.proto"], deps = [ ":parse_location_range_proto", + ":type_proto", ":value_proto", ], ) @@ -793,6 +794,11 @@ cc_proto_library( deps = [":parse_tokens_proto"], ) +java_proto_library( + name = "parse_tokens_java_proto", + deps = [":parse_tokens_proto"], +) + proto_library( name = "type_proto", srcs = ["type.proto"], @@ -1636,9 +1642,9 @@ cc_library( "//zetasql/parser", "//zetasql/parser:bison_parser_generated_lib", "//zetasql/parser:keywords", + "//zetasql/public:parse_tokens_cc_proto", "//zetasql/public/functions:convert_string", "//zetasql/resolved_ast:resolved_node_kind_cc_proto", - "//zetasql/public:parse_tokens_cc_proto", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", diff --git a/zetasql/public/parse_tokens.h b/zetasql/public/parse_tokens.h index 8b0b1f1b4..10d6462e8 100644 --- a/zetasql/public/parse_tokens.h +++ b/zetasql/public/parse_tokens.h @@ -122,28 +122,34 @@ class ParseToken { // Convert the token into its proto. zetasql_base::StatusOr ToProto() const { - ParseTokenProto token_proto; - - // Create a location range proto from the field location_range_. Transform the proto into heap and - // uses a pointer to manage it. Then assign the pointer to the token proto, which will release the - // assigned location range proto when the token proto is about to be released. - auto status_or_location_range_proto = location_range_.ToProto(); - if (!status_or_location_range_proto.ok()) { - return status_or_location_range_proto.status(); - } - auto range_proto = new ParseLocationRangeProto(status_or_location_range_proto.value()); - token_proto.set_allocated_parse_location_range(range_proto); + ParseTokenProto token_proto; + + // Create a location range proto from the field location_range_. Transform the proto into heap and + // uses a pointer to manage it. Then assign the pointer to the token proto, which will release the + // assigned location range proto when the token proto is about to be released. + auto status_or_location_range_proto = location_range_.ToProto(); + if (!status_or_location_range_proto.ok()) { + return status_or_location_range_proto.status(); + } + auto range_proto = new ParseLocationRangeProto(status_or_location_range_proto.value()); + token_proto.set_allocated_parse_location_range(range_proto); + + // New a value proto in heap and immediately assign it to the token proto, so that + // the token proto can manage the memory of the value proto. + // Besides assigning the value proto, a type proto is created and assigned, because + // it is required to identify the type of value when the proto is deserialized. + if (value_.is_valid()) { + auto type_proto = new TypeProto(); + token_proto.set_allocated_type(type_proto); + ZETASQL_RETURN_IF_ERROR(value_.type()->SerializeToSelfContainedProto(type_proto)); - // New a value proto in heap, assign the fields of the value_ field to the proto, - // and transfer the ownership of the value proto to the token proto. auto value_proto = new ValueProto(); - if (value_.is_valid()) { - ZETASQL_RETURN_IF_ERROR(value_.Serialize(value_proto)); - token_proto.set_allocated_value(value_proto); - } - token_proto.set_image(image_); - token_proto.set_kind(serialize_kind(kind_)); - return token_proto; + token_proto.set_allocated_value(value_proto); + ZETASQL_RETURN_IF_ERROR(value_.Serialize(value_proto)); + } + token_proto.set_image(image_); + token_proto.set_kind(serialize_kind(kind_)); + return token_proto; } // The declarations below are intended for internal use. diff --git a/zetasql/public/parse_tokens.proto b/zetasql/public/parse_tokens.proto index 09ded785f..0fea72b7b 100644 --- a/zetasql/public/parse_tokens.proto +++ b/zetasql/public/parse_tokens.proto @@ -20,6 +20,7 @@ package zetasql; import "zetasql/public/parse_location_range.proto"; import "zetasql/public/value.proto"; +import "zetasql/public/type.proto"; option java_package = "com.google.zetasql"; option java_outer_classname = "ZetaSqlParseTokenProtos"; @@ -43,6 +44,8 @@ message ParseTokenProto { optional zetasql.ParseLocationRangeProto parse_location_range = 3; // The value of a token. This field is filled when Kind is VALUE. optional zetasql.ValueProto value = 4; + // The type of the value. It is needed to deserialize the value proto back to value. + optional zetasql.TypeProto type = 5; } message ParseTokenOptionsProto { From 54febd61b6ffb7f2f638acf18cf8406b9f276d02 Mon Sep 17 00:00:00 2001 From: mepan Date: Fri, 14 Aug 2020 20:29:52 +0000 Subject: [PATCH 4/4] Refactor the parse token proto --- zetasql/local_service/local_service.cc | 30 ++--- zetasql/local_service/local_service_grpc.cc | 8 +- zetasql/local_service/local_service_grpc.h | 6 +- zetasql/local_service/local_service_test.cc | 46 ++++---- zetasql/public/parse_tokens.cc | 62 ++++++++++ zetasql/public/parse_tokens.h | 71 ++---------- zetasql/public/parse_tokens_test.cc | 120 ++++++++++---------- 7 files changed, 175 insertions(+), 168 deletions(-) diff --git a/zetasql/local_service/local_service.cc b/zetasql/local_service/local_service.cc index b30d6fe0b..2ebd51e5a 100644 --- a/zetasql/local_service/local_service.cc +++ b/zetasql/local_service/local_service.cc @@ -808,22 +808,24 @@ absl::Status ZetaSqlLocalServiceImpl::GetLanguageOptions( return absl::OkStatus(); } -absl::Status ZetaSqlLocalServiceImpl::GetParseTokens(const GetParseTokensRequest &request, GetParseTokensResponse *response) { - auto options = ParseTokenOptions::FromProto(request.options()); - auto resume_location = ParseResumeLocation::FromProto(request.resume_location()); - std::vector tokens; - ZETASQL_RETURN_IF_ERROR(::zetasql::GetParseTokens(options, &resume_location, &tokens)); - - for (auto& token : tokens) { - auto status_or_token_proto = token.ToProto(); - // Return error if a token cannot be converted to a token proto. - if (!status_or_token_proto.ok()) { - return status_or_token_proto.status(); - } - response->add_tokens()->CopyFrom(status_or_token_proto.value()); +absl::Status ZetaSqlLocalServiceImpl::GetParseTokens( + const GetParseTokensRequest &request, GetParseTokensResponse *response) { + + auto options = ParseTokenOptions::FromProto(request.options()); + auto resume_location = ParseResumeLocation::FromProto(request.resume_location()); + std::vector tokens; + ZETASQL_RETURN_IF_ERROR(::zetasql::GetParseTokens(options, &resume_location, &tokens)); + + for (auto& token : tokens) { + auto status_or_token_proto = token.ToProto(); + // Return error if a token cannot be converted to a token proto. + if (!status_or_token_proto.ok()) { + return status_or_token_proto.status(); } + response->add_tokens()->CopyFrom(status_or_token_proto.value()); + } - return absl::OkStatus(); + return absl::OkStatus(); } size_t ZetaSqlLocalServiceImpl::NumSavedPreparedExpression() const { diff --git a/zetasql/local_service/local_service_grpc.cc b/zetasql/local_service/local_service_grpc.cc index 85469dff6..259fef80f 100644 --- a/zetasql/local_service/local_service_grpc.cc +++ b/zetasql/local_service/local_service_grpc.cc @@ -184,10 +184,10 @@ grpc::Status ZetaSqlLocalServiceGrpcImpl::GetLanguageOptions( return ToGrpcStatus(service_.GetLanguageOptions(*req, resp)); } - grpc::Status ZetaSqlLocalServiceGrpcImpl::GetParseTokens(grpc::ServerContext *context, - const GetParseTokensRequest *req, - GetParseTokensResponse *resp) { - return ToGrpcStatus(service_.GetParseTokens(*req, resp)); +grpc::Status ZetaSqlLocalServiceGrpcImpl::GetParseTokens(grpc::ServerContext *context, + const GetParseTokensRequest *req, + GetParseTokensResponse *resp) { + return ToGrpcStatus(service_.GetParseTokens(*req, resp)); } } // namespace local_service diff --git a/zetasql/local_service/local_service_grpc.h b/zetasql/local_service/local_service_grpc.h index 4889fbcff..eb2709e1c 100644 --- a/zetasql/local_service/local_service_grpc.h +++ b/zetasql/local_service/local_service_grpc.h @@ -97,9 +97,9 @@ class ZetaSqlLocalServiceGrpcImpl const LanguageOptionsRequest* req, LanguageOptionsProto* resp) override; - grpc::Status GetParseTokens(grpc::ServerContext *context, - const GetParseTokensRequest* req, - GetParseTokensResponse* resp) override; + grpc::Status GetParseTokens(grpc::ServerContext *context, + const GetParseTokensRequest* req, + GetParseTokensResponse* resp) override; private: ZetaSqlLocalServiceImpl service_; diff --git a/zetasql/local_service/local_service_test.cc b/zetasql/local_service/local_service_test.cc index 6bdd392db..a846a299b 100644 --- a/zetasql/local_service/local_service_test.cc +++ b/zetasql/local_service/local_service_test.cc @@ -1195,29 +1195,29 @@ TEST_F(ZetaSqlLocalServiceImplTest, GetBuiltinFunctions) { } TEST_F(ZetaSqlLocalServiceImplTest, GetParseTokens) { - GetParseTokensRequest request; - // ParseTokenOptionsProto has the default value as same as the ParseTokenOptions. - // Therefore, it can be created directly without setting values. - request.set_allocated_options(new ParseTokenOptionsProto()); - - // Create ResumeLocation object and convert it to its proto. - // Then assign the ResumeLocationProto to the Request. - auto resume_location = ParseResumeLocation::FromString( - "some_filename", - "Select foo from bar"); - auto resume_location_proto = new ParseResumeLocationProto(); - resume_location.Serialize(resume_location_proto); - request.set_allocated_resume_location(resume_location_proto); - - GetParseTokensResponse response; - ZETASQL_EXPECT_OK(GetParseTokens(request, &response)); - - EXPECT_EQ(5, response.tokens().size()); - EXPECT_EQ("Select", response.tokens().Get(0).image()); - EXPECT_EQ("foo", response.tokens().Get(1).image()); - EXPECT_EQ("from", response.tokens().Get(2).image()); - EXPECT_EQ("bar", response.tokens().Get(3).image()); - EXPECT_EQ(ParseTokenProto_Kind_END_OF_INPUT, response.tokens().Get(4).kind()); + GetParseTokensRequest request; + // ParseTokenOptionsProto has the default value as same as the ParseTokenOptions. + // Therefore, it can be created directly without setting values. + request.set_allocated_options(new ParseTokenOptionsProto()); + + // Create ResumeLocation object and convert it to its proto. + // Then assign the ResumeLocationProto to the Request. + auto resume_location = ParseResumeLocation::FromString( + "some_filename", + "Select foo from bar"); + auto resume_location_proto = new ParseResumeLocationProto(); + resume_location.Serialize(resume_location_proto); + request.set_allocated_resume_location(resume_location_proto); + + GetParseTokensResponse response; + ZETASQL_EXPECT_OK(GetParseTokens(request, &response)); + + EXPECT_EQ(5, response.tokens().size()); + EXPECT_EQ("Select", response.tokens().Get(0).image()); + EXPECT_EQ("foo", response.tokens().Get(1).image()); + EXPECT_EQ("from", response.tokens().Get(2).image()); + EXPECT_EQ("bar", response.tokens().Get(3).image()); + EXPECT_EQ(ParseTokenProto_Kind_END_OF_INPUT, response.tokens().Get(4).kind()); } } // namespace local_service diff --git a/zetasql/public/parse_tokens.cc b/zetasql/public/parse_tokens.cc index b74b23c1e..d7c1d0859 100644 --- a/zetasql/public/parse_tokens.cc +++ b/zetasql/public/parse_tokens.cc @@ -384,4 +384,66 @@ ParseToken::ParseToken(ParseLocationRange location_range, std::string image, DCHECK(!value_.is_null()); } +zetasql_base::StatusOr ParseToken::ToProto() const { + ParseTokenProto token_proto; + + // Create a location range proto from the field location_range_. Create the proto in heap and assign + // the pointer to the token proto, which will release the assigned location range proto when the + // token proto is about to be released. Therefore, no need to manage the allocated memory of the + // LocationRangeProto even when it is created by new method. + auto status_or_location_range_proto = location_range_.ToProto(); + if (!status_or_location_range_proto.ok()) { + return status_or_location_range_proto.status(); + } + auto range_proto = new ParseLocationRangeProto(status_or_location_range_proto.value()); + token_proto.set_allocated_parse_location_range(range_proto); + + // New a value proto in heap and immediately assign it to the token proto, so that + // the token proto can manage the memory of the value proto. + // Besides assigning the value proto, a type proto is created and assigned, because + // it is required to identify the type of value when the proto is deserialized. + if (value_.is_valid()) { + auto type_proto = new TypeProto(); + token_proto.set_allocated_type(type_proto); + ZETASQL_RETURN_IF_ERROR(value_.type()->SerializeToSelfContainedProto(type_proto)); + + auto value_proto = new ValueProto(); + token_proto.set_allocated_value(value_proto); + ZETASQL_RETURN_IF_ERROR(value_.Serialize(value_proto)); + } + token_proto.set_image(image_); + token_proto.set_kind(serialize_kind(kind_)); + return token_proto; +} + +ParseTokenProto_Kind ParseToken::serialize_kind(const ParseToken::Kind kind) { + using zetasql::ParseToken; + switch (kind) { + case ParseToken::Kind::KEYWORD:return ParseTokenProto_Kind::ParseTokenProto_Kind_KEYWORD; + case ParseToken::IDENTIFIER:return ParseTokenProto_Kind::ParseTokenProto_Kind_IDENTIFIER; + case ParseToken::IDENTIFIER_OR_KEYWORD:return ParseTokenProto_Kind::ParseTokenProto_Kind_IDENTIFIER_OR_KEYWORD; + case ParseToken::VALUE:return ParseTokenProto_Kind::ParseTokenProto_Kind_VALUE; + case ParseToken::COMMENT:return ParseTokenProto_Kind::ParseTokenProto_Kind_COMMENT; + case ParseToken::END_OF_INPUT:return ParseTokenProto_Kind::ParseTokenProto_Kind_END_OF_INPUT; + } +} + + +ParseTokenOptionsProto ParseTokenOptions::ToProto() const { + ParseTokenOptionsProto options_proto; + options_proto.set_max_tokens(max_tokens); + options_proto.set_stop_at_end_of_statement(stop_at_end_of_statement); + options_proto.set_include_comments(include_comments); + return options_proto; +} + + +ParseTokenOptions ParseTokenOptions::FromProto(const ParseTokenOptionsProto& proto) { + ParseTokenOptions options; + options.max_tokens = proto.max_tokens(); + options.stop_at_end_of_statement = proto.stop_at_end_of_statement(); + options.include_comments = proto.include_comments(); + return options; +} + } // namespace zetasql diff --git a/zetasql/public/parse_tokens.h b/zetasql/public/parse_tokens.h index 10d6462e8..88652defc 100644 --- a/zetasql/public/parse_tokens.h +++ b/zetasql/public/parse_tokens.h @@ -120,37 +120,8 @@ class ParseToken { // Returns the location of the token in the input. ParseLocationRange GetLocationRange() const { return location_range_; } - // Convert the token into its proto. - zetasql_base::StatusOr ToProto() const { - ParseTokenProto token_proto; - - // Create a location range proto from the field location_range_. Transform the proto into heap and - // uses a pointer to manage it. Then assign the pointer to the token proto, which will release the - // assigned location range proto when the token proto is about to be released. - auto status_or_location_range_proto = location_range_.ToProto(); - if (!status_or_location_range_proto.ok()) { - return status_or_location_range_proto.status(); - } - auto range_proto = new ParseLocationRangeProto(status_or_location_range_proto.value()); - token_proto.set_allocated_parse_location_range(range_proto); - - // New a value proto in heap and immediately assign it to the token proto, so that - // the token proto can manage the memory of the value proto. - // Besides assigning the value proto, a type proto is created and assigned, because - // it is required to identify the type of value when the proto is deserialized. - if (value_.is_valid()) { - auto type_proto = new TypeProto(); - token_proto.set_allocated_type(type_proto); - ZETASQL_RETURN_IF_ERROR(value_.type()->SerializeToSelfContainedProto(type_proto)); - - auto value_proto = new ValueProto(); - token_proto.set_allocated_value(value_proto); - ZETASQL_RETURN_IF_ERROR(value_.Serialize(value_proto)); - } - token_proto.set_image(image_); - token_proto.set_kind(serialize_kind(kind_)); - return token_proto; - } + // Convert a ParseToken object into its proto. + zetasql_base::StatusOr ToProto() const; // The declarations below are intended for internal use. @@ -181,23 +152,7 @@ class ParseToken { // Convert a Token Kind into its proto form. It is used by the ToProto method that converts // a token to its proto. - static ParseTokenProto_Kind serialize_kind(const ParseToken::Kind kind) { - using zetasql::ParseToken; - switch (kind) { - case ParseToken::Kind::KEYWORD: - return ParseTokenProto_Kind::ParseTokenProto_Kind_KEYWORD; - case ParseToken::IDENTIFIER: - return ParseTokenProto_Kind::ParseTokenProto_Kind_IDENTIFIER; - case ParseToken::IDENTIFIER_OR_KEYWORD: - return ParseTokenProto_Kind::ParseTokenProto_Kind_IDENTIFIER_OR_KEYWORD; - case ParseToken::VALUE: - return ParseTokenProto_Kind::ParseTokenProto_Kind_VALUE; - case ParseToken::COMMENT: - return ParseTokenProto_Kind::ParseTokenProto_Kind_COMMENT; - case ParseToken::END_OF_INPUT: - return ParseTokenProto_Kind::ParseTokenProto_Kind_END_OF_INPUT; - } - } + static ParseTokenProto_Kind serialize_kind(ParseToken::Kind kind); // Copyable }; @@ -214,23 +169,11 @@ struct ParseTokenOptions { // Return the comments in the ParseToken vector or silently drop them. bool include_comments = false; - // Convert the token options into its proto. - ParseTokenOptionsProto ToProto() const { - ParseTokenOptionsProto options_proto; - options_proto.set_max_tokens(max_tokens); - options_proto.set_stop_at_end_of_statement(stop_at_end_of_statement); - options_proto.set_include_comments(include_comments); - return options_proto; - } + // Convert the token options into its proto. + ParseTokenOptionsProto ToProto() const; - // Create a ParseTokenOption object from its proto. - static ParseTokenOptions FromProto(const ParseTokenOptionsProto& proto) { - ParseTokenOptions options; - options.max_tokens = proto.max_tokens(); - options.stop_at_end_of_statement = proto.stop_at_end_of_statement(); - options.include_comments = proto.include_comments(); - return options; - } + // Create a ParseTokenOption object from its proto. + static ParseTokenOptions FromProto(const ParseTokenOptionsProto& proto); }; // Gets a vector of ParseTokens starting from , and updates diff --git a/zetasql/public/parse_tokens_test.cc b/zetasql/public/parse_tokens_test.cc index d45324d76..aa8238c9a 100644 --- a/zetasql/public/parse_tokens_test.cc +++ b/zetasql/public/parse_tokens_test.cc @@ -251,73 +251,73 @@ TEST(GetNextTokensTest, ResumeLocationIsAdjustedOnError) { } static void checkEqualBetweenTokenAndProto(const ParseToken& token, ParseTokenProto_Kind proto_kind) { - auto status_or_proto = token.ToProto(); - - // Confirm the ToProto of the token runs successfully and assign the proto as a local variable. - EXPECT_TRUE(status_or_proto.ok()); - auto proto = status_or_proto.value(); - - EXPECT_EQ(proto_kind, proto.kind()); - EXPECT_EQ(token.GetImage(), proto.image()); - - // Check whether the Location Range objects in both token and its proto are identical. - EXPECT_EQ(token.GetLocationRange().start().GetByteOffset(), proto.parse_location_range().start()); - EXPECT_EQ(token.GetLocationRange().end().GetByteOffset(), proto.parse_location_range().end()); - EXPECT_EQ(token.GetLocationRange().start().filename(), proto.parse_location_range().filename()); - - // If the token has no valid value, then return directly. - if (!token.GetValue().is_valid()) { - return; - } - // Check whether the values at token and its proto are identical. - switch (token.GetValue().type_kind()) { - case TypeKind::TYPE_BOOL: - EXPECT_EQ(token.GetValue().bool_value(), proto.value().bool_value()); - break; - case TypeKind::TYPE_STRING: - EXPECT_EQ(token.GetValue().string_value(), proto.value().string_value()); - break; - case TypeKind::TYPE_NUMERIC: - EXPECT_EQ(token.GetValue().numeric_value().ToString(), proto.value().numeric_value()); - break; - } + auto status_or_proto = token.ToProto(); + + // Confirm the ToProto of the token runs successfully and assign the proto as a local variable. + EXPECT_TRUE(status_or_proto.ok()); + auto proto = status_or_proto.value(); + + EXPECT_EQ(proto_kind, proto.kind()); + EXPECT_EQ(token.GetImage(), proto.image()); + + // Check whether the Location Range objects in both token and its proto are identical. + EXPECT_EQ(token.GetLocationRange().start().GetByteOffset(), proto.parse_location_range().start()); + EXPECT_EQ(token.GetLocationRange().end().GetByteOffset(), proto.parse_location_range().end()); + EXPECT_EQ(token.GetLocationRange().start().filename(), proto.parse_location_range().filename()); + + // If the token has no valid value, then return directly. + if (!token.GetValue().is_valid()) { + return; + } + // Check whether the values at token and its proto are identical. + switch (proto.type().type_kind()) { + case TypeKind::TYPE_BOOL: + EXPECT_EQ(token.GetValue().bool_value(), proto.value().bool_value()); + break; + case TypeKind::TYPE_STRING: + EXPECT_EQ(token.GetValue().string_value(), proto.value().string_value()); + break; + case TypeKind::TYPE_NUMERIC: + EXPECT_EQ(token.GetValue().numeric_value().ToString(), proto.value().numeric_value()); + break; + } } TEST(GetNextTokensTest, ParseTokenToProto) { - ParseTokenOptions options; - std::vector parse_tokens; - - ParseResumeLocation location = ParseResumeLocation::FromString("SELECT 'abc' 1 \ntrue `HASH`\n foo"); - // Check error in bison tokenizer. - ZETASQL_ASSERT_OK(GetParseTokens(options, &location, &parse_tokens)); - EXPECT_EQ(7, parse_tokens.size()); - - // Check individual tokens. - checkEqualBetweenTokenAndProto(parse_tokens[0], ParseTokenProto_Kind_KEYWORD); - checkEqualBetweenTokenAndProto(parse_tokens[1], ParseTokenProto_Kind_VALUE); - checkEqualBetweenTokenAndProto(parse_tokens[2], ParseTokenProto_Kind_VALUE); - checkEqualBetweenTokenAndProto(parse_tokens[3], ParseTokenProto_Kind_KEYWORD); - checkEqualBetweenTokenAndProto(parse_tokens[4], ParseTokenProto_Kind_IDENTIFIER); - checkEqualBetweenTokenAndProto(parse_tokens[5], ParseTokenProto_Kind_IDENTIFIER_OR_KEYWORD); - checkEqualBetweenTokenAndProto(parse_tokens[6], ParseTokenProto_Kind_END_OF_INPUT); + ParseTokenOptions options; + std::vector parse_tokens; + + ParseResumeLocation location = ParseResumeLocation::FromString("SELECT 'abc' 1 \ntrue `HASH`\n foo"); + // Check error in bison tokenizer. + ZETASQL_ASSERT_OK(GetParseTokens(options, &location, &parse_tokens)); + EXPECT_EQ(7, parse_tokens.size()); + + // Check individual tokens. + checkEqualBetweenTokenAndProto(parse_tokens[0], ParseTokenProto_Kind_KEYWORD); + checkEqualBetweenTokenAndProto(parse_tokens[1], ParseTokenProto_Kind_VALUE); + checkEqualBetweenTokenAndProto(parse_tokens[2], ParseTokenProto_Kind_VALUE); + checkEqualBetweenTokenAndProto(parse_tokens[3], ParseTokenProto_Kind_KEYWORD); + checkEqualBetweenTokenAndProto(parse_tokens[4], ParseTokenProto_Kind_IDENTIFIER); + checkEqualBetweenTokenAndProto(parse_tokens[5], ParseTokenProto_Kind_IDENTIFIER_OR_KEYWORD); + checkEqualBetweenTokenAndProto(parse_tokens[6], ParseTokenProto_Kind_END_OF_INPUT); } TEST(GetNextTokensTest, ConvertBetweenParseTokenOptionsAndProto) { - ParseTokenOptions options; - options.max_tokens = 100; - options.include_comments = true; - options.stop_at_end_of_statement = true; - - auto proto = options.ToProto(); - EXPECT_EQ(100, proto.max_tokens()); - EXPECT_EQ(true, proto.include_comments()); - EXPECT_EQ(true, proto.stop_at_end_of_statement()); - - options = ParseTokenOptions::FromProto(proto); - EXPECT_EQ(100, options.max_tokens); - EXPECT_EQ(true, options.include_comments); - EXPECT_EQ(true, options.stop_at_end_of_statement); + ParseTokenOptions options; + options.max_tokens = 100; + options.include_comments = true; + options.stop_at_end_of_statement = true; + + auto proto = options.ToProto(); + EXPECT_EQ(100, proto.max_tokens()); + EXPECT_EQ(true, proto.include_comments()); + EXPECT_EQ(true, proto.stop_at_end_of_statement()); + + options = ParseTokenOptions::FromProto(proto); + EXPECT_EQ(100, options.max_tokens); + EXPECT_EQ(true, options.include_comments); + EXPECT_EQ(true, options.stop_at_end_of_statement); } } // namespace zetasql