diff --git a/Changes b/Changes index bba5ee5..f863d4c 100644 --- a/Changes +++ b/Changes @@ -1,16 +1,22 @@ 2024-06-15: Version 2.01 [New Features] - PSGI-compatible applications can now be mounted directly by using 'psgi' route flag - - Added Kelp::Util::adapt_psgi (used by the new 'psgi' route flag) - - Added Kelp::Request::json_content + - Added Kelp::Util::adapt_psgi function (used by the new 'psgi' route flag) + - Added a bunch of new methods with suffix '_param' to Kelp::Request, which work like 'param' but fetch from specific place + - Methods with prefix 'raw_' were added to Kelp::Request, returning encoded request data (see "Bug fixes" below) + - Added charset_encode, charset_decode methods to Kelp + - Added charset method to Kelp::Request + - Added charset_decode method to Kelp::Request, which decodes using either request or application charset [Changes] - Scalar context behavior of the param method called without arguments on json request is now deprecated * this is done in effort to make param method easier to use and harder to misuse + - Kelp::Test no longer uses HTTP::Cookies, implements a much slimmer cookie jar with the same interface + * The new cookie jar only stores key/value pairs without any special data for cookies like domains, paths or expiration dates + - Kelp::Test now has a new import flag: '-utf8' + * Importing with this flag will automatically set Test::More to encode wide characters on output - Repeatedly fetching parameters from json request with the param method is now much faster - - 'kelp' template has been adjusted to match framework looks - Documentation improvements - - Added a homepage on github [Bug fixes] - Route destination will no longer be executed if a response was already rendered by a previous one @@ -19,6 +25,18 @@ * Delayed responses will no longer override a previously rendered normal response * The destination will still be run if the render happened inside 'before_dispatch' hook + [Backward-Incompatible Changes] + - Request data will now be properly decoded using either charset from Content-Type or application charset + * Request paths, query parameters and body are automatically decoded + * Headers, cookies and sessions are unaffected (session encoding must be configured on the middleware level) + * Please use methods with prefix 'raw_' from Kelp::Request to access encoded request data if needed + * Not decoding input was a bug which needed to be fixed, but the application was already encoding the response correctly + + [Tweaks] + - 'kelp' template has been adjusted to match framework looks + - Added a homepage on GitHub + - Kelp now has a logo and is developed by the Kelp-framework organization on GitHub + 2024-06-10: Version 2.00 [New Features] diff --git a/cpanfile b/cpanfile index b87ea58..cc01741 100644 --- a/cpanfile +++ b/cpanfile @@ -5,8 +5,9 @@ requires 'Path::Tiny' => '0'; requires 'Template::Tiny' => 0; requires 'Try::Tiny' => 0; requires 'Class::Inspector' => '0'; -requires 'HTTP::Cookies' => '0'; requires 'namespace::autoclean' => '0'; +requires 'URI' => '0'; +requires 'Hash::MultiValue' => '0'; on 'test' => sub { requires 'Test::Deep' => '0'; diff --git a/lib/Kelp/Request.pm b/lib/Kelp/Request.pm index f4451b6..47c84e2 100644 --- a/lib/Kelp/Request.pm +++ b/lib/Kelp/Request.pm @@ -4,6 +4,8 @@ use Kelp::Base 'Plack::Request'; use Carp; use Try::Tiny; +use Encode qw(decode); +use Hash::MultiValue; attr -app => sub { croak "app is required" }; @@ -16,17 +18,61 @@ attr named => sub { {} }; # The name of the matched route for this request attr route_name => sub { undef }; -# copy of json_content, used repeatedly by param -attr _param_json_content => sub { - my $hash = $_[0]->json_content // {}; - ref $hash eq 'HASH' ? $hash : { ref $hash, $hash }; +attr query_parameters => sub { + my $self = shift; + my $raw = $self->_charset_decode_array( $self->_query_parameters ); + return Hash::MultiValue->new(@{$raw}); +}; + +attr body_parameters => sub { + my $self = shift; + my $raw = $self->_charset_decode_array( $self->_body_parameters ); + return Hash::MultiValue->new(@{$raw}); +}; + +attr parameters => sub { + my $self = shift; + + my $raw_query = $self->_charset_decode_array( $self->_query_parameters ); + my $raw_body = $self->_charset_decode_array( $self->_body_parameters ); + return Hash::MultiValue->new(@{$raw_query}, @{$raw_body}); }; +# Raw methods - methods in Plack::Request (without decoding) +# in Kelp::Request, they are replaced with decoding versions + +sub raw_path { + my $self = shift; + return $self->SUPER::path( @_ ); +} + +sub raw_body { + my $self = shift; + return $self->SUPER::content( @_ ); +} + +sub raw_body_parameters { + my $self = shift; + return $self->SUPER::body_parameters( @_ ); +} + +sub raw_query_parameters { + my $self = shift; + return $self->SUPER::query_parameters( @_ ); +} + +sub raw_parameters { + my $self = shift; + return $self->SUPER::parameters( @_ ); +} + # If you're running the web app as a proxy, use Plack::Middleware::ReverseProxy sub address { $_[0]->env->{REMOTE_ADDR} } sub remote_host { $_[0]->env->{REMOTE_HOST} } sub user { $_[0]->env->{REMOTE_USER} } +# Interface + sub new { my ( $class, %args ) = @_; my $self = $class->SUPER::new( delete $args{env} ); @@ -46,12 +92,51 @@ sub is_json { return $self->content_type =~ m{^application/json}i; } +sub charset { + my $self = shift; + return undef unless $self->content_type; + return undef unless $self->content_type =~ m{;\s*charset=([^;\$]+)}; + return $1; +} + +sub charset_decode { + my ( $self, $string ) = @_; + + if ( $self->charset ) { + + # If the charset is unsupported by Encode, try to decode using + # application charset. Worst case scenario is a server error with code + # 500 + state $supported = { map { $_ => $_ } Encode->encodings(':all') }; + my $charset = $supported->{$self->charset} // $self->app->charset; + + return decode $charset, $string; + } + + return $self->app->charset_decode($string); +} + +sub _charset_decode_array { + my ( $self, $arr ) = @_; + return [ map { $self->charset_decode($_) } @$arr ]; +} + +sub path { + my $self = shift; + return $self->charset_decode( $self->SUPER::path( @_ ) ); +} + +sub content { + my $self = shift; + return $self->charset_decode( $self->SUPER::content( @_ ) ); +} + sub json_content { my $self = shift; return undef unless $self->is_json; return try { - $self->app->json->decode( $self->content ); + $self->app->json->decode( $self->raw_body ); } catch { undef; @@ -62,23 +147,46 @@ sub param { my $self = shift; if ( $self->is_json && $self->app->can('json') ) { - my $hash = $self->_param_json_content; - - return $hash->{ $_[0] } if @_; - if (!wantarray) { - carp "param() called in scalar context on json request is deprecated and will return the number of keys in the future. Use json_content instead"; - return $hash; - } - return keys %$hash; + return $self->json_param( @_ ); } - # safe method of calling Plack::Request::param - return scalar $self->cgi_param($_[0]) if @_; - return $self->cgi_param; + # safe method without calling Plack::Request::param + return $self->parameters->get($_[0]) if @_; + return keys %{ $self->parameters }; } sub cgi_param { - shift->SUPER::param(@_); + shift->SUPER::param( @_ ); +} + +sub query_param { + my $self = shift; + + return $self->query_parameters->get($_[0]) if @_; + return keys %{ $self->query_parameters }; +} + +sub body_param { + my $self = shift; + + return $self->body_parameters->get($_[0]) if @_; + return keys %{ $self->body_parameters }; +} + +sub json_param { + my $self = shift; + + my $hash = $self->{_param_json_content} //= do { + my $hash = $self->json_content // {}; + ref $hash eq 'HASH' ? $hash : { ref $hash, $hash }; + }; + + return $hash->{ $_[0] } if @_; + if (!wantarray) { + carp "param() called in scalar context on json request is deprecated and will return the number of keys in the future. Use json_content instead"; + return $hash; + } + return keys %$hash; } sub session { @@ -116,7 +224,75 @@ Kelp::Request - Request class for a Kelp application =head1 DESCRIPTION This module provides a convenience layer on top of L. It extends -it to add several convenience methods. +it to add several convenience methods and support for application encoding. + +=head1 ENCODING + +Starting with version 2.01, Kelp::Request simplifies input handling and +improves correctness by automatically decoding path, query parameters and body +parameters. + +Headers (so cookies as well) are unaffected, as they aren't +consistently supported outside of ASCII range. JSON and session are configured +separately in modules and middlewares, so they must themselves do the proper +decoding. + +Following methods will return values decoded with charset either from +C header or the one specified in the app's configuration: + +=over + +=item * C + +=item * C + +=item * C + +=item * C + +=item * C + +=item * C + +=item * C + +=item * C + +=item * C + +=back + +If you wish to get input in the original request encoding, use these instead +(note: there is no C): + +=over + +=item * C + +=item * C + +=item * C + +=item * C + +=item * C (instead of C) + +=back + +Following methods will return decoded values if the other parts of the system +are configured to decode them: + +=over + +=item * C - depends on JSON module (on JSON requests) + +=item * C - depends on JSON module + +=item * C - depends on JSON module + +=item * C - depends on session middleware + +=back =head1 ATTRIBUTES @@ -152,16 +328,21 @@ current route is processing. Contains a string name of the route matched for this request. Contains route pattern if the route was not named. +=head1 METHODS + =head2 param -Returns the HTTP parameters of the request. It has two modes of operation. +Shortcut for returning the HTTP parameters of the request with heavy amount of +dwimmery. It has two modes of operation and behaves differently for JSON and +non-JSON requests. =over =item If passed with a parameter, returns the value value of a parameter with that -name from either request body or query. This always returns a scalar value. +name from either request body or query (body is preferred). This always returns +a scalar value. =item @@ -186,21 +367,81 @@ entire contents of json when called without arguments in scalar context. This will be later removed, so that C will work exactly the same regardless of whether the request was json. Use L for that instead. -Since this method has so many ways to use it, you're encouraged to use -other, more specific methods from L. +Since this method behaves differently based on the form of input, you're +encouraged to use other, more specific methods listed below. + +=head2 query_param + +Same as L, but always returns parameters from query string. + +=head2 body_param + +Same as L, but always returns parameters from body form. + +=head2 json_param + +Same as L, but always returns parameters from JSON body. =head2 cgi_param -Calls C in L, which is CGI.pm compatible. It is B to use this method, unless for some reason you have to maintain -CGI.pm compatibility. Misusing this method can lead to bugs and security -vulnerabilities. +CGI.pm compatible implementation of C (but does not set parameters). It +is B to use this method, unless for some reason you have to +maintain CGI.pm compatibility. Misusing this method can lead to bugs and +security vulnerabilities. + +=head2 parameters + +Same as L, but the keys and values in the hash are decoded. + +=head2 raw_parameters + +Same as L. The hash keys and values are B decoded. + +=head2 query_parameters + +Same as L, but the keys and values in the hash are decoded. + +=head2 raw_query_parameters + +Same as L, The hash keys and values are B decoded. + +=head2 body_parameters + +Same as L, but the keys and values in the hash are decoded. + +=head2 raw_body_parameters + +Same as L, The hash keys and values are B decoded. + +=head2 content + +Same as L, but the result is decoded. + +This is the go-to method for getting the request body for string manipulation +character by character. It can be useful when you, for example, want to run a +regex on the body. Use this instead of L. + +=head2 raw_body + +Same as L. The result is B decoded. + +This is the go-to method for getting the request body for string manipulation +byte by byte. An example would be deserializing the body with a custom +serializer. Use this instead of L. =head2 json_content Returns the json-decoded body of the request or undef if the request is not json, there is no json decoder or an error occured. +=head2 path + +Same as L, but the result is decoded. + +=head2 raw_path + +Same as L. The result is B decoded. + =head2 address, remote_host, user These are shortcuts to the REMOTE_ADDR, REMOTE_HOST and REMOTE_USER environment @@ -241,7 +482,7 @@ Set values in the session using key-value pairs: ); } -Set values using a Hashref: +Replace all values with a hash: sub set_session_hashref { my $self = shift; @@ -267,5 +508,13 @@ Returns true if the request was called with C. Returns true if the request's content type was C. +=head2 charset + +Returns the charset from the C HTTP header or C if there is none. + +=head2 charset_decode + +Same as L, but will prefer using L to L. + =cut diff --git a/t/params.t b/t/params.t new file mode 100644 index 0000000..56855ee --- /dev/null +++ b/t/params.t @@ -0,0 +1,57 @@ +use Kelp::Base -strict; + +use Kelp; +use Kelp::Test -utf8; +use Test::More; +use HTTP::Request::Common; +use Encode; +use URI::Escape; +use utf8; + +my $app = Kelp->new( mode => 'test' ); +my $t = Kelp::Test->new( app => $app ); + +$app->add_route( [ POST => '/dump_params/:field' ] => sub { + my ( $self, $field ) = @_; + my $req = $self->req; + + return { + param => $req->param( $field ), + query_param => $req->query_param( $field ), + body_param => $req->body_param( $field ), + json_param => $req->json_param( $field ), + }; +} ); + +my $target = '/dump_params/fld?fld=query'; + +subtest 'testing normal request' => sub { + $t->request( POST $target, + 'Content-Type' => 'application/x-www-form-urlencoded', + 'Content' => 'fld=body', + )->code_is(200); + + $t->json_cmp({ + param => 'body', + query_param => 'query', + body_param => 'body', + json_param => undef, + }); +}; + +subtest 'testing json request' => sub { + $t->request( POST $target, + 'Content-Type' => 'application/json', + 'Content' => '{"fld": "json"}', + )->code_is(200); + + $t->json_cmp({ + param => 'json', + query_param => 'query', + body_param => undef, + json_param => 'json', + }); +}; + +done_testing; + diff --git a/t/run.t b/t/run.t index 6cc5d9d..f907e50 100644 --- a/t/run.t +++ b/t/run.t @@ -1,9 +1,11 @@ use Kelp::Base -strict; use Kelp; -use Kelp::Test; +use Kelp::Test -utf8; use HTTP::Request::Common; use Test::More; +use URI::Escape; +use utf8; my $app = Kelp->new( mode => 'test' ); $app->routes->base("main"); @@ -23,7 +25,8 @@ $app->add_route("/named/:a", sub { return "Got: " . $self->req->named->{a}; }); for my $a (qw{boo дума 123}) { - $t->request( GET "/named/$a" ) + my $encoded = uri_escape $app->charset_encode($a); + $t->request( GET "/named/$encoded" ) ->code_is(200) ->content_is("Got: $a"); } @@ -77,7 +80,8 @@ $app->add_route("/array/:a/:b", sub { return "Got: $a and $b"; }); for my $a (qw{boo дума 123}) { - $t->request( GET "/array/one/$a" ) + my $encoded = uri_escape $app->charset_encode($a); + $t->request( GET "/array/one/$encoded" ) ->code_is(200) ->content_is("Got: one and $a"); } @@ -88,7 +92,8 @@ $app->add_route("/param", sub { return "We have " . $self->param('word'); }); for my $word ('word', 'дума', 'كلمة', 'բառ', 'sözcük') { - $t->request( GET '/param?word=' . $word ) + my $encoded = uri_escape $app->charset_encode($word); + $t->request( GET "/param?word=$encoded" ) ->code_is(200) ->content_like(qr{$word}); } diff --git a/t/unicode.t b/t/unicode.t new file mode 100644 index 0000000..493079b --- /dev/null +++ b/t/unicode.t @@ -0,0 +1,83 @@ +use Kelp::Base -strict; + +use Kelp; +use Kelp::Test -utf8; +use Test::More; +use HTTP::Request::Common; +use Encode; +use URI::Escape; +use utf8; + +my $app = Kelp->new( mode => 'test' ); +my $t = Kelp::Test->new( app => $app ); + +my $test_string = 'zażółć gęslą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ'; + +$app->add_route( [ POST => '/path_echo/:echo' ] => sub { return $_[1]; } ); +$app->add_route( [ POST => '/body_echo' ] => sub { return $_[0]->param('śś'); } ); +$app->add_route( [ POST => '/json_echo' ] => sub { return { 'śś' => $_[0]->param('śś') }; } ); + +subtest 'path encoding no charset ok' => sub { + my $string = uri_escape $app->charset_encode($test_string); + + _t("/path_echo/$string", 'application/x-www-form-urlencoded', '', 200, encode($app->charset, $test_string)); +}; + +subtest 'path encoding cp1250 ok' => sub { + my $string = uri_escape encode 'cp1250', $test_string; + + _t("/path_echo/$string", 'application/x-www-form-urlencoded; charset=cp1250', '', 200, encode($app->charset, $test_string)); +}; + +subtest 'plaintext encoding no charset ok' => sub { + my $string = join '=', map { uri_escape $app->charset_encode($_) } 'śś', $test_string; + + _t('/body_echo', 'application/x-www-form-urlencoded', $string, 200, encode($app->charset, $test_string)); +}; + +subtest 'plaintext encoding utf8 ok' => sub { + my $string = join '=', map { uri_escape encode 'utf-8', $_ } 'śś', $test_string; + + _t('/body_echo', 'application/x-www-form-urlencoded; charset=utf-8', $string, 200, encode($app->charset, $test_string)); +}; + +subtest 'plaintext encoding cp1250 ok' => sub { + my $string = join '=', map { uri_escape encode 'cp1250', $_ } 'śś', $test_string; + + _t('/body_echo', 'application/x-www-form-urlencoded; charset=cp1250', $string, 200, encode($app->charset, $test_string)); +}; + +subtest 'plaintext encoding unknown is utf8 ok' => sub { + my $string = join '=', map { uri_escape encode 'utf-8', $_ } 'śś', $test_string; + + _t('/body_echo', 'application/x-www-form-urlencoded; charset=xxnotanencoding', $string, 200, encode($app->charset, $test_string)); +}; + +subtest 'plaintext encoding unknown is not utf8 error ok' => sub { + my $string = join '=', map { uri_escape encode 'cp1252', $_ } 'śś', $test_string; + + _t('/body_echo', 'application/x-www-form-urlencoded; charset=xxnotanencoding', $string, 500); +}; + +subtest 'json encoding ok' => sub { + my $string = Encode::encode('UTF-8', '{"śś":"' . $test_string . '"}'); + + _t('/json_echo', 'application/json', $string, 200, $string); +}; + +sub _t { + my ( $target, $ct, $content, $code, $expected, %headers) = @_; + + $t->request( POST $target, + 'Content-Type' => $ct, + %headers, + 'Content' => $content, + )->code_is($code); + + if ($expected) { + is $t->res->content, $expected, "expected string to $target ($ct) ok" + } +} + +done_testing; +