From 421246df6e5de6d8720b38201fed86e400cbe70e Mon Sep 17 00:00:00 2001 From: Matthew Somerville Date: Sun, 11 Mar 2018 17:00:45 +0000 Subject: [PATCH 1/2] Allow decoding of application/json. This will detect if the file is UTF-8, UTF-16, or UTF-32, and try and return the content decoded. It will allow use of the charset/ default_charset options. Also allow text/json (if UTF-8). --- README.md | 6 +++--- lib/HTTP/Headers.pm | 12 ++++++++++++ lib/HTTP/Message.pm | 8 ++++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ea32f2fe..adda679a 100644 --- a/README.md +++ b/README.md @@ -104,9 +104,9 @@ The following methods are available: - $mess->decoded\_content( %options ) Returns the content with any `Content-Encoding` undone and for textual content - the raw content encoded to Perl's Unicode strings. If the `Content-Encoding` - or `charset` of the message is unknown this method will fail by returning - `undef`. + (text/*, XML, or JSON) the raw content encoded to Perl's Unicode strings. If + the `Content-Encoding` or `charset` of the message is unknown this method will + fail by returning `undef`. The following options can be specified. diff --git a/lib/HTTP/Headers.pm b/lib/HTTP/Headers.pm index 1224de4e..275273db 100644 --- a/lib/HTTP/Headers.pm +++ b/lib/HTTP/Headers.pm @@ -405,6 +405,13 @@ sub content_is_xml { return 0; } +sub content_is_json { + my $ct = shift->content_type; + # text/json is not standard but still used by various servers. + # No issue including it as well. + return $ct eq 'application/json' || $ct eq 'text/json' || $ct =~ /\+json$/; +} + sub referer { my $self = shift; if (@_ && $_[0] =~ /#/) { @@ -737,6 +744,11 @@ content is XHTML. This method can't be used to set Content-Type. Returns TRUE if the Content-Type header field indicate that the content is XML. This method can't be used to set Content-Type. +=item $h->content_is_json + +Returns TRUE if the Content-Type header field indicate that the +content is JSON. This method can't be used to set Content-Type. + =item $h->content_encoding The Content-Encoding header field is used as a modifier to the diff --git a/lib/HTTP/Message.pm b/lib/HTTP/Message.pm index abffb09e..e588cdb1 100644 --- a/lib/HTTP/Message.pm +++ b/lib/HTTP/Message.pm @@ -351,7 +351,7 @@ sub decoded_content } } - if ($self->content_is_text || (my $is_xml = $self->content_is_xml)) { + if ($self->content_is_text || (my $is_xml = $self->content_is_xml) || $self->content_is_json) { my $charset = lc( $opt{charset} || $self->content_type_charset || @@ -879,9 +879,9 @@ for details about how charset is determined. =item $mess->decoded_content( %options ) Returns the content with any C undone and for textual content -the raw content encoded to Perl's Unicode strings. If the C -or C of the message is unknown this method will fail by returning -C. +(text/*, XML, or JSON) the raw content encoded to Perl's Unicode strings. If +the C or C of the message is unknown this method +will fail by returning C. The following options can be specified. From 3a6f94815f1552fa46c20b2d6d6d120cc0640905 Mon Sep 17 00:00:00 2001 From: Matthew Somerville Date: Sun, 11 Mar 2018 17:14:11 +0000 Subject: [PATCH 2/2] Allow decoding of application/javascript. This media type has a charset parameter as per RFC4329, so can be treated in the same way that XML is. --- README.md | 6 +++--- lib/HTTP/Headers.pm | 12 ++++++++++++ lib/HTTP/Message.pm | 8 ++++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index adda679a..4a51f630 100644 --- a/README.md +++ b/README.md @@ -104,9 +104,9 @@ The following methods are available: - $mess->decoded\_content( %options ) Returns the content with any `Content-Encoding` undone and for textual content - (text/*, XML, or JSON) the raw content encoded to Perl's Unicode strings. If - the `Content-Encoding` or `charset` of the message is unknown this method will - fail by returning `undef`. + (text/*, XML, JSON, or JavaScript) the raw content encoded to Perl's Unicode + strings. If the `Content-Encoding` or `charset` of the message is unknown this + method will fail by returning `undef`. The following options can be specified. diff --git a/lib/HTTP/Headers.pm b/lib/HTTP/Headers.pm index 275273db..67a02f11 100644 --- a/lib/HTTP/Headers.pm +++ b/lib/HTTP/Headers.pm @@ -412,6 +412,13 @@ sub content_is_json { return $ct eq 'application/json' || $ct eq 'text/json' || $ct =~ /\+json$/; } +sub content_is_javascript { + my $ct = shift->content_type; + # text/javascript is obsolete in RFC4329 but still used. + # No issue including it as well. + return $ct eq 'application/javascript' || $ct eq 'text/javascript'; +} + sub referer { my $self = shift; if (@_ && $_[0] =~ /#/) { @@ -749,6 +756,11 @@ content is XML. This method can't be used to set Content-Type. Returns TRUE if the Content-Type header field indicate that the content is JSON. This method can't be used to set Content-Type. +=item $h->content_is_javascript + +Returns TRUE if the Content-Type header field indicate that the +content is JavaScript. This method can't be used to set Content-Type. + =item $h->content_encoding The Content-Encoding header field is used as a modifier to the diff --git a/lib/HTTP/Message.pm b/lib/HTTP/Message.pm index e588cdb1..af1c6e12 100644 --- a/lib/HTTP/Message.pm +++ b/lib/HTTP/Message.pm @@ -351,7 +351,7 @@ sub decoded_content } } - if ($self->content_is_text || (my $is_xml = $self->content_is_xml) || $self->content_is_json) { + if ($self->content_is_text || (my $is_xml = $self->content_is_xml) || $self->content_is_json || $self->content_is_javascript) { my $charset = lc( $opt{charset} || $self->content_type_charset || @@ -879,9 +879,9 @@ for details about how charset is determined. =item $mess->decoded_content( %options ) Returns the content with any C undone and for textual content -(text/*, XML, or JSON) the raw content encoded to Perl's Unicode strings. If -the C or C of the message is unknown this method -will fail by returning C. +(text/*, XML, JSON, or JavaScript) the raw content encoded to Perl's Unicode +strings. If the C or C of the message is unknown +this method will fail by returning C. The following options can be specified.