-
Notifications
You must be signed in to change notification settings - Fork 33
Opens gzip compressed content #513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6dd2889
c55cbe0
8a18b2a
401babb
2ef2d3a
0aafe05
0bddba1
8621e7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| /* | ||
| * Copyright 2013, 2022 Deutsche Nationalbibliothek et al | ||
| * Copyright 2013, 2023 Deutsche Nationalbibliothek et al | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 the "License"; | ||
| * you may not use this file except in compliance with the License. | ||
|
|
@@ -32,40 +32,52 @@ | |
| import java.io.SequenceInputStream; | ||
| import java.net.HttpURLConnection; | ||
| import java.net.URL; | ||
| import java.net.URLDecoder; | ||
| import java.util.Arrays; | ||
| import java.util.HashMap; | ||
| import java.util.Map; | ||
| import java.util.regex.Pattern; | ||
| import java.util.zip.GZIPInputStream; | ||
|
|
||
| /** | ||
| * Opens an {@link HttpURLConnection} and passes a reader to the receiver. | ||
| * | ||
| * @author Christoph Böhme | ||
| * @author Jan Schnasse | ||
| * @author Jens Wille | ||
| * @author Pascal Christoph (dr0i) | ||
| */ | ||
| @Description("Opens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset` and `Content-Type`, as well as generic headers (separated by `\\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header = `*/*`, `Accept-Charset` header (`encoding`) = `UTF-8`, `errorPrefix` = `ERROR: `.") | ||
| @Description("Opens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset`, `Accept-Encoding`, `Content-Encoding` and `Content-Type`, as well as generic headers (separated by `\\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header (`accept`) = `*/*`, `Accept-Charset` header (`acceptcharset`) = `UTF-8`, `errorprefix` = `ERROR: `.") | ||
| @In(String.class) | ||
| @Out(Reader.class) | ||
| @FluxCommand("open-http") | ||
| public final class HttpOpener extends DefaultObjectPipe<String, ObjectReceiver<Reader>> { | ||
|
|
||
| public static final String ACCEPT_DEFAULT = "*/*"; | ||
| public static final String ACCEPT_HEADER = "accept"; | ||
| public static final String ACCEPT_CHARSET_HEADER = "accept-charset"; | ||
| public static final String ACCEPT_ENCODING_HEADER = "accept-encoding"; | ||
| public static final String CONTENT_ENCODING_HEADER = "content-encoding"; | ||
| public static final String CONTENT_TYPE_HEADER = "content-type"; | ||
|
|
||
| public static final String ACCEPT_DEFAULT = "*/*"; | ||
| public static final String CHARSET_DEFAULT = "UTF-8"; | ||
| public static final String DEFAULT_PREFIX = "ERROR: "; | ||
| public static final String ENCODING_DEFAULT = "UTF-8"; | ||
| public static final String ENCODING_HEADER = "accept-charset"; | ||
| public static final String HEADER_FIELD_SEPARATOR = "\n"; | ||
| public static final String HEADER_VALUE_SEPARATOR = ":"; | ||
| public static final String INPUT_DESIGNATOR = "@-"; | ||
| public static final String MIME_PARAMETER_CHARSET = "charset"; | ||
| public static final String MIME_PARAMETER_SEPARATOR = ";"; | ||
| public static final String MIME_PARAMETER_VALUE_SEPARATOR = "="; | ||
|
|
||
| public static final String DEFAULT_METHOD_NAME = "GET"; | ||
| public static final Method DEFAULT_METHOD = Method.valueOf(DEFAULT_METHOD_NAME); | ||
|
|
||
| public static final String HEADER_FIELD_SEPARATOR = "\n"; | ||
| public static final String HEADER_VALUE_SEPARATOR = ":"; | ||
|
|
||
| private static final Pattern HEADER_FIELD_SEPARATOR_PATTERN = Pattern.compile(HEADER_FIELD_SEPARATOR); | ||
| private static final Pattern HEADER_VALUE_SEPARATOR_PATTERN = Pattern.compile(HEADER_VALUE_SEPARATOR); | ||
| private static final Pattern MIME_PARAMETER_SEPARATOR_PATTERN = Pattern.compile(MIME_PARAMETER_SEPARATOR); | ||
|
|
||
| private static final int ALLOWED_REDIRECTIONS = 3; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this value be configurable?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say: let's wait and implement if need arises. Would you be ok with this? |
||
| private static final int CONNECTION_TIMEOUT = 11000; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How did you arrive at this value? 11 seconds seems kind of arbitrary. Should this value be configurable?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's kind of arbitrary. Setting these values at least easily prevent possible infinite loops. |
||
|
|
||
| private final Map<String, String> headers = new HashMap<>(); | ||
|
|
||
|
|
@@ -118,7 +130,7 @@ public boolean getResponseHasBody() { | |
| */ | ||
| public HttpOpener() { | ||
| setAccept(ACCEPT_DEFAULT); | ||
| setEncoding(ENCODING_DEFAULT); | ||
| setAcceptCharset(CHARSET_DEFAULT); | ||
| setErrorPrefix(DEFAULT_PREFIX); | ||
| setMethod(DEFAULT_METHOD); | ||
| setUrl(INPUT_DESIGNATOR); | ||
|
|
@@ -137,43 +149,59 @@ public void setAccept(final String accept) { | |
| } | ||
|
|
||
| /** | ||
| * Sets the HTTP request body. The default value for the request body is | ||
| * {@value INPUT_DESIGNATOR} <i>if the {@link #setMethod(Method) request | ||
| * method} accepts a request body</i>, which means it will use the {@link | ||
| * #process(String) input data} data as request body <i>if the input has | ||
| * not already been used</i>; otherwise, no request body will be set by | ||
| * default. | ||
| * Sets the HTTP {@value CONTENT_TYPE_HEADER} header value. This is a | ||
| * MIME type such as {@code text/plain} or {@code application/json}. | ||
| * | ||
| * <p>If a request body has been set, but the request method does not | ||
| * accept a body, the method <i>may</i> be changed to {@code POST}. | ||
| * @param contentType MIME type to use for the HTTP content-type header | ||
| */ | ||
| public void setContentType(final String contentType) { | ||
| setHeader(CONTENT_TYPE_HEADER, contentType); | ||
| } | ||
|
|
||
| /** | ||
| * Sets the HTTP {@value ACCEPT_CHARSET_HEADER} header value. This is the | ||
| * preferred charset for the HTTP response. | ||
| * The default charset is {@value CHARSET_DEFAULT}. | ||
| * | ||
| * @param body the request body | ||
| * @param charset name of the charset used for the accept-charset HTTP header | ||
| */ | ||
| public void setBody(final String body) { | ||
| this.body = body; | ||
| public void setAcceptCharset(final String charset) { | ||
| setHeader(ACCEPT_CHARSET_HEADER, charset); | ||
| } | ||
|
|
||
| /** | ||
| * Sets the HTTP {@value CONTENT_TYPE_HEADER} header value. This is a | ||
| * MIME type such as {@code text/plain} or {@code application/json}. | ||
| * @deprecated Use {@link #setAcceptCharset} instead. | ||
| * @param charset name of the charset used for the accept-charset HTTP header | ||
| */ | ||
| @Deprecated | ||
| public void setEncoding(final String charset) { | ||
| setAcceptCharset(charset); | ||
| } | ||
|
|
||
| /** | ||
| * Sets the HTTP {@value ACCEPT_ENCODING_HEADER} header value. This is the | ||
| * preferred content encoding for the HTTP response. It accepts HTTP compression. | ||
| * Allowed values are i.a. "gzip" and "Brotli". | ||
| * The default for the content encoding is null, which means "no compression". | ||
| * | ||
| * @param contentType MIME type to use for the HTTP content-type header | ||
| * @param acceptEncoding name of content encoding used for the accept-encoding HTTP | ||
| * header | ||
| */ | ||
| public void setContentType(final String contentType) { | ||
| setHeader(CONTENT_TYPE_HEADER, contentType); | ||
| public void setAcceptEncoding(final String acceptEncoding) { | ||
| setHeader(ACCEPT_ENCODING_HEADER, acceptEncoding); | ||
| } | ||
|
|
||
| /** | ||
| * Sets the HTTP {@value ENCODING_HEADER} header value. This is the | ||
| * preferred encoding for the HTTP response. Additionally, the encoding | ||
| * is used for reading the HTTP response if it does not specify a content | ||
| * encoding. The default for the encoding is {@value ENCODING_DEFAULT}. | ||
| * Sets the HTTP {@value CONTENT_ENCODING_HEADER} header value. This is the | ||
| * content encoding for the HTTP request. It enables HTTP compression. | ||
| * Allowed values are "gzip". | ||
| * The default for the content encoding is null, which means "no compression". | ||
| * | ||
| * @param encoding name of the encoding used for the accept-charset HTTP | ||
| * @param contentEncoding name of content encoding used for the content-encoding HTTP | ||
| * header | ||
| */ | ||
| public void setEncoding(final String encoding) { | ||
| setHeader(ENCODING_HEADER, encoding); | ||
| public void setContentEncoding(final String contentEncoding) { | ||
| setHeader(CONTENT_ENCODING_HEADER, contentEncoding); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -239,28 +267,40 @@ public void setUrl(final String url) { | |
| this.url = url; | ||
| } | ||
|
|
||
| /** | ||
| * Sets the HTTP request body. The default value for the request body is | ||
| * {@value INPUT_DESIGNATOR} <i>if the {@link #setMethod(Method) request | ||
| * method} accepts a request body</i>, which means it will use the {@link | ||
| * #process(String) input data} data as request body <i>if the input has | ||
| * not already been used</i>; otherwise, no request body will be set by | ||
| * default. | ||
| * | ||
| * <p>If a request body has been set, but the request method does not | ||
| * accept a body, the method <i>may</i> be changed to {@code POST}. | ||
| * | ||
| * @param body the request body | ||
| */ | ||
| public void setBody(final String body) { | ||
| this.body = body; | ||
| } | ||
|
|
||
| @Override | ||
| public void process(final String input) { | ||
| try { | ||
| final String requestUrl = getInput(input, url); | ||
| final String requestBody = getInput(input, | ||
| body == null && method.getRequestHasBody() ? INPUT_DESIGNATOR : body); | ||
|
|
||
| final HttpURLConnection connection = | ||
| (HttpURLConnection) new URL(requestUrl).openConnection(); | ||
| body == null && method.getRequestHasBody() ? INPUT_DESIGNATOR : body); | ||
|
|
||
| connection.setRequestMethod(method.name()); | ||
| headers.forEach(connection::addRequestProperty); | ||
|
|
||
| if (requestBody != null) { | ||
| connection.setDoOutput(true); | ||
| connection.getOutputStream().write(requestBody.getBytes()); | ||
| } | ||
| final URL urlToOpen = new URL(requestUrl); | ||
| final HttpURLConnection connection = requestBody != null ? | ||
| doOutput(urlToOpen, requestBody) : doRedirects(urlToOpen); | ||
|
|
||
| final InputStream inputStream = getInputStream(connection); | ||
| final String contentEncoding = getEncoding(connection.getContentEncoding()); | ||
| final String charset = getContentCharset(connection); | ||
|
|
||
| getReceiver().process(new InputStreamReader(inputStream, contentEncoding)); | ||
| getReceiver().process(new InputStreamReader( | ||
| "gzip".equalsIgnoreCase(connection.getContentEncoding()) ? | ||
| new GZIPInputStream(inputStream) : inputStream, charset)); | ||
| } | ||
| catch (final IOException e) { | ||
| throw new MetafactureException(e); | ||
|
|
@@ -287,6 +327,46 @@ else if (inputUsed) { | |
| return result; | ||
| } | ||
|
|
||
| private HttpURLConnection doOutput(final URL urlToOpen, final String requestBody) throws IOException { | ||
| final HttpURLConnection connection = openConnection(urlToOpen); | ||
|
|
||
| connection.setDoOutput(true); | ||
| connection.getOutputStream().write(requestBody.getBytes()); | ||
|
|
||
| return connection; | ||
| } | ||
|
|
||
| private HttpURLConnection doRedirects(final URL startingUrl) throws IOException { | ||
| URL urlToFollow = startingUrl; | ||
|
|
||
| for (int i = 0; i < ALLOWED_REDIRECTIONS; ++i) { | ||
| final HttpURLConnection connection = openConnection(urlToFollow); | ||
| connection.setInstanceFollowRedirects(false); // Make the logic below easier to detect redirections | ||
|
|
||
| switch (connection.getResponseCode()) { | ||
| case HttpURLConnection.HTTP_MOVED_PERM: | ||
| case HttpURLConnection.HTTP_MOVED_TEMP: | ||
| final String location = URLDecoder.decode(connection.getHeaderField("Location"), "UTF-8"); | ||
| urlToFollow = new URL(urlToFollow, location); // Deal with relative URLs | ||
| break; | ||
| default: | ||
| return connection; | ||
| } | ||
| } | ||
|
|
||
| throw new IOException("Too many redirects"); | ||
| } | ||
|
|
||
| private HttpURLConnection openConnection(final URL urlToOpen) throws IOException { | ||
| final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); | ||
|
|
||
| connection.setRequestMethod(method.name()); | ||
| connection.setConnectTimeout(CONNECTION_TIMEOUT); | ||
| headers.forEach(connection::setRequestProperty); | ||
|
|
||
| return connection; | ||
| } | ||
|
|
||
| private InputStream getInputStream(final HttpURLConnection connection) throws IOException { | ||
| try { | ||
| return connection.getInputStream(); | ||
|
|
@@ -312,8 +392,23 @@ private InputStream getErrorStream(final InputStream errorStream) { | |
| } | ||
| } | ||
|
|
||
| private String getEncoding(final String contentEncoding) { | ||
| return contentEncoding != null ? contentEncoding : headers.get(ENCODING_HEADER); | ||
| private String getContentCharset(final HttpURLConnection connection) { | ||
| final String contentType = connection.getContentType(); | ||
|
|
||
| if (contentType != null) { | ||
| final String[] parts = MIME_PARAMETER_SEPARATOR_PATTERN.split(contentType); | ||
|
|
||
| for (int i = 1; i < parts.length; ++i) { | ||
| final String parameter = parts[i].trim(); | ||
| final int index = parameter.indexOf(MIME_PARAMETER_VALUE_SEPARATOR); | ||
|
|
||
| if (index != -1 && MIME_PARAMETER_CHARSET.equalsIgnoreCase(parameter.substring(0, index))) { | ||
| return parameter.substring(index + 1); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return CHARSET_DEFAULT; | ||
| } | ||
|
|
||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.