From 6dd2889a7a149cf5f420dfd8fa4ac93e14abcbb1 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 27 Nov 2023 16:49:40 +0100 Subject: [PATCH 1/8] Opens gzip compressed content (#511) - follows redirects - fixes misconception of "Content-Encoding" --- .../java/org/metafacture/io/HttpOpener.java | 147 ++++++++++++++---- .../org/metafacture/io/HttpOpenerTest.java | 61 ++++---- 2 files changed, 146 insertions(+), 62 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java index 307a65482..b27368ee8 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java @@ -1,5 +1,5 @@ /* - * Copyright 2013, 2022 Deutsche Nationalbibliothek et al + * Copyright 2013, 2023 Deutsche Nationalbibliothek et al * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. @@ -32,10 +32,12 @@ import java.io.SequenceInputStream; import java.net.HttpURLConnection; import java.net.URL; +import java.net.URLDecoder; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; /** * Opens an {@link HttpURLConnection} and passes a reader to the receiver. @@ -43,8 +45,9 @@ * @author Christoph Böhme * @author Jan Schnasse * @author Jens Wille + * @author Pascal Christoph (dr0i) */ -@Description("Opens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset` and `Content-Type`, as well as generic headers (separated by `\\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header = `*/*`, `Accept-Charset` header (`encoding`) = `UTF-8`, `errorPrefix` = `ERROR: `.") +@Description("Opens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset`, `Accept-Encoding` and `Content-Type`, as well as generic headers (separated by `\\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header = `*/*`, `Accept-Charset` header (`encoding`) = `UTF-8`, `errorPrefix` = `ERROR: `.") @In(String.class) @Out(Reader.class) @FluxCommand("open-http") @@ -53,22 +56,21 @@ public final class HttpOpener extends DefaultObjectPipe headers = new HashMap<>(); - private Method method; private String body; private String errorPrefix; @@ -118,7 +120,7 @@ public boolean getResponseHasBody() { */ public HttpOpener() { setAccept(ACCEPT_DEFAULT); - setEncoding(ENCODING_DEFAULT); + setAcceptCharset(CHARSET_DEFAULT); setErrorPrefix(DEFAULT_PREFIX); setMethod(DEFAULT_METHOD); setUrl(INPUT_DESIGNATOR); @@ -163,17 +165,50 @@ public void setContentType(final String contentType) { setHeader(CONTENT_TYPE_HEADER, contentType); } + /** + * Sets the HTTP {@value ACCEPT_CHARSET_HEADER} header value. This is the + * preferred charset for the HTTP response. + * The default charset is {@value CHARSET_DEFAULT}. + * + * @param charset name of the charset used for the accept-charset HTTP header + */ + public void setAcceptCharset(final String charset) { + setHeader(ACCEPT_CHARSET_HEADER, charset); + } + + /** + * @deprecated Use {@link #setAcceptCharset} instead. + * @param charset name of the charset used for the accept-charset HTTP header + */ + @Deprecated + public void setEncoding(final String charset) { + setAcceptCharset(charset); + } + + /** + * Sets the HTTP {@value ACCEPT_ENCODING_HEADER} header value. This is the + * preferred content encoding for the HTTP response. It accepts HTTP compression. + * Allowed values are i.a. "gzip" and "Brotli". + * The default for the content encoding is null, which means "no compression". + * + * @param contentEncoding name of content encoding used for the accept-encoding HTTP + * header + */ + public void setAcceptContentEncoding(final String contentEncoding) { + setHeader(ACCEPT_ENCODING_HEADER, contentEncoding); + } + /** * Sets the HTTP {@value ENCODING_HEADER} header value. This is the - * preferred encoding for the HTTP response. Additionally, the encoding - * is used for reading the HTTP response if it does not specify a content - * encoding. The default for the encoding is {@value ENCODING_DEFAULT}. + * content encoding for the HTTP GET. It enables HTTP compression. + * Allowed values are "gzip". + * The default for the content encoding is null, which means "no compression". * - * @param encoding name of the encoding used for the accept-charset HTTP + * @param contentEncoding name of content encoding used for the content-encoding HTTP * header */ - public void setEncoding(final String encoding) { - setHeader(ENCODING_HEADER, encoding); + public void setContentEncoding(final String contentEncoding) { + setHeader(ENCODING_HEADER, contentEncoding); } /** @@ -244,23 +279,15 @@ public void process(final String input) { try { final String requestUrl = getInput(input, url); final String requestBody = getInput(input, - body == null && method.getRequestHasBody() ? INPUT_DESIGNATOR : body); - - final HttpURLConnection connection = - (HttpURLConnection) new URL(requestUrl).openConnection(); - - connection.setRequestMethod(method.name()); - headers.forEach(connection::addRequestProperty); - + body == null && method.getRequestHasBody() ? INPUT_DESIGNATOR : body); + Reader reader = null; if (requestBody != null) { - connection.setDoOutput(true); - connection.getOutputStream().write(requestBody.getBytes()); + reader = doPostOrPut(requestBody, new URL(requestUrl)); } - - final InputStream inputStream = getInputStream(connection); - final String contentEncoding = getEncoding(connection.getContentEncoding()); - - getReceiver().process(new InputStreamReader(inputStream, contentEncoding)); + else { + reader = doGet(requestUrl); + } + getReceiver().process(reader); } catch (final IOException e) { throw new MetafactureException(e); @@ -270,6 +297,32 @@ public void process(final String input) { } } + private Reader doPostOrPut(final String requestBody, final URL urlToOpen) throws IOException { + final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); + connection.setDoOutput(true); + connection.setRequestMethod(method.name()); + headers.forEach(connection::setRequestProperty); + connection.getOutputStream().write(requestBody.getBytes()); + final InputStream inputStream = getInputStream(connection); + return new InputStreamReader(inputStream, headers.get(ACCEPT_CHARSET_HEADER)); + } + + private Reader doGet(final String requestUrl) throws IOException { + final Reader reader; + final HttpURLConnection connection; + connection = followRedirects(new URL(requestUrl)); + final InputStream inputStream = getInputStream(connection); + + if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) { + final GZIPInputStream gzipInputStream = new GZIPInputStream(inputStream); + reader = new InputStreamReader(gzipInputStream); + } + else { + reader = new InputStreamReader(inputStream, headers.get(ACCEPT_CHARSET_HEADER)); + } + return reader; + } + private String getInput(final String input, final String value) { final String result; @@ -312,8 +365,36 @@ private InputStream getErrorStream(final InputStream errorStream) { } } - private String getEncoding(final String contentEncoding) { - return contentEncoding != null ? contentEncoding : headers.get(ENCODING_HEADER); + private HttpURLConnection followRedirects(final URL startingUrl) throws IOException { + int times = 0; + HttpURLConnection conn; + URL urlToFollow = startingUrl; + while (true) { + times = times + 1; + + if (times > ALLOWED_REDIRECTIONS) { + throw new IOException("Stuck in redirect loop"); + } + + conn = (HttpURLConnection) urlToFollow.openConnection(); + headers.forEach(conn::setRequestProperty); + conn.setRequestMethod(method.name()); + conn.setConnectTimeout(CONNECTION_TIMEOUT); + conn.setInstanceFollowRedirects(false); // Make the logic below easier to detect redirections + + switch (conn.getResponseCode()) { + case HttpURLConnection.HTTP_MOVED_PERM: + case HttpURLConnection.HTTP_MOVED_TEMP: + String location = conn.getHeaderField("Location"); + location = URLDecoder.decode(location, "UTF-8"); + urlToFollow = new URL(urlToFollow, location); // Deal with relative URLs + continue; + default: + break; + } + break; + } + return conn; } } diff --git a/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java index 3ef2974a0..a477d7084 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java @@ -16,13 +16,12 @@ package org.metafacture.io; -import org.metafacture.commons.ResourceUtil; -import org.metafacture.framework.ObjectReceiver; - import com.github.tomakehurst.wiremock.client.MappingBuilder; import com.github.tomakehurst.wiremock.client.ResponseDefinitionBuilder; import com.github.tomakehurst.wiremock.client.WireMock; import com.github.tomakehurst.wiremock.core.WireMockConfiguration; +import com.github.tomakehurst.wiremock.http.HttpHeader; +import com.github.tomakehurst.wiremock.http.HttpHeaders; import com.github.tomakehurst.wiremock.http.RequestMethod; import com.github.tomakehurst.wiremock.junit.WireMockRule; import com.github.tomakehurst.wiremock.matching.RequestPatternBuilder; @@ -32,6 +31,8 @@ import org.junit.ComparisonFailure; import org.junit.Rule; import org.junit.Test; +import org.metafacture.commons.ResourceUtil; +import org.metafacture.framework.ObjectReceiver; import org.mockito.ArgumentCaptor; import org.mockito.Captor; import org.mockito.Mock; @@ -39,13 +40,13 @@ import org.mockito.junit.MockitoJUnit; import org.mockito.junit.MockitoRule; -import static org.mockito.Mockito.times; - -import java.io.IOException; -import java.io.Reader; +import java.io.*; import java.util.Arrays; import java.util.function.BiConsumer; import java.util.function.Consumer; +import java.util.zip.GZIPOutputStream; + +import static org.mockito.Mockito.times; /** * Tests for class {@link HttpOpener}. @@ -62,6 +63,18 @@ public final class HttpOpenerTest { private static final String REQUEST_BODY = "request body"; private static final String RESPONSE_BODY = "response bödy"; // UTF-8 + private static byte[] GZIPPED_RESPONSE_BODY; + static { + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + GZIPOutputStream gzip = new GZIPOutputStream(out); + gzip.write(RESPONSE_BODY.getBytes("UTF-8")); + gzip.close(); + GZIPPED_RESPONSE_BODY = out.toByteArray(); + }catch (Exception e){ + e.printStackTrace(); + } + } @Rule public MockitoRule mockitoRule = MockitoJUnit.rule(); @@ -226,16 +239,16 @@ public void shouldPerformPostRequestWithContentTypeParameter() throws IOExceptio } @Test - public void shouldPerformPostRequestWithEncodingParameter() throws IOException { - final String encoding = "ISO-8859-1"; + public void shouldPerformPostRequestWithCharsetParameter() throws IOException { + final String charset = "ISO-8859-1"; final String header = "Accept-Charset"; - final StringValuePattern value = WireMock.equalTo(encoding); + final StringValuePattern value = WireMock.equalTo(charset); try { shouldPerformRequest(REQUEST_BODY, HttpOpener.Method.POST, (o, u) -> { o.setMethod(HttpOpener.Method.POST); o.setUrl(u); - o.setEncoding(encoding); + o.setAcceptCharset(charset); }, s -> s.withHeader(header, value), q -> q.withHeader(header, value), null); } catch (final ComparisonFailure e) { @@ -243,23 +256,6 @@ public void shouldPerformPostRequestWithEncodingParameter() throws IOException { } } - @Test - public void shouldPerformPostRequestWithEncodingParameterAndContentEncodingResponseHeader() throws IOException { - final String encoding = "ISO-8859-1"; - final String header = "Accept-Charset"; - final StringValuePattern value = WireMock.equalTo(encoding); - - shouldPerformRequest(REQUEST_BODY, HttpOpener.Method.POST, (o, u) -> { - o.setMethod(HttpOpener.Method.POST); - o.setUrl(u); - o.setEncoding(encoding); - }, - s -> s.withHeader(header, value), - q -> q.withHeader(header, value), - r -> r.withHeader("Content-Encoding", "UTF-8") - ); - } - @Test public void shouldPerformGetRequestWithErrorResponse() throws IOException { shouldPerformRequest(TEST_URL, HttpOpener.Method.GET, (o, u) -> {}, @@ -278,6 +274,14 @@ public void shouldPerformGetRequestWithErrorResponseAndWithoutErrorPrefixParamet null, null, WireMock.badRequest().withBody(RESPONSE_BODY), RESPONSE_BODY); } + @Test + public void shouldPerformGetRequestWithGzipedContentEncoding() throws IOException { + shouldPerformRequest(TEST_URL, HttpOpener.Method.GET, (o, u) -> o.setAcceptContentEncoding("gzip"), + null, null, + WireMock.ok().withBody(GZIPPED_RESPONSE_BODY).withHeaders(new HttpHeaders(new HttpHeader(HttpOpener.ENCODING_HEADER,"gzip"))), + RESPONSE_BODY); + } + private void shouldPerformRequest(final String input, final HttpOpener.Method method, final BiConsumer consumer, final String... headers) throws IOException { shouldPerformRequest(input, method, consumer, s -> Arrays.stream(headers).forEach(h -> s.withHeader(h, TEST_VALUE)), @@ -289,7 +293,6 @@ private void shouldPerformRequest(final String input, final HttpOpener.Method me if (responseConsumer != null) { responseConsumer.accept(response); } - shouldPerformRequest(input, method, consumer, stubConsumer, requestConsumer, response, method.getResponseHasBody() ? RESPONSE_BODY : ""); From c55cbe047937500fcad7aae8a3c0444241663e64 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 20 Dec 2023 15:08:37 +0100 Subject: [PATCH 2/8] Update HttpOpener Flux command description. (#513) --- metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java index b27368ee8..4d4a9e0e3 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java @@ -47,7 +47,7 @@ * @author Jens Wille * @author Pascal Christoph (dr0i) */ -@Description("Opens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset`, `Accept-Encoding` and `Content-Type`, as well as generic headers (separated by `\\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header = `*/*`, `Accept-Charset` header (`encoding`) = `UTF-8`, `errorPrefix` = `ERROR: `.") +@Description("Opens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset`, `Accept-Encoding`, `Content-Encoding` and `Content-Type`, as well as generic headers (separated by `\\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header (`accept`) = `*/*`, `Accept-Charset` header (`acceptcharset`) = `UTF-8`, `errorprefix` = `ERROR: `.") @In(String.class) @Out(Reader.class) @FluxCommand("open-http") From 8a18b2acd361f5ce1d9c1fae90d5e4752a5e3411 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 20 Dec 2023 15:12:01 +0100 Subject: [PATCH 3/8] Update HttpOpener `Content-Encoding` header constant name and JavaDoc. (#513) Also, group constants. --- .../java/org/metafacture/io/HttpOpener.java | 26 ++++++++++++------- .../org/metafacture/io/HttpOpenerTest.java | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java index 4d4a9e0e3..1f10e2405 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java @@ -53,24 +53,30 @@ @FluxCommand("open-http") public final class HttpOpener extends DefaultObjectPipe> { - public static final String ACCEPT_DEFAULT = "*/*"; public static final String ACCEPT_HEADER = "accept"; - public static final String CONTENT_TYPE_HEADER = "content-type"; + public static final String ACCEPT_CHARSET_HEADER = "accept-charset"; public static final String ACCEPT_ENCODING_HEADER = "accept-encoding"; - public static final String ENCODING_HEADER = "content-encoding"; - public static final String DEFAULT_PREFIX = "ERROR: "; + public static final String CONTENT_ENCODING_HEADER = "content-encoding"; + public static final String CONTENT_TYPE_HEADER = "content-type"; + + public static final String ACCEPT_DEFAULT = "*/*"; public static final String CHARSET_DEFAULT = "UTF-8"; - public static final String ACCEPT_CHARSET_HEADER = "accept-charset"; + public static final String DEFAULT_PREFIX = "ERROR: "; + public static final String HEADER_FIELD_SEPARATOR = "\n"; + public static final String HEADER_VALUE_SEPARATOR = ":"; public static final String INPUT_DESIGNATOR = "@-"; + public static final String DEFAULT_METHOD_NAME = "GET"; public static final Method DEFAULT_METHOD = Method.valueOf(DEFAULT_METHOD_NAME); - public static final String HEADER_FIELD_SEPARATOR = "\n"; - public static final String HEADER_VALUE_SEPARATOR = ":"; + private static final Pattern HEADER_FIELD_SEPARATOR_PATTERN = Pattern.compile(HEADER_FIELD_SEPARATOR); private static final Pattern HEADER_VALUE_SEPARATOR_PATTERN = Pattern.compile(HEADER_VALUE_SEPARATOR); + private static final int ALLOWED_REDIRECTIONS = 3; private static final int CONNECTION_TIMEOUT = 11000; + private final Map headers = new HashMap<>(); + private Method method; private String body; private String errorPrefix; @@ -199,8 +205,8 @@ public void setAcceptContentEncoding(final String contentEncoding) { } /** - * Sets the HTTP {@value ENCODING_HEADER} header value. This is the - * content encoding for the HTTP GET. It enables HTTP compression. + * Sets the HTTP {@value CONTENT_ENCODING_HEADER} header value. This is the + * content encoding for the HTTP request. It enables HTTP compression. * Allowed values are "gzip". * The default for the content encoding is null, which means "no compression". * @@ -208,7 +214,7 @@ public void setAcceptContentEncoding(final String contentEncoding) { * header */ public void setContentEncoding(final String contentEncoding) { - setHeader(ENCODING_HEADER, contentEncoding); + setHeader(CONTENT_ENCODING_HEADER, contentEncoding); } /** diff --git a/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java index a477d7084..7499511c6 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java @@ -278,7 +278,7 @@ public void shouldPerformGetRequestWithErrorResponseAndWithoutErrorPrefixParamet public void shouldPerformGetRequestWithGzipedContentEncoding() throws IOException { shouldPerformRequest(TEST_URL, HttpOpener.Method.GET, (o, u) -> o.setAcceptContentEncoding("gzip"), null, null, - WireMock.ok().withBody(GZIPPED_RESPONSE_BODY).withHeaders(new HttpHeaders(new HttpHeader(HttpOpener.ENCODING_HEADER,"gzip"))), + WireMock.ok().withBody(GZIPPED_RESPONSE_BODY).withHeaders(new HttpHeaders(new HttpHeader(HttpOpener.CONTENT_ENCODING_HEADER,"gzip"))), RESPONSE_BODY); } From 401babb22049e38a30ed6404ae2ab055e09947a1 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 20 Dec 2023 15:16:21 +0100 Subject: [PATCH 4/8] Update HttpOpener `Accept-Encoding` header setter. (#513) --- .../src/main/java/org/metafacture/io/HttpOpener.java | 6 +++--- .../src/test/java/org/metafacture/io/HttpOpenerTest.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java index 1f10e2405..030c35bb0 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java @@ -197,11 +197,11 @@ public void setEncoding(final String charset) { * Allowed values are i.a. "gzip" and "Brotli". * The default for the content encoding is null, which means "no compression". * - * @param contentEncoding name of content encoding used for the accept-encoding HTTP + * @param acceptEncoding name of content encoding used for the accept-encoding HTTP * header */ - public void setAcceptContentEncoding(final String contentEncoding) { - setHeader(ACCEPT_ENCODING_HEADER, contentEncoding); + public void setAcceptEncoding(final String acceptEncoding) { + setHeader(ACCEPT_ENCODING_HEADER, acceptEncoding); } /** diff --git a/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java index 7499511c6..8f1e95208 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java @@ -276,7 +276,7 @@ public void shouldPerformGetRequestWithErrorResponseAndWithoutErrorPrefixParamet @Test public void shouldPerformGetRequestWithGzipedContentEncoding() throws IOException { - shouldPerformRequest(TEST_URL, HttpOpener.Method.GET, (o, u) -> o.setAcceptContentEncoding("gzip"), + shouldPerformRequest(TEST_URL, HttpOpener.Method.GET, (o, u) -> o.setAcceptEncoding("gzip"), null, null, WireMock.ok().withBody(GZIPPED_RESPONSE_BODY).withHeaders(new HttpHeaders(new HttpHeader(HttpOpener.CONTENT_ENCODING_HEADER,"gzip"))), RESPONSE_BODY); From 2ef2d3aed503e84ee20f576230d8a517e7bc1161 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 20 Dec 2023 15:34:49 +0100 Subject: [PATCH 5/8] Extract HttpOpener response charset from `Content-Type` header. (#513) The `Accept-Charset` request header value is generally not suitable as a charset name. Should be applied to compressed content as well. --- .../java/org/metafacture/io/HttpOpener.java | 29 +++++++++++++++++-- .../org/metafacture/io/HttpOpenerTest.java | 19 ++++++++++-- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java index 030c35bb0..1a426e08d 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java @@ -65,12 +65,16 @@ public final class HttpOpener extends DefaultObjectPipe but was:"); + } + + private void shouldPerformPostRequestWithCharsetParameter(final String expectedMessage) throws IOException { final String charset = "ISO-8859-1"; final String header = "Accept-Charset"; final StringValuePattern value = WireMock.equalTo(charset); + String actualMessage; try { shouldPerformRequest(REQUEST_BODY, HttpOpener.Method.POST, (o, u) -> { o.setMethod(HttpOpener.Method.POST); o.setUrl(u); o.setAcceptCharset(charset); - }, s -> s.withHeader(header, value), q -> q.withHeader(header, value), null); + }, s -> s.withHeader(header, value), q -> q.withHeader(header, value), expectedMessage != null ? + r -> r.withHeader(HttpOpener.CONTENT_TYPE_HEADER, "text/plain; charset=" + charset) : null); + + actualMessage = null; } catch (final ComparisonFailure e) { - Assert.assertEquals("expected: but was:", e.getMessage()); + actualMessage = e.getMessage(); } + + Assert.assertEquals(expectedMessage, actualMessage); } @Test From 0aafe0596459658cb861797a0dc5f3459a8f32ae Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 20 Dec 2023 15:40:57 +0100 Subject: [PATCH 6/8] Clean up HttpOpener test. (#513) --- .../org/metafacture/io/HttpOpenerTest.java | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java index d6b2a460f..245ba4d11 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/HttpOpenerTest.java @@ -16,6 +16,9 @@ package org.metafacture.io; +import org.metafacture.commons.ResourceUtil; +import org.metafacture.framework.ObjectReceiver; + import com.github.tomakehurst.wiremock.client.MappingBuilder; import com.github.tomakehurst.wiremock.client.ResponseDefinitionBuilder; import com.github.tomakehurst.wiremock.client.WireMock; @@ -31,8 +34,6 @@ import org.junit.ComparisonFailure; import org.junit.Rule; import org.junit.Test; -import org.metafacture.commons.ResourceUtil; -import org.metafacture.framework.ObjectReceiver; import org.mockito.ArgumentCaptor; import org.mockito.Captor; import org.mockito.Mock; @@ -40,7 +41,9 @@ import org.mockito.junit.MockitoJUnit; import org.mockito.junit.MockitoRule; -import java.io.*; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.Reader; import java.util.Arrays; import java.util.function.BiConsumer; import java.util.function.Consumer; @@ -63,15 +66,17 @@ public final class HttpOpenerTest { private static final String REQUEST_BODY = "request body"; private static final String RESPONSE_BODY = "response bödy"; // UTF-8 + private static byte[] GZIPPED_RESPONSE_BODY; static { - try { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - GZIPOutputStream gzip = new GZIPOutputStream(out); + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + final GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(RESPONSE_BODY.getBytes("UTF-8")); gzip.close(); + GZIPPED_RESPONSE_BODY = out.toByteArray(); - }catch (Exception e){ + } + catch (final IOException e) { e.printStackTrace(); } } @@ -290,11 +295,9 @@ public void shouldPerformGetRequestWithErrorResponseAndWithoutErrorPrefixParamet } @Test - public void shouldPerformGetRequestWithGzipedContentEncoding() throws IOException { + public void shouldPerformGetRequestWithGzippedContentEncoding() throws IOException { shouldPerformRequest(TEST_URL, HttpOpener.Method.GET, (o, u) -> o.setAcceptEncoding("gzip"), - null, null, - WireMock.ok().withBody(GZIPPED_RESPONSE_BODY).withHeaders(new HttpHeaders(new HttpHeader(HttpOpener.CONTENT_ENCODING_HEADER,"gzip"))), - RESPONSE_BODY); + null, null, WireMock.ok().withBody(GZIPPED_RESPONSE_BODY).withHeader(HttpOpener.CONTENT_ENCODING_HEADER, "gzip"), RESPONSE_BODY); } private void shouldPerformRequest(final String input, final HttpOpener.Method method, final BiConsumer consumer, final String... headers) throws IOException { @@ -308,6 +311,7 @@ private void shouldPerformRequest(final String input, final HttpOpener.Method me if (responseConsumer != null) { responseConsumer.accept(response); } + shouldPerformRequest(input, method, consumer, stubConsumer, requestConsumer, response, method.getResponseHasBody() ? RESPONSE_BODY : ""); From 0bddba19ff199acb58bee89f706291179258389c Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 20 Dec 2023 15:53:13 +0100 Subject: [PATCH 7/8] Group HttpOpener body setter together with URL setter. (#513) --- .../java/org/metafacture/io/HttpOpener.java | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java index 1a426e08d..4e2fd52c5 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java @@ -148,23 +148,6 @@ public void setAccept(final String accept) { setHeader(ACCEPT_HEADER, accept); } - /** - * Sets the HTTP request body. The default value for the request body is - * {@value INPUT_DESIGNATOR} if the {@link #setMethod(Method) request - * method} accepts a request body, which means it will use the {@link - * #process(String) input data} data as request body if the input has - * not already been used; otherwise, no request body will be set by - * default. - * - *

If a request body has been set, but the request method does not - * accept a body, the method may be changed to {@code POST}. - * - * @param body the request body - */ - public void setBody(final String body) { - this.body = body; - } - /** * Sets the HTTP {@value CONTENT_TYPE_HEADER} header value. This is a * MIME type such as {@code text/plain} or {@code application/json}. @@ -284,6 +267,23 @@ public void setUrl(final String url) { this.url = url; } + /** + * Sets the HTTP request body. The default value for the request body is + * {@value INPUT_DESIGNATOR} if the {@link #setMethod(Method) request + * method} accepts a request body, which means it will use the {@link + * #process(String) input data} data as request body if the input has + * not already been used; otherwise, no request body will be set by + * default. + * + *

If a request body has been set, but the request method does not + * accept a body, the method may be changed to {@code POST}. + * + * @param body the request body + */ + public void setBody(final String body) { + this.body = body; + } + @Override public void process(final String input) { try { From 8621e7c0d4d53005be75f87c850a94931dc668de Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 20 Dec 2023 15:57:06 +0100 Subject: [PATCH 8/8] Refactor HttpOpener connection handling. (#513) Apply connect timeout to all requests. --- .../java/org/metafacture/io/HttpOpener.java | 117 ++++++++---------- 1 file changed, 51 insertions(+), 66 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java index 4e2fd52c5..adf5de307 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java @@ -290,14 +290,17 @@ public void process(final String input) { final String requestUrl = getInput(input, url); final String requestBody = getInput(input, body == null && method.getRequestHasBody() ? INPUT_DESIGNATOR : body); - Reader reader = null; - if (requestBody != null) { - reader = doPostOrPut(requestBody, new URL(requestUrl)); - } - else { - reader = doGet(requestUrl); - } - getReceiver().process(reader); + + final URL urlToOpen = new URL(requestUrl); + final HttpURLConnection connection = requestBody != null ? + doOutput(urlToOpen, requestBody) : doRedirects(urlToOpen); + + final InputStream inputStream = getInputStream(connection); + final String charset = getContentCharset(connection); + + getReceiver().process(new InputStreamReader( + "gzip".equalsIgnoreCase(connection.getContentEncoding()) ? + new GZIPInputStream(inputStream) : inputStream, charset)); } catch (final IOException e) { throw new MetafactureException(e); @@ -307,32 +310,6 @@ public void process(final String input) { } } - private Reader doPostOrPut(final String requestBody, final URL urlToOpen) throws IOException { - final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); - connection.setDoOutput(true); - connection.setRequestMethod(method.name()); - headers.forEach(connection::setRequestProperty); - connection.getOutputStream().write(requestBody.getBytes()); - final InputStream inputStream = getInputStream(connection); - return new InputStreamReader(inputStream, getContentCharset(connection)); - } - - private Reader doGet(final String requestUrl) throws IOException { - final Reader reader; - final HttpURLConnection connection; - connection = followRedirects(new URL(requestUrl)); - final InputStream inputStream = getInputStream(connection); - - if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) { - final GZIPInputStream gzipInputStream = new GZIPInputStream(inputStream); - reader = new InputStreamReader(gzipInputStream, getContentCharset(connection)); - } - else { - reader = new InputStreamReader(inputStream, getContentCharset(connection)); - } - return reader; - } - private String getInput(final String input, final String value) { final String result; @@ -350,6 +327,46 @@ else if (inputUsed) { return result; } + private HttpURLConnection doOutput(final URL urlToOpen, final String requestBody) throws IOException { + final HttpURLConnection connection = openConnection(urlToOpen); + + connection.setDoOutput(true); + connection.getOutputStream().write(requestBody.getBytes()); + + return connection; + } + + private HttpURLConnection doRedirects(final URL startingUrl) throws IOException { + URL urlToFollow = startingUrl; + + for (int i = 0; i < ALLOWED_REDIRECTIONS; ++i) { + final HttpURLConnection connection = openConnection(urlToFollow); + connection.setInstanceFollowRedirects(false); // Make the logic below easier to detect redirections + + switch (connection.getResponseCode()) { + case HttpURLConnection.HTTP_MOVED_PERM: + case HttpURLConnection.HTTP_MOVED_TEMP: + final String location = URLDecoder.decode(connection.getHeaderField("Location"), "UTF-8"); + urlToFollow = new URL(urlToFollow, location); // Deal with relative URLs + break; + default: + return connection; + } + } + + throw new IOException("Too many redirects"); + } + + private HttpURLConnection openConnection(final URL urlToOpen) throws IOException { + final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); + + connection.setRequestMethod(method.name()); + connection.setConnectTimeout(CONNECTION_TIMEOUT); + headers.forEach(connection::setRequestProperty); + + return connection; + } + private InputStream getInputStream(final HttpURLConnection connection) throws IOException { try { return connection.getInputStream(); @@ -394,36 +411,4 @@ private String getContentCharset(final HttpURLConnection connection) { return CHARSET_DEFAULT; } - private HttpURLConnection followRedirects(final URL startingUrl) throws IOException { - int times = 0; - HttpURLConnection conn; - URL urlToFollow = startingUrl; - while (true) { - times = times + 1; - - if (times > ALLOWED_REDIRECTIONS) { - throw new IOException("Stuck in redirect loop"); - } - - conn = (HttpURLConnection) urlToFollow.openConnection(); - headers.forEach(conn::setRequestProperty); - conn.setRequestMethod(method.name()); - conn.setConnectTimeout(CONNECTION_TIMEOUT); - conn.setInstanceFollowRedirects(false); // Make the logic below easier to detect redirections - - switch (conn.getResponseCode()) { - case HttpURLConnection.HTTP_MOVED_PERM: - case HttpURLConnection.HTTP_MOVED_TEMP: - String location = conn.getHeaderField("Location"); - location = URLDecoder.decode(location, "UTF-8"); - urlToFollow = new URL(urlToFollow, location); // Deal with relative URLs - continue; - default: - break; - } - break; - } - return conn; - } - }