diff --git a/build.gradle b/build.gradle index 6928ab50d..43fd477ab 100644 --- a/build.gradle +++ b/build.gradle @@ -28,6 +28,7 @@ subprojects { versions = [ 'assertj_core': '3.11.1', 'commons_compress': '1.21', + 'guava': '29.0-jre', 'jackson_databind': '2.15.1', 'junit': '4.12', 'mockito': '2.5.7', diff --git a/metamorph/build.gradle b/metamorph/build.gradle index db8c80532..3a3e329bc 100644 --- a/metamorph/build.gradle +++ b/metamorph/build.gradle @@ -25,6 +25,7 @@ dependencies { implementation project(':metafacture-io') implementation project(':metafacture-mangling') implementation project(':metafacture-javaintegration') + implementation "com.google.guava:guava:${versions.guava}" implementation "org.slf4j:slf4j-api:${versions.slf4j}" testRuntimeOnly "org.slf4j:slf4j-simple:${versions.slf4j}" testImplementation "junit:junit:${versions.junit}" diff --git a/metamorph/src/main/java/org/metafacture/metamorph/functions/URLEncode.java b/metamorph/src/main/java/org/metafacture/metamorph/functions/URLEncode.java index 90debe481..2117faef5 100644 --- a/metamorph/src/main/java/org/metafacture/metamorph/functions/URLEncode.java +++ b/metamorph/src/main/java/org/metafacture/metamorph/functions/URLEncode.java @@ -1,5 +1,5 @@ /* - * Copyright 2013, 2014 Deutsche Nationalbibliothek + * Copyright 2013, 2023 Deutsche Nationalbibliothek et al * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. @@ -16,19 +16,24 @@ package org.metafacture.metamorph.functions; -import org.metafacture.metamorph.api.MorphExecutionException; import org.metafacture.metamorph.api.helpers.AbstractSimpleStatelessFunction; -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; +import com.google.common.net.PercentEscaper; /** * URL encodes the received value. + * Default is to convert a whitespace " "to a plus sign "+". This can be set so that a whitespace " " is escaped to + * "%20". + * Safe characters for this escaper are the ranges 0..9, a..z and A..Z. These are always safe and should not be + * specified. * * @author Markus Michael Geipel - * + * @author Pascal Christoph (dr0i) */ public final class URLEncode extends AbstractSimpleStatelessFunction { + private String safeChars = ""; + private Boolean plusForSpace = true; + private PercentEscaper percentEscaper = new PercentEscaper(safeChars, plusForSpace); /** * Creates an instance of {@link URLEncode}. @@ -38,12 +43,29 @@ public URLEncode() { @Override public String process(final String value) { - try { - return URLEncoder.encode(value, "UTF-8"); - } - catch (final UnsupportedEncodingException e) { - throw new MorphExecutionException("urlencode: unsupported encoding UTF-8", e); - } + return percentEscaper.escape(value); + } + + /** + * Sets a URI escaper with the specified safe characters. The ranges 0..9, a..z and A..Z are always safe + * and should not be specified. + * + * @param safeChars the chars which will not be escaped + */ + public void setSafeChars(final String safeChars) { + this.safeChars = safeChars; + percentEscaper = new PercentEscaper(safeChars, plusForSpace); } + /** + * Sets if a space should be converted into a plus sign "+" or percent escaped as "%20". + *

+ * Default is "true", i.e. to escape the space character as "+". + * + * @param plusForSpace true if space character " " should be converted into a plus sign "+" + */ + public void setPlusForSpace(final Boolean plusForSpace) { + this.plusForSpace = plusForSpace; + percentEscaper = new PercentEscaper(safeChars, plusForSpace); + } } diff --git a/metamorph/src/main/resources/schemata/metamorph.xsd b/metamorph/src/main/resources/schemata/metamorph.xsd index 19195c797..e6b00b5c4 100644 --- a/metamorph/src/main/resources/schemata/metamorph.xsd +++ b/metamorph/src/main/resources/schemata/metamorph.xsd @@ -977,6 +977,21 @@ + + + Chars which will not be escaped. The ranges + 0..9, a..z and A..Z are always safe and should not be + specified. + + + + + + Sets if a space should be converted into a + plus sign "+" or percent escaped as "%20". + + + diff --git a/metamorph/src/test/java/org/metafacture/metamorph/functions/URLEncodeTest.java b/metamorph/src/test/java/org/metafacture/metamorph/functions/URLEncodeTest.java new file mode 100644 index 000000000..583bfedb8 --- /dev/null +++ b/metamorph/src/test/java/org/metafacture/metamorph/functions/URLEncodeTest.java @@ -0,0 +1,68 @@ +/* + * Copyright 2023 hbz + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.metafacture.metamorph.functions; + +import static org.junit.Assert.*; +import org.junit.Test; + +/** + * tests {@link ISBN} + * + * @author Pascal Christoph (dr0i) + */ + +public final class URLEncodeTest { + + private static final String CAFE_UTF8 = "café"; + private static final String CAFE_ENCODED = "caf%C3%A9"; + private static final String SOME_CHARS = "/&%\\+"; + private static final String SOME_CHARS_ENCODED = "%2F%26%25%5C%2B"; + private static final String WHITESPACE = " "; + private static final String WHITESPACE_AS_PLUS_ENCODED = "+"; + private static final String WHITESPACE_PERCENT_ENCODED = "%20"; + + @Test + public void testUtf8(){ + final URLEncode urlEncode = new URLEncode(); + assertEquals(CAFE_ENCODED, urlEncode.process(CAFE_UTF8)); + } + @Test + public void testSomeChars(){ + final URLEncode urlEncode = new URLEncode(); + assertEquals(SOME_CHARS_ENCODED, urlEncode.process(SOME_CHARS)); + } + @Test + public void testEscapeSpaceAsPlus(){ + final URLEncode urlEncode = new URLEncode(); + assertEquals(WHITESPACE_AS_PLUS_ENCODED, urlEncode.process(WHITESPACE)); + } + + @Test + public void testEscapeSpaceAsPercentEncoded(){ + final URLEncode urlEncode = new URLEncode(); + urlEncode.setPlusForSpace(false); + assertEquals(WHITESPACE_PERCENT_ENCODED, urlEncode.process(WHITESPACE)); + } + + @Test + public void testSafeChars(){ + final URLEncode urlEncode = new URLEncode(); + urlEncode.setSafeChars(SOME_CHARS); + assertEquals(SOME_CHARS, urlEncode.process(SOME_CHARS)); + } + +}