diff --git a/build.gradle b/build.gradle index 6928ab50d..43fd477ab 100644 --- a/build.gradle +++ b/build.gradle @@ -28,6 +28,7 @@ subprojects { versions = [ 'assertj_core': '3.11.1', 'commons_compress': '1.21', + 'guava': '29.0-jre', 'jackson_databind': '2.15.1', 'junit': '4.12', 'mockito': '2.5.7', diff --git a/metamorph/build.gradle b/metamorph/build.gradle index db8c80532..3a3e329bc 100644 --- a/metamorph/build.gradle +++ b/metamorph/build.gradle @@ -25,6 +25,7 @@ dependencies { implementation project(':metafacture-io') implementation project(':metafacture-mangling') implementation project(':metafacture-javaintegration') + implementation "com.google.guava:guava:${versions.guava}" implementation "org.slf4j:slf4j-api:${versions.slf4j}" testRuntimeOnly "org.slf4j:slf4j-simple:${versions.slf4j}" testImplementation "junit:junit:${versions.junit}" diff --git a/metamorph/src/main/java/org/metafacture/metamorph/functions/URLEncode.java b/metamorph/src/main/java/org/metafacture/metamorph/functions/URLEncode.java index 90debe481..2117faef5 100644 --- a/metamorph/src/main/java/org/metafacture/metamorph/functions/URLEncode.java +++ b/metamorph/src/main/java/org/metafacture/metamorph/functions/URLEncode.java @@ -1,5 +1,5 @@ /* - * Copyright 2013, 2014 Deutsche Nationalbibliothek + * Copyright 2013, 2023 Deutsche Nationalbibliothek et al * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. @@ -16,19 +16,24 @@ package org.metafacture.metamorph.functions; -import org.metafacture.metamorph.api.MorphExecutionException; import org.metafacture.metamorph.api.helpers.AbstractSimpleStatelessFunction; -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; +import com.google.common.net.PercentEscaper; /** * URL encodes the received value. + * Default is to convert a whitespace " "to a plus sign "+". This can be set so that a whitespace " " is escaped to + * "%20". + * Safe characters for this escaper are the ranges 0..9, a..z and A..Z. These are always safe and should not be + * specified. * * @author Markus Michael Geipel - * + * @author Pascal Christoph (dr0i) */ public final class URLEncode extends AbstractSimpleStatelessFunction { + private String safeChars = ""; + private Boolean plusForSpace = true; + private PercentEscaper percentEscaper = new PercentEscaper(safeChars, plusForSpace); /** * Creates an instance of {@link URLEncode}. @@ -38,12 +43,29 @@ public URLEncode() { @Override public String process(final String value) { - try { - return URLEncoder.encode(value, "UTF-8"); - } - catch (final UnsupportedEncodingException e) { - throw new MorphExecutionException("urlencode: unsupported encoding UTF-8", e); - } + return percentEscaper.escape(value); + } + + /** + * Sets a URI escaper with the specified safe characters. The ranges 0..9, a..z and A..Z are always safe + * and should not be specified. + * + * @param safeChars the chars which will not be escaped + */ + public void setSafeChars(final String safeChars) { + this.safeChars = safeChars; + percentEscaper = new PercentEscaper(safeChars, plusForSpace); } + /** + * Sets if a space should be converted into a plus sign "+" or percent escaped as "%20". + *
+ * Default is "true", i.e. to escape the space character as "+".
+ *
+ * @param plusForSpace true if space character " " should be converted into a plus sign "+"
+ */
+ public void setPlusForSpace(final Boolean plusForSpace) {
+ this.plusForSpace = plusForSpace;
+ percentEscaper = new PercentEscaper(safeChars, plusForSpace);
+ }
}
diff --git a/metamorph/src/main/resources/schemata/metamorph.xsd b/metamorph/src/main/resources/schemata/metamorph.xsd
index 19195c797..e6b00b5c4 100644
--- a/metamorph/src/main/resources/schemata/metamorph.xsd
+++ b/metamorph/src/main/resources/schemata/metamorph.xsd
@@ -977,6 +977,21 @@