From cecd164c359823d934d2e2bedc9e29f13a6a5104 Mon Sep 17 00:00:00 2001 From: Zabuzard Date: Wed, 30 Nov 2022 18:21:43 +0100 Subject: [PATCH 1/4] Ensure that link preview failure does not go silent and stuck --- .../tjbot/commands/utils/LinkPreviews.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java b/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java index 502d3db61b..e1d9db4c74 100644 --- a/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java +++ b/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java @@ -17,10 +17,12 @@ import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; +import java.time.Duration; import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; import java.util.function.Predicate; import java.util.stream.IntStream; @@ -34,7 +36,8 @@ public final class LinkPreviews { private static final String IMAGE_CONTENT_TYPE_PREFIX = "image"; private static final String IMAGE_META_NAME = "image"; - private static final HttpClient CLIENT = HttpClient.newHttpClient(); + private static final HttpClient CLIENT = + HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build(); private LinkPreviews() { throw new UnsupportedOperationException("Utility class"); @@ -75,7 +78,10 @@ public static CompletableFuture> createLinkPreviews(List extractResults(tasks)); + return allDoneTask.thenApply(any -> extractResults(tasks)).exceptionally(e -> { + logger.error("Unknown error during link preview creation", e); + return List.of(); + }); } private static List extractResults( @@ -103,6 +109,9 @@ private static CompletableFuture> createLinkPreview(String return parseWebsite(link, attachmentName, content.dataStream); } return noResult(); + }).orTimeout(10, TimeUnit.SECONDS).exceptionally(e -> { + logger.warn("Failed to create link preview for {}", link, e); + return Optional.empty(); }); } From db4a17c515d0fe2404c24b67c836f76ee18384e1 Mon Sep 17 00:00:00 2001 From: Zabuzard Date: Wed, 30 Nov 2022 18:53:59 +0100 Subject: [PATCH 2/4] Fixed some issues with false positive URL matches --- .../tjbot/commands/utils/LinkPreviews.java | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java b/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java index e1d9db4c74..ae1b143aa4 100644 --- a/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java +++ b/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java @@ -52,10 +52,22 @@ private LinkPreviews() { public static List extractLinks(String content) { return new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect() .stream() + .filter(LinkPreviews::considerUrl) .map(Url::getFullUrl) .toList(); } + private static boolean considerUrl(Url url) { + String raw = url.getOriginalUrl(); + if (raw.contains(">")) { + // URL escapes, such as "" should be skipped + return false; + } + // Not interested in other schemes, also to filter out matches without scheme. + // It detects a lot of such false-positives in Java snippets + return raw.startsWith("http"); + } + /** * Attempts to create previews of all given links. *

@@ -151,7 +163,8 @@ private static CompletableFuture> parseWebsite(String link try { doc = Jsoup.parse(websiteContent, null, link); } catch (IOException e) { - logger.warn("Attempted to create a preview for {}, but the content invalid.", link, e); + logger.warn("Attempted to create a preview for {}, but the content is invalid.", link, + e); return noResult(); } From 1c2a3f037c2054caa78e101d19842517ac122bed Mon Sep 17 00:00:00 2001 From: Zabuzard Date: Thu, 1 Dec 2022 09:21:50 +0100 Subject: [PATCH 3/4] fixed more issues with link preview * removed trailing punctuation * some website use meta name instead of meta property --- .../tjbot/commands/utils/LinkPreviews.java | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java b/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java index ae1b143aa4..b2e4fb40f5 100644 --- a/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java +++ b/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java @@ -52,20 +52,31 @@ private LinkPreviews() { public static List extractLinks(String content) { return new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect() .stream() - .filter(LinkPreviews::considerUrl) - .map(Url::getFullUrl) + .map(LinkPreviews::toLink) + .flatMap(Optional::stream) .toList(); } - private static boolean considerUrl(Url url) { + private static Optional toLink(Url url) { String raw = url.getOriginalUrl(); if (raw.contains(">")) { // URL escapes, such as "" should be skipped - return false; + return Optional.empty(); } // Not interested in other schemes, also to filter out matches without scheme. // It detects a lot of such false-positives in Java snippets - return raw.startsWith("http"); + if (!raw.startsWith("http")) { + return Optional.empty(); + } + + String link = url.getFullUrl(); + + if (link.endsWith(",") || link.endsWith(".")) { + // Remove trailing punctuation + link = link.substring(0, link.length() - 1); + } + + return Optional.of(link); } /** @@ -197,8 +208,12 @@ private static Optional parseOpenGraphTwitterMeta(Document doc, String m @Nullable String fallback) { String value = Optional .ofNullable(doc.selectFirst("meta[property=og:%s]".formatted(metaProperty))) + .or(() -> Optional + .ofNullable(doc.selectFirst("meta[name=og:%s]".formatted(metaProperty)))) .or(() -> Optional .ofNullable(doc.selectFirst("meta[property=twitter:%s]".formatted(metaProperty)))) + .or(() -> Optional + .ofNullable(doc.selectFirst("meta[name=twitter:%s]".formatted(metaProperty)))) .map(element -> element.attr("content")) .orElse(fallback); if (value == null) { @@ -209,6 +224,8 @@ private static Optional parseOpenGraphTwitterMeta(Document doc, String m private static Optional parseOpenGraphMeta(Document doc, String metaProperty) { return Optional.ofNullable(doc.selectFirst("meta[property=og:%s]".formatted(metaProperty))) + .or(() -> Optional + .ofNullable(doc.selectFirst("meta[name=og:%s]".formatted(metaProperty)))) .map(element -> element.attr("content")) .filter(Predicate.not(String::isBlank)); } From 346312d9028a186d3fa6f7b6b8a6787fae954e8c Mon Sep 17 00:00:00 2001 From: Zabuzard Date: Thu, 1 Dec 2022 09:42:21 +0100 Subject: [PATCH 4/4] Code polish --- .../tjbot/commands/utils/LinkPreviews.java | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java b/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java index b2e4fb40f5..57261f82bb 100644 --- a/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java +++ b/application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java @@ -185,7 +185,7 @@ private static CompletableFuture> parseWebsite(String link LinkPreview textPreview = LinkPreview.ofText(title, link, description); - String image = parseOpenGraphMeta(doc, IMAGE_META_NAME).orElse(null); + String image = parseOpenGraphTwitterMeta(doc, IMAGE_META_NAME, null).orElse(null); if (image == null) { return result(textPreview); } @@ -206,30 +206,27 @@ private static CompletableFuture> parseWebsite(String link private static Optional parseOpenGraphTwitterMeta(Document doc, String metaProperty, @Nullable String fallback) { - String value = Optional - .ofNullable(doc.selectFirst("meta[property=og:%s]".formatted(metaProperty))) - .or(() -> Optional - .ofNullable(doc.selectFirst("meta[name=og:%s]".formatted(metaProperty)))) - .or(() -> Optional - .ofNullable(doc.selectFirst("meta[property=twitter:%s]".formatted(metaProperty)))) - .or(() -> Optional - .ofNullable(doc.selectFirst("meta[name=twitter:%s]".formatted(metaProperty)))) - .map(element -> element.attr("content")) + String value = parseMetaProperty(doc, "og:" + metaProperty) + .or(() -> parseMetaProperty(doc, "twitter:" + metaProperty)) .orElse(fallback); + if (value == null) { return Optional.empty(); } return value.isBlank() ? Optional.empty() : Optional.of(value); } - private static Optional parseOpenGraphMeta(Document doc, String metaProperty) { - return Optional.ofNullable(doc.selectFirst("meta[property=og:%s]".formatted(metaProperty))) - .or(() -> Optional - .ofNullable(doc.selectFirst("meta[name=og:%s]".formatted(metaProperty)))) - .map(element -> element.attr("content")) + private static Optional parseMetaProperty(Document doc, String metaProperty) { + return selectFirstMetaTag(doc, "property", metaProperty) + .or(() -> selectFirstMetaTag(doc, "name", metaProperty)) .filter(Predicate.not(String::isBlank)); } + private static Optional selectFirstMetaTag(Document doc, String key, String value) { + return Optional.ofNullable(doc.selectFirst("meta[%s=%s]".formatted(key, value))) + .map(element -> element.attr("content")); + } + private static CompletableFuture> noResult() { return CompletableFuture.completedFuture(Optional.empty()); }