Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions application/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ dependencies {

implementation 'com.github.ben-manes.caffeine:caffeine:3.2.0'

implementation 'org.kohsuke:github-api:1.327'
implementation 'org.kohsuke:github-api:1.329'

implementation 'org.apache.commons:commons-text:1.14.0'
implementation 'com.apptasticsoftware:rssreader:3.9.3'
implementation 'com.apptasticsoftware:rssreader:3.10.0'

testImplementation 'org.mockito:mockito-core:5.18.0'
testImplementation "org.junit.jupiter:junit-jupiter-api:$junitVersion"
Expand Down
4 changes: 3 additions & 1 deletion application/config.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@
"gradle.org",
"help.gradle.org",
"youtube.com",
"www.youtube.com"
"www.youtube.com",
"cdn.discordapp.com",
"media.discordapp.net"
],
"hostBlacklist": [
"bit.ly",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package org.togetherjava.tjbot.features.moderation.scam;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Objects;
import java.util.StringJoiner;

final class AnalyseResults {
private boolean pingsEveryone;
private boolean containsSuspiciousKeyword;
private boolean containsDollarSign;
private boolean onlyContainsUrls = true;
private final Collection<AnalyseUrlResult> urls = new ArrayList<>();

void addUrlResult(AnalyseUrlResult result) {
urls.add(result);
}

boolean hasUrl() {
return !urls.isEmpty();
}

boolean hasSuspiciousUrl() {
return urls.stream().anyMatch(url -> url.isSuspicious);
}

boolean areAllUrlsWithAttachments() {
return urls.stream().allMatch(url -> url.containedAttachment != null);
}

Collection<Attachment> getUrlAttachments() {
return urls.stream().map(url -> url.containedAttachment).filter(Objects::nonNull).toList();
}

boolean pingsEveryone() {
return pingsEveryone;
}

void markPingsEveryone() {
pingsEveryone = true;
}

boolean containsSuspiciousKeyword() {
return containsSuspiciousKeyword;
}

void markContainsSuspiciousKeyword() {
containsSuspiciousKeyword = true;
}

boolean containsDollarSign() {
return containsDollarSign;
}

void markContainsDollarSign() {
containsDollarSign = true;
}

boolean onlyContainsUrls() {
return onlyContainsUrls;
}

void markNonUrlTokenFound() {
onlyContainsUrls = false;
}

@Override
public String toString() {
return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
.add("pingsEveryone=" + pingsEveryone)
.add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
.add("containsDollarSign=" + containsDollarSign)
.add("onlyContainsUrls=" + onlyContainsUrls)
.add("urls=" + urls)
.toString();
}

static final class AnalyseUrlResult {
private boolean isSuspicious;
@Nullable
private Attachment containedAttachment;

@Override
public String toString() {
return new StringJoiner(", ", AnalyseUrlResult.class.getSimpleName() + "[", "]")
.add("isSuspicious=" + isSuspicious)
.add("containedAttachment=" + containedAttachment)
.toString();
}

boolean isSuspicious() {
return isSuspicious;
}

void markSuspicious() {
isSuspicious = true;
}

void setContainedAttachment(Attachment containedAttachment) {
this.containedAttachment = containedAttachment;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.togetherjava.tjbot.features.moderation.scam;

import net.dv8tion.jda.api.entities.Message;

import java.util.Optional;
import java.util.Set;

record Attachment(String fileName) {
private static final Set<String> IMAGE_EXTENSIONS =
Set.of("jpg", "jpeg", "png", "gif", "webp", "tiff", "svg", "apng");

boolean isImage() {
return getFileExtension().map(IMAGE_EXTENSIONS::contains).orElse(false);
}

private Optional<String> getFileExtension() {
int dot = fileName.lastIndexOf('.');
if (dot == -1) {
return Optional.empty();
}
String extension = fileName.substring(dot + 1);
return Optional.of(extension);
}

static Attachment fromDiscord(Message.Attachment attachment) {
return new Attachment(attachment.getFileName());
}

static Attachment fromUrlPath(String urlPath) {
int fileNameStart = urlPath.lastIndexOf('/');
String fileName = fileNameStart == -1 ? "" : urlPath.substring(fileNameStart + 1);
return new Attachment(fileName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,9 @@

import org.togetherjava.tjbot.config.Config;
import org.togetherjava.tjbot.config.ScamBlockerConfig;
import org.togetherjava.tjbot.features.utils.StringDistances;

import java.net.URI;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.StringJoiner;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Stream;
Expand All @@ -28,6 +24,7 @@ public final class ScamDetector {
private final ScamBlockerConfig config;
private final Predicate<String> isSuspiciousAttachmentName;
private final Predicate<String> hasTrustedRole;
private final TokenAnalyse tokenAnalyse;

/**
* Creates a new instance with the given configuration
Expand All @@ -42,6 +39,8 @@ public ScamDetector(Config config) {
.asMatchPredicate();
hasTrustedRole =
Pattern.compile(this.config.getTrustedUserRolePattern()).asMatchPredicate();

tokenAnalyse = new TokenAnalyse(this.config);
}

/**
Expand All @@ -59,10 +58,11 @@ public boolean isScam(Message message) {
}

String content = message.getContentDisplay();
List<Message.Attachment> attachments = message.getAttachments();
List<Attachment> attachments =
message.getAttachments().stream().map(Attachment::fromDiscord).toList();

if (content.isBlank()) {
return areAttachmentsSuspicious(attachments);
return areAttachmentsScam(attachments);
}

return isScam(content);
Expand All @@ -76,158 +76,36 @@ public boolean isScam(Message message) {
*/
public boolean isScam(CharSequence message) {
AnalyseResults results = new AnalyseResults();
TOKENIZER.splitAsStream(message).forEach(token -> analyzeToken(token, results));
TOKENIZER.splitAsStream(message).forEach(token -> tokenAnalyse.analyze(token, results));
return isScam(results);
}

private boolean isScam(AnalyseResults results) {
if (results.pingsEveryone && (results.containsSuspiciousKeyword || results.hasUrl
|| results.containsDollarSign)) {
if (results.pingsEveryone() && (results.containsSuspiciousKeyword() || results.hasUrl()
|| results.containsDollarSign())) {
return true;
}

return Stream
.of(results.containsSuspiciousKeyword, results.hasSuspiciousUrl,
results.containsDollarSign)
boolean hasTooManySuspiciousFlags = Stream
.of(results.containsSuspiciousKeyword(), results.hasSuspiciousUrl(),
results.containsDollarSign())
.filter(flag -> flag)
.count() >= 2;
}

private void analyzeToken(String token, AnalyseResults results) {
if (token.isBlank()) {
return;
}

if (!results.pingsEveryone
&& ("@everyone".equalsIgnoreCase(token) || "@here".equalsIgnoreCase(token))) {
results.pingsEveryone = true;
}

if (!results.containsSuspiciousKeyword && containsSuspiciousKeyword(token)) {
results.containsSuspiciousKeyword = true;
}

if (!results.containsDollarSign && (token.contains("$") || "usd".equalsIgnoreCase(token))) {
results.containsDollarSign = true;
}

if (token.startsWith("http")) {
analyzeUrl(token, results);
}
}

private void analyzeUrl(String url, AnalyseResults results) {
String host;
try {
host = URI.create(url).getHost();
} catch (IllegalArgumentException _) {
// Invalid urls are not scam
return;
}

if (host == null) {
return;
}

results.hasUrl = true;

if (config.getHostWhitelist().contains(host)) {
return;
}

if (config.getHostBlacklist().contains(host)) {
results.hasSuspiciousUrl = true;
return;
}

for (String keyword : config.getSuspiciousHostKeywords()) {
if (isHostSimilarToKeyword(host, keyword)) {
results.hasSuspiciousUrl = true;
break;
}
if (hasTooManySuspiciousFlags) {
return true;
}
}

private boolean containsSuspiciousKeyword(String token) {
String preparedToken = token.toLowerCase(Locale.US);

return config.getSuspiciousKeywords()
.stream()
.map(keyword -> keyword.toLowerCase(Locale.US))
.anyMatch(keyword -> {
// Exact match "^foo$"
if (startsWith(keyword, '^') && endsWith(keyword, '$')) {
return preparedToken.equals(keyword.substring(1, keyword.length() - 1));
}
// Simple regex-inspired syntax "^foo"
if (startsWith(keyword, '^')) {
return preparedToken.startsWith(keyword.substring(1));
}
// Simple regex-inspired syntax "foo$"
if (endsWith(keyword, '$')) {
return preparedToken.endsWith(keyword.substring(0, keyword.length() - 1));
}
return preparedToken.contains(keyword);
});
return results.onlyContainsUrls() && results.areAllUrlsWithAttachments()
&& areAttachmentsScam(results.getUrlAttachments());
}

private boolean areAttachmentsSuspicious(Collection<? extends Message.Attachment> attachments) {
private boolean areAttachmentsScam(Collection<Attachment> attachments) {
long suspiciousAttachments =
attachments.stream().filter(this::isAttachmentSuspicious).count();
return suspiciousAttachments >= config.getSuspiciousAttachmentsThreshold();
}

private boolean isAttachmentSuspicious(Message.Attachment attachment) {
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.getFileName());
}

private boolean isHostSimilarToKeyword(String host, String keyword) {
// NOTE This algorithm is far from optimal.
// It is good enough for our purpose though and not that complex.

// Rolling window of keyword-size over host.
// If any window has a small distance, it is similar
int windowStart = 0;
int windowEnd = keyword.length();
while (windowEnd <= host.length()) {
String window = host.substring(windowStart, windowEnd);
int distance = StringDistances.editDistance(keyword, window);

if (distance <= config.getIsHostSimilarToKeywordDistanceThreshold()) {
return true;
}

windowStart++;
windowEnd++;
}

return false;
}

private static boolean startsWith(CharSequence text, char prefixToTest) {
return !text.isEmpty() && text.charAt(0) == prefixToTest;
}

private static boolean endsWith(CharSequence text, char suffixToTest) {
return !text.isEmpty() && text.charAt(text.length() - 1) == suffixToTest;
}

private static class AnalyseResults {
private boolean pingsEveryone;
private boolean containsSuspiciousKeyword;
private boolean containsDollarSign;
private boolean hasUrl;
private boolean hasSuspiciousUrl;

@Override
public String toString() {
return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
.add("pingsEveryone=" + pingsEveryone)
.add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
.add("containsDollarSign=" + containsDollarSign)
.add("hasUrl=" + hasUrl)
.add("hasSuspiciousUrl=" + hasSuspiciousUrl)
.toString();
}
private boolean isAttachmentSuspicious(Attachment attachment) {
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.fileName());
}
}
Loading
Loading