Skip to content

Commit c05f237

Browse files
committed
Added scam detector and first version of scam blocker
1 parent 47d6a82 commit c05f237

File tree

7 files changed

+319
-3
lines changed

7 files changed

+319
-3
lines changed

application/config.json.template

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,12 @@
2121
"channelPattern": "tj_suggestions",
2222
"upVoteEmoteName": "peepo_yes",
2323
"downVoteEmoteName": "peepo_no"
24+
},
25+
"scamBlocker": {
26+
"mode": "AUTO_DELETE_BUT_APPROVE_QUARANTINE",
27+
"hostWhitelist": ["discord.com", "discord.gg", "discord.media", "discordapp.com", "discordapp.net", "discordstatus.com"],
28+
"hostBlacklist": ["bit.ly"],
29+
"suspiciousHostKeywords": ["discord", "nitro", "premium"],
30+
"isHostSimilarToKeywordDistanceThreshold": 2
2431
}
2532
}

application/src/main/java/org/togetherjava/tjbot/commands/Features.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.togetherjava.tjbot.commands.free.FreeCommand;
1010
import org.togetherjava.tjbot.commands.mathcommands.TeXCommand;
1111
import org.togetherjava.tjbot.commands.moderation.*;
12+
import org.togetherjava.tjbot.commands.moderation.scam.ScamBlocker;
1213
import org.togetherjava.tjbot.commands.moderation.temp.TemporaryModerationRoutine;
1314
import org.togetherjava.tjbot.commands.system.BotCore;
1415
import org.togetherjava.tjbot.commands.tags.TagCommand;
@@ -65,6 +66,7 @@ public enum Features {
6566
// Message receivers
6667
features.add(new TopHelpersMessageListener(database, config));
6768
features.add(new SuggestionsUpDownVoter(config));
69+
features.add(new ScamBlocker(config));
6870

6971
// Event receivers
7072
features.add(new RejoinMuteListener(actionsStore, config));
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package org.togetherjava.tjbot.commands.moderation.scam;
2+
3+
import net.dv8tion.jda.api.events.message.guild.GuildMessageReceivedEvent;
4+
import org.jetbrains.annotations.NotNull;
5+
import org.slf4j.Logger;
6+
import org.slf4j.LoggerFactory;
7+
import org.togetherjava.tjbot.commands.MessageReceiverAdapter;
8+
import org.togetherjava.tjbot.config.Config;
9+
import org.togetherjava.tjbot.config.ScamBlockerConfig;
10+
11+
import java.util.regex.Pattern;
12+
13+
/**
14+
* Listener that receives all sent messages from channels, checks them for scam and takes
15+
* appropriate action.
16+
* <p>
17+
* If scam is detected, depending on the configuration, the blockers actions range from deleting the
18+
* message and banning the author to just logging the message for auditing.
19+
*/
20+
public final class ScamBlocker extends MessageReceiverAdapter {
21+
private static final Logger logger = LoggerFactory.getLogger(ScamBlocker.class);
22+
23+
private final ScamBlockerConfig.Mode mode;
24+
private final ScamDetector scamDetector;
25+
26+
/**
27+
* Creates a new listener to receive all message sent in any channel.
28+
*
29+
* @param config the config to use for this
30+
*/
31+
public ScamBlocker(@NotNull Config config) {
32+
super(Pattern.compile(".*"));
33+
34+
mode = config.getScamBlocker().getMode();
35+
scamDetector = new ScamDetector(config);
36+
}
37+
38+
@Override
39+
public void onMessageReceived(@NotNull GuildMessageReceivedEvent event) {
40+
if (event.getAuthor().isBot() || event.isWebhookMessage()) {
41+
return;
42+
}
43+
44+
if (mode == ScamBlockerConfig.Mode.OFF) {
45+
return;
46+
}
47+
48+
String content = event.getMessage().getContentDisplay();
49+
if (!scamDetector.isScam(content)) {
50+
return;
51+
}
52+
53+
takeAction(event);
54+
}
55+
56+
private void takeAction(@NotNull GuildMessageReceivedEvent event) {
57+
switch (mode) {
58+
case OFF -> throw new AssertionError(
59+
"The OFF-mode should be detected earlier already to prevent expensive computation");
60+
case ONLY_LOG -> takeActionLogOnly(event);
61+
case APPROVE_FIRST, AUTO_DELETE_BUT_APPROVE_QUARANTINE, AUTO_DELETE_AND_QUARANTINE -> throw new UnsupportedOperationException(
62+
"Mode not supported yet: " + mode);
63+
default -> throw new IllegalArgumentException("Mode not supported: " + mode);
64+
}
65+
}
66+
67+
private static void takeActionLogOnly(@NotNull GuildMessageReceivedEvent event) {
68+
logger.warn("Detected a scam message ('{}') from user '{}' in channel '{}' of guild '{}'.",
69+
event.getMessageId(), event.getAuthor().getId(), event.getChannel().getId(),
70+
event.getGuild().getId());
71+
}
72+
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package org.togetherjava.tjbot.commands.moderation.scam;
2+
3+
import org.jetbrains.annotations.NotNull;
4+
import org.togetherjava.tjbot.commands.utils.StringDistances;
5+
import org.togetherjava.tjbot.config.Config;
6+
import org.togetherjava.tjbot.config.ScamBlockerConfig;
7+
8+
import java.net.URI;
9+
import java.util.regex.Pattern;
10+
11+
public class ScamDetector {
12+
private static final Pattern TOKENIZER = Pattern.compile("[\\s,]");
13+
private final ScamBlockerConfig config;
14+
15+
public ScamDetector(@NotNull Config config) {
16+
this.config = config.getScamBlocker();
17+
}
18+
19+
public boolean isScam(@NotNull CharSequence message) {
20+
AnalyseResults results = new AnalyseResults();
21+
TOKENIZER.splitAsStream(message).forEach(token -> analyzeToken(token, results));
22+
return isScam(results);
23+
}
24+
25+
private boolean isScam(@NotNull AnalyseResults results) {
26+
if (results.pingsEveryone && results.containsNitroKeyword && results.hasUrl) {
27+
return true;
28+
}
29+
return results.containsNitroKeyword && results.hasSuspiciousUrl;
30+
}
31+
32+
private void analyzeToken(@NotNull String token, @NotNull AnalyseResults results) {
33+
if ("@everyone".equalsIgnoreCase(token)) {
34+
results.pingsEveryone = true;
35+
}
36+
if ("nitro".equalsIgnoreCase(token)) {
37+
results.containsNitroKeyword = true;
38+
}
39+
40+
if (token.startsWith("http")) {
41+
analyzeUrl(token, results);
42+
}
43+
}
44+
45+
private void analyzeUrl(@NotNull String url, @NotNull AnalyseResults results) {
46+
String host;
47+
try {
48+
host = URI.create(url).getHost();
49+
} catch (IllegalArgumentException e) {
50+
// Invalid urls are not scam
51+
return;
52+
}
53+
if (host == null) {
54+
return;
55+
}
56+
57+
results.hasUrl = true;
58+
59+
if (config.getHostWhitelist().contains(host)) {
60+
return;
61+
}
62+
63+
if (config.getHostBlacklist().contains(host)) {
64+
results.hasSuspiciousUrl = true;
65+
return;
66+
}
67+
68+
for (String keyword : config.getSuspiciousHostKeywords()) {
69+
if (isHostSimilarToKeyword(host, keyword)) {
70+
results.hasSuspiciousUrl = true;
71+
break;
72+
}
73+
}
74+
}
75+
76+
private boolean isHostSimilarToKeyword(String host, String keyword) {
77+
// NOTE This algorithm is far from optimal.
78+
// It is good enough for our purpose though and not that complex.
79+
80+
// Rolling window of keyword-size over host.
81+
// If any window has a small distance, it is similar
82+
int windowStart = 0;
83+
int windowEnd = keyword.length();
84+
while (windowEnd <= host.length()) {
85+
String window = host.substring(windowStart, windowEnd);
86+
int distance = StringDistances.editDistance(keyword, window);
87+
88+
if (distance <= config.getIsHostSimilarToKeywordDistanceThreshold()) {
89+
return true;
90+
}
91+
92+
windowStart++;
93+
windowEnd++;
94+
}
95+
96+
return false;
97+
}
98+
99+
private static class AnalyseResults {
100+
private boolean pingsEveryone;
101+
private boolean containsNitroKeyword;
102+
private boolean hasUrl;
103+
private boolean hasSuspiciousUrl;
104+
}
105+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/**
2+
* This package offers classes dealing with detecting scam messages and taking appropriate action,
3+
* see {@link org.togetherjava.tjbot.commands.moderation.scam.ScamBlocker} as main entry point.
4+
*/
5+
package org.togetherjava.tjbot.commands.moderation.scam;

application/src/main/java/org/togetherjava/tjbot/config/Config.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ public final class Config {
2727
private final List<FreeCommandConfig> freeCommand;
2828
private final String helpChannelPattern;
2929
private final SuggestionsConfig suggestions;
30+
private final ScamBlockerConfig scamBlocker;
3031

3132
@SuppressWarnings("ConstructorWithTooManyParameters")
3233
@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
@@ -41,7 +42,8 @@ private Config(@JsonProperty("token") String token,
4142
@JsonProperty("tagManageRolePattern") String tagManageRolePattern,
4243
@JsonProperty("freeCommand") List<FreeCommandConfig> freeCommand,
4344
@JsonProperty("helpChannelPattern") String helpChannelPattern,
44-
@JsonProperty("suggestions") SuggestionsConfig suggestions) {
45+
@JsonProperty("suggestions") SuggestionsConfig suggestions,
46+
@JsonProperty("scamBlocker") ScamBlockerConfig scamBlocker) {
4547
this.token = token;
4648
this.databasePath = databasePath;
4749
this.projectWebsite = projectWebsite;
@@ -54,6 +56,7 @@ private Config(@JsonProperty("token") String token,
5456
this.freeCommand = Collections.unmodifiableList(freeCommand);
5557
this.helpChannelPattern = helpChannelPattern;
5658
this.suggestions = suggestions;
59+
this.scamBlocker = scamBlocker;
5760
}
5861

5962
/**
@@ -169,7 +172,7 @@ public String getTagManageRolePattern() {
169172
*
170173
* @return the channel name pattern
171174
*/
172-
public String getHelpChannelPattern() {
175+
public @NotNull String getHelpChannelPattern() {
173176
return helpChannelPattern;
174177
}
175178

@@ -178,7 +181,16 @@ public String getHelpChannelPattern() {
178181
*
179182
* @return the suggestion system config
180183
*/
181-
public SuggestionsConfig getSuggestions() {
184+
public @NotNull SuggestionsConfig getSuggestions() {
182185
return suggestions;
183186
}
187+
188+
/**
189+
* Gets the config for the scam blocker system.
190+
*
191+
* @return the scam blocker system config
192+
*/
193+
public @NotNull ScamBlockerConfig getScamBlocker() {
194+
return scamBlocker;
195+
}
184196
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package org.togetherjava.tjbot.config;
2+
3+
import com.fasterxml.jackson.annotation.JsonCreator;
4+
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import com.fasterxml.jackson.annotation.JsonRootName;
6+
import org.jetbrains.annotations.NotNull;
7+
8+
import java.util.Collections;
9+
import java.util.HashSet;
10+
import java.util.Set;
11+
12+
/**
13+
* Configuration for the scam blocker system, see
14+
* {@link org.togetherjava.tjbot.commands.moderation.scam.ScamBlocker}.
15+
*/
16+
@SuppressWarnings("ClassCanBeRecord")
17+
@JsonRootName("scamBlocker")
18+
public final class ScamBlockerConfig {
19+
private final Mode mode;
20+
private final Set<String> hostWhitelist;
21+
private final Set<String> hostBlacklist;
22+
private final Set<String> suspiciousHostKeywords;
23+
private final int isHostSimilarToKeywordDistanceThreshold;
24+
25+
@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
26+
private ScamBlockerConfig(@JsonProperty("mode") Mode mode,
27+
@JsonProperty("hostWhitelist") Set<String> hostWhitelist,
28+
@JsonProperty("hostBlacklist") Set<String> hostBlacklist,
29+
@JsonProperty("suspiciousHostKeywords") Set<String> suspiciousHostKeywords,
30+
@JsonProperty("isHostSimilarToKeywordDistanceThreshold") int isHostSimilarToKeywordDistanceThreshold) {
31+
this.mode = mode;
32+
this.hostWhitelist = new HashSet<>(hostWhitelist);
33+
this.hostBlacklist = new HashSet<>(hostBlacklist);
34+
this.suspiciousHostKeywords = new HashSet<>(suspiciousHostKeywords);
35+
this.isHostSimilarToKeywordDistanceThreshold = isHostSimilarToKeywordDistanceThreshold;
36+
}
37+
38+
/**
39+
* Gets the mode of the scam blocker. Controls which actions it takes when detecting scam.
40+
*
41+
* @return the scam blockers mode
42+
*/
43+
public @NotNull Mode getMode() {
44+
return mode;
45+
}
46+
47+
/**
48+
* Gets the set of trusted hosts. Urls using those hosts are not considered scam.
49+
*
50+
* @return the whitelist of hosts
51+
*/
52+
public @NotNull Set<String> getHostWhitelist() {
53+
return Collections.unmodifiableSet(hostWhitelist);
54+
}
55+
56+
/**
57+
* Gets the set of known scam hosts. Urls using those hosts are considered scam.
58+
*
59+
* @return the blacklist of hosts
60+
*/
61+
public @NotNull Set<String> getHostBlacklist() {
62+
return Collections.unmodifiableSet(hostBlacklist);
63+
}
64+
65+
/**
66+
* Gets the set of keywords that are considered suspicious if they appear in host names. Urls
67+
* using hosts that have those, or similar, keywords in their name, are considered suspicious.
68+
*
69+
* @return the set of suspicious host keywords
70+
*/
71+
public @NotNull Set<String> getSuspiciousHostKeywords() {
72+
return Collections.unmodifiableSet(suspiciousHostKeywords);
73+
}
74+
75+
/**
76+
* Gets the threshold used to determine whether a host is similar to a given keyword. If the
77+
* host contains an infix with an edit distance that is below this threshold, they are
78+
* considered similar.
79+
*
80+
* @return the threshold to determine similarity
81+
*/
82+
public int getIsHostSimilarToKeywordDistanceThreshold() {
83+
return isHostSimilarToKeywordDistanceThreshold;
84+
}
85+
86+
/**
87+
* Mode of a scam blocker. Controls which actions it takes when detecting scam.
88+
*/
89+
public enum Mode {
90+
/**
91+
* The blocker is turned off and will not scan any messages for scam.
92+
*/
93+
OFF,
94+
/**
95+
* The blocker will log any detected scam but will not take action on them.
96+
*/
97+
ONLY_LOG,
98+
/**
99+
* Detected scam will be sent to moderators for review. Any action has to be approved
100+
* explicitly first.
101+
*/
102+
APPROVE_FIRST,
103+
/**
104+
* Detected scam will automatically be deleted. A moderator will be informed for review.
105+
* They can then decide whether the user should be put into quarantine.
106+
*/
107+
AUTO_DELETE_BUT_APPROVE_QUARANTINE,
108+
/**
109+
* The blocker will automatically delete any detected scam and put the user into quarantine.
110+
*/
111+
AUTO_DELETE_AND_QUARANTINE
112+
}
113+
}

0 commit comments

Comments
 (0)