From 64a0b827cef687ff7ab2e3511b3484da1440d67d Mon Sep 17 00:00:00 2001 From: Zabuzard Date: Mon, 21 Jul 2025 10:02:10 +0200 Subject: [PATCH 1/2] added new scam and false positives, fine-tuned, ability to use "foo$" --- application/config.json.template | 17 ++- .../moderation/scam/ScamDetector.java | 20 +++ .../moderation/scam/ScamDetectorTest.java | 137 ++++++++++++++++-- 3 files changed, 156 insertions(+), 18 deletions(-) diff --git a/application/config.json.template b/application/config.json.template index 02835ca9e0..bd3b9be3b9 100644 --- a/application/config.json.template +++ b/application/config.json.template @@ -38,21 +38,27 @@ "trading", "whatsapp", "crypto", - "claim", + "^claim", "teen", "adobe", "hack", "steamcommunity", "freenitro", - "^earn", - ".exe" + "^earn$", + "^earning", + ".exe$" ], "hostWhitelist": [ "discord.com", "discord.media", "discordapp.com", "discordapp.net", - "discordstatus.com" + "discordstatus.com", + "thehackernews.com", + "gradle.org", + "help.gradle.org", + "youtube.com", + "www.youtube.com" ], "hostBlacklist": [ "bit.ly", @@ -75,7 +81,8 @@ "free", "cheat", "crypto", - "tele" + "telegra", + "telety" ], "isHostSimilarToKeywordDistanceThreshold": 2, "suspiciousAttachmentsThreshold": 3, diff --git a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java index 035de5ca0c..a1b0ae71dd 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java @@ -10,6 +10,7 @@ import java.util.Collection; import java.util.List; import java.util.Locale; +import java.util.StringJoiner; import java.util.function.Predicate; import java.util.regex.Pattern; import java.util.stream.Stream; @@ -145,6 +146,10 @@ private boolean containsSuspiciousKeyword(String token) { if (startsWith(keyword, '^')) { return preparedToken.startsWith(keyword.substring(1)); } + // Simple regex-inspired syntax "foo$" + if (endsWith(keyword, '$')) { + return preparedToken.endsWith(keyword.substring(0, keyword.length() - 1)); + } return preparedToken.contains(keyword); }); } @@ -186,11 +191,26 @@ private static boolean startsWith(CharSequence text, char prefixToTest) { return !text.isEmpty() && text.charAt(0) == prefixToTest; } + private static boolean endsWith(CharSequence text, char suffixToTest) { + return !text.isEmpty() && text.charAt(text.length() - 1) == suffixToTest; + } + private static class AnalyseResults { private boolean pingsEveryone; private boolean containsSuspiciousKeyword; private boolean containsDollarSign; private boolean hasUrl; private boolean hasSuspiciousUrl; + + @Override + public String toString() { + return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]") + .add("pingsEveryone=" + pingsEveryone) + .add("containsSuspiciousKeyword=" + containsSuspiciousKeyword) + .add("containsDollarSign=" + containsDollarSign) + .add("hasUrl=" + hasUrl) + .add("hasSuspiciousUrl=" + hasSuspiciousUrl) + .toString(); + } } } diff --git a/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java b/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java index 19127401b6..b554ce5b4a 100644 --- a/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java +++ b/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java @@ -32,17 +32,18 @@ void setUp() { ScamBlockerConfig scamConfig = mock(ScamBlockerConfig.class); when(config.getScamBlocker()).thenReturn(scamConfig); - when(scamConfig.getSuspiciousKeywords()) - .thenReturn(Set.of("nitro", "boob", "sexy", "sexi", "esex", "steam", "gift", "onlyfans", - "bitcoin", "btc", "promo", "trader", "trading", "whatsapp", "crypto", "claim", - "teen", "adobe", "hack", "steamcommunity", "freenitro", "^earn", ".exe")); + when(scamConfig.getSuspiciousKeywords()).thenReturn(Set.of("nitro", "boob", "sexy", "sexi", + "esex", "steam", "gift", "onlyfans", "bitcoin", "btc", "promo", "trader", "trading", + "whatsapp", "crypto", "^claim", "teen", "adobe", "hack", "steamcommunity", + "freenitro", "^earn$", "^earning", ".exe$")); when(scamConfig.getHostWhitelist()).thenReturn(Set.of("discord.com", "discord.media", - "discordapp.com", "discordapp.net", "discordstatus.com")); + "discordapp.com", "discordapp.net", "discordstatus.com", "thehackernews.com", + "gradle.org", "help.gradle.org", "youtube.com", "www.youtube.com")); when(scamConfig.getHostBlacklist()).thenReturn(Set.of("bit.ly", "discord.gg", "teletype.in", "t.me", "corematrix.us", "u.to", "steamcommunity.com", "goo.su", "telegra.ph", "shorturl.at", "cheatings.xyz", "transfer.sh")); - when(scamConfig.getSuspiciousHostKeywords()) - .thenReturn(Set.of("discord", "nitro", "premium", "free", "cheat", "crypto", "tele")); + when(scamConfig.getSuspiciousHostKeywords()).thenReturn(Set.of("discord", "nitro", + "premium", "free", "cheat", "crypto", "telegra", "telety")); when(scamConfig.getIsHostSimilarToKeywordDistanceThreshold()).thenReturn(2); when(scamConfig.getSuspiciousAttachmentsThreshold()) .thenReturn(SUSPICIOUS_ATTACHMENTS_THRESHOLD); @@ -335,14 +336,124 @@ private static List provideRealScamMessages() { Or via TG: https://t.me/Charlie_Adamo """, "Urgently looking for mods & collab managers https://discord.gg/cryptohireo", - "Check this - https://transfer.sh/get/ajmkh3l7tzop/Setup.exe"); + "Check this - https://transfer.sh/get/ajmkh3l7tzop/Setup.exe", + """ + Secrets of the crypto market that top traders don’t want you to know! I’m looking to help some individuals who + are serious about earning over $100K weekly in the market. Remember, I’ll require just 15% of your profits once + you start seeing earnings. Note: I’m only looking for serious and truly interested individuals. + Text me on TG/WhatApps for more info on how to get started +(123)123-1230 https://t.me/officialjohnsmith""", + """ + 💻 Senior Full Stack Engineer | 8+ Years Experience with me + Hi, I’m a Senior Software Engineer with over 8 years of experience building scalable website, cloud-native software solutions across industries like healthcare, fintech, e-commerce, gaming, logistics, and energy. + 🧰 Core Skills: + Frontend: React, Vue, Angular, Next.js, TypeScript, Web3 integration, Svelte, Three.js, Pixi.js + Backend: Node.js, NestJS, PHP (Laravel, Symfony), Python (FastAPI/Flask), .Net, Rails + Databases: MongoDB, MySQL, PostgreSQL, Redis + Ecommerce platforms: MedusaJS, MercurJS, Shopify (Gadget) + Automation & Bots: Token Swap / Trading Bots, AI/ML & Generative AI & CRM, Automation online sites + 🔍 Notable Projects: + Property Shield: Scalable backend with NestJS, Redis Streams, MongoDB, Supabase + Ready Education: Frontend state architecture with NgRx, Next / Vue, TypeScript with Web3, + Kozoom Multimedia: Secure enterprise login using React, Redux, Azure + B2CWorkflow Builder (React Flow) + 📂 Portfolio: https://tobimoller.space/ + 📬 Open to freelance gigs, contracts, and bounties — let’s talk!""", + """ + I'll help the first 10 people interested on how to start earning $100k or more within a week, + but you will reimburse me 10% of your profits when you receive it. Note: only interested people should + send a friend request or send me a dm! ask me (HOW) via Telegram username @JohnSmith_123""", + """ + Ready to unlock your earning potential in the digital market? you can start earning $100,000 and even more + as a beginner from the digital market, DM me for expert guidance or contact me directly on telegram and start building your financial future. + Telegram username @JohnSmith123""", + "Grab it before it's deleted (available for Windows and macOS): https://www.reddit.com/r/TVBaFreeHub/comments/12345t/ninaatradercrackedfullpowertradingfreefor123/"); } private static List provideRealFalsePositiveMessages() { - return List - .of(""" - https://learn.microsoft.com/en-us/dotnet/csharp/fundamentals/types/anonymous-types""", - """ - And according to quick google search. Median wage is about $23k usd"""); + return List.of( + """ + https://learn.microsoft.com/en-us/dotnet/csharp/fundamentals/types/anonymous-types""", + "And according to quick google search. Median wage is about $23k usd", + """ + $ docker image prune -a + WARNING! This will remove all images without at least one container associated to them. + Are you sure you want to continue? [y/N] y + ... + Total reclaimed space: 37.73GB""", + """ + Exception in thread "main" java.lang.NoSuchMethodError: 'java.lang.String org.junit.platform.engine.discovery.MethodSelector.getMethodParameterTypes()' + at com.intellij.junit5.JUnit5TestRunnerUtil.loadMethodByReflection(JUnit5TestRunnerUtil.java:127) + at com.intellij.junit5.JUnit5TestRunnerUtil.buildRequest(JUnit5TestRunnerUtil.java:102) + at com.intellij.junit5.JUnit5IdeaTestRunner.startRunnerWithArgs(JUnit5IdeaTestRunner.java:43) + at com.intellij.rt.junit.IdeaTestRunner$Repeater$1.execute(IdeaTestRunner.java:38) + at com.intellij.rt.execution.junit.TestsRepeater.repeat(TestsRepeater.java:11) + at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:35) + at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:232) + at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:55)""", + """ + The average wage here (not the median, which is lower) gives you a take-home of about $68k in New Zealand dollars. + The median house-price in my city (which is not at all the most expensive city) is ~$740k. + That's an 11 year save for an average earner for an average house without spending anything.""", + "https://thehackernews.com/2025/07/alert-exposed-jdwp-interfaces-lead-to.html", + """ + ~/Developer/TJ-Bot develop ❯ ./gradlew build 10:20:05 PM + FAILURE: Build failed with an exception. + What went wrong: + class name.remal.gradleplugins.sonarlint.SonarLintPlugin + tried to access private field org.gradle.api.plugins.quality.internal.AbstractCodeQualityPlugin.extension + (name.remal.gradleplugins.sonarlint.SonarLintPlugin is in unnamed module of loader + org.gradle.internal.classloader.VisitableURLClassLoader$InstrumentingVisitableURLClassLoader @55f4c79b; + org.gradle.api.plugins.quality.internal.AbstractCodeQualityPlugin is in unnamed module of + loader org.gradle.initialization.MixInLegacyTypesClassLoader @49b2a47d) + Try: + Run with --stacktrace option to get the stack trace. + Run with --info or --debug option to get more log output. + Run with --scan to get full insights. + Get more help at https://help.gradle.org/. + BUILD FAILED in 795ms + 7 actionable tasks: 7 up-to-date + ~/Developer/TJ-Bot develop ❯""", + """ + For example. I enter 3.45 for the price and 3 for the count. It results in 10.350000000000001 for some reason. I followed Bro Code's video: + https://www.youtube.com/watch?v=P8CVPIaRmys&list=PLZPZq0rRZOOjNOZYq_R2PECIMglLemc&index=6 + and his does not do this. Why is this? + import java.util.Scanner; + public class ShoppingCart { + public static void main(String[] args){ + // Shopping Cart Arithmetic Practice + Scanner input = new Scanner(System.in); + String item; + double price; + int count; + char currency = '$'; + double total; + System.out.print("What item would you like to buy?: "); + item = input.nextLine(); + System.out.print("What is the price of this item?: "); + price = input.nextDouble(); + System.out.print("How many " + item + "(s) would you like to buy?: "); + count = input.nextInt(); + total = price * count; + System.out.println("\\nYou bought " + count + " " + item + "(s).\\n"); + System.out.println("Your total is " + currency + total); + } + }""", + "@squidxtv https://cdn.steamusercontent.com/ugc/12827361819537692968/A7B3AC5A176E7B2287B5E84B9A0BE9754F5A6388/", + """ + today i understood, why security is joke, even for people on top + https://micahsmith.com/ddosecrets-publishes-410-gb-of-heap-dumps-hacked-from-telemessages-archive-server/""", + """ + Hey guys @everyone, apologise for disturbing, + I wanted to ask what's the scope of Java in future like after 2030 in USA, like the newer frameworks will + replace Spring Boot ... and how AI will play it role ... + I am very much confused, what to do, I tired exploring Machine Learning, but I don't know why it felt more + like a burden then enjoyment, but spring boot was fun, although exploring microservice architecture + is was tricky mostly when it came to deployment and it become really confusing...""", + "https://www.cloudflare.com/learning/email-security/dmarc-dkim-spf/", + """ + It was pretty pricey, and the costs likely differ a lot from country to country + (keeping in mind that a portion is importing of equipment to NZ and some is labour in a very different market). + We have 13.5KW of storage, a 10KW inverter, 11.5KW of generation and an EV charger. + All up, on a 1% 'green loan', it was $40k NZD (~$23k USD)"""); } } From 160a6a4cf2a07824251b05c37eb4705f6ee7c154 Mon Sep 17 00:00:00 2001 From: Zabuzard Date: Mon, 21 Jul 2025 10:31:54 +0200 Subject: [PATCH 2/2] bugfix for exact matches (^foo$), added a blacklisted host --- application/config.json.template | 3 ++- .../tjbot/features/moderation/scam/ScamDetector.java | 4 ++++ .../tjbot/features/moderation/scam/ScamDetectorTest.java | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/application/config.json.template b/application/config.json.template index bd3b9be3b9..c53120ddd8 100644 --- a/application/config.json.template +++ b/application/config.json.template @@ -72,7 +72,8 @@ "telegra.ph", "shorturl.at", "cheatings.xyz", - "transfer.sh" + "transfer.sh", + "tobimoller.space" ], "suspiciousHostKeywords": [ "discord", diff --git a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java index a1b0ae71dd..b914f03ecc 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java @@ -142,6 +142,10 @@ private boolean containsSuspiciousKeyword(String token) { .stream() .map(keyword -> keyword.toLowerCase(Locale.US)) .anyMatch(keyword -> { + // Exact match "^foo$" + if (startsWith(keyword, '^') && endsWith(keyword, '$')) { + return preparedToken.equals(keyword.substring(1, keyword.length() - 1)); + } // Simple regex-inspired syntax "^foo" if (startsWith(keyword, '^')) { return preparedToken.startsWith(keyword.substring(1)); diff --git a/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java b/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java index b554ce5b4a..10d624d108 100644 --- a/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java +++ b/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java @@ -41,7 +41,7 @@ void setUp() { "gradle.org", "help.gradle.org", "youtube.com", "www.youtube.com")); when(scamConfig.getHostBlacklist()).thenReturn(Set.of("bit.ly", "discord.gg", "teletype.in", "t.me", "corematrix.us", "u.to", "steamcommunity.com", "goo.su", "telegra.ph", - "shorturl.at", "cheatings.xyz", "transfer.sh")); + "shorturl.at", "cheatings.xyz", "transfer.sh", "tobimoller.space")); when(scamConfig.getSuspiciousHostKeywords()).thenReturn(Set.of("discord", "nitro", "premium", "free", "cheat", "crypto", "telegra", "telety")); when(scamConfig.getIsHostSimilarToKeywordDistanceThreshold()).thenReturn(2);