From 86f2192b3c552f129c5b8885b7cccef5772382f4 Mon Sep 17 00:00:00 2001 From: wagoid Date: Sun, 10 Apr 2016 01:58:40 -0300 Subject: [PATCH 01/10] Add the checkAsync method and several other languages --- README.md | 2 +- lib/profanity.js | 259 ++++++++++++++++------- lib/swearwords/ar.json | 40 ++++ lib/swearwords/cs.json | 43 ++++ lib/swearwords/da.json | 22 ++ lib/swearwords/de.json | 67 ++++++ lib/swearwords/en.json | 452 ++++++++++++++++++++++++++++++++++++++++ lib/swearwords/eo.json | 39 ++++ lib/swearwords/es.json | 70 +++++++ lib/swearwords/fa.json | 47 +++++ lib/swearwords/fi.json | 132 ++++++++++++ lib/swearwords/fr.json | 93 +++++++++ lib/swearwords/hi.json | 98 +++++++++ lib/swearwords/hu.json | 98 +++++++++ lib/swearwords/it.json | 182 ++++++++++++++++ lib/swearwords/ja.json | 188 +++++++++++++++++ lib/swearwords/ko.json | 74 +++++++ lib/swearwords/nl.json | 193 +++++++++++++++++ lib/swearwords/no.json | 15 ++ lib/swearwords/pl.json | 55 +++++ lib/swearwords/pt.json | 270 ++++++++++++++++++++++++ lib/swearwords/ru.json | 154 ++++++++++++++ lib/swearwords/sv.json | 45 ++++ lib/swearwords/th.json | 33 +++ lib/swearwords/tlh.json | 5 + lib/swearwords/tr.json | 144 +++++++++++++ lib/swearwords/zh.json | 296 ++++++++++++++++++++++++++ lib/util.js | 7 +- 28 files changed, 3041 insertions(+), 82 deletions(-) create mode 100644 lib/swearwords/ar.json create mode 100644 lib/swearwords/cs.json create mode 100644 lib/swearwords/da.json create mode 100644 lib/swearwords/de.json create mode 100644 lib/swearwords/en.json create mode 100644 lib/swearwords/eo.json create mode 100644 lib/swearwords/es.json create mode 100644 lib/swearwords/fa.json create mode 100644 lib/swearwords/fi.json create mode 100644 lib/swearwords/fr.json create mode 100644 lib/swearwords/hi.json create mode 100644 lib/swearwords/hu.json create mode 100644 lib/swearwords/it.json create mode 100644 lib/swearwords/ja.json create mode 100644 lib/swearwords/ko.json create mode 100644 lib/swearwords/nl.json create mode 100644 lib/swearwords/no.json create mode 100644 lib/swearwords/pl.json create mode 100644 lib/swearwords/pt.json create mode 100644 lib/swearwords/ru.json create mode 100644 lib/swearwords/sv.json create mode 100644 lib/swearwords/th.json create mode 100644 lib/swearwords/tlh.json create mode 100644 lib/swearwords/tr.json create mode 100644 lib/swearwords/zh.json diff --git a/README.md b/README.md index 7a32e02..507ab0e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ > Utility for detection, filtering and replacement / obscuration of forbidden words -The original list of swearwords used by default was taken from [here](https://gist.github.com/jamiew/1112488). +The original lists of swearwords used by default were taken from [here](https://gist.github.com/jamiew/1112488) and [here](https://github.com/shutterstock/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words). **Please note:** This small utility module is written to prevent or monitor the use of certain words in your content without keeping context in account. An improper use may compromise the meaning of your content. Keep in account when using. diff --git a/lib/profanity.js b/lib/profanity.js index 2edbed6..ae6bc5f 100644 --- a/lib/profanity.js +++ b/lib/profanity.js @@ -3,120 +3,219 @@ Copyright (C) 2014 Kano Computing Ltd. License: http://opensource.org/licenses/MIT The MIT License (MIT) */ -var swearwords = require('./swearwords.json'), - util = require('./util'); +var util = require('./util'); +var fs = require('fs'); +var _ = require('underscore'); var DEFAULT_REPLACEMENTS = [ - 'bunnies', - 'butterfly', - 'kitten', - 'love', - 'gingerly', - 'flowers', - 'puppy', - 'joyful', - 'rainbows', - 'unicorn' - ], - DEFAULT_REGEX = getListRegex(swearwords); - -function getListRegex (list) { - // we want to treat all characters in the word as literals, not as regex specials (e.g. shi+) - function escapeRegexChars(word) { return word.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); }; - return new RegExp('\\b(' + list.map(escapeRegexChars).join('|') + ')\\b', 'gi'); + 'bunnies', + 'butterfly', + 'kitten', + 'love', + 'gingerly', + 'flowers', + 'puppy', + 'joyful', + 'rainbows', + 'unicorn' +]; +var DEFAULT_LANGUAGES = ['en']; +var AVAILABLE_LANGUAGES = ['ar', 'cs', 'da', 'de', 'en', 'eo', 'es', 'fa', 'fi', 'fr', 'hi', 'hu', 'it', 'ja', 'ko', 'nl', 'no', 'pl', 'pt', 'ru', 'sv', 'th', 'tlh', 'tr', 'zh']; +var DEFAULT_FS_OPTIONS = {encoding: 'utf8' }; + +function promisiFyReadFile(fileName, option) { + return new Promise(function (resolve, reject) { + fs.readFile(fileName, options, function (err, data) { + if (err) { + reject(err); + } else { + var jsonData = JSON.parse(data); + resolve(jsonData); + } + }); + }); +} + +function getWordListsPromises (languages) { + var promises = []; + _.each(languages, function(language) { + promises.push(promisiFyReadFile('./swearwords/' + language, DEFAULT_FS_OPTIONS)); + }); + + return promises; } -function check (target, forbiddenList) { - var targets = [], - regex = forbiddenList ? getListRegex(forbiddenList) : DEFAULT_REGEX; +function getWordListsConcatenated (languages) { + var lists = []; + + _.each(languages, function (language) { + lists.concat(JSON.parse(fs.readFileSync('./swearwords/' + DEFAULT_FS_OPTIONS))); + }); + + return lists; +} - if (typeof target === 'string') { - targets.push(target); - } else if (typeof target === 'object') { +function escapeRegexChars (word) { + return word.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); +} - util.eachRecursive(target, function (val) { - if (typeof val === 'string') { - targets.push(val); - } +function getListRegexAsync (list, languages) { + var listRegex = []; + if (list) { + listRegex = new Promise(function (resolve, reject) { resolve(_getListRegex(list)); } ); + } else { + Promise.all(getWordListsPromises(languages)) + .then(function (listsArray) { + _.each(listsArray, function (lst) { + listRegex.concat(_getListRegex(lst)); }); + resolve(listRegex); + }) + .catch(function (err) { + reject(err); + }); + } +} + +function getListRegex (list, languages) { + if (!list) { + list = getWordListsConcatenated(languages); + } + return _getListRegex(list); +} + +function _getListRegex(list) { + // we want to treat all characters in the word as literals, not as regex specials (e.g. shi+) + return new RegExp('\\b(' + list.map(escapeRegexChars).join('|') + ')\\b', 'gi'); +} + +function getDefaultLanguagesValue (languages) { + if (util.isString(languages)) { + if (languages.toLowerCase() === 'all') { + languages = AVAILABLE_LANGUAGES; + } else { + languages = [languages]; } + } else if (!languages) { + languages = DEFAULT_LANGUAGES; + } + + return languages; +} + +function check (target, forbiddenList, languages) { + languages = getDefaultLanguagesValue(languages); - return targets.join(' ').match(regex) || []; + var regex = forbiddenList ? getListRegex(forbiddenList) : getListRegex(null, languages); + + return _check(target, regex); } -function purifyString (str, options) { - options = options || {}; +function _check (target, regex) { + var targets = []; + + if (util.isString(target)) { + targets.push(target); + } else if (typeof target === 'object') { + util.eachRecursive(target, function(val) { + if (util.isString(val)) { + targets.push(val); + } + }); + } - var matches = [], - purified, - forbiddenList = options.forbiddenList || null, - replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS, - regex = forbiddenList ? getListRegex(forbiddenList) : DEFAULT_REGEX, - replace = options.replace || false, - obscureSymbol = options.obscureSymbol || '*'; + return targets.join(' ').match(regex) || []; +} - purified = str.replace(regex, function (val) { - matches.push(val); +function checkAsync (target, forbiddenList, languages) { + languages = getDefaultLanguagesValue(languages); + + return new Promise(function(resolve, reject) { + getListRegexAsync(forbiddenList, languages) + .then(function (regex) { + var result = _check(target, regex); + resolve(result); + }) + .catch(function (err) { + reject(err); + }); + }); +} - if (replace) { - return replacementsList[Math.floor(Math.random() * replacementsList.length)]; - } +function purifyString (str, regex, options) { + options = options || {}; - var str = val.substr(0, 1); + var matches = [], + purified, + forbiddenList = options.forbiddenList || null, + replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS, + replace = options.replace || false, + obscureSymbol = options.obscureSymbol || '*'; - for (var i = 0; i < val.length - 2; i += 1) { - str += obscureSymbol; - } + purified = str.replace(regex, function(val) { + matches.push(val); - return str + val.substr(-1); - }); + if (replace) { + return replacementsList[Math.floor(Math.random() * replacementsList.length)]; + } + + var str = val.substr(0, 1); + + for (var i = 0; i < val.length - 2; i += 1) { + str += obscureSymbol; + } - return [ purified, matches ]; + return str + val.substr(-1); + }); + + return [purified, matches]; } function purify (target, options) { - options = options || {}; + options = options || {}; + options.languages = getDefaultLanguagesValue(options.languages); - var matches = [], - fields = options.fields || ( target instanceof Object ? Object.keys(target) : [] ), - result; + var matches = [], + fields = options.fields || (target instanceof Object ? Object.keys(target) : []), + result, regex = forbiddenList ? getListRegex(forbiddenList) : getListRegex(null, languages); - if (typeof target === 'string') { + if (typeof target === 'string') { - return purifyString(target, options); + return purifyString(target, regex, options); - } else if (typeof target === 'object') { - fields.forEach(function (field) { + } else if (typeof target === 'object') { + fields.forEach(function(field) { - // TODO: Use better recursive checking, make DRYer - if (typeof target[field] === 'string') { + // TODO: Use better recursive checking, make DRYer + if (typeof target[field] === 'string') { - result = purifyString(target[field], options); - target[field] = result[0]; - matches = matches.concat(result[1]); + result = purifyString(target[field], regex, options); + target[field] = result[0]; + matches = matches.concat(result[1]); - } else if (typeof target[field] === 'object') { - util.eachRecursive(target[field], function (val, key, root) { + } else if (typeof target[field] === 'object') { + util.eachRecursive(target[field], function(val, key, root) { - if (fields && fields.indexOf(key) === -1) { - return; - } + if (fields && fields.indexOf(key) === -1) { + return; + } - if (typeof val === 'string') { - result = purifyString(val, options); - root[key] = result[0]; - matches = matches.concat(result[1]); - } + if (typeof val === 'string') { + result = purifyString(val, regex, options); + root[key] = result[0]; + matches = matches.concat(result[1]); + } - }); - } }); + } + }); - return [ target, matches ]; - } + return [target, matches]; + } } module.exports = { - check: check, - purify: purify + check: check, + checkAsync: checkAsync, + purify: purify }; \ No newline at end of file diff --git a/lib/swearwords/ar.json b/lib/swearwords/ar.json new file mode 100644 index 0000000..f7d6e66 --- /dev/null +++ b/lib/swearwords/ar.json @@ -0,0 +1,40 @@ +[ + "سكس", + "طيز", + "شرج", + "لعق", + "لحس", + "مص", + "تمص", + "بيضان", + "ثدي", + "بز", + "بزاز", + "حلمة", + "مفلقسة", + "بظر", + "كس", + "فرج", + "شهوة", + "شاذ", + "مبادل", + "عاهرة", + "جماع", + "قضيب", + "زب", + "لوطي", + "لواط", + "سحاق", + "سحاقية", + "اغتصاب", + "خنثي", + "احتلام", + "نيك", + "متناك", + "متناكة", + "شرموطة", + "عرص", + "خول", + "قحبة", + "لبوة" +] \ No newline at end of file diff --git a/lib/swearwords/cs.json b/lib/swearwords/cs.json new file mode 100644 index 0000000..a655b94 --- /dev/null +++ b/lib/swearwords/cs.json @@ -0,0 +1,43 @@ +[ + "bordel", + "buzna", + "čumět", + "čurák", + "debil", + "do piče", + "do prdele", + "dršťka", + "držka", + "flundra", + "hajzl", + "hovno", + "chcanky", + "chuj", + "jebat", + "kokot", + "kokotina", + "koňomrd", + "kunda", + "kurva", + "mamrd", + "mrdat", + "mrdka", + "mrdník", + "oslošoust", + "piča", + "píčus", + "píchat", + "pizda", + "prcat", + "prdel", + "prdelka", + "sračka", + "srát", + "šoustat", + "šulin", + "vypíčenec", + "zkurvit", + "zkurvysyn", + "zmrd", + "žrát" +] \ No newline at end of file diff --git a/lib/swearwords/da.json b/lib/swearwords/da.json new file mode 100644 index 0000000..e423b8a --- /dev/null +++ b/lib/swearwords/da.json @@ -0,0 +1,22 @@ +[ + "anus", + "bøsserøv", + "cock", + "fisse", + "fissehår", + "fuck", + "hestepik", + "kussekryller", + "lort", + "luder", + "pik", + "pikhår", + "pikslugeri", + "piksutteri", + "pis", + "røv", + "røvhul", + "røvskæg", + "røvspræke", + "shit" +] \ No newline at end of file diff --git a/lib/swearwords/de.json b/lib/swearwords/de.json new file mode 100644 index 0000000..a0dfa96 --- /dev/null +++ b/lib/swearwords/de.json @@ -0,0 +1,67 @@ +[ + "analritter", + "arsch", + "arschficker", + "arschlecker", + "arschloch", + "bimbo", + "bratze", + "bumsen", + "bonze", + "dödel", + "fick", + "ficken", + "flittchen", + "fotze", + "fratze", + "hackfresse", + "hure", + "hurensohn", + "ische", + "kackbratze", + "kacke", + "kacken", + "kackwurst", + "kampflesbe", + "kanake", + "kimme", + "lümmel", + "MILF", + "möpse", + "morgenlatte", + "möse", + "mufti", + "muschi", + "nackt", + "neger", + "nigger", + "nippel", + "nutte", + "onanieren", + "orgasmus", + "pimmel", + "pimpern", + "pinkeln", + "pissen", + "pisser", + "popel", + "poppen", + "porno", + "reudig", + "rosette", + "schabracke", + "schlampe", + "scheiße", + "scheisser", + "schiesser", + "schnackeln", + "schwanzlutscher", + "schwuchtel", + "tittchen", + "titten", + "vögeln", + "vollpfosten", + "wichse", + "wichsen", + "wichser" +] \ No newline at end of file diff --git a/lib/swearwords/en.json b/lib/swearwords/en.json new file mode 100644 index 0000000..cd948bc --- /dev/null +++ b/lib/swearwords/en.json @@ -0,0 +1,452 @@ +[ + "4r5e", + "5h1t", + "5hit", + "a55", + "anal", + "anus", + "ar5e", + "arrse", + "arse", + "ass", + "ass-fucker", + "asses", + "assfucker", + "assfukka", + "asshole", + "assholes", + "asswhole", + "a_s_s", + "b!tch", + "b00bs", + "b17ch", + "b1tch", + "ballbag", + "ballsack", + "bastard", + "beastial", + "beastiality", + "bellend", + "bestial", + "bestiality", + "bi+ch", + "biatch", + "bitch", + "bitcher", + "bitchers", + "bitches", + "bitchin", + "bitching", + "bloody", + "blow job", + "blowjob", + "blowjobs", + "boiolas", + "bollock", + "bollok", + "boner", + "boob", + "boobs", + "booobs", + "boooobs", + "booooobs", + "booooooobs", + "breasts", + "buceta", + "bugger", + "bum", + "bunny fucker", + "butt", + "butthole", + "buttmuch", + "buttplug", + "c0ck", + "c0cksucker", + "carpet muncher", + "cawk", + "chink", + "cipa", + "cl1t", + "clit", + "clitoris", + "clits", + "cnut", + "cock", + "cock-sucker", + "cockface", + "cockhead", + "cockmunch", + "cockmuncher", + "cocks", + "cocksuck ", + "cocksucked ", + "cocksucker", + "cocksucking", + "cocksucks ", + "cocksuka", + "cocksukka", + "cok", + "cokmuncher", + "coksucka", + "coon", + "crap", + "cum", + "cummer", + "cumming", + "cums", + "cumshot", + "cunilingus", + "cunillingus", + "cunnilingus", + "cunt", + "cuntlick ", + "cuntlicker ", + "cuntlicking ", + "cunts", + "cyalis", + "cyberfuc", + "cyberfuck ", + "cyberfucked ", + "cyberfucker", + "cyberfuckers", + "cyberfucking ", + "d1ck", + "damn", + "dick", + "dickhead", + "dildo", + "dildos", + "dink", + "dinks", + "dirsa", + "dlck", + "dog-fucker", + "doggin", + "dogging", + "donkeyribber", + "doosh", + "duche", + "dyke", + "ejaculate", + "ejaculated", + "ejaculates ", + "ejaculating ", + "ejaculatings", + "ejaculation", + "ejakulate", + "f u c k", + "f u c k e r", + "f4nny", + "fag", + "fagging", + "faggitt", + "faggot", + "faggs", + "fagot", + "fagots", + "fags", + "fanny", + "fannyflaps", + "fannyfucker", + "fanyy", + "fatass", + "fcuk", + "fcuker", + "fcuking", + "feck", + "fecker", + "felching", + "fellate", + "fellatio", + "fingerfuck ", + "fingerfucked ", + "fingerfucker ", + "fingerfuckers", + "fingerfucking ", + "fingerfucks ", + "fistfuck", + "fistfucked ", + "fistfucker ", + "fistfuckers ", + "fistfucking ", + "fistfuckings ", + "fistfucks ", + "flange", + "fook", + "fooker", + "fuck", + "fucka", + "fucked", + "fucker", + "fuckers", + "fuckhead", + "fuckheads", + "fuckin", + "fucking", + "fuckings", + "fuckingshitmotherfucker", + "fuckme ", + "fucks", + "fuckwhit", + "fuckwit", + "fudge packer", + "fudgepacker", + "fuk", + "fuker", + "fukker", + "fukkin", + "fuks", + "fukwhit", + "fukwit", + "fux", + "fux0r", + "f_u_c_k", + "gangbang", + "gangbanged ", + "gangbangs ", + "gaylord", + "gaysex", + "goatse", + "God", + "god-dam", + "god-damned", + "goddamn", + "goddamned", + "hardcoresex ", + "hell", + "heshe", + "hoar", + "hoare", + "hoer", + "homo", + "hore", + "horniest", + "horny", + "hotsex", + "jack-off ", + "jackoff", + "jap", + "jerk-off ", + "jism", + "jiz ", + "jizm ", + "jizz", + "kawk", + "knob", + "knobead", + "knobed", + "knobend", + "knobhead", + "knobjocky", + "knobjokey", + "kock", + "kondum", + "kondums", + "kum", + "kummer", + "kumming", + "kums", + "kunilingus", + "l3i+ch", + "l3itch", + "labia", + "lmfao", + "lust", + "lusting", + "m0f0", + "m0fo", + "m45terbate", + "ma5terb8", + "ma5terbate", + "masochist", + "master-bate", + "masterb8", + "masterbat*", + "masterbat3", + "masterbate", + "masterbation", + "masterbations", + "masturbate", + "mo-fo", + "mof0", + "mofo", + "mothafuck", + "mothafucka", + "mothafuckas", + "mothafuckaz", + "mothafucked ", + "mothafucker", + "mothafuckers", + "mothafuckin", + "mothafucking ", + "mothafuckings", + "mothafucks", + "mother fucker", + "motherfuck", + "motherfucked", + "motherfucker", + "motherfuckers", + "motherfuckin", + "motherfucking", + "motherfuckings", + "motherfuckka", + "motherfucks", + "muff", + "mutha", + "muthafecker", + "muthafuckker", + "muther", + "mutherfucker", + "n1gga", + "n1gger", + "nazi", + "nigg3r", + "nigg4h", + "nigga", + "niggah", + "niggas", + "niggaz", + "nigger", + "niggers ", + "nob", + "nob jokey", + "nobhead", + "nobjocky", + "nobjokey", + "numbnuts", + "nutsack", + "orgasim ", + "orgasims ", + "orgasm", + "orgasms ", + "p0rn", + "pawn", + "pecker", + "penis", + "penisfucker", + "phonesex", + "phuck", + "phuk", + "phuked", + "phuking", + "phukked", + "phukking", + "phuks", + "phuq", + "pigfucker", + "pimpis", + "piss", + "pissed", + "pisser", + "pissers", + "pisses ", + "pissflaps", + "pissin ", + "pissing", + "pissoff ", + "poop", + "poo", + "porn", + "porno", + "pornography", + "pornos", + "prick", + "pricks ", + "pron", + "pube", + "pusse", + "pussi", + "pussies", + "pussy", + "pussys ", + "rectum", + "retard", + "rimjaw", + "rimming", + "s hit", + "s.o.b.", + "sadist", + "schlong", + "screwing", + "scroat", + "scrote", + "scrotum", + "semen", + "sex", + "sh!+", + "sh!t", + "sh1t", + "shag", + "shagger", + "shaggin", + "shagging", + "shemale", + "shi+", + "shit", + "shitdick", + "shite", + "shited", + "shitey", + "shitfuck", + "shitfull", + "shithead", + "shiting", + "shitings", + "shits", + "shitted", + "shitter", + "shitters ", + "shitting", + "shittings", + "shitty", + "skank", + "slut", + "sluts", + "smegma", + "smut", + "snatch", + "son-of-a-bitch", + "spac", + "spunk", + "s_h_i_t", + "t1tt1e5", + "t1tties", + "teets", + "teez", + "testical", + "testicle", + "tit", + "titfuck", + "tits", + "titt", + "tittie5", + "tittiefucker", + "titties", + "tittyfuck", + "tittywank", + "titwank", + "tosser", + "turd", + "tw4t", + "twat", + "twathead", + "twatty", + "twunt", + "twunter", + "v14gra", + "v1gra", + "vagina", + "viagra", + "vulva", + "w00se", + "wang", + "wank", + "wanker", + "wanky", + "whoar", + "whore", + "willies", + "willy", + "xrated", + "xxx" +] \ No newline at end of file diff --git a/lib/swearwords/eo.json b/lib/swearwords/eo.json new file mode 100644 index 0000000..2f7145d --- /dev/null +++ b/lib/swearwords/eo.json @@ -0,0 +1,39 @@ +[ + "bugren", + "bugri", + "bugru", + "ĉiesulino", + "ĉiesulo", + "diofek", + "diofeka", + "fek", + "feken", + "fekfikanto", + "feklekulo", + "fekulo", + "fik", + "fikado", + "fikema", + "fikfek", + "fiki", + "fikiĝi", + "fikiĝu", + "fikilo", + "fikklaŭno", + "fikota", + "fiku", + "forfiki", + "forfikiĝu", + "forfiku", + "forfurzu", + "forpisi", + "forpisu", + "furzulo", + "kacen", + "kaco", + "kacsuĉulo", + "kojono", + "piĉen", + "piĉo", + "zamenfek" +] \ No newline at end of file diff --git a/lib/swearwords/es.json b/lib/swearwords/es.json new file mode 100644 index 0000000..4d63118 --- /dev/null +++ b/lib/swearwords/es.json @@ -0,0 +1,70 @@ +[ + "Asesinato", + "asno", + "bastardo", + "Bollera", + "Cabron", + "Cabrón", + "Caca", + "Chupada", + "Chupapollas", + "Chupetón", + "concha", + "Concha de tu madre", + "Coño", + "Coprofagía", + "Culo", + "Drogas", + "Esperma", + "Fiesta de salchichas", + "Follador", + "Follar", + "Gilipichis", + "Gilipollas", + "Hacer una paja", + "Haciendo el amor", + "Heroína", + "Hija de puta", + "Hijaputa", + "Hijo de puta", + "Hijoputa", + "Idiota", + "Imbécil", + "infierno", + "Jilipollas", + "Kapullo", + "Lameculos", + "Maciza", + "Macizorra", + "maldito", + "Mamada", + "Marica", + "Maricón", + "Mariconazo", + "martillo", + "Mierda", + "Nazi", + "Orina", + "Pedo", + "Pervertido", + "Pezón", + "Pinche", + "Pis", + "Prostituta", + "Puta", + "Racista", + "Ramera", + "Sádico", + "Semen", + "Sexo", + "Sexo oral", + "Soplagaitas", + "Soplapollas", + "Tetas grandes", + "Tía buena", + "Travesti", + "Trio", + "Verga", + "vete a la mierda", + "Vulva" +] \ No newline at end of file diff --git a/lib/swearwords/fa.json b/lib/swearwords/fa.json new file mode 100644 index 0000000..b77f3c3 --- /dev/null +++ b/lib/swearwords/fa.json @@ -0,0 +1,47 @@ +[ + "آب کیر", + "ارگاسم", + "برهنه", + "پورن", + "پورنو", + "تجاوز", + "تخمی", + "جق", + "جقی", + "جلق", + "جنده", + "چوچول", + "حشر", + "حشری", + "داف", + "دودول", + "ساک زدن", + "سکس", + "سکس کردن", + "سکسی", + "سوپر", + "شق کردن", + "شهوت", + "شهوتی", + "شونبول", + "فیلم سوپر", + "کس", + "کس دادن", + "کس کردن", + "کسکش", + "کوس", + "کون", + "کون دادن", + "کون کردن", + "کونکش", + "کونی", + "کیر", + "کیری", + "لاپا", + "لاپایی", + "لاشی", + "لخت", + "لش", + "منی", + "هرزه" +] \ No newline at end of file diff --git a/lib/swearwords/fi.json b/lib/swearwords/fi.json new file mode 100644 index 0000000..af862f8 --- /dev/null +++ b/lib/swearwords/fi.json @@ -0,0 +1,132 @@ +[ + "alfred nussi", + "bylsiä", + "haahka", + "haista paska", + "haista vittu", + "hatullinen", + "helvetisti", + "hevonkuusi", + "hevonpaska", + "hevonperse", + "hevonvittu", + "hevonvitunperse", + "hitosti", + "hitto", + "huorata", + "hässiä", + "juosten kustu", + "jutku", + "jutsku", + "jätkä", + "kananpaska", + "koiranpaska", + "kuin esterin perseestä", + "kulli", + "kullinluikaus", + "kuppainen", + "kusaista", + "kuseksia", + "kusettaa", + "kusi", + "kusipää", + "kusta", + "kyrpiintynyt", + "kyrpiintyä", + "kyrpiä", + "kyrpä", + "kyrpänaama", + "kyrvitys", + "lahtari", + "lutka", + "molo", + "molopää", + "mulkero", + "mulkku", + "mulkvisti", + "muna", + "munapää", + "munaton", + "mutakuono", + "mutiainen", + "naida", + "nainti", + "narttu", + "neekeri", + "nekru", + "nuolla persettä", + "nussia", + "nussija", + "nussinta", + "paljaalla", + "palli", + "pallit", + "paneskella", + "panettaa", + "panna", + "pano", + "pantava", + "paska", + "paskainen", + "paskamainen", + "paskanmarjat", + "paskantaa", + "paskapuhe", + "paskapää", + "paskattaa", + "paskiainen", + "paskoa", + "pehko", + "pentele", + "perkele", + "perkeleesti", + "persaukinen", + "perse", + "perseennuolija", + "perseet olalla", + "persereikä", + "perseääliö", + "persläpi", + "perspano", + "persvako", + "pilkunnussija", + "pillu", + "pillut", + "pipari", + "piru", + "pistää", + "pyllyvako", + "reikä", + "reva", + "ripsipiirakka", + "runkata", + "runkkari", + "runkkaus", + "runkku", + "ryssä", + "rättipää", + "saatanasti", + "suklaaosasto", + "tavara", + "toosa", + "tuhkaluukku", + "tumputtaa", + "turpasauna", + "tussu", + "tussukka", + "tussut", + "vakipano", + "vetää käteen", + "viiksi", + "vittu", + "vittuilla", + "vittuilu", + "vittumainen", + "vittuuntua", + "vittuuntunut", + "vitun", + "vitusti", + "vituttaa", + "vitutus", + "äpärä" +] \ No newline at end of file diff --git a/lib/swearwords/fr.json b/lib/swearwords/fr.json new file mode 100644 index 0000000..7b85676 --- /dev/null +++ b/lib/swearwords/fr.json @@ -0,0 +1,93 @@ +[ + "baiser", + "bander", + "bigornette", + "bite", + "bitte", + "bloblos", + "bordel", + "bosser", + "bourré", + "bourrée", + "brackmard", + "branlage", + "branler", + "branlette", + "branleur", + "branleuse", + "brouter le cresson", + "caca", + "cailler", + "chatte", + "chiasse", + "chier", + "chiottes", + "clito", + "clitoris", + "con", + "connard", + "connasse", + "conne", + "couilles", + "cramouille", + "cul", + "déconne", + "déconner", + "drague", + "emmerdant", + "emmerder", + "emmerdeur", + "emmerdeuse", + "enculé", + "enculée", + "enculeur", + "enculeurs", + "enfoiré", + "enfoirée", + "étron", + "fille de pute", + "fils de pute", + "folle", + "foutre", + "gerbe", + "gerber", + "gouine", + "grande folle", + "grogniasse", + "gueule", + "jouir", + "la putain de ta mère", + "MALPT", + "ménage à trois", + "merde", + "merdeuse", + "merdeux", + "meuf", + "nègre", + "nique ta mère", + "nique ta race", + "palucher", + "pédale", + "pédé", + "péter", + "pipi", + "pisser", + "pouffiasse", + "pousse-crotte", + "putain", + "pute", + "ramoner", + "sac à merde", + "salaud", + "salope", + "suce", + "tapette", + "teuf", + "tringler", + "trique", + "trou du cul", + "turlute", + "veuve", + "zigounette", + "zizi" +] \ No newline at end of file diff --git a/lib/swearwords/hi.json b/lib/swearwords/hi.json new file mode 100644 index 0000000..3d0a9af --- /dev/null +++ b/lib/swearwords/hi.json @@ -0,0 +1,98 @@ +[ + "aand", + "aandu", + "balatkar", + "beti chod", + "bhadva", + "bhadve", + "bhandve", + "bhootni ke", + "bhosad", + "bhosadi ke", + "boobe", + "chakke", + "chinaal", + "chinki", + "chod", + "chodu", + "chodu bhagat", + "chooche", + "choochi", + "choot", + "choot ke baal", + "chootia", + "chootiya", + "chuche", + "chuchi", + "chudai khanaa", + "chudan chudai", + "chut", + "chut ke baal", + "chut ke dhakkan", + "chut maarli", + "chutad", + "chutadd", + "chutan", + "chutia", + "chutiya", + "gaand", + "gaandfat", + "gaandmasti", + "gaandufad", + "gandu", + "gashti", + "gasti", + "ghassa", + "ghasti", + "harami", + "haramzade", + "hawas", + "hawas ke pujari", + "hijda", + "hijra", + "jhant", + "jhant chaatu", + "jhant ke baal", + "jhantu", + "kamine", + "kaminey", + "kanjar", + "kutta", + "kutta kamina", + "kutte ki aulad", + "kutte ki jat", + "kuttiya", + "loda", + "lodu", + "lund", + "lund choos", + "lund khajoor", + "lundtopi", + "lundure", + "maa ki chut", + "maal", + "madar chod", + "mooh mein le", + "mutth", + "najayaz", + "najayaz aulaad", + "najayaz paidaish", + "paki", + "pataka", + "patakha", + "raand", + "randi", + "saala", + "saala kutta", + "saali kutti", + "saali randi", + "suar", + "suar ki aulad", + "tatte", + "tatti", + "teri maa ka bhosada", + "teri maa ka boba chusu", + "teri maa ki chut", + "tharak", + "tharki" +] \ No newline at end of file diff --git a/lib/swearwords/hu.json b/lib/swearwords/hu.json new file mode 100644 index 0000000..d12e340 --- /dev/null +++ b/lib/swearwords/hu.json @@ -0,0 +1,98 @@ +[ + "balfasz", + "balfaszok", + "balfaszokat", + "balfaszt", + "barmok", + "barmokat", + "barmot", + "barom", + "baszik", + "bazmeg", + "buksza", + "bukszák", + "bukszákat", + "bukszát", + "búr", + "búrok", + "csöcs", + "csöcsök", + "csöcsöket", + "csöcsöt", + "fasz", + "faszfej", + "faszfejek", + "faszfejeket", + "faszfejet", + "faszok", + "faszokat", + "faszt", + "fing", + "fingok", + "fingokat", + "fingot", + "franc", + "francok", + "francokat", + "francot", + "geci", + "gecibb", + "gecik", + "geciket", + "gecit", + "kibaszott", + "kibaszottabb", + "kúr", + "kurafi", + "kurafik", + "kurafikat", + "kurafit", + "kurva", + "kurvák", + "kurvákat", + "kurvát", + "leggecibb", + "legkibaszottabb", + "legszarabb", + "marha", + "marhák", + "marhákat", + "marhát", + "megdöglik", + "pele", + "pelék", + "picsa", + "picsákat", + "picsát", + "pina", + "pinák", + "pinákat", + "pinát", + "pofa", + "pofákat", + "pofát", + "pöcs", + "pöcsök", + "pöcsöket", + "pöcsöt", + "punci", + "puncik", + "segg", + "seggek", + "seggeket", + "segget", + "seggfej", + "seggfejek", + "seggfejeket", + "seggfejet", + "szajha", + "szajhák", + "szajhákat", + "szajhát", + "szar", + "szarabb", + "szarik", + "szarok", + "szarokat", + "szart" +] \ No newline at end of file diff --git a/lib/swearwords/it.json b/lib/swearwords/it.json new file mode 100644 index 0000000..ba3da42 --- /dev/null +++ b/lib/swearwords/it.json @@ -0,0 +1,182 @@ +[ + "allupato", + "ammucchiata", + "anale", + "arrapato", + "arrusa", + "arruso", + "assatanato", + "bagascia", + "bagassa", + "bagnarsi", + "baldracca", + "balle", + "battere", + "battona", + "belino", + "biga", + "bocchinara", + "bocchino", + "bofilo", + "boiata", + "bordello", + "brinca", + "bucaiolo", + "budiùlo", + "buona donna", + "busone", + "cacca", + "caccati in mano e prenditi a schiaffi", + "caciocappella", + "cadavere", + "cagare", + "cagata", + "cagna", + "cammello", + "cappella", + "carciofo", + "carità", + "casci", + "cazzata", + "cazzimma", + "cazzo", + "checca", + "chiappa", + "chiavare", + "chiavata", + "ciospo", + "ciucciami il cazzo", + "coglione", + "coglioni", + "cornuto", + "cozza", + "culattina", + "culattone", + "culo", + "di merda", + "ditalino", + "duro", + "fare unaŠ", + "fava", + "femminuccia", + "fica", + "figa", + "figlio di buona donna", + "figlio di puttana", + "figone", + "finocchio", + "fottere", + "fottersi", + "fracicone", + "fregna", + "frocio", + "froscio", + "fuori come un balcone", + "goldone", + "grilletto", + "guanto", + "guardone", + "incazzarsi", + "incoglionirsi", + "ingoio", + "l'arte bolognese", + "leccaculo", + "lecchino", + "lofare", + "loffa", + "loffare", + "lumaca", + "manico", + "mannaggia", + "merda", + "merdata", + "merdoso", + "mignotta", + "minchia", + "minchione", + "mona", + "monta", + "montare", + "mussa", + "nave scuola", + "nerchia", + "nudo", + "padulo", + "palle", + "palloso", + "patacca", + "patonza", + "pecorina", + "pesce", + "picio", + "pincare", + "pipa", + "pipì", + "pippone", + "pirla", + "pisciare", + "piscio", + "pisello", + "pistola", + "pistolotto", + "pomiciare", + "pompa", + "pompino", + "porca", + "porca madonna", + "porca miseria", + "porca puttana", + "porco due", + "porco zio", + "potta", + "puppami", + "puttana", + "quaglia", + "recchione", + "regina", + "rincoglionire", + "rizzarsi", + "rompiballe", + "ruffiano", + "sbattere", + "sbattersi", + "sborra", + "sborrata", + "sborrone", + "sbrodolata", + "scopare", + "scopata", + "scorreggiare", + "sega", + "slinguare", + "slinguata", + "smandrappata", + "soccia", + "socmel", + "sorca", + "spagnola", + "spompinare", + "sticchio", + "stronza", + "stronzata", + "stronzo", + "succhiami", + "sveltina", + "sverginare", + "tarzanello", + "terrone", + "testa di cazzo", + "tette", + "tirare", + "topa", + "troia", + "trombare", + "uccello", + "vacca", + "vaffanculo", + "vangare", + "venire", + "zinne", + "zio cantante", + "zoccola" +] \ No newline at end of file diff --git a/lib/swearwords/ja.json b/lib/swearwords/ja.json new file mode 100644 index 0000000..cfd30d4 --- /dev/null +++ b/lib/swearwords/ja.json @@ -0,0 +1,188 @@ +[ + "3p", + "g スポット", + "s & m", + "sm", + "sm女王", + "xx", + "アジアのかわいい女の子", + "アスホール", + "アナリングス", + "アナル", + "いたずら", + "イラマチオ", + "ウェブカメラ", + "エクスタシー", + "エスコート", + "エッチ", + "エロティズム", + "エロティック", + "オーガズム", + "オカマ", + "おしっこ", + "おしり", + "オシリ", + "おしりのあな", + "おっぱい", + "オッパイ", + "オナニー", + "オマンコ", + "おもらし", + "お尻", + "カーマスートラ", + "カント", + "クリトリス", + "グループ・セックス", + "グロ", + "クンニリングス", + "ゲイ・セックス", + "ゲイの男性", + "ゲイボーイ", + "ゴールデンシャワー", + "コカイン", + "ゴックン", + "サディズム", + "しばり", + "スウィンガー", + "スカートの中", + "スカトロ", + "ストラップオン", + "ストリップ劇場", + "スラット", + "スリット", + "セクシーな", + "セクシーな 10 代", + "セックス", + "ソドミー", + "ちんこ", + "ディープ・スロート", + "ディック", + "ディルド", + "デートレイプ", + "デブ", + "テレフォンセックス", + "ドッグスタイル", + "トップレス", + "なめ", + "ニガー", + "ヌード", + "ネオ・ナチ", + "ハードコア", + "パイパン", + "バイブレーター", + "バック・スタイル", + "パンティー", + "ビッチ", + "ファック", + "ファンタジー", + "フィスト", + "フェティッシュ", + "フェラチオ", + "ふたなり", + "ぶっかけ", + "フック", + "プリンス アルバート ピアス", + "プレイボーイ", + "ベアバック", + "ペニス", + "ペニスバンド", + "ボーイズラブ", + "ボールギャグ", + "ボールを蹴る", + "ぽっちゃり", + "ホモ", + "ポルノ", + "ポルノグラフィー", + "ボンテージ", + "マザー・ファッカー", + "マスターベーション", + "まんこ", + "やおい", + "やりまん", + "ユダヤ人", + "ラティーナ", + "ラバー", + "ランジェリー", + "レイプ", + "レズビアン", + "ローター", + "ロリータ", + "淫乱", + "陰毛", + "革抑制", + "騎上位", + "巨根", + "巨乳", + "強姦犯", + "玉なめ", + "玉舐め", + "緊縛", + "近親相姦", + "嫌い", + "後背位", + "合意の性交", + "拷問", + "黒人", + "殺し方", + "殺人事件", + "殺人方法", + "支配", + "児童性虐待", + "自己愛性", + "射精", + "手コキ", + "獣姦", + "女の子", + "女王様", + "女子高生", + "女装", + "新しいポルノ", + "人妻", + "人種", + "性交", + "正常位", + "生殖器", + "精液", + "挿入", + "足フェチ", + "足を広げる", + "大陰唇", + "脱衣", + "茶色のシャワー", + "中出し", + "潮吹き女", + "潮吹き男性", + "直腸", + "剃毛", + "貞操帯", + "奴隷", + "二穴", + "乳首", + "尿道プレイ", + "覗き", + "売春婦", + "縛り", + "噴出", + "糞", + "糞尿愛好症", + "糞便", + "平手打ち", + "変態", + "勃起する", + "夢精", + "毛深い", + "誘惑", + "幼児", + "幼児性愛者", + "裸", + "裸の女性", + "乱交", + "両性", + "両性具有", + "両刀", + "輪姦", + "卍", + "宦官", + "肛門", + "膣" +] \ No newline at end of file diff --git a/lib/swearwords/ko.json b/lib/swearwords/ko.json new file mode 100644 index 0000000..0f2ec82 --- /dev/null +++ b/lib/swearwords/ko.json @@ -0,0 +1,74 @@ +[ + "강간", + "개새끼", + "개자식", + "개좆", + "개차반", + "거유", + "계집년", + "고자", + "근친", + "노모", + "니기미", + "뒤질래", + "딸딸이", + "때씹", + "또라이", + "뙤놈", + "로리타", + "망가", + "몰카", + "미친", + "미친새끼", + "바바리맨", + "변태", + "병신", + "보지", + "불알", + "빠구리", + "사까시", + "섹스", + "스와핑", + "쌍놈", + "씨발", + "씨발놈", + "씨팔", + "씹", + "씹물", + "씹빨", + "씹새끼", + "씹알", + "씹창", + "씹팔", + "암캐", + "애자", + "야동", + "야사", + "야애니", + "엄창", + "에로", + "염병", + "옘병", + "유모", + "육갑", + "은꼴", + "자위", + "자지", + "잡년", + "종간나", + "좆", + "좆만", + "죽일년", + "쥐좆", + "직촬", + "짱깨", + "쪽바리", + "창녀", + "포르노", + "하드코어", + "호로", + "화냥년", + "후레아들", + "후장", + "희쭈그리" +] \ No newline at end of file diff --git a/lib/swearwords/nl.json b/lib/swearwords/nl.json new file mode 100644 index 0000000..b58dfa9 --- /dev/null +++ b/lib/swearwords/nl.json @@ -0,0 +1,193 @@ +[ + "aardappels afgieteng", + "achter het raam zitten", + "afberen", + "aflebberen", + "afrossen", + "afrukken", + "aftrekken", + "afwerkplaats", + "afzeiken", + "afzuigen", + "anderhalve man en een paardekop", + "anita", + "asbak", + "aso", + "bagger schijten", + "balen", + "bedonderen", + "befborstelg", + "beffen", + "bekken", + "belazeren", + "besodemieterd zijn", + "besodemieteren", + "beurt", + "boemelen", + "boerelul", + "boerenpummelg", + "bokkelul", + "botergeil", + "broekhoesten", + "brugpieperg", + "buffelen", + "buiten de pot piesen", + "da's kloten van de bok", + "de ballen", + "de hoer spelen", + "de hond uitlaten", + "de koffer induiken", + "delg", + "de pijp aan maarten geven", + "de pijp uitgaan", + "dombo", + "draaikontg", + "driehoog achter wonen", + "drolg", + "drooggeiler", + "droogkloot", + "een beurt geven", + "een nummertje maken", + "een wip maken", + "eikel", + "engerd", + "flamoes", + "flikken", + "flikker", + "gadverdamme", + "galbak", + "gat", + "gedoogzone", + "geilneef", + "gesodemieter", + "godverdomme", + "graftak", + "gras maaien", + "gratenkutg", + "greppeldel", + "griet", + "hoempert", + "hoer", + "hoerenbuurt", + "hoerenloper", + "hoerig", + "hol", + "hufter", + "huisdealer", + "johny", + "kanen", + "kettingzeugg", + "klaarkomen", + "klerebeer", + "klojo", + "klooien", + "klootjesvolk", + "klootoog", + "klootzak", + "kloten", + "knor", + "kontg", + "kontneuken", + "krentekakker", + "kut", + "kuttelikkertje", + "kwakkieg", + "liefdesgrot", + "lul", + "lul-de-behanger", + "lulhannes", + "lummel", + "mafketel", + "matennaaierg", + "matje", + "mof", + "mutsg", + "naaien", + "naakt", + "neuken", + "neukstier", + "nicht", + "oetlul", + "opgeilen", + "opkankeren", + "oprotten", + "opsodemieteren", + "op z'n hondjes", + "op z'n sodemieter geven", + "opzouten", + "ouwehoer", + "ouwehoeren", + "ouwe rukker", + "paal", + "paardelul", + "palen", + "penozeg", + "piesen", + "pijpbekkieg", + "pijpen", + "pik", + "pleurislaaier", + "poep", + "poepen", + "poot", + "portiekslet", + "pot", + "potverdorie", + "publiciteitsgeil", + "raaskallen", + "reet", + "reetridder", + "reet trappen, voor zijn", + "remsporeng", + "reutelen", + "rothoer", + "rotzak", + "rukhond", + "rukken", + "schatje", + "schijt", + "schijten", + "schoft", + "schuinsmarcheerder", + "shit", + "slempen", + "sletg", + "sletterig", + "slik mijn zaad", + "snolg", + "spuiten", + "standje", + "standje-69g", + "stoephoer", + "stootje", + "strontg", + "sufferdg", + "tapijtnek", + "teefg", + "temeier", + "teringlijer", + "toeter", + "tongzoeng", + "triootjeg", + "trottoir prostituée", + "trottoirteef", + "vergallen", + "verkloten", + "verneuken", + "viespeuk", + "vingeren", + "vleesroos", + "voor jan lul", + "voor jan-met-de-korte-achternaam", + "watje", + "welzijnsmafia", + "wijf", + "wippen", + "wuftje", + "zaadje", + "zakkenwasser", + "zeiken", + "zeiker", + "zuigen", + "zuiplap" +] \ No newline at end of file diff --git a/lib/swearwords/no.json b/lib/swearwords/no.json new file mode 100644 index 0000000..f1a44e9 --- /dev/null +++ b/lib/swearwords/no.json @@ -0,0 +1,15 @@ +[ + "drittsekk", + "faen i helvete", + "fitte", + "jævla", + "kuk", + "kukene", + "kuker", + "nigger", + "pikk", + "sotrør", + "ståpikk", + "ståpikkene", + "ståpikker" +] \ No newline at end of file diff --git a/lib/swearwords/pl.json b/lib/swearwords/pl.json new file mode 100644 index 0000000..8078048 --- /dev/null +++ b/lib/swearwords/pl.json @@ -0,0 +1,55 @@ +[ + "burdel", + "burdelmama", + "chuj", + "chujnia", + "ciota", + "cipa", + "cyc", + "debil", + "dmuchać", + "do kurwy nędzy", + "dupa", + "dupek", + "duperele", + "dziwka", + "fiut", + "gówno", + "gówno prawda", + "huj", + "jajco", + "jajeczko", + "jajko", + "jajo", + "ja pierdolę", + "jebać", + "jebany", + "kurwa", + "kurwy", + "kutafon", + "kutas", + "lizać pałę", + "obciągać chuja", + "obciągać fiuta", + "obciągać loda", + "pieprzyć", + "pierdolec", + "pierdolić", + "pierdolnięty", + "pierdoła", + "pierdzieć", + "pizda", + "pojeb", + "popierdolony", + "robic loda", + "robić loda", + "ruchać", + "rzygać", + "skurwysyn", + "sraczka", + "srać", + "suka", + "syf", + "wkurwiać", + "zajebisty" +] \ No newline at end of file diff --git a/lib/swearwords/pt.json b/lib/swearwords/pt.json new file mode 100644 index 0000000..1bc754d --- /dev/null +++ b/lib/swearwords/pt.json @@ -0,0 +1,270 @@ +[ + "aborto", + "amador", + "anus", + "ânus", + "aranha", + "ariano", + "baba-ovo", + "babaca", + "babaovo", + "bacura", + "bagos", + "baitola", + "balalao", + "bastardo", + "bebum", + "besta", + "bicha", + "bisca", + "biscate", + "bissexual", + "bixa", + "boazuda", + "boceta", + "boco", + "bocó", + "boiola", + "bolagato", + "boob", + "bosseta", + "bosta", + "bostana", + "braulio de borracha", + "brioco", + "bronha", + "buca", + "buceta", + "bumbum", + "bunda", + "bunduda", + "burra", + "burro", + "busseta", + "cabrao", + "caceta", + "cacete", + "caga", + "cagado", + "cagalhao", + "cagalhão", + "cagao", + "cagar", + "cagona", + "camisinha", + "canalha", + "caralho", + "casseta", + "cassete", + "cerveja", + "checheca", + "chereca", + "chibumba", + "chibumbo", + "chifruda", + "chifrudo", + "chochota", + "chota", + "chupar", + "clitoris", + "clitóris", + "cocaína", + "cocô", + "colhoes", + "comer", + "cona", + "consolo", + "corna", + "corno", + "cornuda", + "cornudo", + "cretina", + "cretino", + "cu", + "cuzao", + "cuzuda", + "cuzudo", + "cuzão", + "cú", + "dar o rabo", + "debil", + "debiloide", + "defunto", + "demonio", + "demônio", + "doida", + "doido", + "dum raio", + "débil", + "escrota", + "escroto", + "esporra", + "estupido", + "estúpido", + "fecal", + "fedida", + "fedido", + "fedorenta", + "fedorento", + "feia", + "feio", + "feiosa", + "feioso", + "feioza", + "feiozo", + "filho da puta", + "foda", + "foda-se", + "fodace", + "fodao", + "fodasse", + "fode", + "foder", + "fodida", + "fodido", + "fodão", + "frango assado", + "gaiata", + "gaiato", + "gozar", + "grelho", + "grelo", + "heroína", + "heterosexual", + "homem gay", + "idiota", + "imbecil", + "inferno", + "iscrota", + "iscroto", + "ladra", + "ladrao", + "ladrona", + "ladrão", + "lalau", + "leprosa", + "leproso", + "lesbico", + "lolita", + "lésbica", + "lésbico", + "machao", + "machona", + "machorra", + "machão", + "mama", + "meleca", + "merda", + "mija", + "mijada", + "mijado", + "mijar", + "mijo", + "mocrea", + "mocreia", + "mocréa", + "mocréia", + "naba", + "otaria", + "otario", + "otária", + "otário", + "paneleiro", + "paspalha", + "paspalhao", + "paspalho", + "paspalhona", + "paspalhão", + "passar um cheque", + "pau", + "peidar", + "peido", + "peidorreira", + "peidorreiro", + "pemba", + "perereca", + "pica", + "pilantra", + "pinto", + "piroca", + "pirocao", + "pirocão", + "piru", + "porra", + "precheca", + "prostituta", + "prostituto", + "punheta", + "punhetao", + "punheteira", + "punheteiro", + "punhetão", + "puta", + "puta que pariu", + "puta que te pariu", + "puto", + "puxa-saco", + "puxasaco", + "pênis", + "queca", + "rabuda", + "rabudao", + "rabudo", + "rabudona", + "rabudão", + "ramela", + "ramelona", + "ramelão", + "ridicula", + "ridiculo", + "ridícula", + "ridículo", + "rola", + "rolas", + "rolona", + "sacanagem", + "saco", + "safada", + "safado", + "siririca", + "tarada", + "tarado", + "testuda", + "testudao", + "testudo", + "testudona", + "testudão", + "tesuda", + "tesudo", + "tezuda", + "tezudo", + "torneira", + "transar", + "trolha", + "trolxa", + "trouxa", + "troxa", + "vagabunda", + "vagabundo", + "vai tomar no cu", + "vai-te foder", + "veadao", + "veado", + "veadão", + "viada", + "viadao", + "viadinha", + "viadinho", + "viadona", + "viadão", + "vibrador", + "xana", + "xaninha", + "xavasca", + "xerereca", + "xibiu", + "xibumba", + "xochota", + "xota", + "xoxota" +] \ No newline at end of file diff --git a/lib/swearwords/ru.json b/lib/swearwords/ru.json new file mode 100644 index 0000000..172deda --- /dev/null +++ b/lib/swearwords/ru.json @@ -0,0 +1,154 @@ +[ + "bychara", + "byk", + "chernozhopyi", + "dolboy'eb", + "ebalnik", + "ebalo", + "ebalom sch'elkat", + "gol", + "mudack", + "opizdenet", + "osto'eblo", + "ostokhuitel'no", + "ot'ebis", + "otmudohat", + "otpizdit", + "otsosi", + "padlo", + "pedik", + "perdet", + "petuh", + "pidar gnoinyj", + "pizda", + "pizdato", + "pizdatyi", + "piz'det", + "pizdetc", + "pizdoi nakryt'sja", + "pizd'uk", + "piz`dyulina", + "podi ku'evo", + "poeben", + "po'imat' na konchik", + "po'iti posrat", + "po khuy", + "poluchit pizdy", + "pososi moyu konfetku", + "prissat", + "proebat", + "promudobl'adsksya pizdopro'ebina", + "propezdoloch", + "prosrat", + "raspeezdeyi", + "raspizdatyi", + "raz'yebuy", + "raz'yoba", + "s'ebat'sya", + "shalava", + "styervo", + "sukin syn", + "svodit posrat", + "svoloch", + "trakhat'sya", + "trimandoblydskiy pizdoproyob", + "ubl'yudok", + "uboy", + "u'ebitsche", + "vafl'a", + "vafli lovit", + "v pizdu", + "vyperdysh", + "vzdrochennyi", + "yeb vas", + "za'ebat", + "zaebis", + "zalupa", + "zalupat", + "zasranetc", + "zassat", + "zlo'ebuchy", + "бардак", + "бздёнок", + "блядки", + "блядовать", + "блядство", + "блядь", + "бугор", + "во пизду", + "встать раком", + "выёбываться", + "гандон", + "говно", + "говнюк", + "голый", + "дать пизды", + "дерьмо", + "дрочить", + "другой дразнится", + "ёбарь", + "ебать", + "ебать-копать", + "ебло", + "ебнуть", + "ёб твою мать", + "жопа", + "жополиз", + "играть на кожаной флейте", + "измудохать", + "каждый дрочит как он хочет", + "какая разница", + "как два пальца обоссать", + "курите мою трубку", + "лысого в кулаке гонять", + "малофя", + "манда", + "мандавошка", + "мент", + "муда", + "мудило", + "мудозмон", + "наебать", + "наебениться", + "наебнуться", + "на фиг", + "на хуй", + "на хую вертеть", + "на хуя", + "нахуячиться", + "невебенный", + "не ебет", + "ни за хуй собачу", + "ни хуя", + "обнаженный", + "обоссаться можно", + "один ебётся", + "опесдол", + "офигеть", + "охуеть", + "охуйтельно", + "половое сношение", + "секс", + "сиски", + "спиздить", + "срать", + "ссать", + "траxать", + "ты мне ваньку не валяй", + "фига", + "хапать", + "хер с ней", + "хер с ним", + "хохол", + "хрен", + "хуёво", + "хуёвый", + "хуем груши околачивать", + "хуеплет", + "хуило", + "хуиней страдать", + "хуиня", + "хуй", + "хуйнуть", + "хуй пинать" +] \ No newline at end of file diff --git a/lib/swearwords/sv.json b/lib/swearwords/sv.json new file mode 100644 index 0000000..e8b9555 --- /dev/null +++ b/lib/swearwords/sv.json @@ -0,0 +1,45 @@ +[ + "arsle", + "brutta", + "discofitta", + "dra åt helvete", + "fan", + "fitta", + "fittig", + "för helvete", + "helvete", + "hård", + "jävlar", + "knulla", + "kuk", + "kuksås", + "kötthuvud", + "köttnacke", + "moona", + "moonade", + "moonar", + "moonat", + "mutta", + "nigger", + "neger", + "olla", + "pippa", + "pitt", + "prutt", + "pök", + "runka", + "röv", + "rövhål", + "rövknulla", + "satan", + "skita", + "skit ner dig", + "skäggbiff", + "snedfitta", + "snefitta", + "stake", + "subba", + "sås", + "sätta på", + "tusan" +] \ No newline at end of file diff --git a/lib/swearwords/th.json b/lib/swearwords/th.json new file mode 100644 index 0000000..d6ee348 --- /dev/null +++ b/lib/swearwords/th.json @@ -0,0 +1,33 @@ +[ + "กระดอ", + "กระเด้า", + "กระหรี่", + "กะปิ", + "กู", + "ขี้", + "ควย", + "จิ๋ม", + "จู๋", + "เจ๊ก", + "เจี๊ยว", + "ดอกทอง", + "ตอแหล", + "ตูด", + "น้ําแตก", + "มึง", + "แม่ง", + "เย็ด", + "รูตูด", + "ล้างตู้เย็น", + "ส้นตีน", + "สัด", + "เสือก", + "หญิงชาติชั่ว", + "หลั่ง", + "ห่า", + "หํา", + "หี", + "เหี้ย", + "อมนกเขา", + "ไอ้ควาย" +] \ No newline at end of file diff --git a/lib/swearwords/tlh.json b/lib/swearwords/tlh.json new file mode 100644 index 0000000..317224a --- /dev/null +++ b/lib/swearwords/tlh.json @@ -0,0 +1,5 @@ +[ + "ghuy'cha'", + "QI'yaH", + "Qu'vatlh" +] \ No newline at end of file diff --git a/lib/swearwords/tr.json b/lib/swearwords/tr.json new file mode 100644 index 0000000..47c2385 --- /dev/null +++ b/lib/swearwords/tr.json @@ -0,0 +1,144 @@ +[ + "am", + "amcığa", + "amcığı", + "amcığın", + "amcık", + "amcıklar", + "amcıklara", + "amcıklarda", + "amcıklardan", + "amcıkları", + "amcıkların", + "amcıkta", + "amcıktan", + "amı", + "amlar", + "çingene", + "Çingenede", + "Çingeneden", + "Çingeneler", + "Çingenelerde", + "Çingenelerden", + "Çingenelere", + "Çingeneleri", + "Çingenelerin", + "Çingenenin", + "Çingeneye", + "Çingeneyi", + "göt", + "göte", + "götler", + "götlerde", + "götlerden", + "götlere", + "götleri", + "götlerin", + "götte", + "götten", + "götü", + "götün", + "götveren", + "götverende", + "götverenden", + "götverene", + "götvereni", + "götverenin", + "götverenler", + "götverenlerde", + "götverenlerden", + "götverenlere", + "götverenleri", + "götverenlerin", + "kaltağa", + "kaltağı", + "kaltağın", + "kaltak", + "kaltaklar", + "kaltaklara", + "kaltaklarda", + "kaltaklardan", + "kaltakları", + "kaltakların", + "kaltakta", + "kaltaktan", + "orospu", + "orospuda", + "orospudan", + "orospular", + "orospulara", + "orospularda", + "orospulardan", + "orospuları", + "orospuların", + "orospunun", + "orospuya", + "orospuyu", + "otuz birci", + "otuz bircide", + "otuz birciden", + "otuz birciler", + "otuz bircilerde", + "otuz bircilerden", + "otuz bircilere", + "otuz bircileri", + "otuz bircilerin", + "otuz bircinin", + "otuz birciye", + "otuz birciyi", + "saksocu", + "saksocuda", + "saksocudan", + "saksocular", + "saksoculara", + "saksocularda", + "saksoculardan", + "saksocuları", + "saksocuların", + "saksocunun", + "saksocuya", + "saksocuyu", + "sıçmak", + "sik", + "sike", + "siker sikmez", + "siki", + "sikilir sikilmez", + "sikin", + "sikler", + "siklerde", + "siklerden", + "siklere", + "sikleri", + "siklerin", + "sikmek", + "sikmemek", + "sikte", + "sikten", + "siktir", + "siktirir siktirmez", + "taşağa", + "taşağı", + "taşağın", + "taşak", + "taşaklar", + "taşaklara", + "taşaklarda", + "taşaklardan", + "taşakları", + "taşakların", + "taşakta", + "taşaktan", + "yarağa", + "yarağı", + "yarağın", + "yarak", + "yaraklar", + "yaraklara", + "yaraklarda", + "yaraklardan", + "yarakları", + "yarakların", + "yarakta", + "yaraktan" +] \ No newline at end of file diff --git a/lib/swearwords/zh.json b/lib/swearwords/zh.json new file mode 100644 index 0000000..81019c4 --- /dev/null +++ b/lib/swearwords/zh.json @@ -0,0 +1,296 @@ +[ + "13.", + "13点", + "三级片", + "下三烂", + "下贱", + "个老子的", + "九游", + "乳", + "乳交", + "乳头", + "乳房", + "乳波臀浪", + "交配", + "仆街", + "他奶奶", + "他奶奶的", + "他奶娘的", + "他妈", + "他妈ㄉ王八蛋", + "他妈地", + "他妈的", + "他娘", + "他马的", + "你个傻比", + "你他马的", + "你全家", + "你奶奶的", + "你她马的", + "你妈", + "你妈的", + "你娘", + "你娘卡好", + "你娘咧", + "你它妈的", + "你它马的", + "你是鸡", + "你是鸭", + "你马的", + "做爱", + "傻比", + "傻逼", + "册那", + "军妓", + "几八", + "几叭", + "几巴", + "几芭", + "刚度", + "刚瘪三", + "包皮", + "十三点", + "卖B", + "卖比", + "卖淫", + "卵", + "卵子", + "双峰微颤", + "口交", + "口肯", + "叫床", + "吃屎", + "后庭", + "吹箫", + "塞你公", + "塞你娘", + "塞你母", + "塞你爸", + "塞你老师", + "塞你老母", + "处女", + "外阴", + "大卵子", + "大卵泡", + "大鸡巴", + "奶", + "奶奶的熊", + "奶子", + "奸", + "奸你", + "她妈地", + "她妈的", + "她马的", + "妈B", + "妈个B", + "妈个比", + "妈个老比", + "妈妈的", + "妈比", + "妈的", + "妈的B", + "妈逼", + "妓", + "妓女", + "妓院", + "妳她妈的", + "妳妈的", + "妳娘的", + "妳老母的", + "妳马的", + "姘头", + "姣西", + "姦", + "娘个比", + "娘的", + "婊子", + "婊子养的", + "嫖娼", + "嫖客", + "它妈地", + "它妈的", + "密洞", + "射你", + "射精", + "小乳头", + "小卵子", + "小卵泡", + "小瘪三", + "小肉粒", + "小骚比", + "小骚货", + "小鸡巴", + "小鸡鸡", + "屁眼", + "屁股", + "屄", + "屌", + "巨乳", + "干x娘", + "干七八", + "干你", + "干你妈", + "干你娘", + "干你老母", + "干你良", + "干妳妈", + "干妳娘", + "干妳老母", + "干妳马", + "干您娘", + "干机掰", + "干死CS", + "干死GM", + "干死你", + "干死客服", + "幹", + "强奸", + "强奸你", + "性", + "性交", + "性器", + "性无能", + "性爱", + "情色", + "想上你", + "懆您妈", + "懆您娘", + "懒8", + "懒八", + "懒叫", + "懒教", + "成人", + "我操你祖宗十八代", + "扒光", + "打炮", + "打飞机", + "抽插", + "招妓", + "插你", + "插死你", + "撒尿", + "操你", + "操你全家", + "操你奶奶", + "操你妈", + "操你娘", + "操你祖宗", + "操你老妈", + "操你老母", + "操妳", + "操妳全家", + "操妳妈", + "操妳娘", + "操妳祖宗", + "操机掰", + "操比", + "操逼", + "放荡", + "日他娘", + "日你", + "日你妈", + "日你老娘", + "日你老母", + "日批", + "月经", + "机八", + "机巴", + "机机歪歪", + "杂种", + "浪叫", + "淫", + "淫乱", + "淫妇", + "淫棍", + "淫水", + "淫秽", + "淫荡", + "淫西", + "湿透的内裤", + "激情", + "灨你娘", + "烂货", + "烂逼", + "爛", + "狗屁", + "狗日", + "狗狼养的", + "玉杵", + "王八蛋", + "瓜娃子", + "瓜婆娘", + "瓜批", + "瘪三", + "白烂", + "白痴", + "白癡", + "祖宗", + "私服", + "笨蛋", + "精子", + "老二", + "老味", + "老母", + "老瘪三", + "老骚比", + "老骚货", + "肉壁", + "肉棍子", + "肉棒", + "肉缝", + "肏", + "肛交", + "肥西", + "色情", + "花柳", + "荡妇", + "賤", + "贝肉", + "贱B", + "贱人", + "贱货", + "贼你妈", + "赛你老母", + "赛妳阿母", + "赣您娘", + "轮奸", + "迷药", + "逼", + "逼样", + "野鸡", + "阳具", + "阳萎", + "阴唇", + "阴户", + "阴核", + "阴毛", + "阴茎", + "阴道", + "阴部", + "雞巴", + "靠北", + "靠母", + "靠爸", + "靠背", + "靠腰", + "驶你公", + "驶你娘", + "驶你母", + "驶你爸", + "驶你老师", + "驶你老母", + "骚比", + "骚货", + "骚逼", + "鬼公", + "鸡8", + "鸡八", + "鸡叭", + "鸡吧", + "鸡奸", + "鸡巴", + "鸡芭", + "鸡鸡", + "龟儿子", + "龟头" +] \ No newline at end of file diff --git a/lib/util.js b/lib/util.js index 1ff1711..d3fb02c 100644 --- a/lib/util.js +++ b/lib/util.js @@ -32,6 +32,11 @@ function eachRecursive (obj, fn, maxDepth, depth, checked) { }); } +function isString (str) { + return typeof str === 'string'; +} + module.exports = { - eachRecursive: eachRecursive + eachRecursive: eachRecursive, + isString: isString }; From 74c0c563e936fd5ef04a824d1c62b879e3ab5eea Mon Sep 17 00:00:00 2001 From: wagoid Date: Mon, 11 Apr 2016 02:23:42 -0300 Subject: [PATCH 02/10] Bug fixes and test additions. --- .gitignore | 3 +- lib/profanity.js | 40 ++-- lib/swearwords.json | 452 ----------------------------------------- lib/swearwords/pt.json | 6 +- package.json | 4 +- test/profanity.test.js | 219 +++++++++++++++----- 6 files changed, 197 insertions(+), 527 deletions(-) delete mode 100644 lib/swearwords.json diff --git a/.gitignore b/.gitignore index 0f81d60..27d1445 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ node_modules -**.DS_Store \ No newline at end of file +**.DS_Store +.vscode \ No newline at end of file diff --git a/lib/profanity.js b/lib/profanity.js index ae6bc5f..c8d94ff 100644 --- a/lib/profanity.js +++ b/lib/profanity.js @@ -6,6 +6,7 @@ License: http://opensource.org/licenses/MIT The MIT License (MIT) var util = require('./util'); var fs = require('fs'); var _ = require('underscore'); +var path = require('path'); var DEFAULT_REPLACEMENTS = [ 'bunnies', @@ -22,8 +23,9 @@ var DEFAULT_REPLACEMENTS = [ var DEFAULT_LANGUAGES = ['en']; var AVAILABLE_LANGUAGES = ['ar', 'cs', 'da', 'de', 'en', 'eo', 'es', 'fa', 'fi', 'fr', 'hi', 'hu', 'it', 'ja', 'ko', 'nl', 'no', 'pl', 'pt', 'ru', 'sv', 'th', 'tlh', 'tr', 'zh']; var DEFAULT_FS_OPTIONS = {encoding: 'utf8' }; +var SWEARWORDS_DIR = path.join(__dirname, 'swearwords'); -function promisiFyReadFile(fileName, option) { +function promisiFyReadFile(fileName, options) { return new Promise(function (resolve, reject) { fs.readFile(fileName, options, function (err, data) { if (err) { @@ -39,9 +41,8 @@ function promisiFyReadFile(fileName, option) { function getWordListsPromises (languages) { var promises = []; _.each(languages, function(language) { - promises.push(promisiFyReadFile('./swearwords/' + language, DEFAULT_FS_OPTIONS)); + promises.push(promisiFyReadFile(path.join(SWEARWORDS_DIR, language + '.json'), DEFAULT_FS_OPTIONS)); }); - return promises; } @@ -49,7 +50,7 @@ function getWordListsConcatenated (languages) { var lists = []; _.each(languages, function (language) { - lists.concat(JSON.parse(fs.readFileSync('./swearwords/' + DEFAULT_FS_OPTIONS))); + lists = lists.concat(JSON.parse(fs.readFileSync(path.join(SWEARWORDS_DIR, language + '.json'), DEFAULT_FS_OPTIONS))); }); return lists; @@ -60,20 +61,23 @@ function escapeRegexChars (word) { } function getListRegexAsync (list, languages) { - var listRegex = []; if (list) { - listRegex = new Promise(function (resolve, reject) { resolve(_getListRegex(list)); } ); + return Promise(function (resolve, reject) { resolve(_getListRegex(list)); } ); } else { - Promise.all(getWordListsPromises(languages)) + return new Promise(function (resolve, reject) { + Promise.all(getWordListsPromises(languages)) .then(function (listsArray) { + var listRegex = []; _.each(listsArray, function (lst) { - listRegex.concat(_getListRegex(lst)); + listRegex = listRegex.concat(_getListRegex(lst)); }); + resolve(listRegex); }) .catch(function (err) { reject(err); }); + }); } } @@ -86,6 +90,10 @@ function getListRegex (list, languages) { function _getListRegex(list) { // we want to treat all characters in the word as literals, not as regex specials (e.g. shi+) + // var escapedRegexChars = ''; + // _.each(list, function (langWords) { + // escapedRegexChars += langWords.map(escapeRegexChars); + // }); return new RegExp('\\b(' + list.map(escapeRegexChars).join('|') + ')\\b', 'gi'); } @@ -103,10 +111,10 @@ function getDefaultLanguagesValue (languages) { return languages; } -function check (target, forbiddenList, languages) { +function check (target, languages, forbiddenList) { languages = getDefaultLanguagesValue(languages); - var regex = forbiddenList ? getListRegex(forbiddenList) : getListRegex(null, languages); + var regex = forbiddenList ? getListRegex(forbiddenList, languages) : getListRegex(null, languages); return _check(target, regex); } @@ -123,11 +131,11 @@ function _check (target, regex) { } }); } - + return targets.join(' ').match(regex) || []; } -function checkAsync (target, forbiddenList, languages) { +function checkAsync (target, languages, forbiddenList) { languages = getDefaultLanguagesValue(languages); return new Promise(function(resolve, reject) { @@ -147,7 +155,6 @@ function purifyString (str, regex, options) { var matches = [], purified, - forbiddenList = options.forbiddenList || null, replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS, replace = options.replace || false, obscureSymbol = options.obscureSymbol || '*'; @@ -177,7 +184,7 @@ function purify (target, options) { var matches = [], fields = options.fields || (target instanceof Object ? Object.keys(target) : []), - result, regex = forbiddenList ? getListRegex(forbiddenList) : getListRegex(null, languages); + result, regex = options.forbiddenList ? getListRegex(forbiddenList, options.languages) : getListRegex(null, options.languages); if (typeof target === 'string') { @@ -195,11 +202,6 @@ function purify (target, options) { } else if (typeof target[field] === 'object') { util.eachRecursive(target[field], function(val, key, root) { - - if (fields && fields.indexOf(key) === -1) { - return; - } - if (typeof val === 'string') { result = purifyString(val, regex, options); root[key] = result[0]; diff --git a/lib/swearwords.json b/lib/swearwords.json deleted file mode 100644 index cd948bc..0000000 --- a/lib/swearwords.json +++ /dev/null @@ -1,452 +0,0 @@ -[ - "4r5e", - "5h1t", - "5hit", - "a55", - "anal", - "anus", - "ar5e", - "arrse", - "arse", - "ass", - "ass-fucker", - "asses", - "assfucker", - "assfukka", - "asshole", - "assholes", - "asswhole", - "a_s_s", - "b!tch", - "b00bs", - "b17ch", - "b1tch", - "ballbag", - "ballsack", - "bastard", - "beastial", - "beastiality", - "bellend", - "bestial", - "bestiality", - "bi+ch", - "biatch", - "bitch", - "bitcher", - "bitchers", - "bitches", - "bitchin", - "bitching", - "bloody", - "blow job", - "blowjob", - "blowjobs", - "boiolas", - "bollock", - "bollok", - "boner", - "boob", - "boobs", - "booobs", - "boooobs", - "booooobs", - "booooooobs", - "breasts", - "buceta", - "bugger", - "bum", - "bunny fucker", - "butt", - "butthole", - "buttmuch", - "buttplug", - "c0ck", - "c0cksucker", - "carpet muncher", - "cawk", - "chink", - "cipa", - "cl1t", - "clit", - "clitoris", - "clits", - "cnut", - "cock", - "cock-sucker", - "cockface", - "cockhead", - "cockmunch", - "cockmuncher", - "cocks", - "cocksuck ", - "cocksucked ", - "cocksucker", - "cocksucking", - "cocksucks ", - "cocksuka", - "cocksukka", - "cok", - "cokmuncher", - "coksucka", - "coon", - "crap", - "cum", - "cummer", - "cumming", - "cums", - "cumshot", - "cunilingus", - "cunillingus", - "cunnilingus", - "cunt", - "cuntlick ", - "cuntlicker ", - "cuntlicking ", - "cunts", - "cyalis", - "cyberfuc", - "cyberfuck ", - "cyberfucked ", - "cyberfucker", - "cyberfuckers", - "cyberfucking ", - "d1ck", - "damn", - "dick", - "dickhead", - "dildo", - "dildos", - "dink", - "dinks", - "dirsa", - "dlck", - "dog-fucker", - "doggin", - "dogging", - "donkeyribber", - "doosh", - "duche", - "dyke", - "ejaculate", - "ejaculated", - "ejaculates ", - "ejaculating ", - "ejaculatings", - "ejaculation", - "ejakulate", - "f u c k", - "f u c k e r", - "f4nny", - "fag", - "fagging", - "faggitt", - "faggot", - "faggs", - "fagot", - "fagots", - "fags", - "fanny", - "fannyflaps", - "fannyfucker", - "fanyy", - "fatass", - "fcuk", - "fcuker", - "fcuking", - "feck", - "fecker", - "felching", - "fellate", - "fellatio", - "fingerfuck ", - "fingerfucked ", - "fingerfucker ", - "fingerfuckers", - "fingerfucking ", - "fingerfucks ", - "fistfuck", - "fistfucked ", - "fistfucker ", - "fistfuckers ", - "fistfucking ", - "fistfuckings ", - "fistfucks ", - "flange", - "fook", - "fooker", - "fuck", - "fucka", - "fucked", - "fucker", - "fuckers", - "fuckhead", - "fuckheads", - "fuckin", - "fucking", - "fuckings", - "fuckingshitmotherfucker", - "fuckme ", - "fucks", - "fuckwhit", - "fuckwit", - "fudge packer", - "fudgepacker", - "fuk", - "fuker", - "fukker", - "fukkin", - "fuks", - "fukwhit", - "fukwit", - "fux", - "fux0r", - "f_u_c_k", - "gangbang", - "gangbanged ", - "gangbangs ", - "gaylord", - "gaysex", - "goatse", - "God", - "god-dam", - "god-damned", - "goddamn", - "goddamned", - "hardcoresex ", - "hell", - "heshe", - "hoar", - "hoare", - "hoer", - "homo", - "hore", - "horniest", - "horny", - "hotsex", - "jack-off ", - "jackoff", - "jap", - "jerk-off ", - "jism", - "jiz ", - "jizm ", - "jizz", - "kawk", - "knob", - "knobead", - "knobed", - "knobend", - "knobhead", - "knobjocky", - "knobjokey", - "kock", - "kondum", - "kondums", - "kum", - "kummer", - "kumming", - "kums", - "kunilingus", - "l3i+ch", - "l3itch", - "labia", - "lmfao", - "lust", - "lusting", - "m0f0", - "m0fo", - "m45terbate", - "ma5terb8", - "ma5terbate", - "masochist", - "master-bate", - "masterb8", - "masterbat*", - "masterbat3", - "masterbate", - "masterbation", - "masterbations", - "masturbate", - "mo-fo", - "mof0", - "mofo", - "mothafuck", - "mothafucka", - "mothafuckas", - "mothafuckaz", - "mothafucked ", - "mothafucker", - "mothafuckers", - "mothafuckin", - "mothafucking ", - "mothafuckings", - "mothafucks", - "mother fucker", - "motherfuck", - "motherfucked", - "motherfucker", - "motherfuckers", - "motherfuckin", - "motherfucking", - "motherfuckings", - "motherfuckka", - "motherfucks", - "muff", - "mutha", - "muthafecker", - "muthafuckker", - "muther", - "mutherfucker", - "n1gga", - "n1gger", - "nazi", - "nigg3r", - "nigg4h", - "nigga", - "niggah", - "niggas", - "niggaz", - "nigger", - "niggers ", - "nob", - "nob jokey", - "nobhead", - "nobjocky", - "nobjokey", - "numbnuts", - "nutsack", - "orgasim ", - "orgasims ", - "orgasm", - "orgasms ", - "p0rn", - "pawn", - "pecker", - "penis", - "penisfucker", - "phonesex", - "phuck", - "phuk", - "phuked", - "phuking", - "phukked", - "phukking", - "phuks", - "phuq", - "pigfucker", - "pimpis", - "piss", - "pissed", - "pisser", - "pissers", - "pisses ", - "pissflaps", - "pissin ", - "pissing", - "pissoff ", - "poop", - "poo", - "porn", - "porno", - "pornography", - "pornos", - "prick", - "pricks ", - "pron", - "pube", - "pusse", - "pussi", - "pussies", - "pussy", - "pussys ", - "rectum", - "retard", - "rimjaw", - "rimming", - "s hit", - "s.o.b.", - "sadist", - "schlong", - "screwing", - "scroat", - "scrote", - "scrotum", - "semen", - "sex", - "sh!+", - "sh!t", - "sh1t", - "shag", - "shagger", - "shaggin", - "shagging", - "shemale", - "shi+", - "shit", - "shitdick", - "shite", - "shited", - "shitey", - "shitfuck", - "shitfull", - "shithead", - "shiting", - "shitings", - "shits", - "shitted", - "shitter", - "shitters ", - "shitting", - "shittings", - "shitty", - "skank", - "slut", - "sluts", - "smegma", - "smut", - "snatch", - "son-of-a-bitch", - "spac", - "spunk", - "s_h_i_t", - "t1tt1e5", - "t1tties", - "teets", - "teez", - "testical", - "testicle", - "tit", - "titfuck", - "tits", - "titt", - "tittie5", - "tittiefucker", - "titties", - "tittyfuck", - "tittywank", - "titwank", - "tosser", - "turd", - "tw4t", - "twat", - "twathead", - "twatty", - "twunt", - "twunter", - "v14gra", - "v1gra", - "vagina", - "viagra", - "vulva", - "w00se", - "wang", - "wank", - "wanker", - "wanky", - "whoar", - "whore", - "willies", - "willy", - "xrated", - "xxx" -] \ No newline at end of file diff --git a/lib/swearwords/pt.json b/lib/swearwords/pt.json index 1bc754d..64d8137 100644 --- a/lib/swearwords/pt.json +++ b/lib/swearwords/pt.json @@ -182,6 +182,7 @@ "peidorreira", "peidorreiro", "pemba", + "pepeca", "perereca", "pica", "pilantra", @@ -249,14 +250,13 @@ "vai tomar no cu", "vai-te foder", "veadao", - "veado", - "veadão", + "viado", + "viadão", "viada", "viadao", "viadinha", "viadinho", "viadona", - "viadão", "vibrador", "xana", "xaninha", diff --git a/package.json b/package.json index c1738ad..bf18629 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "underscore": "~1.6.0" }, "devDependencies": { - "mocha": "~1.20.x", - "should": "~3.3.x" + "mocha": "^2.4.5", + "should": "^8.3.0" } } diff --git a/test/profanity.test.js b/test/profanity.test.js index 2b2237f..9242e7b 100644 --- a/test/profanity.test.js +++ b/test/profanity.test.js @@ -5,69 +5,188 @@ License: http://opensource.org/licenses/MIT The MIT License (MIT) var should = require('should'), util = require('./util'), - profanity = require('../lib/profanity'); + profanity = require('../lib/profanity'), + _ = require('underscore'); + +function callTestCheckFunctions(languages, forbiddenList) { + it('Should return null with no swearwords found in string', function () { + should(profanity.check('No swearwords here', languages, forbiddenList)).eql([]); + }); + + it('Should retur array of swearwords found in dirty string', function () { + var results = profanity.check('something damn something something poo something', languages, forbiddenList); + + should(results).eql([ + 'damn', + 'poo' + ]); + }); + + it('Should not target substrings', function () { + var detected = profanity.check('foo ass bar', languages, forbiddenList), + notDetected = profanity.check('foo grass bar', languages, forbiddenList); + + should(detected).have.length(1); + should(notDetected).have.length(0); + }); + + it('Should work equally for objects (Recursively) and arrays', function (done) { + var results_obj = profanity.check({ + foo: 'something damn', + bar: { test: 'something poo', bar: 'crap woooh' } + }, languages, forbiddenList), + results_arr = profanity.check([ + 'something damn', + [ 'something poo' ], + { foo: [ { bar: 'something crap' } ] } + ], languages, forbiddenList); + + should(results_obj).eql([ + 'damn', + 'poo', + 'crap' + ]); + + should(results_arr).eql([ + 'damn', + 'poo', + 'crap' + ]); + + done(); + }); +} describe('Profanity module', function () { - describe('.validate(target)', function () { - - it('returns null with no swearwords found in string', function () { - should(profanity.check('No swearwords here')).eql([]); + describe('.check(target)', function () { + callTestCheckFunctions(); + }); + + describe('.check(target) when languages="all"', function () { + callTestCheckFunctions('all', null); + }); + + describe('.check(target) for portuguese', function () { + it('Should not get a portuguese dirty word when there is no language specifided (default is english)', function () { + var notDetected = profanity.check('viados não gostam de pepeca', null); + + should(notDetected).have.length(0); + }); + + it('Should get a dirty word and not get substrings', function () { + var detected = profanity.check('viado não gosta de pepeca', 'pt'); + var notDetected = profanity.check('viados não gosta de pepecas', 'pt'); + + detected.should.eql(['viado', 'pepeca']); + notDetected.should.eql([]); + }); + + it('Should work equally for objects (Recursively) and arrays', function (done) { + var results_obj = profanity.check({ foo: 'something perereca', bar: { test: 'something punheta', bar: 'paspalhão woooh' } }, 'pt'), + results_arr = profanity.check(['something porra', [ 'something precheca' ], { foo: [ { bar: 'something puta' } ] }], 'pt'); + + should(results_obj).eql([ + 'perereca', + 'punheta', + 'paspalhão' + ]); + + should(results_arr).eql([ + 'porra', + 'precheca', + 'puta' + ]); + + done(); + }); + }); + + describe('.check(target) for multiple languages', function () { + it('Should get a dirty word and not get substrings', function () { + var detected = profanity.check('viado não gosta de pepeca fucker bocchinara reudig', ['pt', 'it', 'de', 'en']); + var notDetected = profanity.check('viados não gosta de pepecas fuckersras, bocchinarasws reudigs', ['pt', 'it', 'de', 'en']); + + detected.should.eql(['viado', 'pepeca', 'fucker', 'bocchinara', 'reudig']); + notDetected.should.eql([]); + }); + + it('Should work equally for objects (Recursively) and arrays', function (done) { + var results_obj = profanity.check({ foo: 'something perereca fucker ', bar: { test: 'something punheta bocchinara', bar: 'paspalhão reudig woooh' } }, ['pt', 'it', 'de', 'en']), + results_arr = profanity.check(['something porra fucker', [ 'something precheca bocchinara ' ], { foo: [ { bar: 'something puta reudig' } ] }], 'all'); + + should(results_obj).eql([ + 'perereca', + 'fucker', + 'punheta', + 'bocchinara', + 'paspalhão', + 'reudig' + ]); + + should(results_arr).eql([ + 'porra', + 'fucker', + 'precheca', + 'bocchinara', + 'puta', + 'reudig' + ]); + + done(); + }); + }); + + describe('.checkAsync(target)', () => { + it('Returns null with no swearwords found in string', () => { + profanity.checkAsync('No swearwords here') + .then(results => { + should(results).eql([]); }); + }); - it('returns array of swearwords found in dirty string', function () { - var results = profanity.check('something damn something something poo something'); - - should(results).eql([ + it('Returns array of swearwords found in dirty string', () => { + profanity.checkAsync('something damn something something poo something') + .then(results => { + should(results).eql([ 'damn', 'poo' - ]); - }); - - it('doesn\'t target substrings', function () { - var detected = profanity.check('foo ass bar'), - notDetected = profanity.check('foo grass bar'); - - should(detected).have.length(1); - should(notDetected).have.length(0); - }); - - it('works equally for objects (Recursively) and arrays', function (done) { - var results_obj = profanity.check({ - foo: 'something damn', - bar: { test: 'something poo', bar: 'crap woooh' } - }), - results_arr = profanity.check([ - 'something damn', - [ 'something poo' ], - { foo: [ { bar: 'something crap' } ] } - ]); - - should(results_obj).eql([ - 'damn', - 'poo', - 'crap' - ]); - - should(results_arr).eql([ - 'damn', - 'poo', - 'crap' - ]); - - done(); - }); - + ]); + }); + }); + + it('Doesn\'t target substrings', function () { + Promise.all([profanity.checkAsync('foo ass bar'), profanity.checkAsync('foo grass bar')]) + .then( results => { + var detected = results[0]; + var notDetected = results[1]; + + detected.should.have.length(1); + notDetected.should.have.length(0); + }); + }); + + it('Works equally for objects (Recursively) and arrays', function () { + var firstTarget = { foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } }; + var secondTarget = { foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } }; + var thirdTarget = ['something damn', [ 'something poo' ], { foo: [ { bar: 'something crap' } ] }]; + Promise.all([profanity.checkAsync(firstTarget), profanity.checkAsync(secondTarget), profanity.checkAsync(thirdTarget)]) + .then(results => { + results.should.have.length(3); + _.each(results, result => { + result.should.eql(['damn','poo','crap']); + }); + }); + }); }); + + describe('.purify(target)', function () { - it('works in obscure (default) mode on a simple string', function (done) { + it('works in obscure (default) mode on a simple string', function () { var result = profanity.purify('boob damn something poo'); result[0].should.equal('b**b d**n something p*o'); result[1].should.eql([ 'boob', 'damn', 'poo' ]); - - - done(); }); it('works in obscure (default) mode recursively with objects', function (done) { From 911d011192f57aebf968722e8a3bc20001e388a4 Mon Sep 17 00:00:00 2001 From: wagoid Date: Wed, 13 Apr 2016 02:53:30 -0300 Subject: [PATCH 03/10] Add purifyAsync, made some changes to util and made 100% coverage --- lib/profanity.js | 111 ++++++++++++---------- lib/util.js | 55 ++++++----- package.json | 4 +- test/.jshintrc | 4 +- test/profanity.test.js | 205 +++++++++++++++++++++++++++++------------ 5 files changed, 241 insertions(+), 138 deletions(-) diff --git a/lib/profanity.js b/lib/profanity.js index c8d94ff..055a721 100644 --- a/lib/profanity.js +++ b/lib/profanity.js @@ -25,23 +25,10 @@ var AVAILABLE_LANGUAGES = ['ar', 'cs', 'da', 'de', 'en', 'eo', 'es', 'fa', 'fi', var DEFAULT_FS_OPTIONS = {encoding: 'utf8' }; var SWEARWORDS_DIR = path.join(__dirname, 'swearwords'); -function promisiFyReadFile(fileName, options) { - return new Promise(function (resolve, reject) { - fs.readFile(fileName, options, function (err, data) { - if (err) { - reject(err); - } else { - var jsonData = JSON.parse(data); - resolve(jsonData); - } - }); - }); -} - function getWordListsPromises (languages) { var promises = []; _.each(languages, function(language) { - promises.push(promisiFyReadFile(path.join(SWEARWORDS_DIR, language + '.json'), DEFAULT_FS_OPTIONS)); + promises.push(util.promisiFyReadJsonFile(path.join(SWEARWORDS_DIR, language + '.json'), DEFAULT_FS_OPTIONS)); }); return promises; } @@ -62,17 +49,19 @@ function escapeRegexChars (word) { function getListRegexAsync (list, languages) { if (list) { - return Promise(function (resolve, reject) { resolve(_getListRegex(list)); } ); + return new Promise(function (resolve, reject) { + resolve(_getListRegex(list)); + }); } else { return new Promise(function (resolve, reject) { Promise.all(getWordListsPromises(languages)) .then(function (listsArray) { var listRegex = []; _.each(listsArray, function (lst) { - listRegex = listRegex.concat(_getListRegex(lst)); + listRegex = listRegex.concat(lst); }); - resolve(listRegex); + resolve(_getListRegex(listRegex)); }) .catch(function (err) { reject(err); @@ -85,20 +74,17 @@ function getListRegex (list, languages) { if (!list) { list = getWordListsConcatenated(languages); } + return _getListRegex(list); } function _getListRegex(list) { // we want to treat all characters in the word as literals, not as regex specials (e.g. shi+) - // var escapedRegexChars = ''; - // _.each(list, function (langWords) { - // escapedRegexChars += langWords.map(escapeRegexChars); - // }); return new RegExp('\\b(' + list.map(escapeRegexChars).join('|') + ')\\b', 'gi'); } function getDefaultLanguagesValue (languages) { - if (util.isString(languages)) { + if (_.isString(languages)) { if (languages.toLowerCase() === 'all') { languages = AVAILABLE_LANGUAGES; } else { @@ -114,7 +100,7 @@ function getDefaultLanguagesValue (languages) { function check (target, languages, forbiddenList) { languages = getDefaultLanguagesValue(languages); - var regex = forbiddenList ? getListRegex(forbiddenList, languages) : getListRegex(null, languages); + var regex = getListRegex(forbiddenList, languages); return _check(target, regex); } @@ -122,22 +108,22 @@ function check (target, languages, forbiddenList) { function _check (target, regex) { var targets = []; - if (util.isString(target)) { + if (_.isString(target)) { targets.push(target); - } else if (typeof target === 'object') { + /* istanbul ignore else */ + } else if (_.isObject(target)) { util.eachRecursive(target, function(val) { - if (util.isString(val)) { + /* istanbul ignore else */ + if (_.isString(val)) { targets.push(val); } }); } - return targets.join(' ').match(regex) || []; } function checkAsync (target, languages, forbiddenList) { languages = getDefaultLanguagesValue(languages); - return new Promise(function(resolve, reject) { getListRegexAsync(forbiddenList, languages) .then(function (regex) { @@ -151,11 +137,8 @@ function checkAsync (target, languages, forbiddenList) { } function purifyString (str, regex, options) { - options = options || {}; - var matches = [], purified, - replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS, replace = options.replace || false, obscureSymbol = options.obscureSymbol || '*'; @@ -163,7 +146,7 @@ function purifyString (str, regex, options) { matches.push(val); if (replace) { - return replacementsList[Math.floor(Math.random() * replacementsList.length)]; + return options.replacementsList[Math.floor(Math.random() * options.replacementsList.length)]; } var str = val.substr(0, 1); @@ -178,37 +161,35 @@ function purifyString (str, regex, options) { return [purified, matches]; } -function purify (target, options) { - options = options || {}; - options.languages = getDefaultLanguagesValue(options.languages); - - var matches = [], - fields = options.fields || (target instanceof Object ? Object.keys(target) : []), - result, regex = options.forbiddenList ? getListRegex(forbiddenList, options.languages) : getListRegex(null, options.languages); - - if (typeof target === 'string') { +function _purify (target, fields, regex, options) { + var matches = []; + var result; + + if (_.isString(target)) { return purifyString(target, regex, options); - - } else if (typeof target === 'object') { - fields.forEach(function(field) { + + /* istanbul ignore else */ + } else if (_.isObject(target)) { + _.each(fields, function(field) { // TODO: Use better recursive checking, make DRYer - if (typeof target[field] === 'string') { + if (_.isString(target[field])) { result = purifyString(target[field], regex, options); target[field] = result[0]; matches = matches.concat(result[1]); - - } else if (typeof target[field] === 'object') { + /* istanbul ignore else */ + } else if (_.isObject(target[field])) { util.eachRecursive(target[field], function(val, key, root) { - if (typeof val === 'string') { + /* istanbul ignore else */ + if (_.isString(val)) { result = purifyString(val, regex, options); root[key] = result[0]; matches = matches.concat(result[1]); } - }); + }, options.maxRecursionDepth); } }); @@ -216,8 +197,38 @@ function purify (target, options) { } } +function purify (target, options) { + options = options || {}; + options.languages = getDefaultLanguagesValue(options.languages); + options.replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS; + + var fields = options.fields || (target instanceof Object ? Object.keys(target) : []), + regex = getListRegex(options.forbiddenList, options.languages); + + return _purify(target, fields, regex, options); +} + +function purifyAsync (target, options) { + options = options || {}; + options.languages = getDefaultLanguagesValue(options.languages); + options.replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS; + + return new Promise(function (resolve, reject) { + var fields = options.fields || (target instanceof Object ? Object.keys(target) : []); + + getListRegexAsync(options.forbiddenList, options.languages) + .then(function (regex) { + resolve(_purify(target, fields, regex, options)); + }) + .catch(function (err) { + reject(err); + }); + }); +} + module.exports = { check: check, checkAsync: checkAsync, - purify: purify + purify: purify, + purifyAsync: purifyAsync }; \ No newline at end of file diff --git a/lib/util.js b/lib/util.js index d3fb02c..a163427 100644 --- a/lib/util.js +++ b/lib/util.js @@ -4,39 +4,50 @@ License: http://opensource.org/licenses/MIT The MIT License (MIT) */ var _ = require('underscore'); +var fs = require('fs'); function eachRecursive (obj, fn, maxDepth, depth, checked) { - checked = checked || []; + checked = checked || []; - depth = depth || 0; + depth = depth || 0; + if ((maxDepth && depth > maxDepth) || obj in checked) { + return; + } - if ((maxDepth && depth > maxDepth) || obj in checked) { - return; - } - - _.each(obj, function (val, key) { - checked.push(obj); + _.each(obj, function (val, key) { + checked.push(obj); - if (_.isObject(val)) { - if (val in checked) { - return; - } + if (_.isObject(val)) { + for (var i in checked) { + if (val == checked[i]) { + return; + } + } - checked.push(val); + checked.push(val); - depth += 1; - eachRecursive(val, fn, depth, checked); - } else { - fn(val, key, obj, depth); - } - }); + depth += 1; + eachRecursive(val, fn, maxDepth, depth, checked); + } else { + fn(val, key, obj, depth); + } + }); } -function isString (str) { - return typeof str === 'string'; +function promisiFyReadJsonFile(fileName, options) { + return new Promise(function (resolve, reject) { + fs.readFile(fileName, options, function (err, data) { + if (err) { + reject(err); + } else { + var jsonData = JSON.parse(data); + resolve(jsonData); + } + }); + }); } module.exports = { eachRecursive: eachRecursive, - isString: isString + promisiFyReadJsonFile: promisiFyReadJsonFile }; diff --git a/package.json b/package.json index bf18629..0613c84 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "profanity-util", - "version": "0.0.5", + "version": "1.0.0", "description": "Utility for detection, filtering and replacement / obscuration of forbidden words", "main": "lib/profanity.js", "scripts": { @@ -9,7 +9,7 @@ "author": "Tancredi Trugenberger", "license": "MIT", "dependencies": { - "underscore": "~1.6.0" + "underscore": "^1.8.3" }, "devDependencies": { "mocha": "^2.4.5", diff --git a/test/.jshintrc b/test/.jshintrc index 25245d9..f135188 100644 --- a/test/.jshintrc +++ b/test/.jshintrc @@ -5,12 +5,10 @@ "esnext": true, "globalstrict": false, "expr": true, - "indent": 4, + "indent": 2, "quotmark": true, "smarttabs": true, "trailing": true, - "undef": true, - "unused": true, "globals": { "it": true, "describe": true, diff --git a/test/profanity.test.js b/test/profanity.test.js index 9242e7b..b9374f3 100644 --- a/test/profanity.test.js +++ b/test/profanity.test.js @@ -13,7 +13,7 @@ function callTestCheckFunctions(languages, forbiddenList) { should(profanity.check('No swearwords here', languages, forbiddenList)).eql([]); }); - it('Should retur array of swearwords found in dirty string', function () { + it('Should return array of swearwords found in dirty string', function () { var results = profanity.check('something damn something something poo something', languages, forbiddenList); should(results).eql([ @@ -30,7 +30,7 @@ function callTestCheckFunctions(languages, forbiddenList) { should(notDetected).have.length(0); }); - it('Should work equally for objects (Recursively) and arrays', function (done) { + it('Should work equally for objects (Recursively) and arrays', function () { var results_obj = profanity.check({ foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } @@ -52,8 +52,6 @@ function callTestCheckFunctions(languages, forbiddenList) { 'poo', 'crap' ]); - - done(); }); } @@ -81,7 +79,7 @@ describe('Profanity module', function () { notDetected.should.eql([]); }); - it('Should work equally for objects (Recursively) and arrays', function (done) { + it('Should work equally for objects (Recursively) and arrays', function () { var results_obj = profanity.check({ foo: 'something perereca', bar: { test: 'something punheta', bar: 'paspalhão woooh' } }, 'pt'), results_arr = profanity.check(['something porra', [ 'something precheca' ], { foo: [ { bar: 'something puta' } ] }], 'pt'); @@ -96,8 +94,6 @@ describe('Profanity module', function () { 'precheca', 'puta' ]); - - done(); }); }); @@ -110,7 +106,7 @@ describe('Profanity module', function () { notDetected.should.eql([]); }); - it('Should work equally for objects (Recursively) and arrays', function (done) { + it('Should work equally for objects (Recursively) and arrays', function () { var results_obj = profanity.check({ foo: 'something perereca fucker ', bar: { test: 'something punheta bocchinara', bar: 'paspalhão reudig woooh' } }, ['pt', 'it', 'de', 'en']), results_arr = profanity.check(['something porra fucker', [ 'something precheca bocchinara ' ], { foo: [ { bar: 'something puta reudig' } ] }], 'all'); @@ -131,30 +127,46 @@ describe('Profanity module', function () { 'puta', 'reudig' ]); - - done(); }); }); describe('.checkAsync(target)', () => { - it('Returns null with no swearwords found in string', () => { + it('Should return null with no swearwords found in string', (done) => { profanity.checkAsync('No swearwords here') .then(results => { - should(results).eql([]); - }); + should(results).eql([]); + done(); + }); }); - it('Returns array of swearwords found in dirty string', () => { + it('Should return array of swearwords found in dirty string', (done) => { profanity.checkAsync('something damn something something poo something') .then(results => { should(results).eql([ 'damn', 'poo' ]); + + done(); + }) + .catch(err => { + console.log(err); + }); + }); + + it('Should work when passing my own forbidden list', (done) => { + profanity.checkAsync('something damn something something poo something', 'en', ['damn', 'poo']) + .then(results => { + should(results).eql([ + 'damn', + 'poo' + ]); + + done(); }); }); - it('Doesn\'t target substrings', function () { + it('Should not target substrings', function () { Promise.all([profanity.checkAsync('foo ass bar'), profanity.checkAsync('foo grass bar')]) .then( results => { var detected = results[0]; @@ -165,7 +177,7 @@ describe('Profanity module', function () { }); }); - it('Works equally for objects (Recursively) and arrays', function () { + it('Should work equally for objects (Recursively) and arrays', function () { var firstTarget = { foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } }; var secondTarget = { foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } }; var thirdTarget = ['something damn', [ 'something poo' ], { foo: [ { bar: 'something crap' } ] }]; @@ -177,69 +189,140 @@ describe('Profanity module', function () { }); }); }); + + it('Should jump to catch when passing language that is not supported by the tool', () => { + profanity.checkAsync('Anything', ['HUE']) + .catch(err => { + err.code.should.eql('ENOENT'); + }); + }); }); describe('.purify(target)', function () { + it('Should work in obscure (default) mode on a simple string', function () { + var result = profanity.purify('boob damn something poo'); - it('works in obscure (default) mode on a simple string', function () { - var result = profanity.purify('boob damn something poo'); - - result[0].should.equal('b**b d**n something p*o'); - result[1].should.eql([ 'boob', 'damn', 'poo' ]); - }); - - it('works in obscure (default) mode recursively with objects', function (done) { - var result = profanity.purify({ - bar: { foo: 'something boob', bar: { foo: 'test poo' } }, - test: 'something damn' - }); + result[0].should.equal('b**b d**n something p*o'); + result[1].should.eql([ 'boob', 'damn', 'poo' ]); + }); + + it('Should work when passing my own forbidden list', function () { + var result = profanity.purify('boob damn something pota', { forbiddenList: ['boob', 'damn', 'pota'] }); - result[0].should.have.keys('bar', 'test'); - result[0].bar.should.have.keys('foo', 'bar'); - result[0].bar.foo.should.equal('something b**b'); - result[0].bar.bar.should.have.keys('foo'); - result[0].bar.bar.foo.should.equal('test p*o'); - result[0].bar.foo.should.equal('something b**b'); - result[0].test.should.equal('something d**n'); + result[0].should.equal('b**b d**n something p**a'); + result[1].should.eql([ 'boob', 'damn', 'pota' ]); + }); - result[1].should.eql([ 'boob', 'poo', 'damn' ]); + it('Should work in obscure (default) mode recursively with objects, with infinite recursion and maxRecursionDepth', function () { + var testObj = { + bar: { foo: 'something boob', bar: { foo: 'test poo', bler: {foo: 'will not enter here'} } }, + test: 'something damn' + }; + testObj.crazy = testObj; + + var result = profanity.purify(testObj, {maxRecursionDepth: 1}); + + result[0].should.have.keys('bar', 'test', 'crazy'); + result[0].bar.should.have.keys('foo', 'bar'); + result[0].bar.foo.should.equal('something b**b'); + result[0].bar.bar.should.have.keys('foo', 'bler'); + result[0].bar.bar.foo.should.equal('test p*o'); + result[0].bar.foo.should.equal('something b**b'); + result[0].test.should.equal('something d**n'); + + result[1].should.eql([ 'boob', 'poo', 'damn' ]); + }); - done(); + it('Should work in replace mode on a simple string', function () { + var result = profanity.purify('boob damn something poo', { + replace: true }); - it('works in replace mode on a simple string', function (done) { - var result = profanity.purify('boob damn something poo', { - replace: true - }); + util.testPurified(result[0], '[ placeholder ] [ placeholder ] something [ placeholder ]'); + result[1].should.eql([ 'boob', 'damn', 'poo' ]); + }); - util.testPurified(result[0], '[ placeholder ] [ placeholder ] something [ placeholder ]'); - result[1].should.eql([ 'boob', 'damn', 'poo' ]); + it('Should work in replace mode recursively with objects', function () { + var result = profanity.purify({ + bar: { foo: 'something boob', bar: { foo: 'test poo' } }, + test: 'something damn' + }, { + replace: true + }); + result[0].should.have.keys('bar', 'test'); + result[0].bar.should.have.keys('foo', 'bar'); + util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); + result[0].bar.bar.should.have.keys('foo'); + util.testPurified(result[0].bar.bar.foo, 'test [ placeholder ]'); + util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); + util.testPurified(result[0].test, 'something [ placeholder ]'); - done(); + result[1].should.eql([ 'boob', 'poo', 'damn' ]); + }); + }); + + describe('.purifyAsync(target)', function () { + it('Should work in obscure (default) mode on a simple string', function () { + profanity.purifyAsync('boob damn something poo') + .then(result => { + result[0].should.equal('b**b d**n something p*o'); + result[1].should.eql([ 'boob', 'damn', 'poo' ]); + }); }); - it('works in replace mode recursively with objects', function (done) { - var result = profanity.purify({ - bar: { foo: 'something boob', bar: { foo: 'test poo' } }, - test: 'something damn' - }, { - replace: true + it('Should work in obscure (default) mode recursively with objects', function () { + var objToCheck = { + bar: { foo: 'something boob', bar: { foo: 'test poo' } }, + test: 'something damn' + }; + profanity.purifyAsync(objToCheck) + .then(result => { + result[0].should.have.keys('bar', 'test'); + result[0].bar.should.have.keys('foo', 'bar'); + result[0].bar.foo.should.equal('something b**b'); + result[0].bar.bar.should.have.keys('foo'); + result[0].bar.bar.foo.should.equal('test p*o'); + result[0].bar.foo.should.equal('something b**b'); + result[0].test.should.equal('something d**n'); + + result[1].should.eql([ 'boob', 'poo', 'damn' ]); }); + }); - result[0].should.have.keys('bar', 'test'); - result[0].bar.should.have.keys('foo', 'bar'); - util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); - result[0].bar.bar.should.have.keys('foo'); - util.testPurified(result[0].bar.bar.foo, 'test [ placeholder ]'); - util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); - util.testPurified(result[0].test, 'something [ placeholder ]'); - - result[1].should.eql([ 'boob', 'poo', 'damn' ]); - - done(); + it('Should work in replace mode on a simple string', function () { + profanity.purifyAsync('boob damn something poo', { replace: true }) + .then(result => { + util.testPurified(result[0], '[ placeholder ] [ placeholder ] something [ placeholder ]'); + result[1].should.eql([ 'boob', 'damn', 'poo' ]); + }); }); + it('Should work in replace mode recursively with objects', function () { + var objToCheck = { + bar: { foo: 'something boob', bar: { foo: 'test poo' } }, + test: 'something damn' + }; + profanity.purifyAsync(objToCheck, { replace: true }) + .then(result => { + result[0].should.have.keys('bar', 'test'); + result[0].bar.should.have.keys('foo', 'bar'); + util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); + result[0].bar.bar.should.have.keys('foo'); + util.testPurified(result[0].bar.bar.foo, 'test [ placeholder ]'); + util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); + util.testPurified(result[0].test, 'something [ placeholder ]'); + + result[1].should.eql([ 'boob', 'poo', 'damn' ]); + }); + }); + + it('Should jump to catch when passing language that is not supported by the tool', () => { + profanity.purifyAsync('Anything', { languages: ['HUE'] }) + .catch(err => { + err.code.should.eql('ENOENT'); + }); + }); }); }); From cdd6a5778b31580fe7a4ff58cb91ca2a64f33786 Mon Sep 17 00:00:00 2001 From: wagoid Date: Wed, 13 Apr 2016 03:15:15 -0300 Subject: [PATCH 04/10] Add travis CI and coveralls --- README.md | 3 +++ package.json | 5 ++++- travis.yaml | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 travis.yaml diff --git a/README.md b/README.md index 507ab0e..4ee6b76 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # Node.js Profanity Utility +[![Build Status](https://travis-ci.org/wagoid/nodejs-profanity-util.svg?branch=master)](https://travis-ci.org/wagoid/nodejs-profanity-util) +[![Coverage Status](https://coveralls.io/repos/github/wagoid/nodejs-profanity-util/badge.svg?branch=master)](https://coveralls.io/github/wagoid/nodejs-profanity-util?branch=master) + > Utility for detection, filtering and replacement / obscuration of forbidden words The original lists of swearwords used by default were taken from [here](https://gist.github.com/jamiew/1112488) and [here](https://github.com/shutterstock/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words). diff --git a/package.json b/package.json index 0613c84..3be52cd 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,8 @@ "description": "Utility for detection, filtering and replacement / obscuration of forbidden words", "main": "lib/profanity.js", "scripts": { - "test": "mocha test" + "test": "./node_modules/istanbul/lib/cli.js cover ./node_modules/.bin/_mocha", + "coveralls": "cat ./coverage/lcov.info | ./node_modules/.bin/coveralls" }, "author": "Tancredi Trugenberger", "license": "MIT", @@ -12,6 +13,8 @@ "underscore": "^1.8.3" }, "devDependencies": { + "coveralls": "^2.11.9", + "istanbul": "^0.4.3", "mocha": "^2.4.5", "should": "^8.3.0" } diff --git a/travis.yaml b/travis.yaml new file mode 100644 index 0000000..2381eaf --- /dev/null +++ b/travis.yaml @@ -0,0 +1,9 @@ +language: node_js +node_js: + - "5" + - "5.1" + - "4" + - "4.2" + - "4.1" + - "4.0" +after_success: 'npm run coveralls' \ No newline at end of file From 48d2b7e1dfc883d6ad822f6a78b25081046dcc11 Mon Sep 17 00:00:00 2001 From: wagoid Date: Wed, 13 Apr 2016 03:16:47 -0300 Subject: [PATCH 05/10] Change travis.yaml to travis.yml --- travis.yaml => travis.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename travis.yaml => travis.yml (100%) diff --git a/travis.yaml b/travis.yml similarity index 100% rename from travis.yaml rename to travis.yml From 6f3ea7fe8c7b8197180a6aa9f004d134520e024f Mon Sep 17 00:00:00 2001 From: wagoid Date: Wed, 13 Apr 2016 03:27:26 -0300 Subject: [PATCH 06/10] Finished adding travis CI. --- travis.yml => .travis.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename travis.yml => .travis.yml (100%) diff --git a/travis.yml b/.travis.yml similarity index 100% rename from travis.yml rename to .travis.yml From fec3331d199102fe666e568720375c4cd29df62f Mon Sep 17 00:00:00 2001 From: wagoid Date: Wed, 13 Apr 2016 17:17:34 -0300 Subject: [PATCH 07/10] Change promise engine to bluebird --- .gitignore | 3 +- lib/profanity.js | 53 ++-- lib/util.js | 16 +- package.json | 5 +- test/profanity.test.js | 543 ++++++++++++++++++++--------------------- 5 files changed, 300 insertions(+), 320 deletions(-) diff --git a/.gitignore b/.gitignore index 27d1445..cb2ee4e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ node_modules **.DS_Store -.vscode \ No newline at end of file +.vscode +coverage \ No newline at end of file diff --git a/lib/profanity.js b/lib/profanity.js index 055a721..1f8050c 100644 --- a/lib/profanity.js +++ b/lib/profanity.js @@ -7,6 +7,7 @@ var util = require('./util'); var fs = require('fs'); var _ = require('underscore'); var path = require('path'); +var Promise = require('bluebird'); var DEFAULT_REPLACEMENTS = [ 'bunnies', @@ -30,6 +31,7 @@ function getWordListsPromises (languages) { _.each(languages, function(language) { promises.push(util.promisiFyReadJsonFile(path.join(SWEARWORDS_DIR, language + '.json'), DEFAULT_FS_OPTIONS)); }); + return promises; } @@ -53,19 +55,14 @@ function getListRegexAsync (list, languages) { resolve(_getListRegex(list)); }); } else { - return new Promise(function (resolve, reject) { - Promise.all(getWordListsPromises(languages)) - .then(function (listsArray) { - var listRegex = []; - _.each(listsArray, function (lst) { - listRegex = listRegex.concat(lst); - }); - - resolve(_getListRegex(listRegex)); - }) - .catch(function (err) { - reject(err); + return Promise.all(getWordListsPromises(languages)) + .then(function (listsArray) { + var listRegex = []; + _.each(listsArray, function (lst) { + listRegex = listRegex.concat(lst); }); + + return _getListRegex(listRegex); }); } } @@ -124,16 +121,12 @@ function _check (target, regex) { function checkAsync (target, languages, forbiddenList) { languages = getDefaultLanguagesValue(languages); - return new Promise(function(resolve, reject) { - getListRegexAsync(forbiddenList, languages) - .then(function (regex) { - var result = _check(target, regex); - resolve(result); - }) - .catch(function (err) { - reject(err); - }); - }); + + return getListRegexAsync(forbiddenList, languages) + .then(function (regex) { + var result = _check(target, regex); + return result; + }); } function purifyString (str, regex, options) { @@ -213,17 +206,11 @@ function purifyAsync (target, options) { options.languages = getDefaultLanguagesValue(options.languages); options.replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS; - return new Promise(function (resolve, reject) { - var fields = options.fields || (target instanceof Object ? Object.keys(target) : []); - - getListRegexAsync(options.forbiddenList, options.languages) - .then(function (regex) { - resolve(_purify(target, fields, regex, options)); - }) - .catch(function (err) { - reject(err); - }); - }); + var fields = options.fields || (target instanceof Object ? Object.keys(target) : []); + return getListRegexAsync(options.forbiddenList, options.languages) + .then(function (regex) { + return _purify(target, fields, regex, options); + }); } module.exports = { diff --git a/lib/util.js b/lib/util.js index a163427..104a01c 100644 --- a/lib/util.js +++ b/lib/util.js @@ -5,6 +5,8 @@ License: http://opensource.org/licenses/MIT The MIT License (MIT) var _ = require('underscore'); var fs = require('fs'); +var Promise = require('bluebird'); +var readFile = Promise.promisify(fs.readFile); function eachRecursive (obj, fn, maxDepth, depth, checked) { checked = checked || []; @@ -35,16 +37,10 @@ function eachRecursive (obj, fn, maxDepth, depth, checked) { } function promisiFyReadJsonFile(fileName, options) { - return new Promise(function (resolve, reject) { - fs.readFile(fileName, options, function (err, data) { - if (err) { - reject(err); - } else { - var jsonData = JSON.parse(data); - resolve(jsonData); - } - }); - }); + return readFile(fileName, options) + .then(data => { + return JSON.parse(data); + }); } module.exports = { diff --git a/package.json b/package.json index 3be52cd..078e747 100644 --- a/package.json +++ b/package.json @@ -4,12 +4,13 @@ "description": "Utility for detection, filtering and replacement / obscuration of forbidden words", "main": "lib/profanity.js", "scripts": { - "test": "./node_modules/istanbul/lib/cli.js cover ./node_modules/.bin/_mocha", - "coveralls": "cat ./coverage/lcov.info | ./node_modules/.bin/coveralls" + "test": "node ./node_modules/istanbul/lib/cli.js cover ./node_modules/.bin/_mocha", + "coveralls": "cat ./coverage/lcov.info | node ./node_modules/.bin/coveralls" }, "author": "Tancredi Trugenberger", "license": "MIT", "dependencies": { + "bluebird": "^3.3.5", "underscore": "^1.8.3" }, "devDependencies": { diff --git a/test/profanity.test.js b/test/profanity.test.js index b9374f3..232d971 100644 --- a/test/profanity.test.js +++ b/test/profanity.test.js @@ -4,325 +4,320 @@ License: http://opensource.org/licenses/MIT The MIT License (MIT) */ var should = require('should'), - util = require('./util'), - profanity = require('../lib/profanity'), - _ = require('underscore'); + util = require('./util'), + profanity = require('../lib/profanity'), + _ = require('underscore'); function callTestCheckFunctions(languages, forbiddenList) { - it('Should return null with no swearwords found in string', function () { - should(profanity.check('No swearwords here', languages, forbiddenList)).eql([]); + it('Should return null with no swearwords found in string', () => { + should(profanity.check('No swearwords here', languages, forbiddenList)).eql([]); }); - it('Should return array of swearwords found in dirty string', function () { - var results = profanity.check('something damn something something poo something', languages, forbiddenList); + it('Should return array of swearwords found in dirty string', () => { + var results = profanity.check('something damn something something poo something', languages, forbiddenList); - should(results).eql([ - 'damn', - 'poo' - ]); + should(results).eql([ + 'damn', + 'poo' + ]); }); - it('Should not target substrings', function () { - var detected = profanity.check('foo ass bar', languages, forbiddenList), - notDetected = profanity.check('foo grass bar', languages, forbiddenList); + it('Should not target substrings', () => { + var detected = profanity.check('foo ass bar', languages, forbiddenList), + notDetected = profanity.check('foo grass bar', languages, forbiddenList); - should(detected).have.length(1); - should(notDetected).have.length(0); + should(detected).have.length(1); + should(notDetected).have.length(0); + }); + + it('Should work equally for objects (Recursively) and arrays', () => { + var results_obj = profanity.check({ + foo: 'something damn', + bar: { test: 'something poo', bar: 'crap woooh' } + }, languages, forbiddenList), + results_arr = profanity.check([ + 'something damn', + ['something poo'], + { foo: [{ bar: 'something crap' }] } + ], languages, forbiddenList); + + should(results_obj).eql([ + 'damn', + 'poo', + 'crap' + ]); + + should(results_arr).eql([ + 'damn', + 'poo', + 'crap' + ]); }); +} + +describe('Profanity module', () => { + describe('.check(target)', () => { + callTestCheckFunctions(); + }); + + describe('.check(target) when languages="all"', () => { + callTestCheckFunctions('all', null); + }); + + describe('.check(target) for portuguese', () => { + it('Should not get a portuguese dirty word when there is no language specifided (default is english)', () => { + var notDetected = profanity.check('viados não gostam de pepeca', null); + + should(notDetected).have.length(0); + }); + + it('Should get a dirty word and not get substrings', () => { + var detected = profanity.check('viado não gosta de pepeca', 'pt'); + var notDetected = profanity.check('viados não gosta de pepecas', 'pt'); - it('Should work equally for objects (Recursively) and arrays', function () { - var results_obj = profanity.check({ - foo: 'something damn', - bar: { test: 'something poo', bar: 'crap woooh' } - }, languages, forbiddenList), - results_arr = profanity.check([ - 'something damn', - [ 'something poo' ], - { foo: [ { bar: 'something crap' } ] } - ], languages, forbiddenList); + detected.should.eql(['viado', 'pepeca']); + notDetected.should.eql([]); + }); + + it('Should work equally for objects (Recursively) and arrays', () => { + var results_obj = profanity.check({ foo: 'something perereca', bar: { test: 'something punheta', bar: 'paspalhão woooh' } }, 'pt'), + results_arr = profanity.check(['something porra', ['something precheca'], { foo: [{ bar: 'something puta' }] }], 'pt'); should(results_obj).eql([ - 'damn', - 'poo', - 'crap' + 'perereca', + 'punheta', + 'paspalhão' ]); should(results_arr).eql([ - 'damn', - 'poo', - 'crap' + 'porra', + 'precheca', + 'puta' ]); + }); }); -} -describe('Profanity module', function () { - describe('.check(target)', function () { - callTestCheckFunctions(); + describe('.check(target) for multiple languages', () => { + it('Should get a dirty word and not get substrings', () => { + var detected = profanity.check('viado não gosta de pepeca fucker bocchinara reudig', ['pt', 'it', 'de', 'en']); + var notDetected = profanity.check('viados não gosta de pepecas fuckersras, bocchinarasws reudigs', ['pt', 'it', 'de', 'en']); + + detected.should.eql(['viado', 'pepeca', 'fucker', 'bocchinara', 'reudig']); + notDetected.should.eql([]); }); - - describe('.check(target) when languages="all"', function () { - callTestCheckFunctions('all', null); + + it('Should work equally for objects (Recursively) and arrays', () => { + var results_obj = profanity.check({ foo: 'something perereca fucker ', bar: { test: 'something punheta bocchinara', bar: 'paspalhão reudig woooh' } }, ['pt', 'it', 'de', 'en']), + results_arr = profanity.check(['something porra fucker', ['something precheca bocchinara '], { foo: [{ bar: 'something puta reudig' }] }], 'all'); + + should(results_obj).eql([ + 'perereca', + 'fucker', + 'punheta', + 'bocchinara', + 'paspalhão', + 'reudig' + ]); + + should(results_arr).eql([ + 'porra', + 'fucker', + 'precheca', + 'bocchinara', + 'puta', + 'reudig' + ]); }); - - describe('.check(target) for portuguese', function () { - it('Should not get a portuguese dirty word when there is no language specifided (default is english)', function () { - var notDetected = profanity.check('viados não gostam de pepeca', null); + }); - should(notDetected).have.length(0); - }); - - it('Should get a dirty word and not get substrings', function () { - var detected = profanity.check('viado não gosta de pepeca', 'pt'); - var notDetected = profanity.check('viados não gosta de pepecas', 'pt'); + describe('.checkAsync(target)', () => { + it('Should return null with no swearwords found in string', () => { + return profanity.checkAsync('No swearwords here') + .then(results => { + should(results).eql([]); + }); + }); - detected.should.eql(['viado', 'pepeca']); - notDetected.should.eql([]); - }); - - it('Should work equally for objects (Recursively) and arrays', function () { - var results_obj = profanity.check({ foo: 'something perereca', bar: { test: 'something punheta', bar: 'paspalhão woooh' } }, 'pt'), - results_arr = profanity.check(['something porra', [ 'something precheca' ], { foo: [ { bar: 'something puta' } ] }], 'pt'); - - should(results_obj).eql([ - 'perereca', - 'punheta', - 'paspalhão' - ]); - - should(results_arr).eql([ - 'porra', - 'precheca', - 'puta' - ]); - }); + it('Should return array of swearwords found in dirty string', () => { + return profanity.checkAsync('something damn something something poo something') + .then(results => { + should(results).eql([ + 'damn', + 'poo' + ]); + }); }); - - describe('.check(target) for multiple languages', function () { - it('Should get a dirty word and not get substrings', function () { - var detected = profanity.check('viado não gosta de pepeca fucker bocchinara reudig', ['pt', 'it', 'de', 'en']); - var notDetected = profanity.check('viados não gosta de pepecas fuckersras, bocchinarasws reudigs', ['pt', 'it', 'de', 'en']); - - detected.should.eql(['viado', 'pepeca', 'fucker', 'bocchinara', 'reudig']); - notDetected.should.eql([]); - }); - - it('Should work equally for objects (Recursively) and arrays', function () { - var results_obj = profanity.check({ foo: 'something perereca fucker ', bar: { test: 'something punheta bocchinara', bar: 'paspalhão reudig woooh' } }, ['pt', 'it', 'de', 'en']), - results_arr = profanity.check(['something porra fucker', [ 'something precheca bocchinara ' ], { foo: [ { bar: 'something puta reudig' } ] }], 'all'); - - should(results_obj).eql([ - 'perereca', - 'fucker', - 'punheta', - 'bocchinara', - 'paspalhão', - 'reudig' - ]); - - should(results_arr).eql([ - 'porra', - 'fucker', - 'precheca', - 'bocchinara', - 'puta', - 'reudig' - ]); - }); + + it('Should work when passing my own forbidden list', () => { + return profanity.checkAsync('something damn something something poo something', 'en', ['damn', 'poo']) + .then(results => { + should(results).eql([ + 'damn', + 'poo' + ]); + }); }); - - describe('.checkAsync(target)', () => { - it('Should return null with no swearwords found in string', (done) => { - profanity.checkAsync('No swearwords here') - .then(results => { - should(results).eql([]); - done(); - }); - }); - it('Should return array of swearwords found in dirty string', (done) => { - profanity.checkAsync('something damn something something poo something') - .then(results => { - should(results).eql([ - 'damn', - 'poo' - ]); - - done(); - }) - .catch(err => { - console.log(err); - }); - }); - - it('Should work when passing my own forbidden list', (done) => { - profanity.checkAsync('something damn something something poo something', 'en', ['damn', 'poo']) - .then(results => { - should(results).eql([ - 'damn', - 'poo' - ]); - - done(); - }); - }); + it('Should not target substrings', () => { + Promise.all([profanity.checkAsync('foo ass bar'), profanity.checkAsync('foo grass bar')]) + .then(results => { + var detected = results[0]; + var notDetected = results[1]; - it('Should not target substrings', function () { - Promise.all([profanity.checkAsync('foo ass bar'), profanity.checkAsync('foo grass bar')]) - .then( results => { - var detected = results[0]; - var notDetected = results[1]; - - detected.should.have.length(1); - notDetected.should.have.length(0); - }); - }); + detected.should.have.length(1); + notDetected.should.have.length(0); + }); + }); - it('Should work equally for objects (Recursively) and arrays', function () { - var firstTarget = { foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } }; - var secondTarget = { foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } }; - var thirdTarget = ['something damn', [ 'something poo' ], { foo: [ { bar: 'something crap' } ] }]; - Promise.all([profanity.checkAsync(firstTarget), profanity.checkAsync(secondTarget), profanity.checkAsync(thirdTarget)]) - .then(results => { - results.should.have.length(3); - _.each(results, result => { - result.should.eql(['damn','poo','crap']); - }); + it('Should work equally for objects (Recursively) and arrays', () => { + var firstTarget = { foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } }; + var secondTarget = { foo: 'something damn', bar: { test: 'something poo', bar: 'crap woooh' } }; + var thirdTarget = ['something damn', ['something poo'], { foo: [{ bar: 'something crap' }] }]; + Promise.all([profanity.checkAsync(firstTarget), profanity.checkAsync(secondTarget), profanity.checkAsync(thirdTarget)]) + .then(results => { + results.should.have.length(3); + _.each(results, result => { + result.should.eql(['damn', 'poo', 'crap']); }); - }); - - it('Should jump to catch when passing language that is not supported by the tool', () => { - profanity.checkAsync('Anything', ['HUE']) - .catch(err => { - err.code.should.eql('ENOENT'); - }); - }); + }); }); - - - describe('.purify(target)', function () { - it('Should work in obscure (default) mode on a simple string', function () { - var result = profanity.purify('boob damn something poo'); - - result[0].should.equal('b**b d**n something p*o'); - result[1].should.eql([ 'boob', 'damn', 'poo' ]); - }); - - it('Should work when passing my own forbidden list', function () { - var result = profanity.purify('boob damn something pota', { forbiddenList: ['boob', 'damn', 'pota'] }); - result[0].should.equal('b**b d**n something p**a'); - result[1].should.eql([ 'boob', 'damn', 'pota' ]); - }); + it('Should jump to catch when passing language that is not supported by the tool', () => { + profanity.checkAsync('Anything', ['HUE']) + .catch(err => { + err.code.should.eql('ENOENT'); + }); + }); + }); - it('Should work in obscure (default) mode recursively with objects, with infinite recursion and maxRecursionDepth', function () { - var testObj = { - bar: { foo: 'something boob', bar: { foo: 'test poo', bler: {foo: 'will not enter here'} } }, - test: 'something damn' - }; - testObj.crazy = testObj; - - var result = profanity.purify(testObj, {maxRecursionDepth: 1}); - - result[0].should.have.keys('bar', 'test', 'crazy'); - result[0].bar.should.have.keys('foo', 'bar'); - result[0].bar.foo.should.equal('something b**b'); - result[0].bar.bar.should.have.keys('foo', 'bler'); - result[0].bar.bar.foo.should.equal('test p*o'); - result[0].bar.foo.should.equal('something b**b'); - result[0].test.should.equal('something d**n'); - - result[1].should.eql([ 'boob', 'poo', 'damn' ]); - }); - it('Should work in replace mode on a simple string', function () { - var result = profanity.purify('boob damn something poo', { - replace: true - }); + describe('.purify(target)', () => { + it('Should work in obscure (default) mode on a simple string', () => { + var result = profanity.purify('boob damn something poo'); + + result[0].should.equal('b**b d**n something p*o'); + result[1].should.eql(['boob', 'damn', 'poo']); + }); + + it('Should work when passing my own forbidden list', () => { + var result = profanity.purify('boob damn something pota', { forbiddenList: ['boob', 'damn', 'pota'] }); + + result[0].should.equal('b**b d**n something p**a'); + result[1].should.eql(['boob', 'damn', 'pota']); + }); + + it('Should work in obscure (default) mode recursively with objects, with infinite recursion and maxRecursionDepth', () => { + var testObj = { + bar: { foo: 'something boob', bar: { foo: 'test poo', bler: { foo: 'will not enter here' } } }, + test: 'something damn' + }; + testObj.crazy = testObj; + + var result = profanity.purify(testObj, { maxRecursionDepth: 1 }); - util.testPurified(result[0], '[ placeholder ] [ placeholder ] something [ placeholder ]'); - result[1].should.eql([ 'boob', 'damn', 'poo' ]); + result[0].should.have.keys('bar', 'test', 'crazy'); + result[0].bar.should.have.keys('foo', 'bar'); + result[0].bar.foo.should.equal('something b**b'); + result[0].bar.bar.should.have.keys('foo', 'bler'); + result[0].bar.bar.foo.should.equal('test p*o'); + result[0].bar.foo.should.equal('something b**b'); + result[0].test.should.equal('something d**n'); + + result[1].should.eql(['boob', 'poo', 'damn']); + }); + + it('Should work in replace mode on a simple string', () => { + var result = profanity.purify('boob damn something poo', { + replace: true }); - it('Should work in replace mode recursively with objects', function () { - var result = profanity.purify({ - bar: { foo: 'something boob', bar: { foo: 'test poo' } }, - test: 'something damn' - }, { - replace: true + util.testPurified(result[0], '[ placeholder ] [ placeholder ] something [ placeholder ]'); + result[1].should.eql(['boob', 'damn', 'poo']); + }); + + it('Should work in replace mode recursively with objects', () => { + var result = profanity.purify({ + bar: { foo: 'something boob', bar: { foo: 'test poo' } }, + test: 'something damn' + }, { + replace: true }); - result[0].should.have.keys('bar', 'test'); - result[0].bar.should.have.keys('foo', 'bar'); - util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); - result[0].bar.bar.should.have.keys('foo'); - util.testPurified(result[0].bar.bar.foo, 'test [ placeholder ]'); - util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); - util.testPurified(result[0].test, 'something [ placeholder ]'); + result[0].should.have.keys('bar', 'test'); + result[0].bar.should.have.keys('foo', 'bar'); + util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); + result[0].bar.bar.should.have.keys('foo'); + util.testPurified(result[0].bar.bar.foo, 'test [ placeholder ]'); + util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); + util.testPurified(result[0].test, 'something [ placeholder ]'); - result[1].should.eql([ 'boob', 'poo', 'damn' ]); - }); + result[1].should.eql(['boob', 'poo', 'damn']); }); - - describe('.purifyAsync(target)', function () { - it('Should work in obscure (default) mode on a simple string', function () { - profanity.purifyAsync('boob damn something poo') - .then(result => { - result[0].should.equal('b**b d**n something p*o'); - result[1].should.eql([ 'boob', 'damn', 'poo' ]); - }); + }); + + describe('.purifyAsync(target)', () => { + it('Should work in obscure (default) mode on a simple string', () => { + return profanity.purifyAsync('boob damn something poo') + .then(result => { + result[0].should.equal('b**b d**n something p*o'); + result[1].should.eql(['boob', 'damn', 'poo']); }); + }); - it('Should work in obscure (default) mode recursively with objects', function () { - var objToCheck = { - bar: { foo: 'something boob', bar: { foo: 'test poo' } }, - test: 'something damn' - }; - profanity.purifyAsync(objToCheck) - .then(result => { - result[0].should.have.keys('bar', 'test'); - result[0].bar.should.have.keys('foo', 'bar'); - result[0].bar.foo.should.equal('something b**b'); - result[0].bar.bar.should.have.keys('foo'); - result[0].bar.bar.foo.should.equal('test p*o'); - result[0].bar.foo.should.equal('something b**b'); - result[0].test.should.equal('something d**n'); - - result[1].should.eql([ 'boob', 'poo', 'damn' ]); - }); + it('Should work in obscure (default) mode recursively with objects', () => { + var objToCheck = { + bar: { foo: 'something boob', bar: { foo: 'test poo' } }, + test: 'something damn' + }; + return profanity.purifyAsync(objToCheck) + .then(result => { + result[0].should.have.keys('bar', 'test'); + result[0].bar.should.have.keys('foo', 'bar'); + result[0].bar.foo.should.equal('something b**b'); + result[0].bar.bar.should.have.keys('foo'); + result[0].bar.bar.foo.should.equal('test p*o'); + result[0].bar.foo.should.equal('something b**b'); + result[0].test.should.equal('something d**n'); + + result[1].should.eql(['boob', 'poo', 'damn']); }); + }); - it('Should work in replace mode on a simple string', function () { - profanity.purifyAsync('boob damn something poo', { replace: true }) - .then(result => { - util.testPurified(result[0], '[ placeholder ] [ placeholder ] something [ placeholder ]'); - result[1].should.eql([ 'boob', 'damn', 'poo' ]); - }); + it('Should work in replace mode on a simple string', () => { + return profanity.purifyAsync('boob damn something poo', { replace: true }) + .then(result => { + util.testPurified(result[0], '[ placeholder ] [ placeholder ] something [ placeholder ]'); + result[1].should.eql(['boob', 'damn', 'poo']); }); + }); + + it('Should work in replace mode recursively with objects', () => { + var objToCheck = { + bar: { foo: 'something boob', bar: { foo: 'test poo' } }, + test: 'something damn' + }; + + return profanity.purifyAsync(objToCheck, { replace: true }) + .then(result => { + result[0].should.have.keys('bar', 'test'); + result[0].bar.should.have.keys('foo', 'bar'); + util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); + result[0].bar.bar.should.have.keys('foo'); + util.testPurified(result[0].bar.bar.foo, 'test [ placeholder ]'); + util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); + util.testPurified(result[0].test, 'something [ placeholder ]'); + + result[1].should.eql(['boob', 'poo', 'damn']); + - it('Should work in replace mode recursively with objects', function () { - var objToCheck = { - bar: { foo: 'something boob', bar: { foo: 'test poo' } }, - test: 'something damn' - }; - profanity.purifyAsync(objToCheck, { replace: true }) - .then(result => { - result[0].should.have.keys('bar', 'test'); - result[0].bar.should.have.keys('foo', 'bar'); - util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); - result[0].bar.bar.should.have.keys('foo'); - util.testPurified(result[0].bar.bar.foo, 'test [ placeholder ]'); - util.testPurified(result[0].bar.foo, 'something [ placeholder ]'); - util.testPurified(result[0].test, 'something [ placeholder ]'); - - result[1].should.eql([ 'boob', 'poo', 'damn' ]); - }); }); - - it('Should jump to catch when passing language that is not supported by the tool', () => { - profanity.purifyAsync('Anything', { languages: ['HUE'] }) - .catch(err => { - err.code.should.eql('ENOENT'); - }); - }); }); + + it('Should jump to catch when passing language that is not supported by the tool', () => { + return profanity.purifyAsync('Anything', { languages: ['HUE'] }) + .catch(err => { + err.code.should.eql('ENOENT'); + }); + }); + }); }); From dae6647a3f4fc82d6a675ecb8609c1908829a1a7 Mon Sep 17 00:00:00 2001 From: Wagner Santos Date: Wed, 13 Apr 2016 17:41:22 -0300 Subject: [PATCH 08/10] Add async methods docs to README.md --- README.md | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ee6b76..53f50d7 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,15 @@ The original lists of swearwords used by default were taken from [here](https:// ## API -### `profanity.check(target_string, [ forbidden_list ])` +### `profanity.check(target_string, [languages], [ forbidden_list ])` +#####async version is also available: `profanity.checkAsync(target_string, [languages], [ forbidden_list ])` Returns a list of forbidden words found in a string. **Arguments** * `target_string` - String to search +* `languages` (Optional )- Array of languages to check words from. Accepts a string if just one language is needed. * `forbidden_list` (Optional) - Array containing forbidden terms **Example** @@ -33,7 +35,24 @@ console.log(profanity.check('Lorem ipsum foo bar poop test poop damn dolor sit.. // [ 'poop', 'poop', 'damn' ] ``` +Or the async method: + +```javascript +var profanity = require('profanity-util'); + +profanity.checkAsync('Lorem ipsum foo bar poop test poop damn dolor sit..') + .then(function (result) { + console.log(result); + // [ 'poop', 'poop', 'damn' ] + }) + .catch(function (err) { + console.log('Something went wrong."); + }); +``` + + ### `profanity.purify(target, [ options ])` +#####async version is also available: `profanity.purifyAsync(target, [ options ])` Purifies a string or strings contained in a given object or array from forbidden words. @@ -54,9 +73,11 @@ The .purify method will return an Array containing two values: **Options** * `forbiddenList` - Array of forbidden terms to replace or obscure +* `languages` - Array of languages to check words from. Accepts a string if just one language is needed. * `replacementsList` - Array of replacement words (To pick randomly from) * `obscureSymbol` - Symbol used to obscure words if `obscured` is set to true * `replace`- If set to true it will replace forbidden words (E.g. a*****b) instead of obscuring them +* `maxRecursionDepth` - If you are passing objects, a recursive iteration will be performed to fiind all strings inside it. You can set the maximum depth of the recursion. **Examples** @@ -75,6 +96,21 @@ console.log(profanity.purify({ // [ { foo: 'p**p', bar: { nested: 'd**n', arr: [ 'foo', 'p**p' ] } }, [ 'poop', 'damn', 'poop' ] ] ``` +Async version: + +```javascript +var profanity = require('profanity-util'); + +profanity.purify('lorem ipsum foo damn bar poop') + .then(function (result) { + console.log(result); + // [ 'lorem ipsum foo d**n bar p**p, [ 'damn', 'poop' ] ] + }) + .catch(function (err) { + console.log('Something went wrong.'); + }); +``` + **Obscure mode, custom options** ```javascript From 2b7ac6e367ecd14c256a257a9a66d7d654a3097e Mon Sep 17 00:00:00 2001 From: Wagner Santos Date: Wed, 13 Apr 2016 19:14:37 -0300 Subject: [PATCH 09/10] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 53f50d7..f9a2250 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ profanity.checkAsync('Lorem ipsum foo bar poop test poop damn dolor sit..') // [ 'poop', 'poop', 'damn' ] }) .catch(function (err) { - console.log('Something went wrong."); + console.log('Something went wrong.'); }); ``` From 1614580e3f7cec4088cd7f420f6e60b0dc5143cc Mon Sep 17 00:00:00 2001 From: wagoid Date: Thu, 14 Apr 2016 18:31:02 -0300 Subject: [PATCH 10/10] Add better replacement list check --- lib/profanity.js | 13 +++++++++++-- test/profanity.test.js | 29 ++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/lib/profanity.js b/lib/profanity.js index 1f8050c..4f71c68 100644 --- a/lib/profanity.js +++ b/lib/profanity.js @@ -190,10 +190,19 @@ function _purify (target, fields, regex, options) { } } +function getReplacementsList (options) { + var replacementsList = options.replacementsList; + if (!_.isArray(replacementsList) || _.isEmpty(replacementsList)) { + replacementsList = DEFAULT_REPLACEMENTS; + } + + return replacementsList; +} + function purify (target, options) { options = options || {}; options.languages = getDefaultLanguagesValue(options.languages); - options.replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS; + options.replacementsList = getReplacementsList(options); var fields = options.fields || (target instanceof Object ? Object.keys(target) : []), regex = getListRegex(options.forbiddenList, options.languages); @@ -204,7 +213,7 @@ function purify (target, options) { function purifyAsync (target, options) { options = options || {}; options.languages = getDefaultLanguagesValue(options.languages); - options.replacementsList = options.replacementsList || DEFAULT_REPLACEMENTS; + options.replacementsList = getReplacementsList(options); var fields = options.fields || (target instanceof Object ? Object.keys(target) : []); return getListRegexAsync(options.forbiddenList, options.languages) diff --git a/test/profanity.test.js b/test/profanity.test.js index 232d971..2e88974 100644 --- a/test/profanity.test.js +++ b/test/profanity.test.js @@ -127,7 +127,7 @@ describe('Profanity module', () => { 'puta', 'reudig' ]); - }); + }); }); describe('.checkAsync(target)', () => { @@ -319,5 +319,32 @@ describe('Profanity module', () => { err.code.should.eql('ENOENT'); }); }); + + it('Should work when passing a custom replacements list', () => { + var replacementsList = ['foo', 'bar', 'baz']; + return profanity.purifyAsync('Something poo something fuck', { replace: true, replacementsList}) + .then(result => { + util.testPurified(result[0], 'Something [ placeholder ] something [ placeholder ]', '(foo|bar|baz)'); + result[1].should.eql(['poo', 'fuck']); + }); + }); + + it('Should use the default replacement list when passing an empty replacements list', () => { + var replacementsList = []; + return profanity.purifyAsync('Something poo something fuck', { replace: true, replacementsList}) + .then(result => { + util.testPurified(result[0], 'Something [ placeholder ] something [ placeholder ]'); + result[1].should.eql(['poo', 'fuck']); + }); + }); + + it('Should use the default replacement list when passing a replacement list that is not an array', () => { + var replacementsList = 'dasdasd'; + return profanity.purifyAsync('Something poo something fuck', { replace: true, replacementsList}) + .then(result => { + util.testPurified(result[0], 'Something [ placeholder ] something [ placeholder ]'); + result[1].should.eql(['poo', 'fuck']); + }); + }); }); });