-
Notifications
You must be signed in to change notification settings - Fork 36
Fix bugs in cleanText() and wordCount(), add some tests #13
Changes from all commits
db01e80
21c3d51
8053dca
8850d5a
bbbe6d6
31aa398
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| node_modules |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| --reporter nyan | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| var assert = require('assert'); | ||
| var TextStatistics = require('../index.js'); | ||
|
|
||
| describe('TextStatistics', function() { | ||
| // this is called when you "make" a TextStatistics | ||
| describe('#cleanText()', function() { | ||
| it('should add a final terminator if it\'s missing', function() { | ||
| var ts = TextStatistics('Hello friend'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
|
|
||
| it('should not add a final terminator if there is a \'.\'', function() { | ||
| var ts = TextStatistics('Hello friend.'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
|
|
||
| context('trailing whitespace', function() { | ||
| it('should strip spaces', function() { | ||
| var ts = TextStatistics('Hello friend. '); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
|
|
||
| it('should strip newlines', function() { | ||
| var ts = TextStatistics('Hello friend.\n\n'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
|
|
||
| it('should strip \\r\\n thing', function() { | ||
| var ts = TextStatistics('Hello friend.\r\n'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
|
|
||
| it('should strip tabs', function() { | ||
| var ts = TextStatistics('Hello friend.\t'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
| }); | ||
|
|
||
| context('leading whitespace', function() { | ||
| it('should strip spaces', function() { | ||
| var ts = TextStatistics(' Hello friend.'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
|
|
||
| it('should strip newlines', function() { | ||
| var ts = TextStatistics('\n\nHello friend.'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
|
|
||
| it('should strip \\r\\n thing', function() { | ||
| var ts = TextStatistics('\r\nHello friend.'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
|
|
||
| it('should strip tabs', function() { | ||
| var ts = TextStatistics('\tHello friend.'); | ||
| assert.equal(ts.text, 'Hello friend.'); | ||
| }); | ||
| }); | ||
|
|
||
| it('should remove multiple spaces between words', function() { | ||
| var ts = TextStatistics('Hello good friend.'); | ||
| assert.equal(ts.text, 'Hello good friend.'); | ||
| }); | ||
|
|
||
| it('should un-duplicate terminators', function() { | ||
| var ts = TextStatistics('Hello... Friend..'); | ||
| assert.equal(ts.text, 'Hello. Friend.'); | ||
| }); | ||
|
|
||
| it('should pad terminators with a space', function() { | ||
| var ts = TextStatistics('Hello.Good.Friend.'); | ||
| assert.equal(ts.text, 'Hello. Good. Friend.'); | ||
| }); | ||
|
|
||
| context('unify terminators', function() { | ||
| it('should replace all !! with ..', function() { | ||
| var ts = TextStatistics('Hello! Friend!'); | ||
| assert.equal(ts.text, 'Hello. Friend.'); | ||
| }); | ||
|
|
||
| it('should replace all ?? with ..', function() { | ||
| var ts = TextStatistics('Hello? Friend?'); | ||
| assert.equal(ts.text, 'Hello. Friend.'); | ||
| }); | ||
| }); | ||
|
|
||
| context('replacing newlines with terminators', function() { | ||
| it('should replace \\n', function() { | ||
| var ts = TextStatistics('bulleted list here we go\nnice dog\ngood dog'); | ||
| assert.equal(ts.text, 'bulleted list here we go. nice dog. good dog.'); | ||
| }); | ||
|
|
||
| it('should replace \\r\\n', function() { | ||
| var ts = TextStatistics('bulleted list here we go\r\nnice dog\r\ngood dog'); | ||
| assert.equal(ts.text, 'bulleted list here we go. nice dog. good dog.'); | ||
| }); | ||
|
|
||
| it('should replace \\r', function() { | ||
| var ts = TextStatistics('bulleted list here we go\rnice dog\rgood dog'); | ||
| assert.equal(ts.text, 'bulleted list here we go. nice dog. good dog.'); | ||
| }); | ||
| }); | ||
|
|
||
| context('stripping periods from email addresses', function() { | ||
| it('should replace a single period', function() { | ||
| var ts = TextStatistics('[email protected]'); | ||
| assert.equal(ts.text, 'textstatistics@examplecom.'); | ||
| }); | ||
|
|
||
| it('should replace a single period in the first part', function() { | ||
| var ts = TextStatistics('[email protected]'); | ||
| assert.equal(ts.text, 'textstatistics@examplecom.'); | ||
| }); | ||
|
|
||
| it('should replace two periods in the first part', function() { | ||
| var ts = TextStatistics('[email protected]'); | ||
| assert.equal(ts.text, 'textstatistics@examplecom.'); | ||
| }); | ||
|
|
||
| it('should replace periods with a subdomain', function() { | ||
| var ts = TextStatistics('[email protected]'); | ||
| assert.equal(ts.text, 'textstatistics@testexamplecom.'); | ||
| }); | ||
|
|
||
| it('should replace periods with a subdomain and before the @', function() { | ||
| var ts = TextStatistics('[email protected]'); | ||
| assert.equal(ts.text, 'textstatistics@testexamplecom.'); | ||
| }); | ||
| }); | ||
|
|
||
| context('replacing non-terminator punctuation', function() { | ||
| it('should replace commas with spaces', function() { | ||
| var ts = TextStatistics('Hello, hi, friend.'); | ||
| assert.equal(ts.text, 'Hello hi friend.'); | ||
| }); | ||
|
|
||
| it('should replace colons with spaces', function() { | ||
| var ts = TextStatistics('Hello: hi: friend.'); | ||
| assert.equal(ts.text, 'Hello hi friend.'); | ||
| }); | ||
|
|
||
| it('should replace semicolons with spaces', function() { | ||
| var ts = TextStatistics('Hello; hi; friend.'); | ||
| assert.equal(ts.text, 'Hello hi friend.'); | ||
| }); | ||
|
|
||
| it('should replace parentheses with spaces', function() { | ||
| var ts = TextStatistics('(Hello (hi) friend).'); | ||
| assert.equal(ts.text, 'Hello hi friend.'); | ||
| }); | ||
|
|
||
| it('should replace slashes with spaces', function() { | ||
| var ts = TextStatistics('Hello/hi/friend.'); | ||
| assert.equal(ts.text, 'Hello hi friend.'); | ||
| }); | ||
|
|
||
| it('should replace double hyphens with spaces', function() { | ||
| var ts = TextStatistics('Hello--hi--friend.'); | ||
| assert.equal(ts.text, 'Hello hi friend.'); | ||
| }); | ||
|
|
||
| it('should not replace a single dash with spaces', function() { | ||
| var ts = TextStatistics('Hi-di-ho friend-person!'); | ||
| assert.equal(ts.text, 'Hi-di-ho friend-person.'); | ||
| }); | ||
|
|
||
| it('should replace pluses with spaces', function() { | ||
| var ts = TextStatistics('Hello + hi+friend.'); | ||
| assert.equal(ts.text, 'Hello hi friend.'); | ||
| }); | ||
|
|
||
| it('should replace ampersands with spaces', function() { | ||
| var ts = TextStatistics('Hello&hi & friend.'); | ||
| assert.equal(ts.text, 'Hello hi friend.'); | ||
| }); | ||
|
|
||
| it('should replace em-dash with spaces'); // can I do that? | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Em dash is usually used as a word terminator in my experience, rather than to create hyphenated pairs, so I don't see why not!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, my comment there is more about figuring out where to get the emdash. I was apparently in too much of a hurry to copy/paste it.
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, cool. Sorry, didn't intend to come across as patronising! Apologies if that was the case. Not sure what OS you use, but I love em-dash, and there's a nice shortcut for it on Mac OS — option+shift+dash. Option+dash will give you an en-dash. :)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No worries, I didn't read it as patronizing. My comment wasn't super clear. |
||
| }); | ||
| }); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| var assert = require('assert'); | ||
| var TextStatistics = require('../index.js'); | ||
|
|
||
| describe('TextStatistics', function() { | ||
|
|
||
| describe('#sentenceCount()', function() { | ||
| it('should count a single sentence', function() { | ||
| var ts = TextStatistics('see spot run.'); | ||
| assert.equal(1, ts.sentenceCount()); | ||
| }); | ||
|
|
||
| it('should count a single sentence with a comma', function() { | ||
| var ts = TextStatistics('see, spot runs.'); | ||
| assert.equal(1, ts.sentenceCount()); | ||
| }); | ||
|
|
||
| it('should count a few simple sentences', function() { | ||
| var ts = TextStatistics('see spot run. good job spot. have a treat.'); | ||
| assert.equal(3, ts.sentenceCount()); | ||
| }); | ||
| }); | ||
|
|
||
| describe('#wordCount()', function() { | ||
| it('a string w/o words should have word count of one, because dividing by zero', function() { | ||
| var ts = TextStatistics('.'); | ||
| assert.equal(1, ts.wordCount()); | ||
| }); | ||
|
|
||
| it('should count the number of words in a text', function() { | ||
| var ts = TextStatistics('see spot run'); | ||
| assert.equal(3, ts.wordCount()); | ||
| }); | ||
|
|
||
| it('should not count words with an apostrophe as two words', function() { | ||
| var ts = TextStatistics('they\'re'); | ||
| assert.equal(1, ts.wordCount()); | ||
| }); | ||
|
|
||
| it('should not count the empty string after a period as a word', function() { | ||
| var ts = TextStatistics('dog.'); | ||
| assert.equal(1, ts.wordCount()); | ||
| }); | ||
|
|
||
| it('should count an email address as a single word', function() { | ||
| var ts = TextStatistics('[email protected]'); | ||
| assert.equal(1, ts.wordCount()); | ||
| }); | ||
|
|
||
| it('should count words with a dash as a single word', function() { | ||
| var ts = TextStatistics('long-term'); | ||
| assert.equal(1, ts.wordCount()); | ||
| }); | ||
| }); | ||
| }); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Heh.