4 require_once('PHPUnit/Framework.php');
6 // Include the email address validator class
7 require_once('../TextStatistics.php');
9 class TextStatisticsTest extends PHPUnit_Framework_TestCase {
13 This file contains the more basic tests - short sentences, word counts,
14 sentence counts, and so on. Longer texts are split into their own test
15 files for convenience.
19 protected $TextStatistics = null;
21 public function setUp() {
22 $this->TextStatistics = new TextStatistics();
25 public function tearDown() {
26 unset($this->objTextStatistics);
30 -------------------- */
32 public function testSyllableCountBasicWords() { // "Normal" words
33 $this->assertEquals(1, $this->TextStatistics->syllable_count('a'));
34 $this->assertEquals(1, $this->TextStatistics->syllable_count('was'));
35 $this->assertEquals(1, $this->TextStatistics->syllable_count('the'));
36 $this->assertEquals(1, $this->TextStatistics->syllable_count('and'));
37 $this->assertEquals(2, $this->TextStatistics->syllable_count('foobar'));
38 $this->assertEquals(2, $this->TextStatistics->syllable_count('hello'));
39 $this->assertEquals(1, $this->TextStatistics->syllable_count('world'));
40 $this->assertEquals(3, $this->TextStatistics->syllable_count('wonderful'));
41 $this->assertEquals(2, $this->TextStatistics->syllable_count('simple'));
42 $this->assertEquals(2, $this->TextStatistics->syllable_count('easy'));
43 $this->assertEquals(1, $this->TextStatistics->syllable_count('hard'));
44 $this->assertEquals(1, $this->TextStatistics->syllable_count('quick'));
45 $this->assertEquals(1, $this->TextStatistics->syllable_count('brown'));
46 $this->assertEquals(1, $this->TextStatistics->syllable_count('fox'));
47 $this->assertEquals(1, $this->TextStatistics->syllable_count('jumped'));
48 $this->assertEquals(2, $this->TextStatistics->syllable_count('over'));
49 $this->assertEquals(2, $this->TextStatistics->syllable_count('lazy'));
50 $this->assertEquals(1, $this->TextStatistics->syllable_count('dog'));
51 $this->assertEquals(3, $this->TextStatistics->syllable_count('camera'));
54 public function testSyllableCountComplexWords() { // Odd syllables, long words, difficult sounds
55 $this->assertEquals(12, $this->TextStatistics->syllable_count('antidisestablishmentarianism'));
56 $this->assertEquals(14, $this->TextStatistics->syllable_count('supercalifragilisticexpialidocious'));
57 $this->assertEquals(8, $this->TextStatistics->syllable_count('chlorofluorocarbonation'));
58 $this->assertEquals(4, $this->TextStatistics->syllable_count('forethoughtfulness'));
59 $this->assertEquals(4, $this->TextStatistics->syllable_count('phosphorescent'));
60 $this->assertEquals(5, $this->TextStatistics->syllable_count('theoretician'));
61 $this->assertEquals(5, $this->TextStatistics->syllable_count('promiscuity'));
62 $this->assertEquals(4, $this->TextStatistics->syllable_count('unbutlering'));
63 $this->assertEquals(5, $this->TextStatistics->syllable_count('continuity'));
64 $this->assertEquals(1, $this->TextStatistics->syllable_count('craunched'));
65 $this->assertEquals(1, $this->TextStatistics->syllable_count('squelched'));
66 $this->assertEquals(1, $this->TextStatistics->syllable_count('scrounge'));
67 $this->assertEquals(1, $this->TextStatistics->syllable_count('coughed'));
68 $this->assertEquals(1, $this->TextStatistics->syllable_count('smile'));
69 $this->assertEquals(4, $this->TextStatistics->syllable_count('monopoly'));
70 $this->assertEquals(2, $this->TextStatistics->syllable_count('doughey'));
71 $this->assertEquals(3, $this->TextStatistics->syllable_count('doughier'));
72 $this->assertEquals(4, $this->TextStatistics->syllable_count('leguminous'));
73 $this->assertEquals(3, $this->TextStatistics->syllable_count('thoroughbreds'));
74 $this->assertEquals(2, $this->TextStatistics->syllable_count('special'));
75 $this->assertEquals(3, $this->TextStatistics->syllable_count('delicious'));
76 $this->assertEquals(2, $this->TextStatistics->syllable_count('spatial'));
77 $this->assertEquals(4, $this->TextStatistics->syllable_count('pacifism'));
78 $this->assertEquals(4, $this->TextStatistics->syllable_count('coagulant'));
79 $this->assertEquals(2, $this->TextStatistics->syllable_count('shouldn\'t'));
80 $this->assertEquals(3, $this->TextStatistics->syllable_count('mcdonald'));
81 $this->assertEquals(3, $this->TextStatistics->syllable_count('audience'));
82 $this->assertEquals(2, $this->TextStatistics->syllable_count('finance'));
83 $this->assertEquals(3, $this->TextStatistics->syllable_count('prevalence'));
84 $this->assertEquals(5, $this->TextStatistics->syllable_count('impropriety'));
85 $this->assertEquals(3, $this->TextStatistics->syllable_count('alien'));
86 $this->assertEquals(2, $this->TextStatistics->syllable_count('dreadnought'));
87 $this->assertEquals(3, $this->TextStatistics->syllable_count('verandah'));
88 $this->assertEquals(3, $this->TextStatistics->syllable_count('similar'));
89 $this->assertEquals(4, $this->TextStatistics->syllable_count('similarly'));
90 $this->assertEquals(2, $this->TextStatistics->syllable_count('central'));
91 $this->assertEquals(1, $this->TextStatistics->syllable_count('cyst'));
92 $this->assertEquals(1, $this->TextStatistics->syllable_count('term'));
93 $this->assertEquals(2, $this->TextStatistics->syllable_count('order'));
94 $this->assertEquals(1, $this->TextStatistics->syllable_count('fur'));
95 $this->assertEquals(2, $this->TextStatistics->syllable_count('sugar'));
96 $this->assertEquals(2, $this->TextStatistics->syllable_count('paper'));
97 $this->assertEquals(1, $this->TextStatistics->syllable_count('make'));
98 $this->assertEquals(1, $this->TextStatistics->syllable_count('gem'));
99 $this->assertEquals(2, $this->TextStatistics->syllable_count('program'));
100 $this->assertEquals(2, $this->TextStatistics->syllable_count('hopeless'));
101 $this->assertEquals(3, $this->TextStatistics->syllable_count('hopelessly'));
102 $this->assertEquals(2, $this->TextStatistics->syllable_count('careful'));
103 $this->assertEquals(3, $this->TextStatistics->syllable_count('carefully'));
104 $this->assertEquals(2, $this->TextStatistics->syllable_count('stuffy'));
105 $this->assertEquals(2, $this->TextStatistics->syllable_count('thistle'));
106 $this->assertEquals(2, $this->TextStatistics->syllable_count('teacher'));
107 $this->assertEquals(3, $this->TextStatistics->syllable_count('unhappy'));
108 $this->assertEquals(5, $this->TextStatistics->syllable_count('ambiguity'));
109 $this->assertEquals(4, $this->TextStatistics->syllable_count('validity'));
110 $this->assertEquals(4, $this->TextStatistics->syllable_count('ambiguous'));
111 $this->assertEquals(2, $this->TextStatistics->syllable_count('deserve'));
112 $this->assertEquals(2, $this->TextStatistics->syllable_count('blooper'));
113 $this->assertEquals(1, $this->TextStatistics->syllable_count('scooped'));
114 $this->assertEquals(2, $this->TextStatistics->syllable_count('deserve'));
115 $this->assertEquals(1, $this->TextStatistics->syllable_count('deal'));
116 $this->assertEquals(1, $this->TextStatistics->syllable_count('death'));
117 $this->assertEquals(1, $this->TextStatistics->syllable_count('dearth'));
118 $this->assertEquals(1, $this->TextStatistics->syllable_count('deign'));
119 $this->assertEquals(1, $this->TextStatistics->syllable_count('reign'));
120 $this->assertEquals(2, $this->TextStatistics->syllable_count('bedsore'));
121 $this->assertEquals(5, $this->TextStatistics->syllable_count('anorexia'));
122 $this->assertEquals(3, $this->TextStatistics->syllable_count('anymore'));
123 $this->assertEquals(1, $this->TextStatistics->syllable_count('cored'));
124 $this->assertEquals(1, $this->TextStatistics->syllable_count('sore'));
125 $this->assertEquals(2, $this->TextStatistics->syllable_count('foremost'));
126 $this->assertEquals(2, $this->TextStatistics->syllable_count('restore'));
127 $this->assertEquals(2, $this->TextStatistics->syllable_count('minute'));
128 $this->assertEquals(3, $this->TextStatistics->syllable_count('manticores'));
129 $this->assertEquals(4, $this->TextStatistics->syllable_count('asparagus'));
130 $this->assertEquals(3, $this->TextStatistics->syllable_count('unexplored'));
131 $this->assertEquals(4, $this->TextStatistics->syllable_count('unexploded'));
132 $this->assertEquals(3, $this->TextStatistics->syllable_count('CAPITALS'));
135 // These are fairly common words that are exceptions to given rules and that can not
136 // easily be programmed for. I've added them here for documentation purposes as much
137 // as anything else. If you find a way to program rules for any of these, move them
138 // into the section above. Many compound words will end up here.
139 public function testSyllableCountProgrammedExceptions() {
140 $this->assertEquals(3, $this->TextStatistics->syllable_count('simile'));
141 // Compounds that have caused problems so far
142 // Problem: far too many compound words to list exhaustively.
143 $this->assertEquals(2, $this->TextStatistics->syllable_count('shoreline'));
144 $this->assertEquals(3, $this->TextStatistics->syllable_count('forever'));
147 public function testAverageSyllablesPerWord() {
148 $this->assertEquals(1, $this->TextStatistics->average_syllables_per_word('and then there was one'));
149 $this->assertEquals(2, $this->TextStatistics->average_syllables_per_word('because special ducklings deserve rainbows'));
150 $this->assertEquals(1.5, $this->TextStatistics->average_syllables_per_word('and then there was one because special ducklings deserve rainbows'));
154 -------------------- */
156 public function testWordCount() {
157 $this->assertEquals(9, $this->TextStatistics->word_count('The quick brown fox jumped over the lazy dog'));
158 $this->assertEquals(9, $this->TextStatistics->word_count('The quick brown fox jumped over the lazy dog.'));
159 $this->assertEquals(9, $this->TextStatistics->word_count('The quick brown fox jumped over the lazy dog. '));
160 $this->assertEquals(9, $this->TextStatistics->word_count(' The quick brown fox jumped over the lazy dog. '));
161 $this->assertEquals(9, $this->TextStatistics->word_count(' The quick brown fox jumped over the lazy dog. '));
162 $this->assertEquals(2, $this->TextStatistics->word_count('Yes. No.'));
163 $this->assertEquals(2, $this->TextStatistics->word_count('Yes.No.'));
164 $this->assertEquals(2, $this->TextStatistics->word_count('Yes.No.'));
165 $this->assertEquals(2, $this->TextStatistics->word_count('Yes . No.'));
166 $this->assertEquals(2, $this->TextStatistics->word_count('Yes .No.'));
167 $this->assertEquals(2, $this->TextStatistics->word_count('Yes - No. '));
170 public function testCheckPercentageWordsWithThreeSyllables() {
171 $this->assertEquals(9, number_format($this->TextStatistics->percentage_words_with_three_syllables('there is just one word with three syllables in this sentence')));
172 $this->assertEquals(9, number_format($this->TextStatistics->percentage_words_with_three_syllables('there is just one word with three syllables in this sentence', true)));
173 $this->assertEquals(0, number_format($this->TextStatistics->percentage_words_with_three_syllables('there are no valid words with three Syllables in this sentence', false)));
174 $this->assertEquals(5, number_format($this->TextStatistics->percentage_words_with_three_syllables('there is one and only one word with three or more syllables in this long boring sentence of twenty words')));
175 $this->assertEquals(10, number_format($this->TextStatistics->percentage_words_with_three_syllables('there are two and only two words with three or more syllables in this long sentence of exactly twenty words')));
176 $this->assertEquals(5, number_format($this->TextStatistics->percentage_words_with_three_syllables('there is Actually only one valid word with three or more syllables in this long sentence of Exactly twenty words', false)));
177 $this->assertEquals(0, number_format($this->TextStatistics->percentage_words_with_three_syllables('no long words in this sentence')));
178 $this->assertEquals(0, number_format($this->TextStatistics->percentage_words_with_three_syllables('no long valid words in this sentence because the test ignores proper case words like this Behemoth', false)));
181 public function testTextLengthCheck() {
182 $this->assertEquals(1, $this->TextStatistics->letter_count('a'));
183 $this->assertEquals(0, $this->TextStatistics->letter_count(''));
184 $this->assertEquals(46, $this->TextStatistics->letter_count('this sentence has 30 characters, not including the digits'));
188 -------------------- */
190 public function testSentenceCount() {
191 $this->assertEquals(1, $this->TextStatistics->sentence_count('This is a sentence'));
192 $this->assertEquals(1, $this->TextStatistics->sentence_count('This is a sentence.'));
193 $this->assertEquals(1, $this->TextStatistics->sentence_count('This is a sentence!'));
194 $this->assertEquals(1, $this->TextStatistics->sentence_count('This is a sentence?'));
195 $this->assertEquals(1, $this->TextStatistics->sentence_count('This is a sentence..'));
196 $this->assertEquals(2, $this->TextStatistics->sentence_count('This is a sentence. So is this.'));
197 $this->assertEquals(2, $this->TextStatistics->sentence_count("This is a sentence. \n\n So is this, but this is multi-line!"));
198 $this->assertEquals(2, $this->TextStatistics->sentence_count('This is a sentence,. So is this.'));
199 $this->assertEquals(2, $this->TextStatistics->sentence_count('This is a sentence!? So is this.'));
200 $this->assertEquals(3, $this->TextStatistics->sentence_count('This is a sentence. So is this. And this one as well.'));
201 $this->assertEquals(1, $this->TextStatistics->sentence_count('This is a sentence - but just one.'));
202 $this->assertEquals(1, $this->TextStatistics->sentence_count('This is a sentence (but just one).'));
205 public function testAverageWordsPerSentence() {
206 $this->assertEquals(4, $this->TextStatistics->average_words_per_sentence('This is a sentence'));
207 $this->assertEquals(4, $this->TextStatistics->average_words_per_sentence('This is a sentence.'));
208 $this->assertEquals(4, $this->TextStatistics->average_words_per_sentence('This is a sentence. '));
209 $this->assertEquals(4, $this->TextStatistics->average_words_per_sentence('This is a sentence. This is a sentence'));
210 $this->assertEquals(4, $this->TextStatistics->average_words_per_sentence('This is a sentence. This is a sentence.'));
211 $this->assertEquals(4, $this->TextStatistics->average_words_per_sentence('This, is - a sentence . This is a sentence. '));
212 $this->assertEquals(5.5, $this->TextStatistics->average_words_per_sentence('This is a sentence with extra text. This is a sentence. '));
213 $this->assertEquals(6, $this->TextStatistics->average_words_per_sentence('This is a sentence with some extra text. This is a sentence. '));
217 -------------------- */
219 // Please note that scores for all of these sentences and scoring systems have all been calculated by hand and should therefore be accurate.
220 // All values have been rounded to a single decimal point. PHP can be temperamental when it comes to floats.
222 public function testFleschKincaidReadingEase() {
223 $this->assertEquals(121.2, $this->TextStatistics->flesch_kincaid_reading_ease('This. Is. A. Nice. Set. Of. Small. Words. Of. One. Part. Each.')); // Best score possible
224 $this->assertEquals(94.3, $this->TextStatistics->flesch_kincaid_reading_ease('The quick brown fox jumped over the lazy dog.'));
225 $this->assertEquals(94.3, $this->TextStatistics->flesch_kincaid_reading_ease('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog.'));
226 $this->assertEquals(94.3, $this->TextStatistics->flesch_kincaid_reading_ease('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog'));
227 $this->assertEquals(94.3, $this->TextStatistics->flesch_kincaid_reading_ease("The quick brown fox jumped over the lazy dog. \n\n The quick brown fox jumped over the lazy dog."));
228 $this->assertEquals(50.5, $this->TextStatistics->flesch_kincaid_reading_ease('Now it is time for a more complicated sentence, including several longer words.'));
231 public function testFleschKincaidGradeLevel() {
232 $this->assertEquals(-3.4, $this->TextStatistics->flesch_kincaid_grade_level('This. Is. A. Nice. Set. Of. Small. Words. Of. One. Part. Each.')); // Best score possible
233 $this->assertEquals(2.3, $this->TextStatistics->flesch_kincaid_grade_level('The quick brown fox jumped over the lazy dog.'));
234 $this->assertEquals(2.3, $this->TextStatistics->flesch_kincaid_grade_level('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog.'));
235 $this->assertEquals(2.3, $this->TextStatistics->flesch_kincaid_grade_level('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog'));
236 $this->assertEquals(2.3, $this->TextStatistics->flesch_kincaid_grade_level("The quick brown fox jumped over the lazy dog. \n\n The quick brown fox jumped over the lazy dog."));
237 $this->assertEquals(9.4, $this->TextStatistics->flesch_kincaid_grade_level('Now it is time for a more complicated sentence, including several longer words.'));
240 public function testGunningFogScore() {
241 $this->assertEquals(0.4, $this->TextStatistics->gunning_fog_score('This. Is. A. Nice. Set. Of. Small. Words. Of. One. Part. Each.')); // Best possible score
242 $this->assertEquals(3.6, $this->TextStatistics->gunning_fog_score('The quick brown fox jumped over the lazy dog.'));
243 $this->assertEquals(3.6, $this->TextStatistics->gunning_fog_score('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog.'));
244 $this->assertEquals(3.6, $this->TextStatistics->gunning_fog_score("The quick brown fox jumped over the lazy dog. \n\n The quick brown fox jumped over the lazy dog."));
245 $this->assertEquals(3.6, $this->TextStatistics->gunning_fog_score('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog'));
246 $this->assertEquals(14.4, $this->TextStatistics->gunning_fog_score('Now it is time for a more complicated sentence, including several longer words.'));
247 $this->assertEquals(8.3, $this->TextStatistics->gunning_fog_score('Now it is time for a more Complicated sentence, including Several longer words.')); // Two proper nouns, ignored
250 public function testColemanLiauIndex() {
251 $this->assertEquals(3.0, $this->TextStatistics->coleman_liau_index('This. Is. A. Nice. Set. Of. Small. Words. Of. One. Part. Each.')); // Best possible score would be if all words were 1 character
252 $this->assertEquals(7.7, $this->TextStatistics->coleman_liau_index('The quick brown fox jumped over the lazy dog.'));
253 $this->assertEquals(7.7, $this->TextStatistics->coleman_liau_index('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog.'));
254 $this->assertEquals(7.7, $this->TextStatistics->coleman_liau_index("The quick brown fox jumped over the lazy dog. \n\n The quick brown fox jumped over the lazy dog."));
255 $this->assertEquals(7.7, $this->TextStatistics->coleman_liau_index('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog'));
256 $this->assertEquals(13.6, $this->TextStatistics->coleman_liau_index('Now it is time for a more complicated sentence, including several longer words.'));
259 public function testSMOGIndex() {
260 $this->assertEquals(1.8, $this->TextStatistics->smog_index('This. Is. A. Nice. Set. Of. Small. Words. Of. One. Part. Each.')); // Should be 1.8 for any text with no words of 3+ syllables
261 $this->assertEquals(1.8, $this->TextStatistics->smog_index('The quick brown fox jumped over the lazy dog.'));
262 $this->assertEquals(1.8, $this->TextStatistics->smog_index('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog.'));
263 $this->assertEquals(1.8, $this->TextStatistics->smog_index("The quick brown fox jumped over the lazy dog. \n\n The quick brown fox jumped over the lazy dog."));
264 $this->assertEquals(1.8, $this->TextStatistics->smog_index('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog'));
265 $this->assertEquals(10.1, $this->TextStatistics->smog_index('Now it is time for a more complicated sentence, including several longer words.'));
268 public function testAutomatedReadabilityIndex() {
269 $this->assertEquals(-5.6, $this->TextStatistics->automated_readability_index('This. Is. A. Nice. Set. Of. Small. Words. Of. One. Part. Each.'));
270 $this->assertEquals(1.9, $this->TextStatistics->automated_readability_index('The quick brown fox jumped over the lazy dog.'));
271 $this->assertEquals(1.9, $this->TextStatistics->automated_readability_index('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog.'));
272 $this->assertEquals(1.9, $this->TextStatistics->automated_readability_index("The quick brown fox jumped over the lazy dog. \n\n The quick brown fox jumped over the lazy dog."));
273 $this->assertEquals(1.9, $this->TextStatistics->automated_readability_index('The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog'));
274 $this->assertEquals(8.6, $this->TextStatistics->automated_readability_index('Now it is time for a more complicated sentence, including several longer words.'));