summaryrefslogtreecommitdiffstats
path: root/languagetool/src/SentenceSplitterTest.py
diff options
context:
space:
mode:
authorArno Teigseth <arno@teigseth.no>2011-01-31 05:34:56 +0000
committerArno Teigseth <arno@teigseth.no>2011-01-31 05:34:56 +0000
commit1afa96100bcb613c86533698f8a9d1115e63391e (patch)
tree07c754e874bcbc95eeaa21abc35d4bc84158f4fb /languagetool/src/SentenceSplitterTest.py
parent635a3c7c275c00748c56736b4eb593b651223edd (diff)
downloadgrammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.gz
grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.bz2
grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.xz
Added very basic pre-beta version of LanguageTool. Builds, though :)
Diffstat (limited to 'languagetool/src/SentenceSplitterTest.py')
-rw-r--r--languagetool/src/SentenceSplitterTest.py91
1 files changed, 91 insertions, 0 deletions
diff --git a/languagetool/src/SentenceSplitterTest.py b/languagetool/src/SentenceSplitterTest.py
new file mode 100644
index 0000000..52fe732
--- /dev/null
+++ b/languagetool/src/SentenceSplitterTest.py
@@ -0,0 +1,91 @@
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2003,2004 Daniel Naber <daniel.naber@t-online.de>
+
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import os
+
+import SentenceSplitter
+import unittest
+
+class SentenceSplitterTestCase(unittest.TestCase):
+
+ def testSplit(self):
+ self.s = SentenceSplitter.SentenceSplitter()
+
+ l = self.s.split(None)
+ self.assertEqual(len(l), 0)
+
+ self._doTest("")
+ self._doTest("This is a sentence.")
+ self._doTest("This is a sentence. #And this is another one.")
+ self._doTest("This is a sentence. #Isn't it? #Yes, it is.")
+ self._doTest("This is e.g. Mr. Smith, who talks slowly... #But this is another sentence.")
+ self._doTest("Chanel no. 5 is groovy.")
+ self._doTest("Mrs. Jones gave Peter $4.5, to buy Chanel No 5. #He never came back.")
+ self._doTest("On p. 6 there's nothing. #Another sentence.")
+ self._doTest("Leave me alone!, he yelled. #Another sentence.")
+ self._doTest("\"Leave me alone!\", he yelled.")
+ self._doTest("'Leave me alone!', he yelled. #Another sentence.")
+ self._doTest("'Leave me alone,' he yelled. #Another sentence.")
+ self._doTest("This works on the phrase level, i.e. not on the word level.")
+ self._doTest("Let's meet at 5 p.m. in the main street.")
+ self._doTest("James comes from the U.K. where he worked as a programmer.")
+ self._doTest("Don't split strings like U.S.A. please.")
+ self._doTest("Don't split strings like U. S. A. either.")
+ self._doTest("Don't split... #Well you know. #Here comes more text.")
+ self._doTest("Don't split... well you know. #Here comes more text.")
+ self._doTest('The "." should not be a delimiter in quotes.')
+ self._doTest('"Here he comes!" she said.')
+ self._doTest('"Here he comes!", she said.')
+ self._doTest('"Here he comes." #But this is another sentence.')
+ self._doTest('"Here he comes!". #That\'s what he said.')
+ self._doTest('The sentence ends here. #(Not me.)')
+ self._doTest("He won't. #Really.")
+ self._doTest("He won't say no. #Not really.")
+ self._doTest("He won't say no. 5 is better. #Not really.")
+ self._doTest("They met at 5 p.m. on Thursday.")
+ self._doTest("They met at 5 p.m. #It was Thursday.")
+ self._doTest("This is it: a test.")
+ # known not to work:
+ #self._doTest("This is it: #A final test.")
+ # two returns -> paragraph -> new sentence:
+ self._doTest("He won't\n\n#Really.")
+ # Some people make two spaces after sentence end:
+ self._doTest("This is a sentence. #And this is another one.")
+ # Missing space after sentence end:
+ self._doTest("James is from the Ireland!#He lives in Spain now.")
+ # From the abbreviation list:
+ self._doTest("Jones Bros. have built a succesful company.")
+ # Doesn't work:
+ #self._doTest("James is from the U.K. #He lives in Spain now.")
+
+ return
+
+ def _doTest(self, s):
+ s_copy = s.replace("#", "")
+ l = self.s.split(s_copy)
+ correct_result = s.split("#")
+ # ignore leading/trailing whitespace differences:
+ i = 0
+ for item in l:
+ l[i] = l[i].strip()
+ i = i + 1
+ i = 0
+ for item in correct_result:
+ correct_result[i] = correct_result[i].strip()
+ i = i + 1
+ self.assertEqual(l, correct_result)
+ return