Added very basic pre-beta version of LanguageTool. Builds, though :)

author: Arno Teigseth <arno@teigseth.no> 2011-01-31 05:34:56 +0000
committer: Arno Teigseth <arno@teigseth.no> 2011-01-31 05:34:56 +0000
commit: 1afa96100bcb613c86533698f8a9d1115e63391e (patch)
tree: 07c754e874bcbc95eeaa21abc35d4bc84158f4fb /languagetool/src/ChunkerTest.py
parent: 635a3c7c275c00748c56736b4eb593b651223edd (diff)
download: grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.gz
grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.bz2
grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.xz
1 files changed, 78 insertions, 0 deletions
diff --git a/languagetool/src/ChunkerTest.py b/languagetool/src/ChunkerTest.py
new file mode 100644
index 0000000..eb8889e
--- /dev/null
+++ b/languagetool/src/ChunkerTest.py
@@ -0,0 +1,78 @@
+# -*- coding: iso-8859-1 -*-
+#
+# LanguageTool -- A Rule-Based Style and Grammar Checker
+# Copyright (C) 2002,2003,2004 Daniel Naber <daniel.naber@t-online.de>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+import re
+import unittest
+
+import Chunker
+
+class LocalRules:
+
+	def __init__(self, rule_list):
+		self.rules = rule_list
+		return
+
+class ChunkerTestCase(unittest.TestCase):
+
+	def testChunking(self):
+		c = Chunker.Chunker()
+		r1 = Chunker.Rule("NP1: AT0 NN1 NN1")
+		r2 = Chunker.Rule("NP2: AT0 NN1")
+		rules = LocalRules([r1, r2])
+		c.setRules(rules)
+
+		tagged_text = self._makeList("Blah/XX the/AT0 house/NN1 foo/YY")
+		chunks = c.chunk(tagged_text)
+		self.assertEqual(chunks, [(2, 4, 'NP2')])
+		
+		tagged_text = self._makeList("Blah/XX house/NN1 foo/YY")
+		chunks = c.chunk(tagged_text)
+		self.assertEqual(chunks, [])
+
+		tagged_text = self._makeList("the/AT0 summer/NN1 house/NN1 foo/YY2")
+		chunks = c.chunk(tagged_text)
+		self.assertEqual(chunks, [(0, 4, 'NP1')])
+	
+		# more than one chunk:
+
+		tagged_text = self._makeList("the/AT0 summer/NN1 is/VB a/AT0 hit/NN1")
+		chunks = c.chunk(tagged_text)
+		self.assertEqual(chunks, [(0, 2, 'NP2'), (6, 8, 'NP2')])
+
+		tagged_text = self._makeList("the/AT0 summer/NN1 a/AT0 hit/NN1")
+		chunks = c.chunk(tagged_text)
+		self.assertEqual(chunks, [(0, 2, 'NP2'), (4, 6, 'NP2')])
+
+		return
+
+	def _makeList(self, s):
+		parts = re.split("(\s+)", s)
+		l = []
+		for part in parts:
+			word = None
+			word_norm = None
+			tag = None
+			pair = re.split("/", part)
+			if len(pair) == 2:
+				word, tag = pair
+				word_norm = word
+			else:
+				word = pair[0]
+			l.append((word, word_norm, tag))
+		return l
author	Arno Teigseth <arno@teigseth.no>	2011-01-31 05:34:56 +0000
committer	Arno Teigseth <arno@teigseth.no>	2011-01-31 05:34:56 +0000
commit	1afa96100bcb613c86533698f8a9d1115e63391e (patch)
tree	07c754e874bcbc95eeaa21abc35d4bc84158f4fb /languagetool/src/ChunkerTest.py
parent	635a3c7c275c00748c56736b4eb593b651223edd (diff)
download	grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.gz grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.bz2 grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.xz