diff options
| author | Arno Teigseth <arno@teigseth.no> | 2011-01-31 05:34:56 +0000 |
|---|---|---|
| committer | Arno Teigseth <arno@teigseth.no> | 2011-01-31 05:34:56 +0000 |
| commit | 1afa96100bcb613c86533698f8a9d1115e63391e (patch) | |
| tree | 07c754e874bcbc95eeaa21abc35d4bc84158f4fb /languagetool/src/ChunkerTest.py | |
| parent | 635a3c7c275c00748c56736b4eb593b651223edd (diff) | |
| download | grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.gz grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.bz2 grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.xz | |
Added very basic pre-beta version of LanguageTool. Builds, though :)
Diffstat (limited to 'languagetool/src/ChunkerTest.py')
| -rw-r--r-- | languagetool/src/ChunkerTest.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/languagetool/src/ChunkerTest.py b/languagetool/src/ChunkerTest.py new file mode 100644 index 0000000..eb8889e --- /dev/null +++ b/languagetool/src/ChunkerTest.py @@ -0,0 +1,78 @@ +# -*- coding: iso-8859-1 -*- +# +# LanguageTool -- A Rule-Based Style and Grammar Checker +# Copyright (C) 2002,2003,2004 Daniel Naber <daniel.naber@t-online.de> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import re +import unittest + +import Chunker + +class LocalRules: + + def __init__(self, rule_list): + self.rules = rule_list + return + +class ChunkerTestCase(unittest.TestCase): + + def testChunking(self): + c = Chunker.Chunker() + r1 = Chunker.Rule("NP1: AT0 NN1 NN1") + r2 = Chunker.Rule("NP2: AT0 NN1") + rules = LocalRules([r1, r2]) + c.setRules(rules) + + tagged_text = self._makeList("Blah/XX the/AT0 house/NN1 foo/YY") + chunks = c.chunk(tagged_text) + self.assertEqual(chunks, [(2, 4, 'NP2')]) + + tagged_text = self._makeList("Blah/XX house/NN1 foo/YY") + chunks = c.chunk(tagged_text) + self.assertEqual(chunks, []) + + tagged_text = self._makeList("the/AT0 summer/NN1 house/NN1 foo/YY2") + chunks = c.chunk(tagged_text) + self.assertEqual(chunks, [(0, 4, 'NP1')]) + + # more than one chunk: + + tagged_text = self._makeList("the/AT0 summer/NN1 is/VB a/AT0 hit/NN1") + chunks = c.chunk(tagged_text) + self.assertEqual(chunks, [(0, 2, 'NP2'), (6, 8, 'NP2')]) + + tagged_text = self._makeList("the/AT0 summer/NN1 a/AT0 hit/NN1") + chunks = c.chunk(tagged_text) + self.assertEqual(chunks, [(0, 2, 'NP2'), (4, 6, 'NP2')]) + + return + + def _makeList(self, s): + parts = re.split("(\s+)", s) + l = [] + for part in parts: + word = None + word_norm = None + tag = None + pair = re.split("/", part) + if len(pair) == 2: + word, tag = pair + word_norm = word + else: + word = pair[0] + l.append((word, word_norm, tag)) + return l |
