diff options
| author | Arno Teigseth <arno@teigseth.no> | 2011-01-31 05:34:56 +0000 |
|---|---|---|
| committer | Arno Teigseth <arno@teigseth.no> | 2011-01-31 05:34:56 +0000 |
| commit | 1afa96100bcb613c86533698f8a9d1115e63391e (patch) | |
| tree | 07c754e874bcbc95eeaa21abc35d4bc84158f4fb /languagetool/TextCheckerCGI.py | |
| parent | 635a3c7c275c00748c56736b4eb593b651223edd (diff) | |
| download | grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.gz grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.bz2 grammar-norwegian-1afa96100bcb613c86533698f8a9d1115e63391e.tar.xz | |
Added very basic pre-beta version of LanguageTool. Builds, though :)
Diffstat (limited to 'languagetool/TextCheckerCGI.py')
| -rw-r--r-- | languagetool/TextCheckerCGI.py | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/languagetool/TextCheckerCGI.py b/languagetool/TextCheckerCGI.py new file mode 100644 index 0000000..750d2ec --- /dev/null +++ b/languagetool/TextCheckerCGI.py @@ -0,0 +1,210 @@ +#!/usr/bin/python +# -*- coding: iso-8859-1 -*- +# +# LanguageTool -- A Rule-Based Style and Grammar Checker +# Copyright (C) 2002,2003,2004 Daniel Naber <daniel.naber@t-online.de> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import cgi +import string +import sys +import os + +os.chdir(sys.path[0]) +sys.path.append(sys.path[0]) +sys.path.append(os.path.join(sys.path[0], "src")) + +import TagInfo +import TextChecker + +# FIXME: for debugging only +import cgitb +cgitb.enable() + +def main(): + form = cgi.FieldStorage() + if form.getvalue("text"): + check(form) + elif form.getvalue("explain"): + displayExplanation(form) + else: + displayForm(form) + return + +def displayExplanation(form): + print "Content-Type: text/html\n\n" + tag = cgi.escape(form.getvalue("explain")) # security: anti XSS + taginfo = TagInfo.TagInfo(form.getvalue("lang")) + print """<html><head> + <title>LanguageTool: Tag explanation for %s</title> + </head> + <body> + <h1>Tag explanation for %s</h1> + + <p><strong>%s:</strong> %s</p> + + </body></html>""" % (tag, tag, tag, taginfo.getExp(form.getvalue("explain"))) + return + +def displayForm(form): + print "Content-Type: text/html\n\n" + print """<html><head> + <title>LanguageTool Web Interface</title> + <style rel='stylesheet'> + <!-- + .error { font-weight: bold; color: red; } + --> + </style> + </head> + <body> + <h1>LanguageTool Web Interface</h1> + Enter text here:<br /> + <form action="/cgi-bin/languagetool/TextCheckerCGI.py" method="post"> + <textarea name="text" rows="8" cols="80"></textarea><br /> + <select name="lang"> + <option value="en">English</option> + <option value="de">German</option> + <option value="hu">Hungarian (experimental)</option> + </select><br /> + <input type="checkbox" name="tags" checked="checked"> Show part-of-speech tags + <input type="checkbox" name="german_ff" checked="checked"> Check for false friends (for German native speakers)<br /> + <input type="checkbox" name="style"> Check for some style issues (e.g. <em>don't</em> instead of <em>do not</em>)<br /> + <input type="checkbox" name="sentencelength"> Complain about long sentences (more than 30 words) + <br /> + <input type="submit" value="Check Text"> + </form> + <p><strong>Suggested test text</strong><br> + The incorrect words are <span class="error">red</span> (copy and paste + into text field to try it):</p> + + <p>Then he <span class="error">look</span> at the building.<br> + I definitely think <span class="error">is</span> should be less than four years.<br> + This allows to provide a powerful <span class="error">a</span> help system.<br> + His house is as big <span class="error">like</span> mine.<br> + His car is larger <span class="error">then</span> mine. + </p> + <hr> + <a href="http://www.danielnaber.de/languagetool/">LanguageTool Homepage</a> + </body></html>""" + return + +def check(form): + print "Content-Type: text/html\n\n" + text = form.getvalue("text").decode('latin1') + if not text: + text = "" + # TODO: put options for alle these in the web page? too confusing... + grammar = None + falsefriends = None + words = None + builtin = None + textlanguage = "en" + if not form.getvalue("style"): + words = ["__NONE"] + mothertongue = "de" + if not form.getvalue("german_ff"): + mothertongue = None + if form.getvalue("lang"): + textlanguage = form.getvalue("lang") + max_sentence_length = 0 + if form.getvalue("sentencelength"): + max_sentence_length = None + checker = TextChecker.TextChecker(grammar, falsefriends, words, builtin, \ + textlanguage, mothertongue, max_sentence_length) + + print """<html><head> + <title>Check result</title> + <style rel='stylesheet'> + <!-- + .tag { color: grey; } + .error { font-weight: bold; color: red; } + .expl { color: blue; } + .repl { font-weight: bold; } + --> + </style> + <script language="javascript"> + <!-- + var data = new Array(); + """ + taginfo = TagInfo.TagInfo(textlanguage) + print taginfo.getJavascriptCode() + print """ + function info(s) { + alert(s + ": " + data[s]); + } + //--> + </script> + </head> + <body> + <h1>Result</h1>""" + + (rule_matches, res, tags) = checker.check(text) + # TODO: add an option to print the complete checker XML response: + #print "XML reply:<br><pre>%s</pre>" % cgi.escape(res) + #print "<pre>%s</pre>" % tags + taglist = [] + char_count = 0 + list_count = 0 + text_list = [] + for tag_triple in tags: + tag_str = "" + if form.getvalue("tags") and tag_triple[2]: + w = tag_triple[2] + tag_str = '<span class="tag"><a href="TextCheckerCGI.py?explain=%s&lang=%s" \ + onclick="info(\'%s\');return false;">[%s]</a></span>' \ + % (w, textlanguage, w, w) + if tag_triple[2] == 'SENT_END': + tag_str = '%s<br>\n' % tag_str + word = cgi.escape(tag_triple[0]) + text_list.append(word) + text_list.append(tag_str) + char_count = char_count + len(word) + # guarantee that the rule_matches are ordered by their position: + rule_matches.sort() + rule_matches.reverse() # add messages from end of list to avoid count confusion + for rule_match in rule_matches: + # TODO: this produces invalid code if the rule ranges are nested! + ct = 0 + i = 0 + start_found = 0 + end_found = 0 + for el in text_list: + if not el.startswith("<span"): + from_pos = ct + to_pos = ct + len(el) + if rule_match.to_pos <= to_pos and rule_match.to_pos >= from_pos and not end_found: + text_list[i] = '%s</span><span class="expl">[%s (%s)]</span>' % (text_list[i], \ + rule_match.message, rule_match.id) + end_found = 1 + elif rule_match.from_pos <= to_pos and rule_match.from_pos >= from_pos and not start_found: + text_list[i] = '<span class="error">%s' % text_list[i] + start_found = 1 + ct = ct + len(el) + if end_found and start_found: + break + i = i + 1 + text = string.join(text_list, '') + print text.encode('latin1') + + if len(rule_matches) == 1: + print "<p>%d possible error found.</p>" % len(rule_matches) + else: + print "<p>%d possible errors found.</p>" % len(rule_matches) + + #print "<p>" + cgi.escape(res) + print "</body></html>" + +main() |
