aboutsummaryrefslogtreecommitdiffstats
path: root/inorsk-hyphenmaybe
diff options
context:
space:
mode:
authorPetter Reinholdtsen <pere@debian.org>2004-12-07 23:39:38 +0000
committerPetter Reinholdtsen <pere@debian.org>2004-12-07 23:39:38 +0000
commit89c451a8d15749af8eeaba489570ad75d15b8c2c (patch)
treeb9e984f1c1a5311c22d70e1feed286c3ad87319a /inorsk-hyphenmaybe
downloadhomepage-89c451a8d15749af8eeaba489570ad75d15b8c2c.tar.gz
homepage-89c451a8d15749af8eeaba489570ad75d15b8c2c.tar.bz2
homepage-89c451a8d15749af8eeaba489570ad75d15b8c2c.tar.xz
Import todays web pages. Slightly edited to get relative links to local documents.copy_20041208
Diffstat (limited to 'inorsk-hyphenmaybe')
-rw-r--r--inorsk-hyphenmaybe229
1 files changed, 229 insertions, 0 deletions
diff --git a/inorsk-hyphenmaybe b/inorsk-hyphenmaybe
new file mode 100644
index 0000000..dd66f89
--- /dev/null
+++ b/inorsk-hyphenmaybe
@@ -0,0 +1,229 @@
+: Use /bin/bash
+
+# Copyright: Rune Kleveland (2000) <runekl@math.uio.no>
+# License : GPL
+# Version : 0.9
+
+# This script takes a (TeX) file as argument and prints every word not
+# in the Norwegian dictionary hyphenated by TeX. The parsing of the
+# file is done by ispell.
+
+# It can be used to find out which words the Norwegian patterns
+# hyphenates incorrectly, because the Norwegian patterns I have
+# generated should hyphenate every word in the norsk and nynorsk
+# dictionaries correctly. The incorrecly hyphenated words should be
+# included in a \hyphenation command. If there already are
+# hyphenation commands in the TeX document, that is taken into
+# consideration unless the -noparse option is given.
+
+# If you tell me about incorrectly hyphenated common words, I might
+# fix it in a future version.
+
+# If multi level hyphenation is available, it is used. Unfortunately
+# this requires an experimental TeX today, and few people seem to have
+# that.
+
+# nohyphinsecure [-p patterns] [-l language] [-e] [-h] [file]
+#
+# -p patterns Choose the patterns to hyphenate with.
+# The default is norsk, in which case TeX
+# executes \language=\l@norsk.
+#
+# -l language Choose the ispell dictionary. The default is norsk.
+#
+# -ll language Filter through extra dictionary. The default is nynorsk.
+# Use false to avoid filtering.
+#
+# -e Throw away all english words.
+#
+# -h Print only words that does contain a hyphen
+#
+# -nosort Do not sort the words, but output in the order they appear
+#
+# -nroff Ispell parsing keyword. Overrride default TeX.
+#
+# -noparse Don't try to find hyphenation commands in the input file.
+
+
+TMP=/tmp
+ISPELLMODE=tex
+LATEX=latex
+LANGUAGE=norsk
+LLANGUAGE=nynorsk
+CH=a-zæøåéèêôóòçA-ZÆØÅÉÈÊÔÓÒÇ
+PATTERNS=${LANGUAGE}
+IGNOREENGLISH=false
+FORMAT=-t
+ONLYHYPHEN=false
+PARSEFORHYPH=true
+SORTING=true
+
+while [ $# != 0 ]
+do
+ case "$1" in
+ -p)
+ PATTERNS=$2
+ shift
+ ;;
+ -l)
+ LANGUAGE=$2
+ shift
+ ;;
+ -ll)
+ LLANGUAGE=$2
+ shift
+ ;;
+ -e)
+ IGNOREENGLISH=true
+ ;;
+ -h)
+ ONLYHYPHEN=true
+ ;;
+ -nroff)
+ FORMAT=-n
+ ;;
+ -noparse)
+ PARSEFORHYPH=false
+ ;;
+ -nosort)
+ SORTING=false
+ ;;
+ -)
+ break
+ ;;
+ -*)
+ echo 'Usage: nohyphinsecure [-p patterns] [-l dictionary] [-ll dictionary] [-e] [-h] [file] ...' \
+ 1>&2
+ exit 2
+ ;;
+ *)
+ break
+ ;;
+ esac
+ shift
+done
+
+
+# Parse for \hyphenation command. Assumes you use TeX gently.
+
+if [ ${PARSEFORHYPH} = true ]
+then
+ cat $@ > ${TMP}/hyphen0.tmp
+ sed -e 's/%.*//' \
+ -e '/\\hyphenation[ ]*{.*}/ p' \
+ -e '/\\hyphenation[ ]*{[^}]*$/,/}/! D' ${TMP}/hyphen0.tmp \
+ | sed -e 's/^.\+\(\\hyphenation[ ]*{\)/\1/' \
+ > ${TMP}/hyphen1.tmp
+ cat ${TMP}/hyphen0.tmp
+else
+ rm -f ${TMP}/hyphen1.tmp
+ cat $@
+fi | \
+if [ ${LANGUAGE} = false ]
+then
+ tr -cs ${CH} '\n'
+elif [ ${LLANGUAGE} = false ]
+then
+ tr '.,;:' ' ' \
+ | ispell -B -l -d ${LANGUAGE} ${FORMAT}
+else
+ tr '.,;:' ' ' \
+ | ispell -B -l -d ${LANGUAGE} ${FORMAT} \
+ | ispell -B -l -d ${LLANGUAGE} ${FORMAT}
+fi \
+ > ${TMP}/hyphen2.tmp
+
+cd ${TMP}
+
+if [ "${IGNOREENGLISH}" = true ]
+then
+ (grep -v '[^a-zA-Z]' hyphen2.tmp | ispell -l -d english; \
+ grep '[^a-zA-Z]' hyphen2.tmp)
+else
+ cat hyphen2.tmp
+fi | \
+if [ $SORTING = true ]
+then
+ sort \
+ | uniq -c \
+ | sort -n -r -s
+else
+ cat
+fi \
+ | sed -e '1 i \
+\\writelog{' \
+ -e '1000~1000 a \
+}\
+\\writelog{' \
+ -e '$ a \
+}' \
+ > hyphen3.tmp
+
+TEXFILE='\nonstopmode
+\documentclass{minimal}
+\usepackage{t1enc}
+\makeatletter
+\language=\l@'${PATTERNS}'\lefthyphenmin=2\righthyphenmin=2
+\ifx\gendiscretionary\@undefined\else
+\hyphenclassesstate=1\hyphenclasses=5\exhyphenclass=4
+\fi
+\InputIfFileExists{./hyphen1.tmp}{}
+\makeatother
+\def\writelog#1{\setbox0=\vbox{\parfillskip0pt \hsize16383.99999pt
+\pretolerance=-1 \tolerance=-1 \hbadness=0 \showboxdepth=0 \ #1}}
+\begin{document}
+\input{hyphen3.tmp}
+\typeout{----------}
+\end{document}'
+
+
+${LATEX} ${TEXFILE} 2&>/dev/null
+
+rm -f hyphen[0123].tmp
+
+# Parse the log file
+
+sed -e '1,/(hyphen3.tmp/ D' \
+ -e '/\\hbox/ D' \
+ -e '/^ *$/ D' \
+ -e 's/^\(\[\]\)\? *\\T1[^ ]* /*/' \
+ -e '/^----------/,$ c \
+ ' minimal.log \
+ | tr -d '\n)' \
+ | tr -s ' ' '\n' \
+ | sed -e 's/-\*//' \
+ -e '1 s/\*//' \
+ -e 's/\*/\
+/' -e '/^ *$/ D' \
+ -e 's/\^\^c5/Å/g' \
+ -e 's/\^\^c6/Æ/g' \
+ -e 's/\^\^d8/Ø/g' \
+ -e 's/\^\^c7/Ç/g' \
+ -e 's/\^\^c8/È/g' \
+ -e 's/\^\^c9/É/g' \
+ -e 's/\^\^d2/Ò/g' \
+ -e 's/\^\^d3/Ó/g' \
+ -e 's/\^\^d4/Ô/g' \
+ -e 's/\^\^e5/å/g' \
+ -e 's/\^\^e6/æ/g' \
+ -e 's/\^\^f8/ø/g' \
+ -e 's/\^\^e7/ç/g' \
+ -e 's/\^\^e8/è/g' \
+ -e 's/\^\^e9/é/g' \
+ -e 's/\^\^f2/ò/g' \
+ -e 's/\^\^f3/ó/g' \
+ -e 's/\^\^f4/ô/g' | \
+if [ $SORTING = true ]
+then
+ sed -e N -e 's/\n/ /'
+else
+ cat
+fi | \
+if [ "${ONLYHYPHEN}" = true ]
+then
+ grep '\-'
+else
+ cat
+fi
+
+rm -f minimal.log minimal.aux minimal.dvi