diff options
| author | Petter Reinholdtsen <pere@debian.org> | 2004-12-07 23:39:38 +0000 | 
|---|---|---|
| committer | Petter Reinholdtsen <pere@debian.org> | 2004-12-07 23:39:38 +0000 | 
| commit | 89c451a8d15749af8eeaba489570ad75d15b8c2c (patch) | |
| tree | b9e984f1c1a5311c22d70e1feed286c3ad87319a /inorsk-hyphenmaybe | |
| download | homepage-89c451a8d15749af8eeaba489570ad75d15b8c2c.tar.gz homepage-89c451a8d15749af8eeaba489570ad75d15b8c2c.tar.bz2 homepage-89c451a8d15749af8eeaba489570ad75d15b8c2c.tar.xz | |
Import todays web pages.  Slightly edited to get relative links to local documents.copy_20041208
Diffstat (limited to 'inorsk-hyphenmaybe')
| -rw-r--r-- | inorsk-hyphenmaybe | 229 | 
1 files changed, 229 insertions, 0 deletions
| diff --git a/inorsk-hyphenmaybe b/inorsk-hyphenmaybe new file mode 100644 index 0000000..dd66f89 --- /dev/null +++ b/inorsk-hyphenmaybe @@ -0,0 +1,229 @@ +: Use /bin/bash + +# Copyright: Rune Kleveland (2000) <runekl@math.uio.no> +# License  : GPL +# Version  : 0.9 + +# This script takes a (TeX) file as argument and prints every word not +# in the Norwegian dictionary hyphenated by TeX.  The parsing of the +# file is done by ispell. + +# It can be used to find out which words the Norwegian patterns +# hyphenates incorrectly, because the Norwegian patterns I have +# generated should hyphenate every word in the norsk and nynorsk +# dictionaries correctly.  The incorrecly hyphenated words should be +# included in a \hyphenation command.  If there already are +# hyphenation commands in the TeX document, that is taken into +# consideration unless the -noparse option is given. + +# If you tell me about incorrectly hyphenated common words, I might +# fix it in a future version. + +# If multi level hyphenation is available, it is used.  Unfortunately +# this requires an experimental TeX today, and few people seem to have +# that. + +# nohyphinsecure [-p patterns] [-l language] [-e] [-h] [file] +# +#    -p patterns  Choose the patterns to hyphenate with. +#                 The default is norsk, in which case TeX +#                 executes \language=\l@norsk. +# +#    -l language  Choose the ispell dictionary.  The default is norsk. +# +#    -ll language Filter through extra dictionary.  The default is nynorsk. +#                 Use false to avoid filtering. +# +#    -e           Throw away all english words. +# +#    -h           Print only words that does contain a hyphen +# +#    -nosort      Do not sort the words, but output in the order they appear +# +#    -nroff       Ispell parsing keyword.  Overrride default TeX. +# +#    -noparse     Don't try to find hyphenation commands in the input file. + + +TMP=/tmp +ISPELLMODE=tex +LATEX=latex +LANGUAGE=norsk +LLANGUAGE=nynorsk +CH=a-zæøåéèêôóòçA-ZÆØÅÉÈÊÔÓÒÇ +PATTERNS=${LANGUAGE} +IGNOREENGLISH=false +FORMAT=-t +ONLYHYPHEN=false +PARSEFORHYPH=true +SORTING=true + +while [ $# != 0 ] +do +    case "$1" in +	-p) +	    PATTERNS=$2 +	    shift +	    ;; +	-l) +	    LANGUAGE=$2 +	    shift +	    ;; +	-ll) +	    LLANGUAGE=$2 +	    shift +	    ;; +	-e) +	    IGNOREENGLISH=true +	    ;; +	-h) +	    ONLYHYPHEN=true +	    ;; +	-nroff) +	    FORMAT=-n +	    ;; +	-noparse) +	    PARSEFORHYPH=false +	    ;; +	-nosort) +	    SORTING=false +	    ;; +	-) +	    break +	    ;; +	-*) +	    echo 'Usage: nohyphinsecure [-p patterns] [-l dictionary] [-ll dictionary] [-e] [-h] [file] ...' \ +	      1>&2 +	    exit 2 +	    ;; +	*) +	    break +	    ;; +    esac +    shift +done + + +# Parse for \hyphenation command.  Assumes you use TeX gently. + +if [ ${PARSEFORHYPH} = true ] +then +  cat $@ > ${TMP}/hyphen0.tmp +  sed -e 's/%.*//' \ +      -e '/\\hyphenation[ 	]*{.*}/ p' \ +      -e '/\\hyphenation[ 	]*{[^}]*$/,/}/! D' ${TMP}/hyphen0.tmp \ +    | sed -e 's/^.\+\(\\hyphenation[ 	]*{\)/\1/' \ +    > ${TMP}/hyphen1.tmp +  cat ${TMP}/hyphen0.tmp +else +  rm -f ${TMP}/hyphen1.tmp +  cat $@ +fi | \ +if [ ${LANGUAGE} = false ] +then +  tr -cs ${CH} '\n' +elif [ ${LLANGUAGE} = false ] +then +  tr '.,;:' '    ' \ +    | ispell -B -l -d ${LANGUAGE}  ${FORMAT} +else +  tr '.,;:' '    ' \ +    | ispell -B -l -d ${LANGUAGE}  ${FORMAT} \ +    | ispell -B -l -d ${LLANGUAGE} ${FORMAT} +fi \ +  > ${TMP}/hyphen2.tmp + +cd ${TMP} + +if [ "${IGNOREENGLISH}" = true ] +then +  (grep -v '[^a-zA-Z]' hyphen2.tmp | ispell -l -d english; \ +   grep '[^a-zA-Z]' hyphen2.tmp) +else +  cat hyphen2.tmp +fi | \ +if [ $SORTING = true ] +then +  sort \ +  | uniq -c \ +  | sort -n -r -s +else +  cat +fi \ +  | sed -e '1 i \ +\\writelog{' \ +      -e '1000~1000 a \ +}\ +\\writelog{' \ +      -e '$ a \ +}' \ +  > hyphen3.tmp + +TEXFILE='\nonstopmode +\documentclass{minimal} +\usepackage{t1enc} +\makeatletter +\language=\l@'${PATTERNS}'\lefthyphenmin=2\righthyphenmin=2 +\ifx\gendiscretionary\@undefined\else +\hyphenclassesstate=1\hyphenclasses=5\exhyphenclass=4 +\fi +\InputIfFileExists{./hyphen1.tmp}{} +\makeatother +\def\writelog#1{\setbox0=\vbox{\parfillskip0pt \hsize16383.99999pt +\pretolerance=-1 \tolerance=-1 \hbadness=0 \showboxdepth=0 \ #1}} +\begin{document} +\input{hyphen3.tmp} +\typeout{----------} +\end{document}' + + +${LATEX} ${TEXFILE} 2&>/dev/null + +rm -f hyphen[0123].tmp + +# Parse the log file + +sed -e '1,/(hyphen3.tmp/ D' \ +    -e '/\\hbox/ D' \ +    -e '/^ *$/ D' \ +    -e 's/^\(\[\]\)\? *\\T1[^ ]* /*/' \ +    -e '/^----------/,$ c \ + '  minimal.log \ +  | tr -d '\n)' \ +  | tr -s ' ' '\n' \ +  | sed -e 's/-\*//' \ +        -e '1 s/\*//' \ +        -e 's/\*/\ +/'      -e '/^ *$/ D' \ +	-e 's/\^\^c5/Å/g' \ +	-e 's/\^\^c6/Æ/g' \ +	-e 's/\^\^d8/Ø/g' \ +	-e 's/\^\^c7/Ç/g' \ +	-e 's/\^\^c8/È/g' \ +	-e 's/\^\^c9/É/g' \ +	-e 's/\^\^d2/Ò/g' \ +	-e 's/\^\^d3/Ó/g' \ +	-e 's/\^\^d4/Ô/g' \ +	-e 's/\^\^e5/å/g' \ +	-e 's/\^\^e6/æ/g' \ +	-e 's/\^\^f8/ø/g' \ +	-e 's/\^\^e7/ç/g' \ +	-e 's/\^\^e8/è/g' \ +	-e 's/\^\^e9/é/g' \ +	-e 's/\^\^f2/ò/g' \ +	-e 's/\^\^f3/ó/g' \ +	-e 's/\^\^f4/ô/g' | \ +if [ $SORTING = true ] +then +  sed -e N -e 's/\n/ /' +else +  cat +fi | \ +if [ "${ONLYHYPHEN}" = true ] +then +  grep '\-' +else +  cat +fi + +rm -f minimal.log minimal.aux minimal.dvi | 
