strip_html now replaces non-ASCII characters (entities like é) to

their UTF-8 versions instead of Latin1. Also added &[aeiou]uml; entities to the list. However, I still don't know if this is really important anyway...
author: Wilmer van der Gaast <wilmer@gaast.net> 2006-01-03 19:30:54 +0100
committer: Wilmer van der Gaast <wilmer@gaast.net> 2006-01-03 19:30:54 +0100
commit: 39cc341b8f6299fbf8a62b243d278d1e48c8def7 (patch)
tree: ca0dbb4a395f054b2af5f324ceacbd153e86cf4e
parent: a252c1ad43823eb935148a5578ee0d666902b2f1 (diff)
1 files changed, 33 insertions, 24 deletions
diff --git a/util.c b/util.c
index 57ee0522..3fad6314 100644
--- a/util.c
+++ b/util.c
@@ -180,34 +180,39 @@ time_t get_time(int year, int month, int day, int hour, int min, int sec)
 typedef struct htmlentity
 {
 	char code[8];
-	char is;
+	char is[4];
 } htmlentity_t;
 
 /* FIXME: This is ISO8859-1(5) centric, so might cause problems with other charsets. */
 
-static htmlentity_t ent[] =
+static const htmlentity_t ent[] =
 {
-	{ "lt",     '<' },
-	{ "gt",     '>' },
-	{ "amp",    '&' },
-	{ "quot",   '"' },
-	{ "aacute", 'с' },
-	{ "eacute", 'щ' },
-	{ "iacute", 'щ' },
-	{ "oacute", 'ѓ' },
-	{ "uacute", 'њ' },
-	{ "agrave", 'р' },
-	{ "egrave", 'ш' },
-	{ "igrave", 'ь' },
-	{ "ograve", 'ђ' },
-	{ "ugrave", 'љ' },
-	{ "acirc",  'т' },
-	{ "ecirc",  'ъ' },
-	{ "icirc",  'ю' },
-	{ "ocirc",  'є' },
-	{ "ucirc",  'ћ' },
-	{ "nbsp",   ' ' },
-	{ "",        0  }
+	{ "lt",     "<" },
+	{ "gt",     ">" },
+	{ "amp",    "&" },
+	{ "quot",   "\"" },
+	{ "aacute", "УЁ" },
+	{ "eacute", "УЉ" },
+	{ "iacute", "УЉ" },
+	{ "oacute", "УГ" },
+	{ "uacute", "УК" },
+	{ "agrave", "У " },
+	{ "egrave", "УЈ" },
+	{ "igrave", "УЌ" },
+	{ "ograve", "УВ" },
+	{ "ugrave", "УЙ" },
+	{ "acirc",  "УЂ" },
+	{ "ecirc",  "УЊ" },
+	{ "icirc",  "УЎ" },
+	{ "ocirc",  "УД" },
+	{ "ucirc",  "УЛ" },
+	{ "auml",   "УЄ" },
+	{ "euml",   "УЋ" },
+	{ "iuml",   "УЏ" },
+	{ "ouml",   "УЖ" },
+	{ "uuml",   "УМ" },
+	{ "nbsp",   " " },
+	{ "",        ""  }
 };
 
 void strip_html( char *in )
@@ -256,7 +261,11 @@ void strip_html( char *in )
 			for( i = 0; *ent[i].code; i ++ )
 				if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 )
 				{
-					*(s++) = ent[i].is;
+					int j;
+					
+					for( j = 0; ent[i].is[j]; j ++ )
+						*(s++) = ent[i].is[j];
+					
 					matched = 1;
 					break;
 				}
author	Wilmer van der Gaast <wilmer@gaast.net>	2006-01-03 19:30:54 +0100
committer	Wilmer van der Gaast <wilmer@gaast.net>	2006-01-03 19:30:54 +0100
commit	39cc341b8f6299fbf8a62b243d278d1e48c8def7 (patch)
tree	ca0dbb4a395f054b2af5f324ceacbd153e86cf4e
parent	a252c1ad43823eb935148a5578ee0d666902b2f1 (diff)