diff options
| author | Wilmer van der Gaast <wilmer@gaast.net> | 2006-01-03 19:30:54 +0100 | 
|---|---|---|
| committer | Wilmer van der Gaast <wilmer@gaast.net> | 2006-01-03 19:30:54 +0100 | 
| commit | 39cc341b8f6299fbf8a62b243d278d1e48c8def7 (patch) | |
| tree | ca0dbb4a395f054b2af5f324ceacbd153e86cf4e | |
| parent | a252c1ad43823eb935148a5578ee0d666902b2f1 (diff) | |
strip_html now replaces non-ASCII characters (entities like é) to
their UTF-8 versions instead of Latin1. Also added &[aeiou]uml; entities
to the list. However, I still don't know if this is really important anyway...
| -rw-r--r-- | util.c | 57 | 
1 files changed, 33 insertions, 24 deletions
| @@ -180,34 +180,39 @@ time_t get_time(int year, int month, int day, int hour, int min, int sec)  typedef struct htmlentity  {  	char code[8]; -	char is; +	char is[4];  } htmlentity_t;  /* FIXME: This is ISO8859-1(5) centric, so might cause problems with other charsets. */ -static htmlentity_t ent[] = +static const htmlentity_t ent[] =  { -	{ "lt",     '<' }, -	{ "gt",     '>' }, -	{ "amp",    '&' }, -	{ "quot",   '"' }, -	{ "aacute", 'á' }, -	{ "eacute", 'é' }, -	{ "iacute", 'é' }, -	{ "oacute", 'ó' }, -	{ "uacute", 'ú' }, -	{ "agrave", 'à' }, -	{ "egrave", 'è' }, -	{ "igrave", 'ì' }, -	{ "ograve", 'ò' }, -	{ "ugrave", 'ù' }, -	{ "acirc",  'â' }, -	{ "ecirc",  'ê' }, -	{ "icirc",  'î' }, -	{ "ocirc",  'ô' }, -	{ "ucirc",  'û' }, -	{ "nbsp",   ' ' }, -	{ "",        0  } +	{ "lt",     "<" }, +	{ "gt",     ">" }, +	{ "amp",    "&" }, +	{ "quot",   "\"" }, +	{ "aacute", "á" }, +	{ "eacute", "é" }, +	{ "iacute", "é" }, +	{ "oacute", "ó" }, +	{ "uacute", "ú" }, +	{ "agrave", "à" }, +	{ "egrave", "è" }, +	{ "igrave", "ì" }, +	{ "ograve", "ò" }, +	{ "ugrave", "ù" }, +	{ "acirc",  "â" }, +	{ "ecirc",  "ê" }, +	{ "icirc",  "î" }, +	{ "ocirc",  "ô" }, +	{ "ucirc",  "û" }, +	{ "auml",   "ä" }, +	{ "euml",   "ë" }, +	{ "iuml",   "ï" }, +	{ "ouml",   "ö" }, +	{ "uuml",   "ü" }, +	{ "nbsp",   " " }, +	{ "",        ""  }  };  void strip_html( char *in ) @@ -256,7 +261,11 @@ void strip_html( char *in )  			for( i = 0; *ent[i].code; i ++ )  				if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 )  				{ -					*(s++) = ent[i].is; +					int j; +					 +					for( j = 0; ent[i].is[j]; j ++ ) +						*(s++) = ent[i].is[j]; +					  					matched = 1;  					break;  				} | 
