GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method/***************************************************************************\
* *
* BitlBee - An IRC to IM gateway *
* Simple XML (stream) parse tree handling code (Jabber/XMPP, mainly) *
* *
* Copyright 2006 Wilmer van der Gaast <wilmer@gaast.net> *
* *
* This library is free software; you can redistribute it and/or *
* modify it under the terms of the GNU Lesser General Public *
* License as published by the Free Software Foundation, version *
* 2.1. *
* *
* This library is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
* Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this library; if not, write to the Free Software Foundation, *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *
* *
****************************************************************************/
#include <glib.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <stdio.h>
#include "xmltree.h"
#define g_strcasecmp g_ascii_strcasecmp
#define g_strncasecmp g_ascii_strncasecmp
static void xt_start_element( GMarkupParseContext *ctx, const gchar *element_name, const gchar **attr_names, const gchar **attr_values, gpointer data, GError **error )
{
struct xt_parser *xt = data;
struct xt_node *node = g_new0( struct xt_node, 1 ), *nt;
int i;
node->parent = xt->cur;
node->name = g_strdup( element_name );
/* First count the number of attributes */
for( i = 0; attr_names[i]; i ++ );
/* Then allocate a NULL-terminated array. */
node->attr = g_new0( struct xt_attr, i + 1 );
/* And fill it, saving one variable by starting at the end. */
for( i --; i >= 0; i -- )
{
node->attr[i].key = g_strdup( attr_names[i] );
node->attr[i].value = g_strdup( attr_values[i] );
}
/* Add it to the linked list of children nodes, if we have a current
node yet. */
if( xt->cur )
{
if( xt->cur->children )
{
for( nt = xt->cur->children; nt->next; nt = nt->next );
nt->next = node;
}
else
{
xt->cur->children = node;
}
}
else if( xt->root )
{
/* ERROR situation: A second root-element??? */
}
/* Now this node will be the new current node. */
xt->cur = node;
/* And maybe this is the root? */
if( xt->root == NULL )
xt->root = node;
}
static void xt_text( GMarkupParseContext *ctx, const gchar *text, gsize text_len, gpointer data, GError **error )
{
struct xt_parser *xt = data;
struct xt_node *node = xt->cur;
if( node == NULL )
return;
/* FIXME: Does g_renew also OFFICIALLY accept NULL arguments? */
node->text = g_renew( char, node->text, node->text_len + text_len + 1 );
memcpy( node->text + node->text_len, text, text_len );
node->text_len += text_len;
/* Zero termination is always nice to have. */
node->text[node->text_len] = 0;
}
static void xt_end_element( GMarkupParseContext *ctx, const gchar *element_name, gpointer data, GError **error )
{
struct xt_parser *xt = data;
xt->cur->flags |= XT_COMPLETE;
xt->cur = xt->cur->parent;
}
GMarkupParser xt_parser_funcs =
{
xt_start_element,
xt_end_element,
xt_text,
NULL,
NULL
};
struct xt_parser *xt_new( const struct xt_handler_entry *handlers, gpointer data )
{
struct xt_parser *xt = g_new0( struct xt_parser, 1 );
xt->data = data;
xt->handlers = handlers;
xt_reset( xt );
return xt;
}
/* Reset the parser, flush everything we have so far. For example, we need
this for XMPP when doing TLS/SASL to restart the stream. */
void xt_reset( struct xt_parser *xt )
{
if( xt->parser )
g_markup_parse_context_free( xt->parser );
xt->parser = g_markup_parse_context_new( &xt_parser_funcs, 0, xt, NULL );
if( xt->root )
{
xt_free_node( xt->root );
xt->root = NULL;
xt->cur = NULL;
}
}
/* Feed the parser, don't execute any handler. Returns -1 on errors, 0 on
end-of-stream and 1 otherwise. */
int xt_feed( struct xt_parser *xt, const char *text, int text_len )
{
if( !g_markup_parse_context_parse( xt->parser, text, text_len, &xt->gerr ) )
{
return -1;
}
return !( xt->root && xt->root->flags & XT_COMPLETE );
}
/* Find completed nodes and see if a handler has to be called. Passing
a node isn't necessary if you want to start at the root, just pass
NULL. This second argument is needed for recursive calls. */
int xt_handle( struct xt_parser *xt, struct xt_node *node, int depth )
{
struct xt_node *c;
xt_status st;
int i;
/* Just in case someone likes infinite loops... */
if( xt->root == NULL )
return 0;
if( node == NULL )
return xt_handle( xt, xt->root, depth );
if( depth != 0 )
for( c = node->children; c; c = c->next )
if( !xt_handle( xt, c, depth > 0 ? depth - 1 : depth ) )
return 0;
if( node->flags & XT_COMPLETE && !( node->flags & XT_SEEN ) )
{
if( xt->handlers ) for( i = 0; xt->handlers[i].func; i ++ )
{
/* This one is fun! \o/ */
/* If handler.name == NULL it means it should always match. */
if( ( xt->handlers[i].name == NULL ||
/* If it's not, compare. There should always be a name. */
g_strcasecmp( xt->handlers[i].name, node->name ) == 0 ) &&
/* If handler.parent == NULL, it's a match. */
( xt->handlers[i].parent == NULL ||
/* If there's a parent node, see if the name matches. */
( node->parent ? g_strcasecmp( xt->handlers[i].parent, node->parent->name ) == 0 :
/* If there's no parent, the handler should mention <root> as a parent. */
strcmp( xt->handlers[i].parent, "<root>" ) == 0 ) ) )
{
st = xt->handlers[i].func( node, xt->data );
if( st == XT_ABORT )
return 0;
else if( st != XT_NEXT )
break;
}
}
node->flags |= XT_SEEN;
}
return 1;
}
/* Garbage collection: Cleans up all nodes that are handled. Useful for
streams because there's no reason to keep a complete packet history
in memory. */
void xt_cleanup( struct xt_parser *xt, struct xt_node *node, int depth )
{
struct xt_node *c, *prev;
if( !xt || !xt->root )
return;
if( node == NULL )
{
xt_cleanup( xt, xt->root, depth );
return;
}
if( node->flags & XT_SEEN && node == xt->root )
{
xt_free_node( xt->root );
xt->root = xt->cur = NULL;
/* xt->cur should be NULL already, BTW... */
return;
}
/* c contains the current node, prev the previous node (or NULL).
I admit, this one's pretty horrible. */
for( c = node->children, prev = NULL; c; prev = c, c = c ? c->next : node->children )
{
if( c->flags & XT_SEEN )
{
/* Remove the node from the linked list. */
if( prev )
prev->next = c->next;
else
node->children = c->next;
xt_free_node( c );
/* Since the for loop wants to get c->next, make sure
c points at something that exists (and that c->next
will actually be the next item we should check). c
can be NULL now, if we just removed the first item.
That explains the ? thing in for(). */
c = prev;
}
else
{
/* This node can't be cleaned up yet, but maybe a
subnode can. */
if( depth != 0 )
xt_cleanup( xt, c, depth > 0 ? depth - 1 : depth );
}
}
}
struct xt_node *xt_from_string( const char *in )
{
struct xt_parser *parser;
struct xt_node *ret;
parser = xt_new( NULL, NULL );
xt_feed( parser, in, strlen( in ) );
ret = parser->root;
parser->root = NULL;
xt_free( parser );
return ret;
}
static void xt_to_string_real( struct xt_node *node, GString *str )
{
char *buf;
struct xt_node *c;
int i;
g_string_append_printf( str, "<%s", node->name );
for( i = 0; node->attr[i].key; i ++ )
{
buf = g_markup_printf_escaped( " %s=\"%s\"", node->attr[i].key, node->attr[i].value );
g_string_append( str, buf );
g_free( buf );
}
if( node->text == NULL && node->children == NULL )
{
g_string_append( str, "/>" );
return;
}
g_string_append( str, ">" );
if( node->text_len > 0 )
{
buf = g_markup_escape_text( node->text, node->text_len );
g_string_append( str, buf );
g_free( buf );
}
for( c = node->children; c; c = c->next )
xt_to_string_real( c, str );
g_string_append_printf( str, "</%s>", node->name );
}
char *xt_to_string( struct xt_node *node )
{
GString *ret;
char *real;
ret = g_string_new( "" );
xt_to_string_real( node, ret );
real = ret->str;
g_string_free( ret, FALSE );
return real;
}
void xt_print( struct xt_node *node )
{
int i;
struct xt_node *c;
/* Indentation */
for( c = node; c->parent; c = c->parent )
fprintf( stderr, " " );
/* Start the tag */
fprintf( stderr, "<%s", node->name );
/* Print the attributes */
for( i = 0; node->attr[i].key; i ++ )
{
char *v = g_markup_escape_text( node->attr[i].value, -1 );
fprintf( stderr, " %s=\"%s\"", node->attr[i].key, v );
g_free( v );
}
/* /> in case there's really *nothing* inside this tag, otherwise
just >. */
/* If this tag doesn't have any content at all... */
if( node->text == NULL && node->children == NULL )
{
fprintf( stderr, "/>\n" );
return;
/* Then we're finished! */
}
/* Otherwise... */
fprintf( stderr, ">" );
/* Only print the text if it contains more than whitespace (TEST). */
if( node->text_len > 0 )
{
for( i = 0; node->text[i] && isspace( node->text[i] ); i ++ );
if( node->text[i] )
{
char *v = g_markup_escape_text( node->text, -1 );
fprintf( stderr, "%s", v );
g_free( v );
}
}
if( node->children )
fprintf( stderr, "\n" );
for( c = node->children; c; c = c->next )
xt_print( c );
if( node->children )
for( c = node; c->parent; c = c->parent )
fprintf( stderr, " " );
/* Non-empty tag is now finished. */
fprintf( stderr, "</%s>\n", node->name );
}
struct xt_node *xt_dup( struct xt_node *node )
{
struct xt_node *dup = g_new0( struct xt_node, 1 );
struct xt_node *c, *dc = NULL;
int i;
/* Let's NOT copy the parent element here BTW! Only do it for children. */
dup->name = g_strdup( node->name );
dup->flags = node->flags;
if( node->text )
{
dup->text = g_memdup( node->text, node->text_len + 1 );
dup->text_len = node->text_len;
}
/* Count the number of attributes and allocate the new array. */
for( i = 0; node->attr[i].key; i ++ );
dup->attr = g_new0( struct xt_attr, i + 1 );
/* Copy them all! */
for( i --; i >= 0; i -- )
{
dup->attr[i].key = g_strdup( node->attr[i].key );
dup->attr[i].value = g_strdup( node->attr[i].value );
}
/* This nice mysterious loop takes care of the children. */
for( c = node->children; c; c = c->next )
{
if( dc == NULL )
dc = dup->children = xt_dup( c );
else
dc = ( dc->next = xt_dup( c ) );
dc->parent = dup;
}
return dup;
}
/* Frees a node. This doesn't clean up references to itself from parents! */
void xt_free_node( struct xt_node *node )
{
int i;
if( !node )
return;
g_free( node->name );
g_free( node->text );
for( i = 0; node->attr[i].key; i ++ )
{
g_free( node->attr[i].key );
g_free( node->attr[i].value );
}
g_free( node->attr );
while( node->children )
{
struct xt_node *next = node->children->next;
xt_free_node( node->children );
node->children = next;
}
g_free( node );
}
void xt_free( struct xt_parser *xt )
{
if( !xt )
return;
if( xt->root )
xt_free_node( xt->root );
g_markup_parse_context_free( xt->parser );
g_free( xt );
}
/* To find a node's child with a specific name, pass the node's children
list, not the node itself! The reason you have to do this by hand: So
that you can also use this function as a find-next. */
struct xt_node *xt_find_node( struct xt_node *node, const char *name )
{
while( node )
{
char *colon;
if( g_strcasecmp( node->name, name ) == 0 ||
( ( colon = strchr( node->name, ':' ) ) &&
g_strcasecmp( colon + 1, name ) == 0 ) )
break;
node = node->next;
}
return node;
}
/* More advanced than the one above, understands something like
../foo/bar to find a subnode bar of a node foo which is a child
of node's parent. Pass the node directly, not its list of children. */
struct xt_node *xt_find_path( struct xt_node *node, const char *name )
{
while( name && *name && node )
{
char *colon, *slash;
int n;
if( ( slash = strchr( name, '/' ) ) )
n = slash - name;
else
n = strlen( name );
if( strncmp( name, "..", n ) == 0 )
{
node = node->parent;
}
else
{
node = node->children;
while( node )
{
if( g_strncasecmp( node->name, name, n ) == 0 ||
( ( colon = strchr( node->name, ':' ) ) &&
g_strncasecmp( colon + 1, name, n ) == 0 ) )
break;
node = node->next;
}
}
name = slash ? slash + 1 : NULL;
}
return node;
}
char *xt_find_attr( struct xt_node *node, const char *key )
{
int i;
char *colon;
if( !node )
return NULL;
for( i = 0; node->attr[i].key; i ++ )
if( g_strcasecmp( node->attr[i].key, key ) == 0 )
break;
/* This is an awful hack that only takes care of namespace prefixes
inside a tag. Since IMHO excessive namespace usage in XMPP is
massive overkill anyway (this code exists for almost four years
now and never really missed it): Meh. */
if( !node->attr[i].key && strcmp( key, "xmlns" ) == 0 &&
( colon = strchr( node->name, ':' ) ) )
{
*colon = '\0';
for( i = 0; node->attr[i].key; i ++ )
if( strncmp( node->attr[i].key, "xmlns:", 6 ) == 0 &&
strcmp( node->attr[i].key + 6, node->name ) == 0 )
break;
*colon = ':';
}
return node->attr[i].value;
}
/* Strip a few non-printable characters that aren't allowed in XML streams
(and upset some XMPP servers for example). */
void xt_strip_text( char *in )
{
char *out = in;
static const char nonprint[32] = {
0, 0, 0, 0, 0, 0, 0, 0, /* 0..7 */
0, 1, 1, 0, 0, 1, 0, 0, /* 9 (tab), 10 (\n), 13 (\r) */
};
if( !in )
return;
while( *in )
{
if( (unsigned int) *in >= ' ' || nonprint[(unsigned int) *in] )
*out ++ = *in;
in ++;
}
*out = *in;
}
struct xt_node *xt_new_node( char *name, const char *text, struct xt_node *children )
{
struct xt_node *node, *c;
node = g_new0( struct xt_node, 1 );
node->name = g_strdup( name );
node->children = children;
node->attr = g_new0( struct xt_attr, 1 );
if( text )
{
node->text = g_strdup( text );
xt_strip_text( node->text );
node->text_len = strlen( node->text );
}
for( c = children; c; c = c->next )
{
if( c->parent != NULL )
{
/* ERROR CONDITION: They seem to have a parent already??? */
}
c->parent = node;
}
return node;
}
void xt_add_child( struct xt_node *parent, struct xt_node *child )
{
struct xt_node *node;
/* This function can actually be used to add more than one child, so
do handle this properly. */
for( node = child; node; node = node->next )
{
if( node->parent != NULL )
{
/* ERROR CONDITION: They seem to have a parent already??? */
}
node->parent = parent;
}
if( parent->children == NULL )
{
parent->children = child;
}
else
{
for( node = parent->children; node->next; node = node->next );
node->next = child;
}
}
/* Same, but at the beginning. */
void xt_insert_child( struct xt_node *parent, struct xt_node *child )
{
struct xt_node *node, *last = NULL;
if( child == NULL )
return; /* BUG */
for( node = child; node; node = node->next )
{
if( node->parent != NULL )
{
/* ERROR CONDITION: They seem to have a parent already??? */
}
node->parent = parent;
last = node;
}
last->next = parent->children;
parent->children = child;
}
void xt_add_attr( struct xt_node *node, const char *key, const char *value )
{
int i;
/* Now actually it'd be nice if we can also change existing attributes
(which actually means this function doesn't have the right name).
So let's find out if we have this attribute already... */
for( i = 0; node->attr[i].key; i ++ )
if( strcmp( node->attr[i].key, key ) == 0 )
break;
if( node->attr[i].key == NULL )
{
/* If not, allocate space for a new attribute. */
node->attr = g_renew( struct xt_attr, node->attr, i + 2 );
node->attr[i].key = g_strdup( key );
node->attr[i+1].key = NULL;
}
else
{
/* Otherwise, free the old value before setting the new one. */
g_free( node->attr[i].value );
}
node->attr[i].value = g_strdup( value );
}
int xt_remove_attr( struct xt_node *node, const char *key )
{
int i, last;
for( i = 0; node->attr[i].key; i ++ )
if( strcmp( node->attr[i].key, key ) == 0 )
break;
/* If we didn't find the attribute... */
if( node->attr[i].key == NULL )
return 0;
g_free( node->attr[i].key );
g_free( node->attr[i].value );
/* If it's the last, this is easy: */
if( node->attr[i+1].key == NULL )
{
node->attr[i].key = node->attr[i].value = NULL;
}
else /* It's also pretty easy, actually. */
{
/* Find the last item. */
for( last = i + 1; node->attr[last+1].key; last ++ );
node->attr[i] = node->attr[last];
node->attr[last].key = NULL;
node->attr[last].value = NULL;
}
/* Let's not bother with reallocating memory here. It takes time and
most packets don't stay in memory for long anyway. */
return 1;
}