2 * Copyright (C) 2006 Andrej Kacian <andrej@kacian.sk>
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
25 #include <curl/curl.h>
46 static void _handler_set(XML_Parser parser, guint type)
52 case FEED_TYPE_RSS_20:
53 XML_SetElementHandler(parser,
54 feed_parser_rss20_start,
55 feed_parser_rss20_end);
59 XML_SetElementHandler(parser,
60 feed_parser_rdf_start,
64 case FEED_TYPE_ATOM_10:
65 XML_SetElementHandler(parser,
66 feed_parser_atom10_start,
67 feed_parser_atom10_end);
72 static void _elparse_start_chooser(void *data,
73 const gchar *el, const gchar **attr)
75 FeedParserCtx *ctx = (FeedParserCtx *)data;
76 guint feedtype = FEED_TYPE_NONE;
79 if( ctx->depth == 0 ) {
81 /* RSS 2.0 detected */
82 if( !strcmp(el, "rss") ) {
83 feedtype = FEED_TYPE_RSS_20;
84 } else if( !strcmp(el, "rdf:RDF") ) {
85 feedtype = FEED_TYPE_RDF;
86 } else if( !strcmp(el, "feed") ) {
88 /* ATOM feed detected, let's check version */
89 version = feed_parser_get_attribute_value(attr, "xmlns");
90 if( version != NULL &&
91 (!strcmp(version, "http://www.w3.org/2005/Atom") ||
92 !strcmp(version, "https://www.w3.org/2005/Atom")) )
93 feedtype = FEED_TYPE_ATOM_10;
95 feedtype = FEED_TYPE_ATOM_03;
99 _handler_set(ctx->parser, feedtype);
104 static void _elparse_end_dummy(void *data, const gchar *el)
106 FeedParserCtx *ctx = (FeedParserCtx *)data;
108 if( ctx->str != NULL ) {
109 g_string_free(ctx->str, TRUE);
116 void libfeed_expat_chparse(void *data, const gchar *s, gint len)
118 FeedParserCtx *ctx = (FeedParserCtx *)data;
123 strncpy(buf, s, len);
126 /* check if the string is blank, ... */
127 for( i = 0; i < strlen(buf); i++ )
128 if( !isspace(buf[i]) )
131 /* ...because we do not want the blanks if we're just starting new GString */
132 if( xblank > 0 && ctx->str == NULL ) {
137 if( ctx->str == NULL ) {
138 ctx->str = g_string_sized_new(len + 1);
141 g_string_append(ctx->str, buf);
146 void feed_parser_set_expat_handlers(FeedParserCtx *ctx)
148 XML_SetUserData(ctx->parser, (void *)ctx);
150 XML_SetElementHandler(ctx->parser,
151 _elparse_start_chooser,
154 XML_SetCharacterDataHandler(ctx->parser,
155 libfeed_expat_chparse);
157 XML_SetUnknownEncodingHandler(ctx->parser, feed_parser_unknown_encoding_handler,
161 size_t feed_writefunc(void *ptr, size_t size, size_t nmemb, void *data)
163 gint len = size * nmemb;
164 FeedParserCtx *ctx = (FeedParserCtx *)data;
167 status = XML_Parse(ctx->parser, ptr, len, FALSE);
169 if( status == XML_STATUS_ERROR ) {
170 err = XML_GetErrorCode(ctx->parser);
171 printf("\nExpat: --- %s\n\n", XML_ErrorString(err));
177 gchar *feed_parser_get_attribute_value(const gchar **attr, const gchar *name)
181 if( attr == NULL || name == NULL )
184 for( i = 0; attr[i] != NULL && attr[i+1] != NULL; i += 2 ) {
185 if( !strcmp( attr[i], name) )
186 return (gchar *)attr[i+1];
189 /* We haven't found anything. */
193 #define CHARSIZEUTF32 4
203 static gint giconv_utf32_char(GIConv cd, const gchar *inbuf, size_t insize,
208 guchar outbuf[CHARSIZEUTF32];
212 outsize = sizeof(outbuf);
213 outbufp = (gchar *)outbuf;
214 #ifdef HAVE_ICONV_PROTO_CONST
215 r = g_iconv(cd, (const gchar **)&inbuf, &insize,
218 r = g_iconv(cd, (gchar **)&inbuf, &insize,
222 g_iconv(cd, 0, 0, 0, 0);
225 return LEP_ICONV_ILSEQ;
227 return LEP_ICONV_INVAL;
229 return LEP_ICONV_UNKNOWN;
235 if( (insize > 0) || (outsize > 0) )
236 return LEP_ICONV_FAILED;
239 for( i = 0; i < sizeof(outbuf); i++ ) {
240 value = (value << 8) + outbuf[i];
246 return LEP_ICONV_FAILED;
250 static gint feed_parser_setup_unknown_encoding(const gchar *charset,
259 cd = g_iconv_open("UTF-32BE", charset);
260 if( cd == (GIConv) -1 )
264 for( i = 0; i < 256; i++ ) {
268 r = giconv_utf32_char(cd, buf, 1, &value);
269 if( r == LEP_ICONV_OK) {
270 info->map[i] = value;
271 } else if( r != LEP_ICONV_INVAL ) {
273 for( j = 0; j < 256; j++ ) {
276 r = giconv_utf32_char(cd, buf, 2, &value);
277 if( r == LEP_ICONV_OK ) {
280 } else if( r != LEP_ICONV_INVAL ) {
282 for( k = 0; k < 256; k++ ) {
285 r = giconv_utf32_char(cd, buf, 3, &value);
286 if( r == LEP_ICONV_OK) {
300 struct FeedParserUnknownEncoding {
305 static gint feed_parser_unknown_encoding_convert(void *data, const gchar *s)
308 struct FeedParserUnknownEncoding *enc_data;
318 r = giconv_utf32_char(enc_data->cd, s, insize, &value);
319 if( r != LEP_ICONV_OK )
325 static void feed_parser_unknown_encoding_data_free(void *data)
327 struct FeedParserUnknownEncoding *enc_data;
330 free(enc_data->charset);
331 g_iconv_close(enc_data->cd);
335 int feed_parser_unknown_encoding_handler(void *encdata, const XML_Char *name,
339 struct FeedParserUnknownEncoding *data;
342 result = feed_parser_setup_unknown_encoding(name, info);
345 info->convert = NULL;
346 info->release = NULL;
347 return XML_STATUS_OK;
350 cd = g_iconv_open("UTF-32BE", name);
351 if( cd == (GIConv)-1 )
352 return XML_STATUS_ERROR;
354 data = malloc( sizeof(*data) );
357 return XML_STATUS_ERROR;
360 data->charset = strdup(name);
361 if( data->charset == NULL ) {
364 return XML_STATUS_ERROR;
369 info->convert = feed_parser_unknown_encoding_convert;
370 info->release = feed_parser_unknown_encoding_data_free;
372 return XML_STATUS_OK;