2 * Claws Mail -- a GTK based, lightweight, and fast e-mail client
3 * Copyright (C) 2006-2023 the Claws Mail Team and Andrej Kacian <andrej@kacian.sk>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include <curl/curl.h>
43 static void _handler_set(XML_Parser parser, guint type)
49 case FEED_TYPE_RSS_20:
50 XML_SetElementHandler(parser,
51 feed_parser_rss20_start,
52 feed_parser_rss20_end);
56 XML_SetElementHandler(parser,
57 feed_parser_rdf_start,
61 case FEED_TYPE_ATOM_10:
62 XML_SetElementHandler(parser,
63 feed_parser_atom10_start,
64 feed_parser_atom10_end);
69 static void _elparse_start_chooser(void *data,
70 const gchar *el, const gchar **attr)
72 FeedParserCtx *ctx = (FeedParserCtx *)data;
73 guint feedtype = FEED_TYPE_NONE;
76 if( ctx->depth == 0 ) {
78 /* RSS 2.0 detected */
79 if( !strcmp(el, "rss") ) {
80 feedtype = FEED_TYPE_RSS_20;
81 } else if( !strcmp(el, "rdf:RDF") ) {
82 feedtype = FEED_TYPE_RDF;
83 } else if( !strcmp(el, "feed") ) {
85 /* ATOM feed detected, let's check version */
86 version = feed_parser_get_attribute_value(attr, "xmlns");
87 if( version != NULL &&
88 (!strcmp(version, "http://www.w3.org/2005/Atom") ||
89 !strcmp(version, "https://www.w3.org/2005/Atom")) )
90 feedtype = FEED_TYPE_ATOM_10;
92 feedtype = FEED_TYPE_ATOM_03;
94 /* Not a known feed type */
95 ctx->feed->is_valid = FALSE;
99 _handler_set(ctx->parser, feedtype);
104 static void _elparse_end_dummy(void *data, const gchar *el)
106 FeedParserCtx *ctx = (FeedParserCtx *)data;
108 if( ctx->str != NULL ) {
109 g_string_free(ctx->str, TRUE);
116 void libfeed_expat_chparse(void *data, const gchar *s, gint len)
118 FeedParserCtx *ctx = (FeedParserCtx *)data;
122 buf = g_strndup(s, len);
124 /* check if the string is blank, ... */
125 for( i = 0; i < strlen(buf); i++ )
126 if( !isspace(buf[i]) )
129 /* ...because we do not want the blanks if we're just starting new GString */
130 if( xblank > 0 && ctx->str == NULL ) {
135 if( ctx->str == NULL ) {
136 ctx->str = g_string_sized_new(len + 1);
139 g_string_append(ctx->str, buf);
144 void feed_parser_set_expat_handlers(FeedParserCtx *ctx)
146 XML_SetUserData(ctx->parser, (void *)ctx);
148 XML_SetElementHandler(ctx->parser,
149 _elparse_start_chooser,
152 XML_SetCharacterDataHandler(ctx->parser,
153 libfeed_expat_chparse);
155 XML_SetUnknownEncodingHandler(ctx->parser, feed_parser_unknown_encoding_handler,
159 size_t feed_writefunc(void *ptr, size_t size, size_t nmemb, void *data)
161 gint len = size * nmemb;
162 FeedParserCtx *ctx = (FeedParserCtx *)data;
165 if (!ctx->feed->is_valid) {
166 /* We already know that the feed is not valid, so we won't
167 * try parsing it. Just return correct number so libcurl is
172 status = XML_Parse(ctx->parser, ptr, len, FALSE);
174 if( status == XML_STATUS_ERROR ) {
175 err = XML_GetErrorCode(ctx->parser);
176 printf("\nExpat: --- %s\n\n", XML_ErrorString(err));
177 ctx->feed->is_valid = FALSE;
183 gchar *feed_parser_get_attribute_value(const gchar **attr, const gchar *name)
187 if( attr == NULL || name == NULL )
190 for( i = 0; attr[i] != NULL && attr[i+1] != NULL; i += 2 ) {
191 if( !strcmp( attr[i], name) )
192 return (gchar *)attr[i+1];
195 /* We haven't found anything. */
199 #define CHARSIZEUTF32 4
209 static gint giconv_utf32_char(GIConv cd, const gchar *inbuf, size_t insize,
214 guchar outbuf[CHARSIZEUTF32];
218 outsize = sizeof(outbuf);
219 outbufp = (gchar *)outbuf;
220 #ifdef HAVE_ICONV_PROTO_CONST
221 r = g_iconv(cd, (const gchar **)&inbuf, &insize,
224 r = g_iconv(cd, (gchar **)&inbuf, &insize,
228 g_iconv(cd, 0, 0, 0, 0);
231 return LEP_ICONV_ILSEQ;
233 return LEP_ICONV_INVAL;
235 return LEP_ICONV_UNKNOWN;
241 if( (insize > 0) || (outsize > 0) )
242 return LEP_ICONV_FAILED;
245 for( i = 0; i < sizeof(outbuf); i++ ) {
246 value = (value << 8) + outbuf[i];
252 return LEP_ICONV_FAILED;
256 static gint feed_parser_setup_unknown_encoding(const gchar *charset,
265 cd = g_iconv_open("UTF-32BE", charset);
266 if( cd == (GIConv) -1 )
270 for( i = 0; i < 256; i++ ) {
274 r = giconv_utf32_char(cd, buf, 1, &value);
275 if( r == LEP_ICONV_OK) {
276 info->map[i] = value;
277 } else if( r != LEP_ICONV_INVAL ) {
279 for( j = 0; j < 256; j++ ) {
282 r = giconv_utf32_char(cd, buf, 2, &value);
283 if( r == LEP_ICONV_OK ) {
286 } else if( r != LEP_ICONV_INVAL ) {
288 for( k = 0; k < 256; k++ ) {
291 r = giconv_utf32_char(cd, buf, 3, &value);
292 if( r == LEP_ICONV_OK) {
306 struct FeedParserUnknownEncoding {
311 static gint feed_parser_unknown_encoding_convert(void *data, const gchar *s)
314 struct FeedParserUnknownEncoding *enc_data;
324 r = giconv_utf32_char(enc_data->cd, s, insize, &value);
325 if( r != LEP_ICONV_OK )
331 static void feed_parser_unknown_encoding_data_free(void *data)
333 struct FeedParserUnknownEncoding *enc_data;
336 g_free(enc_data->charset);
337 g_iconv_close(enc_data->cd);
341 int feed_parser_unknown_encoding_handler(void *encdata, const XML_Char *name,
345 struct FeedParserUnknownEncoding *data;
348 result = feed_parser_setup_unknown_encoding(name, info);
351 info->convert = NULL;
352 info->release = NULL;
353 return XML_STATUS_OK;
356 cd = g_iconv_open("UTF-32BE", name);
357 if( cd == (GIConv)-1 )
358 return XML_STATUS_ERROR;
360 data = g_malloc( sizeof(*data) );
363 return XML_STATUS_ERROR;
366 data->charset = g_strdup(name);
367 if( data->charset == NULL ) {
370 return XML_STATUS_ERROR;
375 info->convert = feed_parser_unknown_encoding_convert;
376 info->release = feed_parser_unknown_encoding_data_free;
378 return XML_STATUS_OK;