8f9f4d6bb79ab10d3bb55fed4c8297efb771c6dc
[claws.git] / src / plugins / rssyl / libfeed / parser_atom10.c
1 /*
2  * Copyright (C) 2006 Andrej Kacian <andrej@kacian.sk>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 02111-1307, USA.
18  */
19 #define __USE_GNU
20
21 #include <glib.h>
22 #include <expat.h>
23 #include <string.h>
24 #include <stdio.h>
25
26 #include "feed.h"
27 #include "feeditem.h"
28 #include "date.h"
29 #include "parser.h"
30 #include "parser_atom10.h"
31
32 enum {
33         FEED_LOC_ATOM10_NONE,
34         FEED_LOC_ATOM10_ENTRY,
35         FEED_LOC_ATOM10_AUTHOR,
36         FEED_LOC_ATOM10_SOURCE,
37         FEED_LOC_ATOM10_CONTENT
38 } FeedAtom10Locations;
39
40 void feed_parser_atom10_start(void *data, const gchar *el, const gchar **attr)
41 {
42         FeedParserCtx *ctx = (FeedParserCtx *)data;
43         gchar *a = NULL;
44
45         if( ctx->depth == 1 ) {
46
47                 if( !strcmp(el, "entry") ) {
48                         /* Start of new feed item found.
49                          * Create a new FeedItem, freeing the one we already have, if any. */
50                         if( ctx->curitem != NULL )
51                                 feed_item_free(ctx->curitem);
52                         ctx->curitem = feed_item_new(ctx->feed);
53                         ctx->location = FEED_LOC_ATOM10_ENTRY;
54                 } else if( !strcmp(el, "author") ) {
55                         /* Start of author info for the feed found.
56                          * Set correct location. */
57                         ctx->location = FEED_LOC_ATOM10_AUTHOR;
58                 } else if( !strcmp(el, "link") ) {
59                         if (!feed_parser_get_attribute_value(attr, "rel")) {
60                                 /* Link tag for the feed */
61                                 g_free(ctx->feed->link);
62                                 ctx->feed->link =
63                                         g_strdup(feed_parser_get_attribute_value(attr, "href"));
64                         }
65                 } else ctx->location = FEED_LOC_ATOM10_NONE;
66
67         } else if( ctx->depth == 2 ) {
68
69                 /* Make sure we are in one of known locations within the XML structure.
70                  * This condition should never be true on a valid Atom feed. */
71                 if (ctx->location != FEED_LOC_ATOM10_AUTHOR &&
72                                 ctx->location != FEED_LOC_ATOM10_ENTRY) {
73                         ctx->depth++;
74                         return;
75                 }
76
77                 if( !strcmp(el, "author") ) {
78                         /* Start of author info for current feed item.
79                          * Set correct location. */
80                         ctx->location = FEED_LOC_ATOM10_AUTHOR;
81                 } else if( !strcmp(el, "link") ) {
82                         /* Capture item URL, from the "url" XML attribute. */
83                         if (ctx->curitem && ctx->location == FEED_LOC_ATOM10_ENTRY)
84                   ctx->curitem->url = g_strdup(feed_parser_get_attribute_value(attr, "href"));
85                 } else if( !strcmp(el, "source") ) {
86                         ctx->location = FEED_LOC_ATOM10_SOURCE;
87                 } else ctx->location = FEED_LOC_ATOM10_ENTRY;
88
89                 if( !strcmp(el, "title") ) {
90                         a = feed_parser_get_attribute_value(attr, "type");
91                         if( !a || !strcmp(a, "text") )
92                                 ctx->curitem->title_format = FEED_ITEM_TITLE_TEXT;
93                         else if( !strcmp(a, "html") )
94                                 ctx->curitem->title_format = FEED_ITEM_TITLE_HTML;
95                         else if( !strcmp(a, "xhtml") )
96                                 ctx->curitem->title_format = FEED_ITEM_TITLE_XHTML;
97                         else
98                                 ctx->curitem->title_format = FEED_ITEM_TITLE_UNKNOWN;
99                 } else if (!strcmp(el, "content") ) {
100                         a = feed_parser_get_attribute_value(attr, "type");
101                         if (a && !strcmp(a, "xhtml")) {
102                                 ctx->curitem->xhtml_content = TRUE;
103                                 ctx->location = FEED_LOC_ATOM10_CONTENT;
104                         }
105                 }
106         }
107
108         ctx->depth++;
109 }
110
111 void feed_parser_atom10_end(void *data, const gchar *el)
112 {
113         FeedParserCtx *ctx = (FeedParserCtx *)data;
114         Feed *feed = ctx->feed;
115         gchar *text = NULL, *tmp;
116
117         if( ctx->str != NULL )
118                 text = g_strstrip(g_strdup(ctx->str->str));
119         else
120                 text = "";
121
122         switch( ctx->depth ) {
123
124                 case 0:
125                         /* Just in case. */
126                         break;
127
128                 case 1:
129
130                         if( !strcmp(el, "feed") ) {
131                                 /* We have finished parsing the feed, reverse the list
132                                  * so it's not upside down. */
133                                 feed->items = g_slist_reverse(ctx->feed->items);
134                         }
135
136                         break;
137
138                 case 2:
139
140                         /* decide if we just received </entry>, so we can
141                          * add a complete item to feed */
142                         if( !strcmp(el, "entry") ) {
143
144                                 /* Fix up URL, if it is relative */
145                                 if (ctx->curitem->url != NULL &&
146                                                 !strstr(ctx->curitem->url, "://") &&
147                                                 ctx->feed->link != NULL) {
148                                         tmp = g_strconcat(ctx->feed->link,
149                                                         (ctx->curitem->url[0] == '/' ? "" : "/"),
150                                                         ctx->curitem->url, NULL);
151                                         feed_item_set_url(ctx->curitem, tmp);
152                                         g_free(tmp);
153                                 }
154
155                                 /* append the complete feed item */
156                                 if( ctx->curitem->id && ctx->curitem->title
157                                                 && ctx->curitem->date_modified ) {
158                                         feed->items = 
159                                                 g_slist_prepend(feed->items, (gpointer)ctx->curitem);
160                                 }
161                                 
162                                 /* since it's in the linked list, lose this pointer */
163                                 ctx->curitem = NULL;
164
165                         } else if( !strcmp(el, "title") ) {     /* so it wasn't end of item */
166                                 FILL(feed->title)
167                         } else if( !strcmp(el, "summary" ) ) {
168                                 FILL(feed->description)
169                         } else if( !strcmp(el, "updated" ) ) {
170                                 feed->date = parseISO8601Date(text);
171                         }
172                         /* FIXME: add more later */
173
174                         break;
175
176                 case 3:
177
178                         if( ctx->curitem == NULL )
179                                 break;
180
181                         switch(ctx->location) {
182
183                                 /* We're in feed/entry */
184                                 case FEED_LOC_ATOM10_ENTRY:
185                                         if( !strcmp(el, "title") ) {
186                                                 FILL(ctx->curitem->title)
187                                         } else if( !strcmp(el, "summary") ) {
188                                                 FILL(ctx->curitem->summary)
189                                         } else if( !strcmp(el, "content") ) {
190                                                 if (!ctx->curitem->xhtml_content)
191                                                         FILL(ctx->curitem->text)
192                                         } else if( !strcmp(el, "id") ) {
193                                                 FILL(ctx->curitem->id)
194                                                 feed_item_set_id_permalink(ctx->curitem, TRUE);
195                                         } else if( !strcmp(el, "published") ) {
196                                                 ctx->curitem->date_published = parseISO8601Date(text);
197                                         } else if( !strcmp(el, "updated") ) {
198                                                 ctx->curitem->date_modified = parseISO8601Date(text);
199                                         }
200
201                                         break;
202
203                                 /* We're in feed/author or about to leave feed/entry/author */
204                                 case FEED_LOC_ATOM10_AUTHOR:
205                                         if( !strcmp(el, "author" ) ) {
206                                                 /* We just finished parsing <author> */
207                                                 ctx->curitem->author = g_strdup_printf("%s%s%s%s%s",
208                                                                 ctx->name ? ctx->name : "",
209                                                                 ctx->name && ctx->mail ? " <" : ctx->mail ? "<" : "",
210                                                                 ctx->mail ? ctx->mail : "",
211                                                                 ctx->mail ? ">" : "",
212                                                                 !ctx->name && !ctx->mail ? "N/A" : "");
213                                                 ctx->location = FEED_LOC_ATOM10_ENTRY;
214                                         } else if( !strcmp(el, "name") ) {
215                                                 FILL(feed->author)
216                                         }
217
218                                         break;
219                         }
220
221                         break;
222
223                 case 4:
224
225                         if( ctx->curitem == NULL )
226                                 break;
227
228                         switch(ctx->location) {
229
230                                 /* We're in feed/entry/author */
231                                 case FEED_LOC_ATOM10_AUTHOR:
232                                         if( !strcmp(el, "name") ) {
233                                                 FILL(ctx->name)
234                                         } else if( !strcmp(el, "email") ) {
235                                                 FILL(ctx->mail)
236                                         }
237
238                                         break;
239
240                                 /* We're in feed/entry/source */
241                                 case FEED_LOC_ATOM10_SOURCE:
242                                         if( !strcmp(el, "title" ) ) {
243                                                 FILL(ctx->curitem->sourcetitle)
244                                         } else if( !strcmp(el, "id" ) ) {
245                                                 FILL(ctx->curitem->sourceid)
246                                         } else if( !strcmp(el, "updated" ) ) {
247                                                 ctx->curitem->sourcedate = parseISO8601Date(text);
248                                         }
249
250                                         break;
251
252                                 case FEED_LOC_ATOM10_CONTENT:
253                                         if (!strcmp(el, "div") && ctx->curitem->xhtml_content)
254                                                 FILL(ctx->curitem->text)
255                                         break;
256
257                                 }
258
259
260                         break;
261         }
262
263         if( ctx->str != NULL ) {
264                 g_free(text);
265                 g_string_free(ctx->str, TRUE);
266                 ctx->str = NULL;
267         }
268         ctx->str = NULL;
269
270         ctx->depth--;
271 }