fix CID 1596595: Resource leaks, and CID 1596594: (CHECKED_RETURN)
[claws.git] / src / common / xml.c
1 /*
2  * Claws Mail -- a GTK based, lightweight, and fast e-mail client
3  * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  * 
18  */
19
20 #ifdef HAVE_CONFIG_H
21 #  include "config.h"
22 #include "claws-features.h"
23 #endif
24
25 #include <glib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29
30 #include "xml.h"
31 #include "utils.h"
32 #include "codeconv.h"
33 #include "file-utils.h"
34
35 #define SPARSE_MEMORY
36 /* if this is defined all attr.names and tag.names are stored
37  * in a hash table */
38 #if defined(SPARSE_MEMORY)
39 #include "stringtable.h" 
40
41 static StringTable *xml_string_table;
42 static XMLTag  *xml_copy_tag            (XMLTag         *tag);
43 static XMLAttr *xml_copy_attr           (XMLAttr        *attr);
44 static void xml_free_node               (XMLNode        *node);
45 static void xml_free_tag                (XMLTag         *tag);
46 static void xml_pop_tag         (XMLFile        *file);
47 static void xml_push_tag                (XMLFile        *file,
48                                  XMLTag         *tag);
49 static gint xml_read_line               (XMLFile        *file);
50 static void xml_truncate_buf            (XMLFile        *file);
51 static gint xml_unescape_str            (gchar          *str);
52
53 static void xml_string_table_create(void)
54 {
55         if (xml_string_table == NULL)
56                 xml_string_table = string_table_new();
57 }
58 #define XML_STRING_ADD(str) \
59         string_table_insert_string(xml_string_table, (str))
60 #define XML_STRING_FREE(str) \
61         string_table_free_string(xml_string_table, (str))
62
63 #define XML_STRING_TABLE_CREATE() \
64         xml_string_table_create()
65
66 #else /* !SPARSE_MEMORY */
67
68 #define XML_STRING_ADD(str) \
69         g_strdup(str)
70 #define XML_STRING_FREE(str) \
71         g_free(str)
72
73 #define XML_STRING_TABLE_CREATE()
74
75 #endif /* SPARSE_MEMORY */
76
77 static gint xml_get_parenthesis (XMLFile        *file,
78                                  gchar          *buf,
79                                  gint            len);
80
81 XMLFile *xml_open_file(const gchar *path)
82 {
83         XMLFile *newfile;
84
85         cm_return_val_if_fail(path != NULL, NULL);
86
87         newfile = g_new(XMLFile, 1);
88
89         newfile->fp = claws_fopen(path, "rb");
90         if (!newfile->fp) {
91                 FILE_OP_ERROR(path, "fopen");
92                 g_free(newfile);
93                 return NULL;
94         }
95
96         XML_STRING_TABLE_CREATE();
97
98         newfile->buf = g_string_new(NULL);
99         newfile->bufp = newfile->buf->str;
100
101         newfile->dtd = NULL;
102         newfile->encoding = NULL;
103         newfile->tag_stack = NULL;
104         newfile->level = 0;
105         newfile->is_empty_element = FALSE;
106
107         newfile->path = g_strdup(path);
108
109         return newfile;
110 }
111
112 void xml_close_file(XMLFile *file)
113 {
114         cm_return_if_fail(file != NULL);
115
116         if (file->fp) claws_fclose(file->fp);
117
118         g_string_free(file->buf, TRUE);
119
120         g_free(file->dtd);
121         g_free(file->encoding);
122         g_free(file->path);
123
124         while (file->tag_stack != NULL)
125                 xml_pop_tag(file);
126
127         g_free(file);
128 }
129
130 static GNode *xml_build_tree(XMLFile *file, GNode *parent, guint level)
131 {
132         GNode *node = NULL;
133         XMLNode *xmlnode;
134         XMLTag *tag;
135
136         while (xml_parse_next_tag(file) == 0) {
137                 if (file->level < level) break;
138                 if (file->level == level) {
139                         g_warning("xml_build_tree(): parse error in %s", file->path);
140                         break;
141                 }
142
143                 tag = xml_get_current_tag(file);
144                 if (!tag) break;
145                 xmlnode = xml_node_new(xml_copy_tag(tag), NULL);
146                 xmlnode->element = xml_get_element(file);
147                 if (!parent)
148                         node = g_node_new(xmlnode);
149                 else
150                         node = g_node_append_data(parent, xmlnode);
151
152                 xml_build_tree(file, node, file->level);
153                 if (file->level == 0) break;
154         }
155
156         return node;
157 }
158
159 GNode *xml_parse_file(const gchar *path)
160 {
161         XMLFile *file;
162         GNode *node;
163
164         file = xml_open_file(path);
165         if (file == NULL)
166                 return NULL;
167
168         xml_get_dtd(file);
169
170         node = xml_build_tree(file, NULL, file->level);
171
172         xml_close_file(file);
173
174 #if defined(SPARSE_MEMORY)
175         if (debug_get_mode())
176                 string_table_get_stats(xml_string_table);
177 #endif
178
179         return node;
180 }
181
182 gint xml_get_dtd(XMLFile *file)
183 {
184         gchar buf[XMLBUFSIZE];
185         gchar *bufp = buf;
186
187         if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) return -1;
188
189         if ((*bufp++ == '?') &&
190             (bufp = strcasestr(bufp, "xml")) &&
191             (bufp = strcasestr(bufp + 3, "version")) &&
192             (bufp = strchr(bufp + 7, '?'))) {
193                 file->dtd = g_strdup(buf);
194                 if ((bufp = strcasestr(buf, "encoding=\""))) {
195                         bufp += 9;
196                         extract_quote(bufp, '"');
197                         file->encoding = g_strdup(bufp);
198                         file->need_codeconv =
199                                 g_strcmp0(bufp, CS_INTERNAL);
200                 } else {
201                         file->encoding = g_strdup(CS_INTERNAL);
202                         file->need_codeconv = FALSE;
203                 }
204         } else {
205                 g_warning("can't get XML DTD in %s", file->path);
206                 return -1;
207         }
208
209         return 0;
210 }
211
212 gint xml_parse_next_tag(XMLFile *file)
213 {
214         gchar buf[XMLBUFSIZE];
215         gchar *bufp = buf;
216         gchar *tag_str;
217         XMLTag *tag;
218         gint len;
219
220 next:
221         if (file->is_empty_element == TRUE) {
222                 file->is_empty_element = FALSE;
223                 xml_pop_tag(file);
224                 return 0;
225         }
226
227         if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) {
228                 g_warning("xml_parse_next_tag(): can't parse next tag  in %s", file->path);
229                 return -1;
230         }
231
232         len = strlen(buf);
233
234         /* end-tag */
235         if (buf[0] == '/') {
236                 if (strcmp(xml_get_current_tag(file)->tag, buf + 1) != 0) {
237                         g_warning("xml_parse_next_tag(): tag name mismatch in %s : %s (%s)", file->path, buf, xml_get_current_tag(file)->tag);
238                         return -1;
239                 }
240                 xml_pop_tag(file);
241                 return 0;
242         }
243
244         if (len >= 7 && !strncmp(buf, "!-- ", 4) && !strncmp(buf+len-3, " --", 3)) {
245                 /* skip comment */
246                 goto next;
247         }
248
249         tag = xml_tag_new(NULL);
250         xml_push_tag(file, tag);
251
252         if (len > 0 && buf[len - 1] == '/') {
253                 file->is_empty_element = TRUE;
254                 buf[len - 1] = '\0';
255                 g_strchomp(buf);
256         }
257         
258         if (strlen(buf) == 0) {
259                 g_warning("xml_parse_next_tag(): tag name is empty in %s", file->path);
260                 return -1;
261         }
262
263         while (*bufp != '\0' && !g_ascii_isspace(*bufp)) bufp++;
264         if (*bufp == '\0') {
265                 if (file->need_codeconv) {
266                         tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL);
267                         if (tag_str) {
268                                 tag->tag = XML_STRING_ADD(tag_str);
269                                 g_free(tag_str);
270                         } else
271                                 tag->tag = XML_STRING_ADD(buf);
272                 } else
273                         tag->tag = XML_STRING_ADD(buf);
274                 return 0;
275         } else {
276                 *bufp++ = '\0';
277                 if (file->need_codeconv) {
278                         tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL);
279                         if (tag_str) {
280                                 tag->tag = XML_STRING_ADD(tag_str);
281                                 g_free(tag_str);
282                         } else
283                                 tag->tag = XML_STRING_ADD(buf);
284                 } else
285                         tag->tag = XML_STRING_ADD(buf);
286         }
287
288         /* parse attributes ( name=value ) */
289         while (*bufp) {
290                 XMLAttr *attr;
291                 gchar *attr_name;
292                 gchar *attr_value;
293                 gchar *utf8_attr_name;
294                 gchar *utf8_attr_value;
295                 gchar *p;
296                 gchar quote;
297
298                 while (g_ascii_isspace(*bufp)) bufp++;
299                 attr_name = bufp;
300                 if ((p = strchr(attr_name, '=')) == NULL) {
301                         g_warning("xml_parse_next_tag(): syntax error in %s, tag (a) %s", file->path, attr_name);
302                         return -1;
303                 }
304                 bufp = p;
305                 *bufp++ = '\0';
306                 while (g_ascii_isspace(*bufp)) bufp++;
307
308                 if (*bufp != '"' && *bufp != '\'') {
309                         g_warning("xml_parse_next_tag(): syntax error in %s, tag (b) %s", file->path, bufp);
310                         return -1;
311                 }
312                 quote = *bufp;
313                 bufp++;
314                 attr_value = bufp;
315                 if ((p = strchr(attr_value, quote)) == NULL) {
316                         g_warning("xml_parse_next_tag(): syntax error in %s, tag (c) %s", file->path, attr_value);
317                         return -1;
318                 }
319                 bufp = p;
320                 *bufp++ = '\0';
321
322                 g_strchomp(attr_name);
323                 xml_unescape_str(attr_value);
324                 if (file->need_codeconv) {
325                         utf8_attr_name = conv_codeset_strdup
326                                 (attr_name, file->encoding, CS_INTERNAL);
327                         utf8_attr_value = conv_codeset_strdup
328                                 (attr_value, file->encoding, CS_INTERNAL);
329                         if (!utf8_attr_name)
330                                 utf8_attr_name = g_strdup(attr_name);
331                         if (!utf8_attr_value)
332                                 utf8_attr_value = g_strdup(attr_value);
333
334                         attr = xml_attr_new(utf8_attr_name, utf8_attr_value);
335                         g_free(utf8_attr_value);
336                         g_free(utf8_attr_name);
337                 } else {
338                         attr = xml_attr_new(attr_name, attr_value);
339                 }
340                 xml_tag_add_attr(tag, attr);
341
342         }
343         tag->attr = g_list_reverse(tag->attr);
344
345         return 0;
346 }
347
348 static void xml_push_tag(XMLFile *file, XMLTag *tag)
349 {
350         cm_return_if_fail(tag != NULL);
351
352         file->tag_stack = g_list_prepend(file->tag_stack, tag);
353         file->level++;
354 }
355
356 static void xml_pop_tag(XMLFile *file)
357 {
358         XMLTag *tag;
359
360         if (!file->tag_stack) return;
361
362         tag = (XMLTag *)file->tag_stack->data;
363
364         file->tag_stack = g_list_remove(file->tag_stack, tag);
365         xml_free_tag(tag);
366         file->level--;
367 }
368
369 XMLTag *xml_get_current_tag(XMLFile *file)
370 {
371         if (file->tag_stack)
372                 return (XMLTag *)file->tag_stack->data;
373         else
374                 return NULL;
375 }
376
377 GList *xml_get_current_tag_attr(XMLFile *file)
378 {
379         XMLTag *tag;
380
381         tag = xml_get_current_tag(file);
382         if (!tag) return NULL;
383
384         return tag->attr;
385 }
386
387 gchar *xml_get_element(XMLFile *file)
388 {
389         gchar *str;
390         gchar *new_str;
391         gchar *end;
392
393         while ((end = strchr(file->bufp, '<')) == NULL)
394                 if (xml_read_line(file) < 0) return NULL;
395
396         if (end == file->bufp)
397                 return NULL;
398
399         str = g_strndup(file->bufp, end - file->bufp);
400         /* this is not XML1.0 strict */
401         g_strstrip(str);
402         xml_unescape_str(str);
403
404         file->bufp = end;
405         xml_truncate_buf(file);
406
407         if (str[0] == '\0') {
408                 g_free(str);
409                 return NULL;
410         }
411
412         if (!file->need_codeconv)
413                 return str;
414
415         new_str = conv_codeset_strdup(str, file->encoding, CS_INTERNAL);
416         if (!new_str)
417                 new_str = g_strdup(str);
418         g_free(str);
419
420         return new_str;
421 }
422
423 static gint xml_read_line(XMLFile *file)
424 {
425         gchar buf[XMLBUFSIZE];
426         gint index;
427
428         if (claws_fgets(buf, sizeof(buf), file->fp) == NULL)
429                 return -1;
430
431         index = file->bufp - file->buf->str;
432
433         g_string_append(file->buf, buf);
434
435         file->bufp = file->buf->str + index;
436
437         return 0;
438 }
439
440 static void xml_truncate_buf(XMLFile *file)
441 {
442         gint len;
443
444         len = file->bufp - file->buf->str;
445         if (len > 0) {
446                 g_string_erase(file->buf, 0, len);
447                 file->bufp = file->buf->str;
448         }
449 }
450
451 gboolean xml_compare_tag(XMLFile *file, const gchar *name)
452 {
453         XMLTag *tag;
454
455         tag = xml_get_current_tag(file);
456
457         if (tag && strcmp(tag->tag, name) == 0)
458                 return TRUE;
459         else
460                 return FALSE;
461 }
462
463 XMLNode *xml_node_new(XMLTag *tag, const gchar *text)
464 {
465         XMLNode *node;
466
467         node = g_new(XMLNode, 1);
468         node->tag = tag;
469         node->element = g_strdup(text);
470
471         return node;
472 }
473
474 XMLTag *xml_tag_new(const gchar *tag)
475 {
476         XMLTag *new_tag;
477  
478         new_tag = g_new(XMLTag, 1);
479         if (tag)
480                 new_tag->tag = XML_STRING_ADD(tag);
481         else
482                 new_tag->tag = NULL;
483         new_tag->attr = NULL;
484  
485         return new_tag;
486 }
487
488 XMLAttr *xml_attr_new(const gchar *name, const gchar *value)
489 {
490         XMLAttr *new_attr;
491  
492         new_attr = g_new(XMLAttr, 1);
493         new_attr->name = XML_STRING_ADD(name);
494         new_attr->value = g_strdup(value);
495  
496         return new_attr;
497 }
498
499 XMLAttr *xml_attr_new_int(const gchar *name, const gint value)
500 {
501         XMLAttr *new_attr;
502         gchar *valuestr;
503
504         valuestr = g_strdup_printf("%d", value);
505
506         new_attr = g_new(XMLAttr, 1);
507         new_attr->name = XML_STRING_ADD(name);
508         new_attr->value = valuestr;
509  
510         return new_attr;
511 }
512
513 void xml_tag_add_attr(XMLTag *tag, XMLAttr *attr)
514 {
515         tag->attr = g_list_prepend(tag->attr, attr);
516 }
517
518 static XMLTag *xml_copy_tag(XMLTag *tag)
519 {
520         XMLTag *new_tag;
521         XMLAttr *attr;
522         GList *list;
523
524         new_tag = xml_tag_new(tag->tag);
525         for (list = tag->attr; list != NULL; list = list->next) {
526                 attr = xml_copy_attr((XMLAttr *)list->data);
527                 xml_tag_add_attr(new_tag, attr);
528         }
529         tag->attr = g_list_reverse(tag->attr);
530
531         return new_tag;
532 }
533
534 static XMLAttr *xml_copy_attr(XMLAttr *attr)
535 {
536         return xml_attr_new(attr->name, attr->value);
537 }
538
539 static gint xml_unescape_str(gchar *str)
540 {
541         gchar *start;
542         gchar *end;
543         gchar *p = str;
544         gchar *esc_str;
545         gchar ch;
546         gint len;
547
548         while ((start = strchr(p, '&')) != NULL) {
549                 if ((end = strchr(start + 1, ';')) == NULL) {
550                         g_warning("unescaped '&' appeared");
551                         p = start + 1;
552                         continue;
553                 }
554                 len = end - start + 1;
555                 if (len < 3) {
556                         p = end + 1;
557                         continue;
558                 }
559
560                 Xstrndup_a(esc_str, start, len, return -1);
561                 if (!strcmp(esc_str, "&lt;"))
562                         ch = '<';
563                 else if (!strcmp(esc_str, "&gt;"))
564                         ch = '>';
565                 else if (!strcmp(esc_str, "&amp;"))
566                         ch = '&';
567                 else if (!strcmp(esc_str, "&apos;"))
568                         ch = '\'';
569                 else if (!strcmp(esc_str, "&quot;"))
570                         ch = '\"';
571                 else {
572                         p = end + 1;
573                         continue;
574                 }
575
576                 *start = ch;
577                 memmove(start + 1, end + 1, strlen(end + 1) + 1);
578                 p = start + 1;
579         }
580
581         return 0;
582 }
583
584 gint xml_file_put_escape_str(FILE *fp, const gchar *str)
585 {
586         const gchar *p;
587         int result = 0;
588         cm_return_val_if_fail(fp != NULL, -1);
589
590         if (!str) return 0;
591
592         for (p = str; *p != '\0'; p++) {
593                 switch (*p) {
594                 case '<':
595                         result = claws_fputs("&lt;", fp);
596                         break;
597                 case '>':
598                         result = claws_fputs("&gt;", fp);
599                         break;
600                 case '&':
601                         result = claws_fputs("&amp;", fp);
602                         break;
603                 case '\'':
604                         result = claws_fputs("&apos;", fp);
605                         break;
606                 case '\"':
607                         result = claws_fputs("&quot;", fp);
608                         break;
609                 default:
610                         result = claws_fputc(*p, fp);
611                 }
612         }
613
614         return (result == EOF ? -1 : 0);
615 }
616
617 gint xml_file_put_xml_decl(FILE *fp)
618 {
619         cm_return_val_if_fail(fp != NULL, -1);
620         XML_STRING_TABLE_CREATE();
621
622         return fprintf(fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", CS_INTERNAL);
623 }
624
625 static void xml_free_node(XMLNode *node)
626 {
627         if (!node) return;
628
629         xml_free_tag(node->tag);
630         g_free(node->element);
631         g_free(node);
632 }
633
634 static gboolean xml_free_func(GNode *node, gpointer data)
635 {
636         XMLNode *xmlnode = node->data;
637
638         xml_free_node(xmlnode);
639         return FALSE;
640 }
641
642 void xml_free_tree(GNode *node)
643 {
644         cm_return_if_fail(node != NULL);
645
646         g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, xml_free_func,
647                         NULL);
648
649         g_node_destroy(node);
650 }
651
652 static void xml_free_tag(XMLTag *tag)
653 {
654         if (!tag) return;
655
656         XML_STRING_FREE(tag->tag);
657         while (tag->attr != NULL) {
658                 XMLAttr *attr = (XMLAttr *)tag->attr->data;
659                 tag->attr = g_list_remove(tag->attr, tag->attr->data);
660                 XML_STRING_FREE(attr->name);
661                 g_free(attr->value); /* __not__ XML_STRING_FREE */
662                 g_free(attr);
663         }
664         g_free(tag);
665 }
666
667 static gint xml_get_parenthesis(XMLFile *file, gchar *buf, gint len)
668 {
669         gchar *start;
670         gchar *end;
671
672         buf[0] = '\0';
673
674         while ((start = strchr(file->bufp, '<')) == NULL)
675                 if (xml_read_line(file) < 0) return -1;
676
677         start++;
678         file->bufp = start;
679
680         while ((end = strchr(file->bufp, '>')) == NULL)
681                 if (xml_read_line(file) < 0) return -1;
682
683         strncpy2(buf, file->bufp, MIN(end - file->bufp + 1, len));
684         g_strstrip(buf);
685         file->bufp = end + 1;
686         xml_truncate_buf(file);
687
688         return 0;
689 }
690
691 #define TRY(func) \
692 if (!(func)) \
693 { \
694         g_warning("failed to write part of XML tree"); \
695         return -1; \
696 } \
697
698 static int xml_write_tree_recursive(GNode *node, FILE *fp)
699 {
700         gint i, depth;
701         XMLTag *tag;
702         GList *cur;
703
704         cm_return_val_if_fail(node != NULL, -1);
705         cm_return_val_if_fail(fp != NULL, -1);
706
707         depth = g_node_depth(node) - 1;
708         for (i = 0; i < depth; i++)
709                 TRY(claws_fputs("    ", fp) != EOF);
710
711         tag = ((XMLNode *) node->data)->tag;
712
713         TRY(fprintf(fp, "<%s", tag->tag) > 0);
714
715         for (cur = tag->attr; cur != NULL; cur = g_list_next(cur)) {
716                 XMLAttr *attr = (XMLAttr *) cur->data;
717
718                 TRY(fprintf(fp, " %s=\"", attr->name) > 0);
719                 TRY(xml_file_put_escape_str(fp, attr->value) == 0);
720                 TRY(claws_fputs("\"", fp) != EOF);
721                 
722         }
723
724         if (node->children) {
725                 GNode *child;
726                 TRY(claws_fputs(">\n", fp) != EOF);
727
728                 child = node->children;
729                 while (child) {
730                         GNode *cur;
731
732                         cur = child;
733                         child = cur->next;
734                         TRY(xml_write_tree_recursive(cur, fp) == 0);
735                 }
736
737                 for (i = 0; i < depth; i++)
738                         TRY(claws_fputs("    ", fp) != EOF);
739                 TRY(fprintf(fp, "</%s>\n", tag->tag) > 0);
740         } else
741                 TRY(claws_fputs(" />\n", fp) != EOF);
742         
743         return 0;
744 }
745
746 #undef TRY
747
748 int xml_write_tree(GNode *node, FILE *fp)
749 {
750         return xml_write_tree_recursive(node, fp);
751 }
752
753 static gpointer copy_node_func(gpointer nodedata, gpointer data)
754 {
755         XMLNode *xmlnode = (XMLNode *) nodedata;
756         XMLNode *newxmlnode;
757         
758         newxmlnode = g_new0(XMLNode, 1);
759         newxmlnode->tag = xml_copy_tag(xmlnode->tag);
760         newxmlnode->element = g_strdup(xmlnode->element);
761
762         return newxmlnode;
763 }
764
765 GNode *xml_copy_tree(GNode *node)
766 {
767         return g_node_map(node, copy_node_func, NULL);
768 }