addritem_person_get_picture() now returns something useful: a copy
[claws.git] / src / entity.c
1 /*
2  * Claws Mail -- a GTK+ based, lightweight, and fast e-mail client
3  * Copyright (C) 2017 Ricardo Mones and the Claws Mail team
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #include "claws-features.h"
21 #endif
22
23 #include "defs.h"
24 #include "utils.h"
25 #include "entity.h"
26
27 #define ENTITY_MAX_LEN 8
28 #define DECODED_MAX_LEN 6
29
30 static GHashTable *symbol_table = NULL;
31
32 typedef struct _EntitySymbol EntitySymbol;
33
34 struct _EntitySymbol
35 {
36         gchar *const key;
37         gchar *const value;
38 };
39
40 /* in alphabetical order with upper-case version first */
41 static EntitySymbol symbolic_entities[] = {
42         /* A */
43         {"Aacute", "Á"},
44         {"aacute", "á"},
45         {"Acirc", "Â"},
46         {"acirc", "â"},
47         {"acute", "´"},
48         {"AElig", "Æ"},
49         {"aelig", "æ"},
50         {"Agrave", "À"},
51         {"agrave", "à"},
52         {"alefsym", "ℵ"},
53         {"Alpha", "Α"},
54         {"alpha", "α"},
55         {"amp", "&"},
56         {"and", "∧"},
57         {"ang", "∠"},
58         {"apos", "'"},
59         {"Aring", "Å"},
60         {"aring", "å"},
61         {"asymp", "≈"},
62         {"Atilde", "Ã"},
63         {"atilde", "ã"},
64         {"Auml", "Ä"},
65         {"auml", "ä"},
66         /* B */
67         {"bdquo", "„"},
68         {"Beta", "Β"},
69         {"beta", "β"},
70         {"brvbar", "¦"},
71         {"bull", "•"},
72         /* C */
73         {"cap", "∩"},
74         {"Ccedil", "Ç"},
75         {"ccedil", "ç"},
76         {"cedil", "¸"},
77         {"cent", "¢"},
78         {"Chi", "Χ"},
79         {"chi", "χ"},
80         {"circ", "ˆ"},
81         {"clubs", "♣"},
82         {"cong", "≅"},
83         {"copy", "©"},
84         {"crarr", "↵"},
85         {"cup", "∪"},
86         {"curren", "¤"},
87         /* D */
88         {"dagger", "†"},
89         {"Dagger", "‡"},
90         {"dArr", "⇓"},
91         {"darr", "↓"},
92         {"deg", "°"},
93         {"Delta", "Δ"},
94         {"delta", "δ"},
95         {"diams", "♦"},
96         {"divide", "÷"},
97         /* E */
98         {"Eacute", "É"},
99         {"eacute", "é"},
100         {"Ecirc", "Ê"},
101         {"ecirc", "ê"},
102         {"Egrave", "È"},
103         {"egrave", "è"},
104         {"empty", "∅"},
105         {"emsp", "\xE2\x80\x83"},
106         {"ensp", "\xE2\x80\x82"},
107         {"Epsilon", "Ε"},
108         {"epsilon", "ε"},
109         {"equiv", "≡"},
110         {"Eta", "Η"},
111         {"eta", "η"},
112         {"ETH", "Ð"},
113         {"eth", "ð"},
114         {"Euml", "Ë"},
115         {"euml", "ë"},
116         {"euro", "€"},
117         {"exist", "∃"},
118         /* F */
119         {"fnof", "ƒ"},
120         {"forall", "∀"},
121         {"frac12", "½"},
122         {"frac14", "¼"},
123         {"frac34", "¾"},
124         {"frasl", "⁄"},
125         /* G */
126         {"Gamma", "Γ"},
127         {"gamma", "γ"},
128         {"ge", "≥"},
129         {"gt", ">"},
130         /* H */
131         {"hArr", "⇔"},
132         {"harr", "↔"},
133         {"hearts", "♥"},
134         {"hellip", "…"},
135         /* I */
136         {"Iacute", "Í"},
137         {"iacute", "í"},
138         {"IArr", "⇐"},
139         {"Icirc", "Î"},
140         {"icirc", "î"},
141         {"iexcl", "¡"},
142         {"Igrave", "Ì"},
143         {"igrave", "ì"},
144         {"image", "ℑ"},
145         {"infin", "∞"},
146         {"int", "∫"},
147         {"Iota", "Ι"},
148         {"iota", "ι"},
149         {"iquest", "¿"},
150         {"isin", "∈"},
151         {"Iuml", "Ï"},
152         {"iuml", "ï"},
153         /* K */
154         {"Kappa", "Κ"},
155         {"kappa", "κ"},
156         /* L */
157         {"Lambda", "Λ"},
158         {"lambda", "λ"},
159         {"lang", "〈"},
160         {"laquo", "«"},
161         {"larr", "←"},
162         {"lceil", "⌈"},
163         {"ldquo", "“"},
164         {"le", "≤"},
165         {"lfloor", "⌊"},
166         {"lowast", "∗"},
167         {"loz", "◊"},
168         {"lrm", "\xE2\x80\x8E"},
169         {"lsaquo", "‹"},
170         {"lsquo", "‘"},
171         {"lt", "<"},
172         /* M */
173         {"macr", "¯"},
174         {"mdash", "—"},
175         {"micro", "µ"},
176         {"middot", "·"},
177         {"minus", "−"},
178         {"Mu", "Μ"},
179         {"mu", "μ"},
180         /* N */
181         {"nabla", "∇"},
182         {"nbsp", "\xC2\xA0"},
183         {"ndash", "–"},
184         {"ne", "≠"},
185         {"ni", "∋"},
186         {"not", "¬"},
187         {"notin", "∉"},
188         {"nsub", "⊄"},
189         {"Ntilde", "Ñ"},
190         {"ntilde", "ñ"},
191         {"Nu", "Ν"},
192         {"nu", "ν"},
193         /* O */
194         {"Oacute", "Ó"},
195         {"oacute", "ó"},
196         {"Ocirc", "Ô"},
197         {"ocirc", "ô"},
198         {"OElig", "Œ"},
199         {"oelig", "œ"},
200         {"Ograve", "Ò"},
201         {"ograve", "ò"},
202         {"oline", "‾"},
203         {"Omega", "Ω"},
204         {"omega", "ω"},
205         {"Omicron", "Ο"},
206         {"omicron", "ο"},
207         {"oplus", "⊕"},
208         {"or", "∨"},
209         {"ordf", "ª"},
210         {"ordm", "º"},
211         {"Oslash", "Ø"},
212         {"oslash", "ø"},
213         {"Otilde", "Õ"},
214         {"otilde", "õ"},
215         {"otimes", "⊗"},
216         {"Ouml", "Ö"},
217         {"ouml", "ö"},
218         /* P */
219         {"para", "¶"},
220         {"part", "∂"},
221         {"permil", "‰"},
222         {"perp", "⊥"},
223         {"Phi", "Φ"},
224         {"phi", "φ"},
225         {"Pi", "Π"},
226         {"pi", "π"},
227         {"piv", "ϖ"},
228         {"plusmn", "±"},
229         {"pound", "£"},
230         {"Prime", "″"},
231         {"prime", "′"},
232         {"prod", "∏"},
233         {"prop", "∝"},
234         {"Psi", "Ψ"},
235         {"psi", "ψ"},
236         /* Q */
237         {"quot", "\""},
238         /* R */
239         {"radic", "√"},
240         {"rang", "〉"},
241         {"raquo", "»"},
242         {"rArr", "⇒"},
243         {"rarr", "→"},
244         {"rceil", "⌉"},
245         {"rdquo", "”"},
246         {"real", "ℜ"},
247         {"reg", "®"},
248         {"rfloor", "⌋"},
249         {"Rho", "Ρ"},
250         {"rho", "ρ"},
251         {"rlm", "\xE2\x80\x8F"},
252         {"rsaquo", "›"},
253         {"rsquo", "’"},
254         /* S */
255         {"sbquo", "‚"},
256         {"Scaron", "Š"},
257         {"scaron", "š"},
258         {"sdot", "⋅"},
259         {"sect", "§"},
260         {"shy", "\xC2\xAD"},
261         {"Sigma", "Σ"},
262         {"sigma", "σ"},
263         {"sigmaf", "ς"},
264         {"sim", "∼"},
265         {"spades", "♠"},
266         {"sub", "⊂"},
267         {"sube", "⊆"},
268         {"sum", "∑"},
269         {"sup", "⊃"},
270         {"sup1", "¹"},
271         {"sup2", "²"},
272         {"sup3", "³"},
273         {"supe", "⊇"},
274         {"szlig", "ß"},
275         /* T */
276         {"Tau", "Τ"},
277         {"tau", "τ"},
278         {"there4", "∴"},
279         {"Theta", "Θ"},
280         {"theta", "θ"},
281         {"thetasym", "ϑ"},
282         {"thinsp", "\xE2\x80\x89"},
283         {"THORN", "Þ"},
284         {"thorn", "þ"},
285         {"tilde", "˜"},
286         {"times", "×"},
287         {"trade", "™"},
288         /* U */
289         {"Uacute", "Ú"},
290         {"uacute", "ú"},
291         {"uArr", "⇑"},
292         {"uarr", "↑"},
293         {"Ucirc", "Û"},
294         {"ucirc", "û"},
295         {"Ugrave", "Ù"},
296         {"ugrave", "ù"},
297         {"uml", "¨"},
298         {"upsih", "ϒ"},
299         {"Upsilon", "Υ"},
300         {"upsilon", "υ"},
301         {"Uuml", "Ü"},
302         {"uuml", "ü"},
303         /* W */
304         {"weierp", "℘"},
305         /* X */
306         {"Xi", "Ξ"},
307         {"xi", "ξ"},
308         /* Y */
309         {"Yacute", "Ý"},
310         {"yacute", "ý"},
311         {"yen", "¥"},
312         {"Yuml", "Ÿ"},
313         {"yuml", "ÿ"},
314         /* Z */
315         {"Zeta", "Ζ"},
316         {"zeta", "ζ"},
317         {"zwj", "\xE2\x80\x8D"},
318         {"zwnj", "\xE2\x80\x8C"},
319         {NULL, NULL}
320 };
321
322 static gchar* entity_extract_to_buffer(gchar *p, gchar b[])
323 {
324         gint i = 0;
325
326         while (*p != '\0' && *p != ';' && i < ENTITY_MAX_LEN) {
327                 b[i] = *p;
328                 ++i, ++p;
329         }
330         if (*p != ';' || i == ENTITY_MAX_LEN)
331                 return NULL;
332         b[i] = '\0';
333
334         return b;
335 }
336
337 static gchar *entity_decode_numeric(gchar *str)
338 {
339         gchar b[ENTITY_MAX_LEN];
340         gchar *p = str, *res;
341         gboolean hex = FALSE;
342         gunichar c;
343
344         ++p;
345         if (*p == '\0')
346                 return NULL;
347
348         if (*p == 'x') {
349                 hex = TRUE;
350                 ++p;
351                 if (*p == '\0')
352                         return NULL;
353         }
354
355         if (entity_extract_to_buffer (p, b) == NULL)
356                 return NULL;
357
358         c = g_ascii_strtoll (b, NULL, (hex? 16: 10));
359         res = g_malloc0 (DECODED_MAX_LEN + 1);
360         g_unichar_to_utf8 (c, res);
361
362         return res;
363 }
364
365 static gchar *entity_decode_symbol(gchar *str)
366 {
367         gchar b[ENTITY_MAX_LEN];
368         gchar *decoded;
369
370         if (entity_extract_to_buffer (str, b) == NULL)
371                 return NULL;
372
373         if (symbol_table == NULL) {
374                 gint i;
375
376                 symbol_table = g_hash_table_new (g_str_hash, g_str_equal);
377                 for (i = 0; symbolic_entities[i].key != NULL; ++i) {
378                         g_hash_table_insert (symbol_table,
379                                 symbolic_entities[i].key, symbolic_entities[i].value);
380                 }
381                 debug_print("initialized entities table with %d symbols\n", i);
382         }
383
384         decoded = g_hash_table_lookup (symbol_table, b);
385         if (decoded != NULL)
386                 return g_strdup (decoded);
387
388         return NULL;
389 }
390
391 gchar *entity_decode(gchar *str)
392 {
393         gchar *p = str;
394         if (p == NULL || *p != '&')
395                 return NULL;
396         ++p;
397         if (*p == '\0')
398                 return NULL;
399         if (*p == '#')
400                 return entity_decode_numeric(p);
401         else
402                 return entity_decode_symbol(p);
403 }