gchar b[ENTITY_MAX_LEN];
gchar *p = str, *res;
gboolean hex = FALSE;
- gunichar c;
+ gunichar c = -1;
+ gint ret;
++p;
if (*p == '\0')
if (entity_extract_to_buffer (p, b) == NULL)
return NULL;
- c = g_ascii_strtoll (b, NULL, (hex? 16: 10));
+ if (strlen(b) > 0)
+ c = g_ascii_strtoll (b, NULL, (hex ? 16 : 10));
+
+ if (c < 0) {
+ /* Obviously invalid */
+ debug_print("Numeric reference '&#%s;' is invalid\n", b);
+ return NULL;
+ } else if (c >= 0 && c <= 31) {
+ /* An unprintable character; return the Unicode replacement symbol */
+ return g_strdup("\xef\xbf\xbd");
+ } else if (c > 0x10ffff) {
+ /* Make sure the character falls within the Unicode codespace
+ * (0x0 - 0x10ffff) */
+ debug_print("Numeric reference '&#%s;' is invalid, outside of Unicode codespace\n", b);
+ return NULL;
+ }
+
res = g_malloc0 (DECODED_MAX_LEN + 1);
- g_unichar_to_utf8 (c, res);
+ ret = g_unichar_to_utf8 (c, res);
+ if (ret == 0) {
+ debug_print("Failed to convert unicode character %u to UTF-8\n", c);
+ g_free(res);
+ res = NULL;
+ }
return res;
}
#include <glib.h>
+#include <stdio.h>
+
#include "mock_debug_print.h"
#include "entity.h"
static void
-test_entity(void)
+test_entity_invalid(void)
{
gchar *result;
+ /* Invalid entity strings */
result = entity_decode(NULL);
g_assert_null(result);
-
+ result = entity_decode("");
+ g_assert_null(result);
result = entity_decode("foo");
g_assert_null(result);
+ result = entity_decode("&");
+ g_assert_null(result);
+ result = entity_decode("&;");
+ g_assert_null(result);
+ result = entity_decode("&#");
+ g_assert_null(result);
+ result = entity_decode("&#;");
+ g_assert_null(result);
+
+ /* Valid entity string, but with missing semicolon */
+ result = entity_decode("Á");
+ g_assert_null(result);
+ result = entity_decode("{");
+ g_assert_null(result);
+}
+
+static void
+test_entity_toolong(void)
+{
+ gchar *result;
+
+ /* Largest unicode code point is 0x10ffff, let's test that,
+ * and one past it */
+ result = entity_decode("");
+ g_assert_nonnull(result);
+ g_free(result);
+ result = entity_decode("�");
+ g_assert_null(result);
+
+ /* ENTITY_MAX_LEN is 8, test 8- and 9-char entity strings
+ * for possible buffer overflows */
+ result = entity_decode("�");
+ g_assert_null(result);
+ result = entity_decode("�");
+ g_assert_null(result);
+}
+
+static void
+test_entity_unprintable(void)
+{
+ gchar *result, numstr[6]; /* "&#XX;" */
+ gint i;
+
+ for (i = 0; i < 32; i++) {
+ sprintf(numstr, "&#%d;", i);
+ result = entity_decode(numstr);
+ g_assert_nonnull(result);
+ g_assert_cmpstr(result, ==, "\xef\xbf\xbd");
+ g_free(result);
+ }
+}
+
+static void
+test_entity_valid(void)
+{
+ gchar *result;
result = entity_decode("Á");
g_assert_nonnull(result);
g_printerr("result '%s'\n", result);
g_assert_cmpstr(result, ==, "{");
g_free(result);
+
}
int
{
g_test_init(&argc, &argv, NULL);
- g_test_add_func("/core/entity", test_entity);
+ g_test_add_func("/core/entity/invalid", test_entity_invalid);
+ g_test_add_func("/core/entity/toolong", test_entity_toolong);
+ g_test_add_func("/core/entity/unprintable", test_entity_unprintable);
+ g_test_add_func("/core/entity/valid", test_entity_valid);
return g_test_run();
}