1 /* Copyright (C) 1991-1993, 1996-2000, 2001 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 3 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library. If not, see <http://www.gnu.org/licenses/>.
19 /* Match STRING against the filename pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int FCT (const CHAR *pattern, const CHAR *string,
22 const CHAR *string_end, int no_leading_period, int flags)
24 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
25 const CHAR *string_end, int no_leading_period, int flags)
27 static const CHAR *END (const CHAR *patternp) internal_function;
30 #define __builtin_expect(op,val) ((op) == (val))
35 FCT (pattern, string, string_end, no_leading_period, flags)
38 const CHAR *string_end;
39 int no_leading_period;
42 register const CHAR *p = pattern, *n = string;
45 # if WIDE_CHAR_VERSION
46 const char *collseq = (const char *)
47 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
49 const UCHAR *collseq = (const UCHAR *)
50 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
54 while ((c = *p++) != L('\0'))
56 int new_no_leading_period = 0;
62 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
66 res = EXT (c, p, n, string_end, no_leading_period,
74 else if (*n == L('/') && (flags & FNM_FILE_NAME))
76 else if (*n == L('.') && no_leading_period)
81 if (!(flags & FNM_NOESCAPE))
85 /* Trailing \ loses. */
89 if (n == string_end || FOLD ((UCHAR) *n) != c)
94 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
98 res = EXT (c, p, n, string_end, no_leading_period,
104 if (n != string_end && *n == L('.') && no_leading_period)
107 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
109 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
111 const CHAR *endp = END (p);
114 /* This is a pattern. Skip over it. */
122 /* A ? needs to match one character. */
124 /* There isn't another character; no match. */
126 else if (*n == L('/')
127 && __builtin_expect (flags & FNM_FILE_NAME, 0))
128 /* A slash does not match a wildcard under
132 /* One character of the string is consumed in matching
133 this ? wildcard, so *??? won't match if there are
134 less than three characters. */
140 /* The wildcard(s) is/are the last element of the pattern.
141 If the name is a file name and contains another slash
142 this means it cannot match, unless the FNM_LEADING_DIR
145 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
147 if (flags & FNM_FILE_NAME)
149 if (flags & FNM_LEADING_DIR)
153 if (MEMCHR (n, L('/'), string_end - n) == NULL)
164 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
170 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
171 && (c == L('@') || c == L('+') || c == L('!'))
174 int flags2 = ((flags & FNM_FILE_NAME)
175 ? flags : (flags & ~FNM_PERIOD));
176 int no_leading_period2 = no_leading_period;
178 for (--p; n < endp; ++n, no_leading_period2 = 0)
179 if (FCT (p, n, string_end, no_leading_period2, flags2)
183 else if (c == L('/') && (flags & FNM_FILE_NAME))
185 while (n < string_end && *n != L('/'))
187 if (n < string_end && *n == L('/')
188 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
194 int flags2 = ((flags & FNM_FILE_NAME)
195 ? flags : (flags & ~FNM_PERIOD));
196 int no_leading_period2 = no_leading_period;
198 if (c == L('\\') && !(flags & FNM_NOESCAPE))
201 for (--p; n < endp; ++n, no_leading_period2 = 0)
202 if (FOLD ((UCHAR) *n) == c
203 && (FCT (p, n, string_end, no_leading_period2, flags2)
209 /* If we come here no match is possible with the wildcard. */
214 /* Nonzero if the sense of the character class is inverted. */
219 if (posixly_correct == 0)
220 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
225 if (*n == L('.') && no_leading_period)
228 if (*n == L('/') && (flags & FNM_FILE_NAME))
229 /* `/' cannot be matched. */
232 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
236 fn = FOLD ((UCHAR) *n);
241 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
245 c = FOLD ((UCHAR) *p);
251 else if (c == L('[') && *p == L(':'))
253 /* Leave room for the null. */
254 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
256 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
259 const CHAR *startp = p;
263 if (c1 == CHAR_CLASS_MAX_LENGTH)
264 /* The name is too long and therefore the pattern
269 if (c == L(':') && p[1] == L(']'))
274 if (c < L('a') || c >= L('z'))
276 /* This cannot possibly be a character class name.
277 Match it as a normal range. */
286 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
287 wt = IS_CHAR_CLASS (str);
289 /* Invalid character class name. */
292 # if defined _LIBC && ! WIDE_CHAR_VERSION
293 /* The following code is glibc specific but does
294 there a good job in speeding up the code since
295 we can avoid the btowc() call. */
296 if (_ISCTYPE ((UCHAR) *n, wt))
299 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
303 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
304 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
305 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
306 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
307 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
308 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
309 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
310 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
311 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
312 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
313 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
314 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
320 else if (c == L('[') && *p == L('='))
324 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
325 const CHAR *startp = p;
337 if (c != L('=') || p[1] != L(']'))
347 if ((UCHAR) *n == str[0])
352 const int32_t *table;
353 # if WIDE_CHAR_VERSION
354 const int32_t *weights;
355 const int32_t *extra;
357 const unsigned char *weights;
358 const unsigned char *extra;
360 const int32_t *indirect;
362 const UCHAR *cp = (const UCHAR *) str;
364 /* This #include defines a local function! */
365 # if WIDE_CHAR_VERSION
366 # include <locale/weightwc.h>
368 # include <locale/weight.h>
371 # if WIDE_CHAR_VERSION
372 table = (const int32_t *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
374 weights = (const int32_t *)
375 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
376 extra = (const int32_t *)
377 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
378 indirect = (const int32_t *)
379 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
381 table = (const int32_t *)
382 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
383 weights = (const unsigned char *)
384 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
385 extra = (const unsigned char *)
386 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
387 indirect = (const int32_t *)
388 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
394 /* We found a table entry. Now see whether the
395 character we are currently at has the same
396 equivalance class value. */
397 int len = weights[idx];
399 const UCHAR *np = (const UCHAR *) n;
401 idx2 = findidx (&np);
402 if (idx2 != 0 && len == weights[idx2])
407 && (weights[idx + 1 + cnt]
408 == weights[idx2 + 1 + cnt]))
420 else if (c == L('\0'))
421 /* [ (unterminated) loses. */
430 if (c == L('[') && *p == L('.'))
433 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
434 const CHAR *startp = p;
440 if (c == L('.') && p[1] == L(']'))
450 /* We have to handling the symbols differently in
451 ranges since then the collation sequence is
453 is_range = *p == L('-') && p[1] != L('\0');
457 /* There are no names defined in the collation
458 data. Therefore we only accept the trivial
459 names consisting of the character itself. */
463 if (!is_range && *n == startp[1])
472 const int32_t *symb_table;
473 # ifdef WIDE_CHAR_VERSION
477 # define str (startp + 1)
479 const unsigned char *extra;
485 # ifdef WIDE_CHAR_VERSION
486 /* We have to convert the name to a single-byte
487 string. This is possible since the names
488 consist of ASCII characters and the internal
489 representation is UCS4. */
490 for (strcnt = 0; strcnt < c1; ++strcnt)
491 str[strcnt] = startp[1 + strcnt];
495 _NL_CURRENT_WORD (LC_COLLATE,
496 _NL_COLLATE_SYMB_HASH_SIZEMB);
497 symb_table = (const int32_t *)
498 _NL_CURRENT (LC_COLLATE,
499 _NL_COLLATE_SYMB_TABLEMB);
500 extra = (const unsigned char *)
501 _NL_CURRENT (LC_COLLATE,
502 _NL_COLLATE_SYMB_EXTRAMB);
504 /* Locate the character in the hashing table. */
505 hash = elem_hash (str, c1);
508 elem = hash % table_size;
509 second = hash % (table_size - 2);
510 while (symb_table[2 * elem] != 0)
512 /* First compare the hashing value. */
513 if (symb_table[2 * elem] == hash
514 && c1 == extra[symb_table[2 * elem + 1]]
516 &extra[symb_table[2 * elem + 1]
519 /* Yep, this is the entry. */
520 idx = symb_table[2 * elem + 1];
521 idx += 1 + extra[idx];
529 if (symb_table[2 * elem] != 0)
531 /* Compare the byte sequence but only if
532 this is not part of a range. */
533 # ifdef WIDE_CHAR_VERSION
536 idx += 1 + extra[idx];
537 /* Adjust for the alignment. */
538 idx = (idx + 3) & ~3;
540 wextra = (int32_t *) &extra[idx + 4];
545 # ifdef WIDE_CHAR_VERSION
546 for (c1 = 0; c1 < wextra[idx]; ++c1)
547 if (n[c1] != wextra[1 + c1])
550 if (c1 == wextra[idx])
553 for (c1 = 0; c1 < extra[idx]; ++c1)
554 if (n[c1] != extra[1 + c1])
557 if (c1 == extra[idx])
562 /* Get the collation sequence value. */
564 # ifdef WIDE_CHAR_VERSION
565 cold = wextra[1 + wextra[idx]];
567 /* Adjust for the alignment. */
568 idx += 1 + extra[idx];
569 idx = (idx + 3) & ~4;
570 cold = *((int32_t *) &extra[idx]);
577 /* No valid character. Match it as a
579 if (!is_range && *n == str[0])
596 /* We have to handling the symbols differently in
597 ranges since then the collation sequence is
599 is_range = *p == L('-') && p[1] != L('\0');
601 if (!is_range && c == fn)
608 if (c == L('-') && *p != L(']'))
611 /* We have to find the collation sequence
612 value for C. Collation sequence is nothing
613 we can regularly access. The sequence
614 value is defined by the order in which the
615 definitions of the collation values for the
616 various characters appear in the source
617 file. A strange concept, nowhere
623 # ifdef WIDE_CHAR_VERSION
624 /* Search in the `names' array for the characters. */
625 fcollseq = collseq_table_lookup (collseq, fn);
626 if (fcollseq == ~((uint32_t) 0))
627 /* XXX We don't know anything about the character
628 we are supposed to match. This means we are
630 goto range_not_matched;
635 lcollseq = collseq_table_lookup (collseq, cold);
637 fcollseq = collseq[fn];
638 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
642 if (cend == L('[') && *p == L('.'))
645 _NL_CURRENT_WORD (LC_COLLATE,
647 const CHAR *startp = p;
653 if (c == L('.') && p[1] == L(']'))
665 /* There are no names defined in the
666 collation data. Therefore we only
667 accept the trivial names consisting
668 of the character itself. */
677 const int32_t *symb_table;
678 # ifdef WIDE_CHAR_VERSION
682 # define str (startp + 1)
684 const unsigned char *extra;
690 # ifdef WIDE_CHAR_VERSION
691 /* We have to convert the name to a single-byte
692 string. This is possible since the names
693 consist of ASCII characters and the internal
694 representation is UCS4. */
695 for (strcnt = 0; strcnt < c1; ++strcnt)
696 str[strcnt] = startp[1 + strcnt];
700 _NL_CURRENT_WORD (LC_COLLATE,
701 _NL_COLLATE_SYMB_HASH_SIZEMB);
702 symb_table = (const int32_t *)
703 _NL_CURRENT (LC_COLLATE,
704 _NL_COLLATE_SYMB_TABLEMB);
705 extra = (const unsigned char *)
706 _NL_CURRENT (LC_COLLATE,
707 _NL_COLLATE_SYMB_EXTRAMB);
709 /* Locate the character in the hashing
711 hash = elem_hash (str, c1);
714 elem = hash % table_size;
715 second = hash % (table_size - 2);
716 while (symb_table[2 * elem] != 0)
718 /* First compare the hashing value. */
719 if (symb_table[2 * elem] == hash
721 == extra[symb_table[2 * elem + 1]])
723 &extra[symb_table[2 * elem + 1]
726 /* Yep, this is the entry. */
727 idx = symb_table[2 * elem + 1];
728 idx += 1 + extra[idx];
736 if (symb_table[2 * elem] != 0)
738 /* Compare the byte sequence but only if
739 this is not part of a range. */
740 # ifdef WIDE_CHAR_VERSION
743 idx += 1 + extra[idx];
744 /* Adjust for the alignment. */
745 idx = (idx + 3) & ~4;
747 wextra = (int32_t *) &extra[idx + 4];
749 /* Get the collation sequence value. */
751 # ifdef WIDE_CHAR_VERSION
752 cend = wextra[1 + wextra[idx]];
754 /* Adjust for the alignment. */
755 idx += 1 + extra[idx];
756 idx = (idx + 3) & ~4;
757 cend = *((int32_t *) &extra[idx]);
760 else if (symb_table[2 * elem] != 0 && c1 == 1)
772 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
779 /* XXX It is not entirely clear to me how to handle
780 characters which are not mentioned in the
781 collation specification. */
783 # ifdef WIDE_CHAR_VERSION
784 lcollseq == 0xffffffff ||
786 lcollseq <= fcollseq)
788 /* We have to look at the upper bound. */
795 # ifdef WIDE_CHAR_VERSION
797 collseq_table_lookup (collseq, cend);
798 if (hcollseq == ~((uint32_t) 0))
800 /* Hum, no information about the upper
801 bound. The matching succeeds if the
802 lower bound is matched exactly. */
803 if (lcollseq != fcollseq)
804 goto range_not_matched;
809 hcollseq = collseq[cend];
813 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
816 # ifdef WIDE_CHAR_VERSION
820 /* We use a boring value comparison of the character
821 values. This is better than comparing using
822 `strcoll' since the latter would have surprising
823 and sometimes fatal consequences. */
826 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
832 if (cold <= fn && fn <= cend)
849 /* Skip the rest of the [...] that already matched. */
856 /* [... (unterminated) loses. */
859 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
863 /* XXX 1003.2d11 is unclear if this is right. */
866 else if (c == L('[') && *p == L(':'))
869 const CHAR *startp = p;
874 if (++c1 == CHAR_CLASS_MAX_LENGTH)
877 if (*p == L(':') && p[1] == L(']'))
880 if (c < L('a') || c >= L('z'))
889 else if (c == L('[') && *p == L('='))
895 if (c != L('=') || p[1] != L(']'))
900 else if (c == L('[') && *p == L('.'))
909 if (*p == L('.') && p[1] == L(']'))
925 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
929 res = EXT (c, p, n, string_end, no_leading_period, flags);
936 if (NO_LEADING_PERIOD (flags))
938 if (n == string_end || c != *n)
941 new_no_leading_period = 1;
947 if (n == string_end || c != FOLD ((UCHAR) *n))
951 no_leading_period = new_no_leading_period;
958 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
959 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
968 END (const CHAR *pattern)
970 const CHAR *p = pattern;
974 /* This is an invalid pattern. */
976 else if (*p == L('['))
978 /* Handle brackets special. */
979 if (posixly_correct == 0)
980 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
982 /* Skip the not sign. We have to recognize it because of a possibly
984 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
986 /* A leading ']' is recognized as such. */
989 /* Skip over all characters of the list. */
992 /* This is no valid pattern. */
995 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
996 || *p == L('!')) && p[1] == L('('))
998 else if (*p == L(')'))
1007 struct patternlist *next;
1011 #define xalloca malloc
1013 void free_xalloca(struct patternlist **top){
1014 struct patternlist *p, *next;
1028 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1029 int no_leading_period, int flags)
1034 struct patternlist {
1035 struct patternlist *next;
1039 struct patternlist *list = NULL;
1041 struct patternlist **lastp = &list;
1042 size_t pattern_len = STRLEN (pattern);
1046 static struct patternlist *xalloca_top = NULL;
1049 /* Parse the pattern. Store the individual parts in the list. */
1051 for (startp = p = pattern + 1; level >= 0; ++p)
1052 if (*p == L('\0')) {
1053 /* This is an invalid pattern. */
1054 free_xalloca(&xalloca_top);
1056 } else if (*p == L('[')) {
1057 /* Handle brackets special. */
1058 if (posixly_correct == 0)
1059 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1061 /* Skip the not sign. We have to recognize it because of a possibly
1063 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1065 /* A leading ']' is recognized as such. */
1068 /* Skip over all characters of the list. */
1069 while (*p != L(']'))
1070 if (*p++ == L('\0')) {
1071 /* This is no valid pattern. */
1072 free_xalloca(&xalloca_top);
1076 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1077 || *p == L('!')) && p[1] == L('('))
1078 /* Remember the nesting level. */
1080 else if (*p == L(')'))
1084 /* This means we found the end of the pattern. */
1085 #define NEW_PATTERN \
1086 struct patternlist *newp; \
1088 if (opt == L('?') || opt == L('@')) \
1089 newp = xalloca (sizeof (struct patternlist) \
1090 + (pattern_len * sizeof (CHAR))); \
1092 newp = xalloca (sizeof (struct patternlist) \
1093 + ((p - startp + 1) * sizeof (CHAR))); \
1094 if (!xalloca_top) xalloca_top = newp; \
1095 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1096 newp->next = NULL; \
1102 else if (*p == L('|'))
1110 assert (list != NULL);
1111 assert (p[-1] == L(')'));
1117 if (FCT (p, string, string_end, no_leading_period, flags) == 0) {
1118 free_xalloca(&xalloca_top);
1126 for (rs = string; rs <= string_end; ++rs)
1127 /* First match the prefix with the current pattern with the
1129 if (FCT (list->str, string, rs, no_leading_period,
1130 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1131 /* This was successful. Now match the rest with the rest
1133 && (FCT (p, rs, string_end,
1136 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1137 flags & FNM_FILE_NAME
1138 ? flags : flags & ~FNM_PERIOD) == 0
1139 /* This didn't work. Try the whole pattern. */
1141 && FCT (pattern - 1, rs, string_end,
1144 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1146 flags & FNM_FILE_NAME
1147 ? flags : flags & ~FNM_PERIOD) == 0)))
1148 /* It worked. Signal success. */
1149 free_xalloca(&xalloca_top);
1152 while ((list = list->next) != NULL);
1154 /* None of the patterns lead to a match. */
1155 free_xalloca(&xalloca_top);
1159 if (FCT (p, string, string_end, no_leading_period, flags) == 0) {
1160 free_xalloca(&xalloca_top);
1167 /* I cannot believe it but `strcat' is actually acceptable
1168 here. Match the entire string with the prefix from the
1169 pattern list and the rest of the pattern following the
1171 if (FCT (STRCAT (list->str, p), string, string_end,
1173 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) {
1174 /* It worked. Signal success. */
1175 free_xalloca(&xalloca_top);
1178 while ((list = list->next) != NULL);
1180 /* None of the patterns lead to a match. */
1181 free_xalloca(&xalloca_top);
1185 for (rs = string; rs <= string_end; ++rs)
1187 struct patternlist *runp;
1189 for (runp = list; runp != NULL; runp = runp->next)
1190 if (FCT (runp->str, string, rs, no_leading_period,
1191 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1194 /* If none of the patterns matched see whether the rest does. */
1196 && (FCT (p, rs, string_end,
1199 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1200 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1202 /* This is successful. */
1203 free_xalloca(&xalloca_top);
1208 /* None of the patterns together with the rest of the pattern
1210 free_xalloca(&xalloca_top);
1214 assert (! "Invalid extended matching operator");
1218 free_xalloca(&xalloca_top);