2007-07-11 [colin] 2.10.0cvs16
[claws.git] / src / common / fnmatch_loop.c
1 /* Copyright (C) 1991-1993, 1996-2000, 2001 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 3 of the License, or (at your option) any later version.
8
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library. If not, see <http://www.gnu.org/licenses/>.
16  */
17
18
19 /* Match STRING against the filename pattern PATTERN, returning zero if
20    it matches, nonzero if not.  */
21 static int FCT (const CHAR *pattern, const CHAR *string,
22                 const CHAR *string_end, int no_leading_period, int flags)
23                 internal_function;
24 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
25                 const CHAR *string_end, int no_leading_period, int flags)
26      internal_function;
27 static const CHAR *END (const CHAR *patternp) internal_function;
28
29 #ifdef WIN32
30 #define __builtin_expect(op,val) ((op) == (val))
31 #endif
32
33 static int
34 internal_function
35 FCT (pattern, string, string_end, no_leading_period, flags)
36      const CHAR *pattern;
37      const CHAR *string;
38      const CHAR *string_end;
39      int no_leading_period;
40      int flags;
41 {
42   register const CHAR *p = pattern, *n = string;
43   register UCHAR c;
44 #ifdef _LIBC
45 # if WIDE_CHAR_VERSION
46   const char *collseq = (const char *)
47     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
48 # else
49   const UCHAR *collseq = (const UCHAR *)
50     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
51 # endif
52 #endif
53
54   while ((c = *p++) != L('\0'))
55     {
56       int new_no_leading_period = 0;
57       c = FOLD (c);
58
59       switch (c)
60         {
61         case L('?'):
62           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
63             {
64               int res;
65
66               res = EXT (c, p, n, string_end, no_leading_period,
67                          flags);
68               if (res != -1)
69                 return res;
70             }
71
72           if (n == string_end)
73             return FNM_NOMATCH;
74           else if (*n == L('/') && (flags & FNM_FILE_NAME))
75             return FNM_NOMATCH;
76           else if (*n == L('.') && no_leading_period)
77             return FNM_NOMATCH;
78           break;
79
80         case L('\\'):
81           if (!(flags & FNM_NOESCAPE))
82             {
83               c = *p++;
84               if (c == L('\0'))
85                 /* Trailing \ loses.  */
86                 return FNM_NOMATCH;
87               c = FOLD (c);
88             }
89           if (n == string_end || FOLD ((UCHAR) *n) != c)
90             return FNM_NOMATCH;
91           break;
92
93         case L('*'):
94           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
95             {
96               int res;
97
98               res = EXT (c, p, n, string_end, no_leading_period,
99                          flags);
100               if (res != -1)
101                 return res;
102             }
103
104           if (n != string_end && *n == L('.') && no_leading_period)
105             return FNM_NOMATCH;
106
107           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
108             {
109               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
110                 {
111                   const CHAR *endp = END (p);
112                   if (endp != p)
113                     {
114                       /* This is a pattern.  Skip over it.  */
115                       p = endp;
116                       continue;
117                     }
118                 }
119
120               if (c == L('?'))
121                 {
122                   /* A ? needs to match one character.  */
123                   if (n == string_end)
124                     /* There isn't another character; no match.  */
125                     return FNM_NOMATCH;
126                   else if (*n == L('/')
127                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
128                     /* A slash does not match a wildcard under
129                        FNM_FILE_NAME.  */
130                     return FNM_NOMATCH;
131                   else
132                     /* One character of the string is consumed in matching
133                        this ? wildcard, so *??? won't match if there are
134                        less than three characters.  */
135                     ++n;
136                 }
137             }
138
139           if (c == L('\0'))
140             /* The wildcard(s) is/are the last element of the pattern.
141                If the name is a file name and contains another slash
142                this means it cannot match, unless the FNM_LEADING_DIR
143                flag is set.  */
144             {
145               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
146
147               if (flags & FNM_FILE_NAME)
148                 {
149                   if (flags & FNM_LEADING_DIR)
150                     result = 0;
151                   else
152                     {
153                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
154                         result = 0;
155                     }
156                 }
157
158               return result;
159             }
160           else
161             {
162               const CHAR *endp;
163
164               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
165                              string_end - n);
166               if (endp == NULL)
167                 endp = string_end;
168
169               if (c == L('[')
170                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
171                       && (c == L('@') || c == L('+') || c == L('!'))
172                       && *p == L('(')))
173                 {
174                   int flags2 = ((flags & FNM_FILE_NAME)
175                                 ? flags : (flags & ~FNM_PERIOD));
176                   int no_leading_period2 = no_leading_period;
177
178                   for (--p; n < endp; ++n, no_leading_period2 = 0)
179                     if (FCT (p, n, string_end, no_leading_period2, flags2)
180                         == 0)
181                       return 0;
182                 }
183               else if (c == L('/') && (flags & FNM_FILE_NAME))
184                 {
185                   while (n < string_end && *n != L('/'))
186                     ++n;
187                   if (n < string_end && *n == L('/')
188                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
189                           == 0))
190                     return 0;
191                 }
192               else
193                 {
194                   int flags2 = ((flags & FNM_FILE_NAME)
195                                 ? flags : (flags & ~FNM_PERIOD));
196                   int no_leading_period2 = no_leading_period;
197
198                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
199                     c = *p;
200                   c = FOLD (c);
201                   for (--p; n < endp; ++n, no_leading_period2 = 0)
202                     if (FOLD ((UCHAR) *n) == c
203                         && (FCT (p, n, string_end, no_leading_period2, flags2)
204                             == 0))
205                       return 0;
206                 }
207             }
208
209           /* If we come here no match is possible with the wildcard.  */
210           return FNM_NOMATCH;
211
212         case L('['):
213           {
214             /* Nonzero if the sense of the character class is inverted.  */
215             register int not;
216             CHAR cold;
217             UCHAR fn;
218
219             if (posixly_correct == 0)
220               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
221
222             if (n == string_end)
223               return FNM_NOMATCH;
224
225             if (*n == L('.') && no_leading_period)
226               return FNM_NOMATCH;
227
228             if (*n == L('/') && (flags & FNM_FILE_NAME))
229               /* `/' cannot be matched.  */
230               return FNM_NOMATCH;
231
232             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
233             if (not)
234               ++p;
235
236             fn = FOLD ((UCHAR) *n);
237
238             c = *p++;
239             for (;;)
240               {
241                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
242                   {
243                     if (*p == L('\0'))
244                       return FNM_NOMATCH;
245                     c = FOLD ((UCHAR) *p);
246                     ++p;
247
248                     if (c == fn)
249                       goto matched;
250                   }
251                 else if (c == L('[') && *p == L(':'))
252                   {
253                     /* Leave room for the null.  */
254                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
255                     size_t c1 = 0;
256 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
257                     wctype_t wt;
258 #endif
259                     const CHAR *startp = p;
260
261                     for (;;)
262                       {
263                         if (c1 == CHAR_CLASS_MAX_LENGTH)
264                           /* The name is too long and therefore the pattern
265                              is ill-formed.  */
266                           return FNM_NOMATCH;
267
268                         c = *++p;
269                         if (c == L(':') && p[1] == L(']'))
270                           {
271                             p += 2;
272                             break;
273                           }
274                         if (c < L('a') || c >= L('z'))
275                           {
276                             /* This cannot possibly be a character class name.
277                                Match it as a normal range.  */
278                             p = startp;
279                             c = L('[');
280                             goto normal_bracket;
281                           }
282                         str[c1++] = c;
283                       }
284                     str[c1] = L('\0');
285
286 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
287                     wt = IS_CHAR_CLASS (str);
288                     if (wt == 0)
289                       /* Invalid character class name.  */
290                       return FNM_NOMATCH;
291
292 # if defined _LIBC && ! WIDE_CHAR_VERSION
293                     /* The following code is glibc specific but does
294                        there a good job in speeding up the code since
295                        we can avoid the btowc() call.  */
296                     if (_ISCTYPE ((UCHAR) *n, wt))
297                       goto matched;
298 # else
299                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
300                       goto matched;
301 # endif
302 #else
303                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
304                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
305                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
306                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
307                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
308                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
309                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
310                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
311                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
312                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
313                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
314                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
315                       goto matched;
316 #endif
317                     c = *p++;
318                   }
319 #ifdef _LIBC
320                 else if (c == L('[') && *p == L('='))
321                   {
322                     UCHAR str[1];
323                     uint32_t nrules =
324                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
325                     const CHAR *startp = p;
326
327                     c = *++p;
328                     if (c == L('\0'))
329                       {
330                         p = startp;
331                         c = L('[');
332                         goto normal_bracket;
333                       }
334                     str[0] = c;
335
336                     c = *++p;
337                     if (c != L('=') || p[1] != L(']'))
338                       {
339                         p = startp;
340                         c = L('[');
341                         goto normal_bracket;
342                       }
343                     p += 2;
344
345                     if (nrules == 0)
346                       {
347                         if ((UCHAR) *n == str[0])
348                           goto matched;
349                       }
350                     else
351                       {
352                         const int32_t *table;
353 # if WIDE_CHAR_VERSION
354                         const int32_t *weights;
355                         const int32_t *extra;
356 # else
357                         const unsigned char *weights;
358                         const unsigned char *extra;
359 # endif
360                         const int32_t *indirect;
361                         int32_t idx;
362                         const UCHAR *cp = (const UCHAR *) str;
363
364                         /* This #include defines a local function!  */
365 # if WIDE_CHAR_VERSION
366 #  include <locale/weightwc.h>
367 # else
368 #  include <locale/weight.h>
369 # endif
370
371 # if WIDE_CHAR_VERSION
372                         table = (const int32_t *)
373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
374                         weights = (const int32_t *)
375                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
376                         extra = (const int32_t *)
377                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
378                         indirect = (const int32_t *)
379                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
380 # else
381                         table = (const int32_t *)
382                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
383                         weights = (const unsigned char *)
384                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
385                         extra = (const unsigned char *)
386                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
387                         indirect = (const int32_t *)
388                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
389 # endif
390
391                         idx = findidx (&cp);
392                         if (idx != 0)
393                           {
394                             /* We found a table entry.  Now see whether the
395                                character we are currently at has the same
396                                equivalance class value.  */
397                             int len = weights[idx];
398                             int32_t idx2;
399                             const UCHAR *np = (const UCHAR *) n;
400
401                             idx2 = findidx (&np);
402                             if (idx2 != 0 && len == weights[idx2])
403                               {
404                                 int cnt = 0;
405
406                                 while (cnt < len
407                                        && (weights[idx + 1 + cnt]
408                                            == weights[idx2 + 1 + cnt]))
409                                   ++cnt;
410
411                                 if (cnt == len)
412                                   goto matched;
413                               }
414                           }
415                       }
416
417                     c = *p++;
418                   }
419 #endif
420                 else if (c == L('\0'))
421                   /* [ (unterminated) loses.  */
422                   return FNM_NOMATCH;
423                 else
424                   {
425                     int is_range = 0;
426
427 #ifdef _LIBC
428                     int is_seqval = 0;
429
430                     if (c == L('[') && *p == L('.'))
431                       {
432                         uint32_t nrules =
433                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
434                         const CHAR *startp = p;
435                         size_t c1 = 0;
436
437                         while (1)
438                           {
439                             c = *++p;
440                             if (c == L('.') && p[1] == L(']'))
441                               {
442                                 p += 2;
443                                 break;
444                               }
445                             if (c == '\0')
446                               return FNM_NOMATCH;
447                             ++c1;
448                           }
449
450                         /* We have to handling the symbols differently in
451                            ranges since then the collation sequence is
452                            important.  */
453                         is_range = *p == L('-') && p[1] != L('\0');
454
455                         if (nrules == 0)
456                           {
457                             /* There are no names defined in the collation
458                                data.  Therefore we only accept the trivial
459                                names consisting of the character itself.  */
460                             if (c1 != 1)
461                               return FNM_NOMATCH;
462
463                             if (!is_range && *n == startp[1])
464                               goto matched;
465
466                             cold = startp[1];
467                             c = *p++;
468                           }
469                         else
470                           {
471                             int32_t table_size;
472                             const int32_t *symb_table;
473 # ifdef WIDE_CHAR_VERSION
474                             char str[c1];
475                             unsigned int strcnt;
476 # else
477 #  define str (startp + 1)
478 # endif
479                             const unsigned char *extra;
480                             int32_t idx;
481                             int32_t elem;
482                             int32_t second;
483                             int32_t hash;
484
485 # ifdef WIDE_CHAR_VERSION
486                             /* We have to convert the name to a single-byte
487                                string.  This is possible since the names
488                                consist of ASCII characters and the internal
489                                representation is UCS4.  */
490                             for (strcnt = 0; strcnt < c1; ++strcnt)
491                               str[strcnt] = startp[1 + strcnt];
492 #endif
493
494                             table_size =
495                               _NL_CURRENT_WORD (LC_COLLATE,
496                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
497                             symb_table = (const int32_t *)
498                               _NL_CURRENT (LC_COLLATE,
499                                            _NL_COLLATE_SYMB_TABLEMB);
500                             extra = (const unsigned char *)
501                               _NL_CURRENT (LC_COLLATE,
502                                            _NL_COLLATE_SYMB_EXTRAMB);
503
504                             /* Locate the character in the hashing table.  */
505                             hash = elem_hash (str, c1);
506
507                             idx = 0;
508                             elem = hash % table_size;
509                             second = hash % (table_size - 2);
510                             while (symb_table[2 * elem] != 0)
511                               {
512                                 /* First compare the hashing value.  */
513                                 if (symb_table[2 * elem] == hash
514                                     && c1 == extra[symb_table[2 * elem + 1]]
515                                     && memcmp (str,
516                                                &extra[symb_table[2 * elem + 1]
517                                                      + 1], c1) == 0)
518                                   {
519                                     /* Yep, this is the entry.  */
520                                     idx = symb_table[2 * elem + 1];
521                                     idx += 1 + extra[idx];
522                                     break;
523                                   }
524
525                                 /* Next entry.  */
526                                 elem += second;
527                               }
528
529                             if (symb_table[2 * elem] != 0)
530                               {
531                                 /* Compare the byte sequence but only if
532                                    this is not part of a range.  */
533 # ifdef WIDE_CHAR_VERSION
534                                 int32_t *wextra;
535
536                                 idx += 1 + extra[idx];
537                                 /* Adjust for the alignment.  */
538                                 idx = (idx + 3) & ~3;
539
540                                 wextra = (int32_t *) &extra[idx + 4];
541 # endif
542
543                                 if (! is_range)
544                                   {
545 # ifdef WIDE_CHAR_VERSION
546                                     for (c1 = 0; c1 < wextra[idx]; ++c1)
547                                       if (n[c1] != wextra[1 + c1])
548                                         break;
549
550                                     if (c1 == wextra[idx])
551                                       goto matched;
552 # else
553                                     for (c1 = 0; c1 < extra[idx]; ++c1)
554                                       if (n[c1] != extra[1 + c1])
555                                         break;
556
557                                     if (c1 == extra[idx])
558                                       goto matched;
559 # endif
560                                   }
561
562                                 /* Get the collation sequence value.  */
563                                 is_seqval = 1;
564 # ifdef WIDE_CHAR_VERSION
565                                 cold = wextra[1 + wextra[idx]];
566 # else
567                                 /* Adjust for the alignment.  */
568                                 idx += 1 + extra[idx];
569                                 idx = (idx + 3) & ~4;
570                                 cold = *((int32_t *) &extra[idx]);
571 # endif
572
573                                 c = *p++;
574                               }
575                             else if (c1 == 1)
576                               {
577                                 /* No valid character.  Match it as a
578                                    single byte.  */
579                                 if (!is_range && *n == str[0])
580                                   goto matched;
581
582                                 cold = str[0];
583                                 c = *p++;
584                               }
585                             else
586                               return FNM_NOMATCH;
587                           }
588                       }
589                     else
590 # undef str
591 #endif
592                       {
593                         c = FOLD (c);
594                       normal_bracket:
595
596                         /* We have to handling the symbols differently in
597                            ranges since then the collation sequence is
598                            important.  */
599                         is_range = *p == L('-') && p[1] != L('\0');
600
601                         if (!is_range && c == fn)
602                           goto matched;
603
604                         cold = c;
605                         c = *p++;
606                       }
607
608                     if (c == L('-') && *p != L(']'))
609                       {
610 #if _LIBC
611                         /* We have to find the collation sequence
612                            value for C.  Collation sequence is nothing
613                            we can regularly access.  The sequence
614                            value is defined by the order in which the
615                            definitions of the collation values for the
616                            various characters appear in the source
617                            file.  A strange concept, nowhere
618                            documented.  */
619                         uint32_t fcollseq;
620                         uint32_t lcollseq;
621                         UCHAR cend = *p++;
622
623 # ifdef WIDE_CHAR_VERSION
624                         /* Search in the `names' array for the characters.  */
625                         fcollseq = collseq_table_lookup (collseq, fn);
626                         if (fcollseq == ~((uint32_t) 0))
627                           /* XXX We don't know anything about the character
628                              we are supposed to match.  This means we are
629                              failing.  */
630                           goto range_not_matched;
631
632                         if (is_seqval)
633                           lcollseq = cold;
634                         else
635                           lcollseq = collseq_table_lookup (collseq, cold);
636 # else
637                         fcollseq = collseq[fn];
638                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
639 # endif
640
641                         is_seqval = 0;
642                         if (cend == L('[') && *p == L('.'))
643                           {
644                             uint32_t nrules =
645                               _NL_CURRENT_WORD (LC_COLLATE,
646                                                 _NL_COLLATE_NRULES);
647                             const CHAR *startp = p;
648                             size_t c1 = 0;
649
650                             while (1)
651                               {
652                                 c = *++p;
653                                 if (c == L('.') && p[1] == L(']'))
654                                   {
655                                     p += 2;
656                                     break;
657                                   }
658                                 if (c == '\0')
659                                   return FNM_NOMATCH;
660                                 ++c1;
661                               }
662
663                             if (nrules == 0)
664                               {
665                                 /* There are no names defined in the
666                                    collation data.  Therefore we only
667                                    accept the trivial names consisting
668                                    of the character itself.  */
669                                 if (c1 != 1)
670                                   return FNM_NOMATCH;
671
672                                 cend = startp[1];
673                               }
674                             else
675                               {
676                                 int32_t table_size;
677                                 const int32_t *symb_table;
678 # ifdef WIDE_CHAR_VERSION
679                                 char str[c1];
680                                 unsigned int strcnt;
681 # else
682 #  define str (startp + 1)
683 # endif
684                                 const unsigned char *extra;
685                                 int32_t idx;
686                                 int32_t elem;
687                                 int32_t second;
688                                 int32_t hash;
689
690 # ifdef WIDE_CHAR_VERSION
691                                 /* We have to convert the name to a single-byte
692                                    string.  This is possible since the names
693                                    consist of ASCII characters and the internal
694                                    representation is UCS4.  */
695                                 for (strcnt = 0; strcnt < c1; ++strcnt)
696                                   str[strcnt] = startp[1 + strcnt];
697 # endif
698
699                                 table_size =
700                                   _NL_CURRENT_WORD (LC_COLLATE,
701                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
702                                 symb_table = (const int32_t *)
703                                   _NL_CURRENT (LC_COLLATE,
704                                                _NL_COLLATE_SYMB_TABLEMB);
705                                 extra = (const unsigned char *)
706                                   _NL_CURRENT (LC_COLLATE,
707                                                _NL_COLLATE_SYMB_EXTRAMB);
708
709                                 /* Locate the character in the hashing
710                                    table.  */
711                                 hash = elem_hash (str, c1);
712
713                                 idx = 0;
714                                 elem = hash % table_size;
715                                 second = hash % (table_size - 2);
716                                 while (symb_table[2 * elem] != 0)
717                                   {
718                                 /* First compare the hashing value.  */
719                                     if (symb_table[2 * elem] == hash
720                                         && (c1
721                                             == extra[symb_table[2 * elem + 1]])
722                                         && memcmp (str,
723                                                    &extra[symb_table[2 * elem + 1]
724                                                          + 1], c1) == 0)
725                                       {
726                                         /* Yep, this is the entry.  */
727                                         idx = symb_table[2 * elem + 1];
728                                         idx += 1 + extra[idx];
729                                         break;
730                                       }
731
732                                     /* Next entry.  */
733                                     elem += second;
734                                   }
735
736                                 if (symb_table[2 * elem] != 0)
737                                   {
738                                     /* Compare the byte sequence but only if
739                                        this is not part of a range.  */
740 # ifdef WIDE_CHAR_VERSION
741                                     int32_t *wextra;
742
743                                     idx += 1 + extra[idx];
744                                     /* Adjust for the alignment.  */
745                                     idx = (idx + 3) & ~4;
746
747                                     wextra = (int32_t *) &extra[idx + 4];
748 # endif
749                                     /* Get the collation sequence value.  */
750                                     is_seqval = 1;
751 # ifdef WIDE_CHAR_VERSION
752                                     cend = wextra[1 + wextra[idx]];
753 # else
754                                     /* Adjust for the alignment.  */
755                                     idx += 1 + extra[idx];
756                                     idx = (idx + 3) & ~4;
757                                     cend = *((int32_t *) &extra[idx]);
758 # endif
759                                   }
760                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
761                                   {
762                                     cend = str[0];
763                                     c = *p++;
764                                   }
765                                 else
766                                   return FNM_NOMATCH;
767                               }
768 # undef str
769                           }
770                         else
771                           {
772                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
773                               cend = *p++;
774                             if (cend == L('\0'))
775                               return FNM_NOMATCH;
776                             cend = FOLD (cend);
777                           }
778
779                         /* XXX It is not entirely clear to me how to handle
780                            characters which are not mentioned in the
781                            collation specification.  */
782                         if (
783 # ifdef WIDE_CHAR_VERSION
784                             lcollseq == 0xffffffff ||
785 # endif
786                             lcollseq <= fcollseq)
787                           {
788                             /* We have to look at the upper bound.  */
789                             uint32_t hcollseq;
790
791                             if (is_seqval)
792                               hcollseq = cend;
793                             else
794                               {
795 # ifdef WIDE_CHAR_VERSION
796                                 hcollseq =
797                                   collseq_table_lookup (collseq, cend);
798                                 if (hcollseq == ~((uint32_t) 0))
799                                   {
800                                     /* Hum, no information about the upper
801                                        bound.  The matching succeeds if the
802                                        lower bound is matched exactly.  */
803                                     if (lcollseq != fcollseq)
804                                       goto range_not_matched;
805
806                                     goto matched;
807                                   }
808 # else
809                                 hcollseq = collseq[cend];
810 # endif
811                               }
812
813                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
814                               goto matched;
815                           }
816 # ifdef WIDE_CHAR_VERSION
817                       range_not_matched:
818 # endif
819 #else
820                         /* We use a boring value comparison of the character
821                            values.  This is better than comparing using
822                            `strcoll' since the latter would have surprising
823                            and sometimes fatal consequences.  */
824                         UCHAR cend = *p++;
825
826                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
827                           cend = *p++;
828                         if (cend == L('\0'))
829                           return FNM_NOMATCH;
830
831                         /* It is a range.  */
832                         if (cold <= fn && fn <= cend)
833                           goto matched;
834 #endif
835
836                         c = *p++;
837                       }
838                   }
839
840                 if (c == L(']'))
841                   break;
842               }
843
844             if (!not)
845               return FNM_NOMATCH;
846             break;
847
848           matched:
849             /* Skip the rest of the [...] that already matched.  */
850             do
851               {
852               ignore_next:
853                 c = *p++;
854
855                 if (c == L('\0'))
856                   /* [... (unterminated) loses.  */
857                   return FNM_NOMATCH;
858
859                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
860                   {
861                     if (*p == L('\0'))
862                       return FNM_NOMATCH;
863                     /* XXX 1003.2d11 is unclear if this is right.  */
864                     ++p;
865                   }
866                 else if (c == L('[') && *p == L(':'))
867                   {
868                     int c1 = 0;
869                     const CHAR *startp = p;
870
871                     while (1)
872                       {
873                         c = *++p;
874                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
875                           return FNM_NOMATCH;
876
877                         if (*p == L(':') && p[1] == L(']'))
878                           break;
879
880                         if (c < L('a') || c >= L('z'))
881                           {
882                             p = startp;
883                             goto ignore_next;
884                           }
885                       }
886                     p += 2;
887                     c = *p++;
888                   }
889                 else if (c == L('[') && *p == L('='))
890                   {
891                     c = *++p;
892                     if (c == L('\0'))
893                       return FNM_NOMATCH;
894                     c = *++p;
895                     if (c != L('=') || p[1] != L(']'))
896                       return FNM_NOMATCH;
897                     p += 2;
898                     c = *p++;
899                   }
900                 else if (c == L('[') && *p == L('.'))
901                   {
902                     ++p;
903                     while (1)
904                       {
905                         c = *++p;
906                         if (c == '\0')
907                           return FNM_NOMATCH;
908
909                         if (*p == L('.') && p[1] == L(']'))
910                           break;
911                       }
912                     p += 2;
913                     c = *p++;
914                   }
915               }
916             while (c != L(']'));
917             if (not)
918               return FNM_NOMATCH;
919           }
920           break;
921
922         case L('+'):
923         case L('@'):
924         case L('!'):
925           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
926             {
927               int res;
928
929               res = EXT (c, p, n, string_end, no_leading_period, flags);
930               if (res != -1)
931                 return res;
932             }
933           goto normal_match;
934
935         case L('/'):
936           if (NO_LEADING_PERIOD (flags))
937             {
938               if (n == string_end || c != *n)
939                 return FNM_NOMATCH;
940
941               new_no_leading_period = 1;
942               break;
943             }
944           /* FALLTHROUGH */
945         default:
946         normal_match:
947           if (n == string_end || c != FOLD ((UCHAR) *n))
948             return FNM_NOMATCH;
949         }
950
951       no_leading_period = new_no_leading_period;
952       ++n;
953     }
954
955   if (n == string_end)
956     return 0;
957
958   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
959     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
960     return 0;
961
962   return FNM_NOMATCH;
963 }
964
965
966 static const CHAR *
967 internal_function
968 END (const CHAR *pattern)
969 {
970   const CHAR *p = pattern;
971
972   while (1)
973     if (*++p == L('\0'))
974       /* This is an invalid pattern.  */
975       return pattern;
976     else if (*p == L('['))
977       {
978         /* Handle brackets special.  */
979         if (posixly_correct == 0)
980           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
981
982         /* Skip the not sign.  We have to recognize it because of a possibly
983            following ']'.  */
984         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
985           ++p;
986         /* A leading ']' is recognized as such.  */
987         if (*p == L(']'))
988           ++p;
989         /* Skip over all characters of the list.  */
990         while (*p != L(']'))
991           if (*p++ == L('\0'))
992             /* This is no valid pattern.  */
993             return pattern;
994       }
995     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
996               || *p == L('!')) && p[1] == L('('))
997       p = END (p + 1);
998     else if (*p == L(')'))
999       break;
1000
1001   return p + 1;
1002 }
1003
1004 #ifdef WIN32
1005 struct patternlist
1006 {
1007   struct patternlist *next;
1008   CHAR str[0];
1009 };
1010
1011 #define xalloca malloc
1012 #define xfree   free
1013 void free_xalloca(struct patternlist **top){
1014   struct patternlist *p, *next;
1015
1016   p = *top;
1017   while (p){
1018     next = p->next;
1019     xfree(p);
1020     p = next;
1021   }
1022   *top = NULL;
1023 }
1024 #endif
1025
1026 static int
1027 internal_function
1028 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1029      int no_leading_period, int flags)
1030 {
1031   const CHAR *startp;
1032   int level;
1033 #ifndef WIN32
1034   struct patternlist {
1035     struct patternlist *next;
1036     CHAR str[0];
1037   } *list = NULL;
1038 #else
1039   struct patternlist *list = NULL;
1040 #endif
1041   struct patternlist **lastp = &list;
1042   size_t pattern_len = STRLEN (pattern);
1043   const CHAR *p;
1044   const CHAR *rs;
1045 #ifdef WIN32
1046   static struct patternlist *xalloca_top = NULL;
1047 #endif
1048
1049   /* Parse the pattern.  Store the individual parts in the list.  */
1050   level = 0;
1051   for (startp = p = pattern + 1; level >= 0; ++p)
1052     if (*p == L('\0')) {
1053       /* This is an invalid pattern.  */
1054       free_xalloca(&xalloca_top);
1055       return -1;
1056     } else if (*p == L('[')) {
1057       /* Handle brackets special.  */
1058       if (posixly_correct == 0)
1059         posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1060
1061       /* Skip the not sign.  We have to recognize it because of a possibly
1062            following ']'.  */
1063       if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1064         ++p;
1065       /* A leading ']' is recognized as such.  */
1066       if (*p == L(']'))
1067         ++p;
1068       /* Skip over all characters of the list.  */
1069       while (*p != L(']'))
1070         if (*p++ == L('\0')) {
1071           /* This is no valid pattern.  */
1072           free_xalloca(&xalloca_top);
1073           return -1;
1074         }
1075     }
1076     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1077               || *p == L('!')) && p[1] == L('('))
1078       /* Remember the nesting level.  */
1079       ++level;
1080     else if (*p == L(')'))
1081       {
1082         if (level-- == 0)
1083           {
1084             /* This means we found the end of the pattern.  */
1085 #define NEW_PATTERN \
1086         struct patternlist *newp;                                             \
1087                                                                               \
1088             if (opt == L('?') || opt == L('@'))                               \
1089               newp = xalloca (sizeof (struct patternlist)                     \
1090                              + (pattern_len * sizeof (CHAR)));                \
1091             else                                                              \
1092               newp = xalloca (sizeof (struct patternlist)                     \
1093                              + ((p - startp + 1) * sizeof (CHAR)));           \
1094                 if (!xalloca_top) xalloca_top = newp;                         \
1095             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1096             newp->next = NULL;                                                \
1097             *lastp = newp;                                                    \
1098             lastp = &newp->next
1099             NEW_PATTERN;
1100           }
1101       }
1102     else if (*p == L('|'))
1103       {
1104         if (level == 0)
1105           {
1106             NEW_PATTERN;
1107             startp = p + 1;
1108           }
1109       }
1110   assert (list != NULL);
1111   assert (p[-1] == L(')'));
1112 #undef NEW_PATTERN
1113
1114   switch (opt)
1115     {
1116     case L('*'):
1117                 if (FCT (p, string, string_end, no_leading_period, flags) == 0) {
1118         free_xalloca(&xalloca_top);
1119         return 0;
1120       }
1121       /* FALLTHROUGH */
1122
1123     case L('+'):
1124       do
1125         {
1126           for (rs = string; rs <= string_end; ++rs)
1127             /* First match the prefix with the current pattern with the
1128                current pattern.  */
1129             if (FCT (list->str, string, rs, no_leading_period,
1130                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1131                 /* This was successful.  Now match the rest with the rest
1132                    of the pattern.  */
1133                 && (FCT (p, rs, string_end,
1134                          rs == string
1135                          ? no_leading_period
1136                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1137                          flags & FNM_FILE_NAME
1138                          ? flags : flags & ~FNM_PERIOD) == 0
1139                     /* This didn't work.  Try the whole pattern.  */
1140                     || (rs != string
1141                         && FCT (pattern - 1, rs, string_end,
1142                                 rs == string
1143                                 ? no_leading_period
1144                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1145                                    ? 1 : 0),
1146                                 flags & FNM_FILE_NAME
1147                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1148               /* It worked.  Signal success.  */
1149               free_xalloca(&xalloca_top);
1150               return 0;
1151         }
1152       while ((list = list->next) != NULL);
1153
1154       /* None of the patterns lead to a match.  */
1155       free_xalloca(&xalloca_top);
1156       return FNM_NOMATCH;
1157
1158     case L('?'):
1159       if (FCT (p, string, string_end, no_leading_period, flags) == 0) {
1160         free_xalloca(&xalloca_top);
1161         return 0;
1162       }
1163       /* FALLTHROUGH */
1164
1165     case L('@'):
1166       do
1167         /* I cannot believe it but `strcat' is actually acceptable
1168            here.  Match the entire string with the prefix from the
1169            pattern list and the rest of the pattern following the
1170            pattern list.  */
1171         if (FCT (STRCAT (list->str, p), string, string_end,
1172                  no_leading_period,
1173                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) {
1174           /* It worked.  Signal success.  */
1175           free_xalloca(&xalloca_top);
1176           return 0;
1177         }
1178       while ((list = list->next) != NULL);
1179
1180       /* None of the patterns lead to a match.  */
1181       free_xalloca(&xalloca_top);
1182       return FNM_NOMATCH;
1183
1184     case L('!'):
1185       for (rs = string; rs <= string_end; ++rs)
1186         {
1187           struct patternlist *runp;
1188
1189           for (runp = list; runp != NULL; runp = runp->next)
1190             if (FCT (runp->str, string, rs,  no_leading_period,
1191                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1192               break;
1193
1194           /* If none of the patterns matched see whether the rest does.  */
1195           if (runp == NULL
1196               && (FCT (p, rs, string_end,
1197                        rs == string
1198                        ? no_leading_period
1199                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1200                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1201                   == 0)){
1202             /* This is successful.  */
1203             free_xalloca(&xalloca_top);
1204             return 0;
1205           }
1206         }
1207
1208       /* None of the patterns together with the rest of the pattern
1209          lead to a match.  */
1210       free_xalloca(&xalloca_top);
1211       return FNM_NOMATCH;
1212
1213     default:
1214       assert (! "Invalid extended matching operator");
1215       break;
1216     }
1217
1218   free_xalloca(&xalloca_top);
1219   return -1;
1220 }
1221
1222
1223 #undef FOLD
1224 #undef CHAR
1225 #undef UCHAR
1226 #undef INT
1227 #undef FCT
1228 #undef EXT
1229 #undef END
1230 #undef MEMPCPY
1231 #undef MEMCHR
1232 #undef STRCOLL
1233 #undef STRLEN
1234 #undef STRCAT
1235 #undef L
1236 #undef BTOWC