src/unmime.c

   1 /*
   2  * MIME mail decoding.
   3  *
   4  * This module contains decoding routines for converting
   5  * quoted-printable data into pure 8-bit data, in MIME
   6  * formatted messages.
   7  *
   8  * By Henrik Storner <storner@image.dk>
   9  *
  10  * Configuration file support for fetchmail 4.3.8 by
  11  * Frank Damgaard <frda@post3.tele.dk>
  12  *
  13  * Modified by Hiroyuki Yamamoto <hiro-y@kcn.ne.jp>
  14  *
  15  */
  16
  17 #ifdef HAVE_CONFIG_H
  18 #  include "config.h"
  19 #endif
  20
  21 #include <string.h>
  22 #include <stdlib.h>
  23 #include <stdio.h>
  24 #include <ctype.h>
  25 #if HAVE_ALLOCA_H
  26 #  include <alloca.h>
  27 #endif
  28 #if HAVE_LIBJCONV
  29 #  include <jconv.h>
  30 #  include "prefs_common.h"
  31 #endif
  32
  33 #include "unmime.h"
  34 #include "base64.h"
  35 #include "rfc822.h"
  36 #include "utils.h"
  37
  38 #ifndef TRUE
  39 #define FALSE   0
  40 #define TRUE    1
  41 #endif /* TRUE */
  42
  43 #define xalloca(ptr, t, n)      if (!(ptr = (t) alloca(n))) \
  44                 {fprintf(stderr, "alloca failed"); exit(1);}
  45
  46 static unsigned char unhex(unsigned char c)
  47 {
  48   if ((c >= '0') && (c <= '9'))
  49     return (c - '0');
  50   else if ((c >= 'A') && (c <= 'F'))
  51     return (c - 'A' + 10);
  52   else if ((c >= 'a') && (c <= 'f'))
  53     return (c - 'a' + 10);
  54   else
  55     return c;
  56 }
  57
  58 static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
  59 {
  60   c1 = unhex(c1);
  61   c2 = unhex(c2);
  62
  63   if ((c1 > 15) || (c2 > 15))
  64     return 1;
  65   else {
  66     *c_out = 16*c1+c2;
  67     return 0;
  68   }
  69 }
  70
  71
  72 /*
  73  * Routines to decode MIME QP-encoded headers, as per RFC 2047.
  74  */
  75
  76 /* States of the decoding state machine */
  77 #define S_COPY_PLAIN        0   /* Just copy, but watch for the QP flag */
  78 #define S_SKIP_MIMEINIT     1   /* Get the encoding, and skip header */
  79 #define S_COPY_MIME         2   /* Decode a sequence of coded characters */
  80
  81 static const char MIMEHDR_INIT[]  = "=?";       /* Start of coded sequence */
  82 static const char MIMEHDR_END[]   = "?=";       /* End of coded sequence */
  83
  84 #if !HAVE_LIBJCONV
  85 void UnMimeHeader(unsigned char *hdr)
  86 {
  87   /* Decode a buffer containing data encoded according to RFC
  88    * 2047. This only handles content-transfer-encoding; conversion
  89    * between character sets is not implemented.  In other words: We
  90    * assume the charsets used can be displayed by your mail program
  91    * without problems.
  92    */
  93
  94   /* Note: Decoding is done "in-situ", i.e. without using an
  95    * additional buffer for temp. storage. This is possible, since the
  96    * decoded string will always be shorter than the encoded string,
  97    * due to the en- coding scheme.
  98    */
  99
 100   int  state = S_COPY_PLAIN;
 101   unsigned char *p_in, *p_out, *p;
 102   unsigned char enc = '\0';             /* initialization pacifies -Wall */
 103   int  i;
 104
 105   /* Speed up in case this is not a MIME-encoded header */
 106   p = strstr(hdr, MIMEHDR_INIT);
 107   if (p == NULL)
 108     return;   /* No MIME header */
 109
 110   /* Loop through the buffer.
 111    *  p_in : Next char to be processed.
 112    *  p_out: Where to put the next processed char
 113    *  enc  : Encoding used (usually, 'q' = quoted-printable)
 114    */
 115   for (p_out = p_in = hdr; (*p_in); ) {
 116     switch (state) {
 117     case S_COPY_PLAIN:
 118       p = strstr(p_in, MIMEHDR_INIT);
 119       if (p == NULL) {
 120         /*
 121          * No more coded data in buffer,
 122          * just move remainder into place.
 123          */
 124         i = strlen(p_in);   /* How much left */
 125         memmove(p_out, p_in, i);
 126         p_in += i; p_out += i;
 127       }
 128       else {
 129         /* MIME header init found at location p */
 130         if (p > p_in) {
 131           /* There are some uncoded chars at the beginning. */
 132           i = (p - p_in);
 133           memmove(p_out, p_in, i);
 134           p_out += i;
 135         }
 136         p_in = (p + 2);
 137         state = S_SKIP_MIMEINIT;
 138       }
 139       break;
 140
 141     case S_SKIP_MIMEINIT:
 142       /* Mime type definition: "charset?encoding?" */
 143       p = strchr(p_in, '?');
 144       if (p != NULL) {
 145         /* p_in .. (p-1) holds the charset */
 146
 147         /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
 148         if (*(p+2) == '?') {
 149           enc = tolower(*(p+1));
 150           p_in = p+3;
 151           state = S_COPY_MIME;
 152         }
 153         else
 154           state = S_COPY_PLAIN;
 155       }
 156       else
 157         state = S_COPY_PLAIN;   /* Invalid data */
 158       break;
 159
 160     case S_COPY_MIME:
 161       p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */
 162       if (p == NULL) p = p_in + strlen(p_in);
 163       for (; (p_in < p); ) {
 164         /* Decode all encoded data */
 165         if (enc == 'q') {
 166           if (*p_in == '=') {
 167             /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
 168             if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
 169               p_in += 3;
 170             else {
 171               /* Invalid QP data - pass through unchanged. */
 172               *p_out = *p_in;
 173               p_in++;
 174             }
 175           }
 176           else if (*p_in == '_') {
 177             /*
 178              * RFC 2047: '_' inside encoded word represents 0x20.
 179              * NOT a space - always the value 0x20.
 180              */
 181             *p_out = 0x20;
 182             p_in++;
 183           }
 184           else {
 185             /* Copy unchanged */
 186             *p_out = *p_in;
 187             p_in++;
 188           }
 189           p_out++;
 190         }
 191         else if (enc == 'b') {
 192           /* Decode base64 encoded data */
 193           char delimsave;
 194           int decoded_count;
 195
 196           delimsave = *p; *p = '\r';
 197           decoded_count = from64tobits(p_out, p_in);
 198           *p = delimsave;
 199           if (decoded_count > 0)
 200             p_out += decoded_count;
 201           p_in = p;
 202         }
 203         else {
 204           /* Copy unchanged */
 205           *p_out = *p_in;
 206           p_in++;
 207           p_out++;
 208         }
 209       }
 210       if (*p_in)
 211         p_in += 2;   /* Skip the MIMEHDR_END delimiter */
 212
 213       /*
 214        * We've completed decoding one encoded sequence. But another
 215        * may follow immediately, in which case whitespace before the
 216        * new MIMEHDR_INIT delimiter must be discarded.
 217        * See if that is the case
 218        */
 219       p = strstr(p_in, MIMEHDR_INIT);
 220       state = S_COPY_PLAIN;
 221       if (p != NULL) {
 222         /*
 223          * There is more MIME data later on. Is there
 224          * whitespace  only before the delimiter?
 225          */
 226         unsigned char *q;
 227         int  wsp_only = 1;
 228
 229         for (q=p_in; (wsp_only && (q < p)); q++)
 230           wsp_only = isspace(*q);
 231
 232         if (wsp_only) {
 233           /*
 234            * Whitespace-only before the MIME delimiter. OK,
 235            * just advance p_in to past the new MIMEHDR_INIT,
 236            * and prepare to process the new MIME charset/encoding
 237            * header.
 238            */
 239           p_in = p + strlen(MIMEHDR_INIT);
 240           state = S_SKIP_MIMEINIT;
 241         }
 242       }
 243       break;
 244     }
 245   }
 246
 247   *p_out = '\0';
 248 }
 249 #else /* HAVE_LIBJCONV */
 250 void UnMimeHeaderConv(unsigned char *hdr, unsigned char *conv_r, int conv_len)
 251 {
 252   int  state = S_COPY_PLAIN;
 253   unsigned char *p_in, *p_out, *p;
 254   unsigned char enc = '\0';             /* initialization pacifies -Wall */
 255   int  i;
 256   unsigned char *p_mimestart = NULL;
 257   iconv_t cd = (iconv_t)-1;
 258
 259   if (conv_r && conv_len < 0) {
 260     conv_len = 0;
 261     conv_r = NULL;
 262   }
 263   else
 264     conv_len--;         /* reserve for terminating NULL character */
 265
 266   /* Speed up in case this is not a MIME-encoded header */
 267   p = strstr(hdr, MIMEHDR_INIT);
 268   if (p == NULL) {
 269     /* No MIME header */
 270     const char *const *codesets;
 271     int n_codesets, actual_codeset, r;
 272     char *newstr;
 273     size_t newlen = 0;
 274
 275     if (prefs_common.force_charset) {
 276       codesets = (const char **)&prefs_common.force_charset;
 277       n_codesets = 1;
 278     }
 279     else
 280       codesets = jconv_info_get_pref_codesets(&n_codesets);
 281
 282     r = jconv_alloc_conv(hdr, strlen(hdr), &newstr, &newlen,
 283                          codesets, n_codesets, &actual_codeset,
 284                          jconv_info_get_current_codeset());
 285     if (r == 0) {
 286       if (newlen > conv_len) newlen = conv_len;
 287       strncpy(conv_r, newstr, newlen);
 288       conv_r[newlen] = '\0';
 289       free(newstr);
 290     }
 291     else {
 292       strncpy(conv_r, hdr, conv_len);
 293       conv_r[conv_len] = '\0';
 294     }
 295
 296     return;
 297   }
 298
 299   /* Loop through the buffer.
 300    *  p_in : Next char to be processed.
 301    *  p_out: Where to put the next processed char
 302    *  enc  : Encoding used (usually, 'q' = quoted-printable)
 303    */
 304   for (p_out = p_in = hdr; (*p_in); ) {
 305     switch (state) {
 306     case S_COPY_PLAIN:
 307       p = strstr(p_in, MIMEHDR_INIT);
 308       if (p == NULL) {
 309         /*
 310          * No more coded data in buffer,
 311          * just move remainder into place.
 312          */
 313         i = strlen(p_in);   /* How much left */
 314         if (conv_r) {
 315           int len;
 316
 317           len = conv_len > i ? i : conv_len;
 318           memcpy(conv_r, p_in, len);
 319           conv_r += len;
 320           conv_len -= len;
 321         }
 322         memmove(p_out, p_in, i);
 323         p_in += i; p_out += i;
 324       }
 325       else {
 326         /* MIME header init found at location p */
 327         if (p > p_in) {
 328           /* There are some uncoded chars at the beginning. */
 329           i = (p - p_in);
 330           if (conv_r) {
 331             int len;
 332
 333             len = conv_len > i ? i : conv_len;
 334             memcpy(conv_r, p_in, len);
 335             conv_r += len;
 336             conv_len -= len;
 337           }
 338           memmove(p_out, p_in, i);
 339           p_out += i;
 340         }
 341         p_in = (p + 2);
 342         state = S_SKIP_MIMEINIT;
 343       }
 344       break;
 345
 346     case S_SKIP_MIMEINIT:
 347       /* Mime type definition: "charset?encoding?" */
 348       p = strchr(p_in, '?');
 349       if (p != NULL) {
 350         /* p_in .. (p-1) holds the charset */
 351         char *charset;
 352
 353         charset = malloc(p - p_in + 1);
 354         memcpy(charset, p_in, p - p_in);
 355         charset[p - p_in] = '\0';
 356         if (cd != (iconv_t)-1) iconv_close(cd);
 357         cd = iconv_open(jconv_info_get_current_codeset(), charset);
 358         free(charset);
 359
 360         /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
 361         if (*(p+2) == '?') {
 362           enc = tolower(*(p+1));
 363           p_in = p+3;
 364           state = S_COPY_MIME;
 365         }
 366         else
 367           state = S_COPY_PLAIN;
 368       }
 369       else
 370         state = S_COPY_PLAIN;   /* Invalid data */
 371       break;
 372
 373     case S_COPY_MIME:
 374       p_mimestart = p_out;
 375       p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */
 376       if (p == NULL) p = p_in + strlen(p_in);
 377       for (; (p_in < p); ) {
 378         /* Decode all encoded data */
 379         if (enc == 'q') {
 380           if (*p_in == '=') {
 381             /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
 382             if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
 383               p_in += 3;
 384             else {
 385               /* Invalid QP data - pass through unchanged. */
 386               *p_out = *p_in;
 387               p_in++;
 388             }
 389           }
 390           else if (*p_in == '_') {
 391             /*
 392              * RFC 2047: '_' inside encoded word represents 0x20.
 393              * NOT a space - always the value 0x20.
 394              */
 395             *p_out = 0x20;
 396             p_in++;
 397           }
 398           else {
 399             /* Copy unchanged */
 400             *p_out = *p_in;
 401             p_in++;
 402           }
 403           p_out++;
 404         }
 405         else if (enc == 'b') {
 406           /* Decode base64 encoded data */
 407           char delimsave;
 408           int decoded_count;
 409
 410           delimsave = *p; *p = '\r';
 411           decoded_count = from64tobits(p_out, p_in);
 412           *p = delimsave;
 413           if (decoded_count > 0)
 414             p_out += decoded_count;
 415           p_in = p;
 416         }
 417         else {
 418           /* Copy unchanged */
 419           *p_out = *p_in;
 420           p_in++;
 421           p_out++;
 422         }
 423       }
 424       if (conv_r && cd != (iconv_t)-1) {
 425         size_t inleft;
 426
 427         inleft = p_out - p_mimestart;
 428         iconv(cd, (char **)&p_mimestart, &inleft, (char **)&conv_r, &conv_len);
 429       }
 430       if (*p_in)
 431         p_in += 2;   /* Skip the MIMEHDR_END delimiter */
 432
 433       /*
 434        * We've completed decoding one encoded sequence. But another
 435        * may follow immediately, in which case whitespace before the
 436        * new MIMEHDR_INIT delimiter must be discarded.
 437        * See if that is the case
 438        */
 439       p = strstr(p_in, MIMEHDR_INIT);
 440       state = S_COPY_PLAIN;
 441       if (p != NULL) {
 442         /*
 443          * There is more MIME data later on. Is there
 444          * whitespace  only before the delimiter?
 445          */
 446         unsigned char *q;
 447         int  wsp_only = 1;
 448
 449         for (q=p_in; (wsp_only && (q < p)); q++)
 450           wsp_only = isspace(*q);
 451
 452         if (wsp_only) {
 453           /*
 454            * Whitespace-only before the MIME delimiter. OK,
 455            * just advance p_in to past the new MIMEHDR_INIT,
 456            * and prepare to process the new MIME charset/encoding
 457            * header.
 458            */
 459           p_in = p + strlen(MIMEHDR_INIT);
 460           state = S_SKIP_MIMEINIT;
 461         }
 462       }
 463       break;
 464     }
 465   }
 466
 467   *p_out = '\0';
 468   if (conv_r)
 469     *conv_r = '\0';
 470   if (cd != (iconv_t)-1) iconv_close(cd);
 471 }
 472
 473 #endif /* !HAVE_LIBJCONV */
 474
 475
 476 /*
 477  * Routines for decoding body-parts of a message.
 478  *
 479  * Since the "fetch" part of fetchmail gets a message body
 480  * one line at a time, we need to maintain some state variables
 481  * across multiple invokations of the UnMimeBodyline() routine.
 482  * The driver routine should call MimeBodyType() when all
 483  * headers have been received, and then UnMimeBodyline() for
 484  * every line in the message body.
 485  *
 486  */
 487 #define S_BODY_DATA 0
 488 #define S_BODY_HDR  1
 489
 490 /*
 491  * Flag indicating if we are currently processing
 492  * the headers or the body of a (multipart) message.
 493  */
 494 static int  BodyState = S_BODY_DATA;
 495
 496 /*
 497  * Flag indicating if we are in the process of decoding
 498  * a quoted-printable body part.
 499  */
 500 static int  CurrEncodingIsQP = 0;
 501 static int  CurrTypeNeedsDecode = 0;
 502
 503 /*
 504  * Delimiter for multipart messages. RFC 2046 states that this must
 505  * NEVER be longer than 70 characters. Add 3 for the two hyphens
 506  * at the beginning, and a terminating null.
 507  */
 508 #define MAX_DELIM_LEN 70
 509 static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3];
 510
 511
 512 /* This string replaces the "Content-Transfer-Encoding: quoted-printable"
 513  * string in all headers, including those in body-parts. The replacement
 514  * must be no longer than the original string.
 515  */
 516 static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit";
 517 static void SetEncoding8bit(unsigned char *XferEncOfs)
 518 {
 519   unsigned char *p;
 520
 521   if (XferEncOfs != NULL) {
 522      memcpy(XferEncOfs, ENC8BIT, strlen(ENC8BIT));
 523
 524      /* If anything left, in this header, replace with whitespace */
 525      for (p=XferEncOfs+strlen(ENC8BIT); (*p >= ' '); p++) *p=' ';
 526   }
 527 }
 528
 529 static char *GetBoundary(char *CntType)
 530 {
 531   char *p1, *p2;
 532   int flag;
 533
 534   /* Find the "boundary" delimiter. It must be preceded with a ';'
 535    * and optionally some whitespace.
 536    */
 537   p1 = CntType;
 538   do {
 539     p2 = strchr(p1, ';');
 540     if (p2)
 541       for (p2++; isspace(*p2); p2++);
 542
 543     p1 = p2;
 544   } while ((p1) && (strncasecmp(p1, "boundary", 8) != 0));
 545
 546   if (p1 == NULL)
 547     /* No boundary delimiter */
 548     return NULL;
 549
 550   /* Skip "boundary", whitespace and '='; check that we do have a '=' */
 551   for (p1+=8, flag=0; (isspace(*p1) || (*p1 == '=')); p1++)
 552     flag |= (*p1 == '=');
 553   if (!flag)
 554     return NULL;
 555
 556   /* Find end of boundary delimiter string */
 557   if (*p1 == '\"') {
 558     /* The delimiter is inside quotes */
 559     p1++;
 560     p2 = strchr(p1, '\"');
 561     if (p2 == NULL)
 562       return NULL;  /* No closing '"' !?! */
 563   }
 564   else {
 565     /* There might be more text after the "boundary" string. */
 566     p2 = strchr(p1, ';');  /* Safe - delimiter with ';' must be in quotes */
 567   }
 568
 569   /* Zero-terminate the boundary string */
 570   if (p2 != NULL)
 571     *p2 = '\0';
 572
 573   return (p1 && strlen(p1)) ? p1 : NULL;
 574 }
 575
 576
 577 int CheckContentType(char *CntType)
 578 {
 579   /*
 580    * Static array of Content-Type's for which we will do
 581    * quoted-printable decoding, if requested.
 582    * It is probably wise to do this only on known text-only types;
 583    * be really careful if you change this.
 584    */
 585
 586   static char *DecodedTypes[] = {
 587     "text/",        /* Will match ALL content-type's starting with 'text/' */
 588     "message/rfc822",
 589     NULL
 590   };
 591
 592   char *p = CntType;
 593   int i;
 594
 595   /* If no Content-Type header, it isn't MIME - don't touch it */
 596   if (CntType == NULL) return 0;
 597
 598   /* Skip whitespace, if any */
 599   for (; isspace(*p); p++) ;
 600
 601   for (i=0;
 602        (DecodedTypes[i] &&
 603         (strncasecmp(p, DecodedTypes[i], strlen(DecodedTypes[i]))));
 604        i++) ;
 605
 606   return (DecodedTypes[i] != NULL);
 607 }
 608
 609
 610 /*
 611  * This routine does three things:
 612  * 1) It determines - based on the message headers - whether the
 613  *    message body is a MIME message that may hold 8 bit data.
 614  *    - A message that has a "quoted-printable" or "8bit" transfer
 615  *      encoding is assumed to contain 8-bit data (when decoded).
 616  *    - A multipart message is assumed to contain 8-bit data
 617  *      when decoded (there might be quoted-printable body-parts).
 618  *    - All other messages are assumed NOT to include 8-bit data.
 619  * 2) It determines the delimiter-string used in multi-part message
 620  *    bodies.
 621  * 3) It sets the initial values of the CurrEncodingIsQP,
 622  *    CurrTypeNeedsDecode, and BodyState variables, from the header
 623  *    contents.
 624  *
 625  * The return value is a bitmask.
 626  */
 627 int MimeBodyType(unsigned char *hdrs, int WantDecode)
 628 {
 629   unsigned char *NxtHdr = hdrs;
 630   unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p;
 631   int  HdrsFound = 0;     /* We only look for three headers */
 632   int  BodyType;          /* Return value */
 633
 634   /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */
 635   MultipartDelimiter[0] = '\0';
 636   CurrEncodingIsQP = CurrTypeNeedsDecode = 0;
 637   BodyState = S_BODY_DATA;
 638   BodyType = 0;
 639
 640   /* Just in case ... */
 641   if (hdrs == NULL)
 642     return BodyType;
 643
 644   XferEnc = XferEncOfs = CntType = MimeVer = NULL;
 645
 646   do {
 647     if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) {
 648       XferEncOfs = NxtHdr;
 649       p = nxtaddr(NxtHdr);
 650       if (p != NULL) {
 651         xalloca(XferEnc, char *, strlen(p) + 1);
 652         strcpy(XferEnc, p);
 653         HdrsFound++;
 654       }
 655     }
 656     else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) {
 657       /*
 658        * This one is difficult. We cannot use the standard
 659        * nxtaddr() routine, since the boundary-delimiter is
 660        * (probably) enclosed in quotes - and thus appears
 661        * as an rfc822 comment, and nxtaddr() "eats" up any
 662        * spaces in the delimiter. So, we have to do this
 663        * by hand.
 664        */
 665
 666       /* Skip the "Content-Type:" part and whitespace after it */
 667       for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == '\t')); NxtHdr++);
 668
 669       /*
 670        * Get the full value of the Content-Type header;
 671        * it might span multiple lines. So search for
 672        * a newline char, but ignore those that have a
 673        * have a TAB or space just after the NL (continued
 674        * lines).
 675        */
 676       p = NxtHdr-1;
 677       do {
 678         p=strchr((p+1),'\n');
 679       } while ( (p != NULL) && ((*(p+1) == '\t') || (*(p+1) == ' ')) );
 680       if (p == NULL) p = NxtHdr + strlen(NxtHdr);
 681
 682       xalloca(CntType, char *, p-NxtHdr+2);
 683       strncpy(CntType, NxtHdr, (p-NxtHdr));
 684       *(CntType+(p-NxtHdr)) = '\0';
 685       HdrsFound++;
 686     }
 687     else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) {
 688       p = nxtaddr(NxtHdr);
 689       if (p != NULL) {
 690         xalloca(MimeVer, char *, strlen(p) + 1);
 691         strcpy(MimeVer, p);
 692         HdrsFound++;
 693       }
 694     }
 695
 696     NxtHdr = (strchr(NxtHdr, '\n'));
 697     if (NxtHdr != NULL) NxtHdr++;
 698   } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3));
 699
 700
 701   /* Done looking through the headers, now check what they say */
 702   if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) {
 703
 704     CurrTypeNeedsDecode = CheckContentType(CntType);
 705
 706     /* Check Content-Type to see if this is a multipart message */
 707     if ( (CntType != NULL) &&
 708          ((strncasecmp(CntType, "multipart/mixed", 16) == 0) ||
 709           (strncasecmp(CntType, "message/", 8) == 0)) ) {
 710
 711       char *p1 = GetBoundary(CntType);
 712
 713       if (p1 != NULL) {
 714         /* The actual delimiter is "--" followed by
 715            the boundary string */
 716         strcpy(MultipartDelimiter, "--");
 717         strncat(MultipartDelimiter, p1, MAX_DELIM_LEN);
 718         BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
 719       }
 720     }
 721
 722     /*
 723      * Check Content-Transfer-Encoding, but
 724      * ONLY for non-multipart messages (BodyType == 0).
 725      */
 726     if ((XferEnc != NULL) && (BodyType == 0)) {
 727       if (strcasecmp(XferEnc, "quoted-printable") == 0) {
 728         CurrEncodingIsQP = 1;
 729         BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
 730         if (WantDecode && CurrTypeNeedsDecode) {
 731            SetEncoding8bit(XferEncOfs);
 732         }
 733       }
 734       else if (strcasecmp(XferEnc, "7bit") == 0) {
 735         CurrEncodingIsQP = 0;
 736         BodyType = (MSG_IS_7BIT);
 737       }
 738       else if (strcasecmp(XferEnc, "8bit") == 0) {
 739         CurrEncodingIsQP = 0;
 740         BodyType = (MSG_IS_8BIT);
 741       }
 742     }
 743
 744   }
 745
 746   return BodyType;
 747 }
 748
 749
 750 /*
 751  * Decode one line of data containing QP data.
 752  * Return flag set if this line ends with a soft line-break.
 753  * 'bufp' is modified to point to the end of the output buffer.
 754  */
 755 int DoOneQPLine(unsigned char **bufp, flag delimited, flag issoftline)
 756 {
 757   unsigned char *buf = *bufp;
 758   unsigned char *p_in, *p_out, *p;
 759   int n;
 760   int ret = 0;
 761
 762   /*
 763    * Special case: line consists of a single =2E and messages are
 764    * dot-terminated.  Line has to be dot-stuffed after decoding.
 765    */
 766   if (delimited && !issoftline && buf[0]=='=' && !strncmp(*bufp, "=2E\n", 4))
 767   {
 768       strcpy(buf, "..\n");
 769       *bufp += 4;
 770       return(FALSE);
 771   }
 772
 773   p_in = buf;
 774   if (delimited && issoftline && (strncmp(buf, "..", 2) == 0))
 775     p_in++;
 776
 777   for (p_out = buf; (*p_in); ) {
 778     p = strchr(p_in, '=');
 779     if (p == NULL) {
 780       /* No more QP data, just move remainder into place */
 781       n = strlen(p_in);
 782       memmove(p_out, p_in, n);
 783       p_in += n; p_out += n;
 784     }
 785     else {
 786       if (p > p_in) {
 787         /* There are some uncoded chars at the beginning. */
 788         n = (p - p_in);
 789         memmove(p_out, p_in, n);
 790         p_out += n;
 791       }
 792
 793       switch (*(p+1)) {
 794       case '\0': case '\r': case '\n':
 795         /* Soft line break, skip '=' */
 796         p_in = p+1;
 797         if (*p_in == '\r') p_in++;
 798         if (*p_in == '\n') p_in++;
 799         ret = 1;
 800         break;
 801
 802       default:
 803         /* There is a QP encoded byte */
 804         if (qp_char(*(p+1), *(p+2), p_out) == 0) {
 805           p_in = p+3;
 806         }
 807         else {
 808           /* Invalid QP data - pass through unchanged. */
 809           *p_out = '=';
 810           p_in = p+1;
 811         }
 812         p_out++;
 813         break;
 814       }
 815     }
 816   }
 817
 818   *p_out = '\0';
 819   *bufp = p_out;
 820   return ret;
 821 }
 822
 823
 824 /* This is called once per line in the message body.  We need to scan
 825  * all lines in the message body for the multipart delimiter string,
 826  * and handle any body-part headers in such messages (these can toggle
 827  * qp-decoding on and off).
 828  *
 829  * Note: Messages that are NOT multipart-messages go through this
 830  * routine quickly, since BodyState will always be S_BODY_DATA,
 831  * and MultipartDelimiter is NULL.
 832  *
 833  * Return flag set if this line ends with a soft line-break.
 834  * 'bufp' is modified to point to the end of the output buffer.
 835  */
 836
 837 #if 0
 838 int UnMimeBodyline(unsigned char **bufp, flag delimited, flag softline)
 839 {
 840   unsigned char *buf = *bufp;
 841   int ret = 0;
 842
 843   switch (BodyState) {
 844   case S_BODY_HDR:
 845     UnMimeHeader(buf);   /* Headers in body-parts can be encoded, too! */
 846     if ((*buf == '\0') || (*buf == '\n') || (strcmp(buf, "\r\n") == 0)) {
 847       BodyState = S_BODY_DATA;
 848     }
 849     else if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) {
 850       char *XferEnc;
 851
 852       XferEnc = nxtaddr(buf);
 853       if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) {
 854         CurrEncodingIsQP = 1;
 855
 856         /*
 857          * Hmm ... we cannot be really sure that CurrTypeNeedsDecode
 858          * has been set - we may not have seen the Content-Type header
 859          * yet. But *usually* the Content-Type header comes first, so
 860          * this will work. And there is really no way of doing it
 861          * "right" as long as we stick with the line-by-line processing.
 862          */
 863         if (CurrTypeNeedsDecode)
 864             SetEncoding8bit(buf);
 865       }
 866     }
 867     else if (strncasecmp("Content-Type:", buf, 13) == 0) {
 868       CurrTypeNeedsDecode = CheckContentType(nxtaddr(buf));
 869     }
 870
 871     *bufp = (buf + strlen(buf));
 872     break;
 873
 874   case S_BODY_DATA:
 875     if ((*MultipartDelimiter) &&
 876         (strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) {
 877       BodyState = S_BODY_HDR;
 878       CurrEncodingIsQP = CurrTypeNeedsDecode = 0;
 879     }
 880
 881     if (CurrEncodingIsQP && CurrTypeNeedsDecode)
 882       ret = DoOneQPLine(bufp, delimited, softline);
 883     else
 884      *bufp = (buf + strlen(buf));
 885     break;
 886   }
 887
 888   return ret;
 889 }
 890 #endif /* 0 */
 891
 892
 893 #ifdef STANDALONE
 894 #include <stdio.h>
 895 #include <unistd.h>
 896
 897 char *program_name = "unmime";
 898 int outlevel = 0;
 899
 900 #define BUFSIZE_INCREMENT 4096
 901
 902 #ifdef DEBUG
 903 #define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD)
 904 #else
 905 #define DBG_FWRITE(B,L,BS,FD)
 906 #endif
 907
 908 int main(int argc, char *argv[])
 909 {
 910   unsigned int BufSize;
 911   unsigned char *buffer, *buf_p;
 912   int nl_count, i, bodytype;
 913
 914 #ifdef DEBUG
 915   pid_t pid;
 916   FILE *fd_orig, *fd_conv;
 917   char fnam[100];
 918
 919   pid = getpid();
 920   sprintf(fnam, "/tmp/i_unmime.%x", pid);
 921   fd_orig = fopen(fnam, "w");
 922   sprintf(fnam, "/tmp/o_unmime.%x", pid);
 923   fd_conv = fopen(fnam, "w");
 924 #endif
 925
 926   BufSize = BUFSIZE_INCREMENT;    /* Initial size of buffer */
 927   buf_p = buffer = (unsigned char *) xmalloc(BufSize);
 928   nl_count = 0;
 929
 930   do {
 931     i = fread(buf_p, 1, 1, stdin);
 932     switch (*buf_p) {
 933      case '\n':
 934        nl_count++;
 935        break;
 936
 937      case '\r':
 938        break;
 939
 940      default:
 941        nl_count = 0;
 942        break;
 943     }
 944
 945     buf_p++;
 946     if ((buf_p - buffer) == BufSize) {
 947        /* Buffer is full! Get more room. */
 948        buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT);
 949        buf_p = buffer + BufSize;
 950        BufSize += BUFSIZE_INCREMENT;
 951     }
 952   } while ((i > 0) && (nl_count < 2));
 953
 954   *buf_p = '\0';
 955   DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig);
 956
 957   UnMimeHeader(buffer);
 958   bodytype = MimeBodyType(buffer, 1);
 959
 960   i = strlen(buffer);
 961   fwrite(buffer, i, 1, stdout);
 962   DBG_FWRITE(buffer, i, 1, fd_conv);
 963
 964   do {
 965      buf_p = (buffer - 1);
 966      do {
 967         buf_p++;
 968         i = fread(buf_p, 1, 1, stdin);
 969      } while ((i == 1) && (*buf_p != '\n'));
 970      if (i == 1) buf_p++;
 971      *buf_p = '\0';
 972      DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig);
 973
 974      if (buf_p > buffer) {
 975         if (bodytype & MSG_NEEDS_DECODE) {
 976            buf_p = buffer;
 977            UnMimeBodyline(&buf_p, 0);
 978         }
 979         fwrite(buffer, (buf_p - buffer), 1, stdout);
 980         DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv);
 981      }
 982   } while (buf_p > buffer);
 983
 984   free(buffer);
 985   fflush(stdout);
 986
 987 #ifdef DEBUG
 988   fclose(fd_orig);
 989   fclose(fd_conv);
 990 #endif
 991
 992   return 0;
 993 }
 994 #endif
 995