Changeset 88 for NEWT0/trunk

Show
Ignore:
Timestamp:
03/26/07 05:05:58 (20 months ago)
Author:
matthiasm
Message:

Fixed the Win32 "iconv" code from the ground up.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • NEWT0/trunk/src/newt_core/NewtIconv.c

    r74 r88  
    7272 
    7373#include <string.h> 
     74#include <windows.h> 
     75#include <winnls.h> 
     76 
     77/** 
     78 * This is a minimalistic "iconv" for VC6. 
     79 * This version simply maps into WIN32 native calls and is pretty limited 
     80 * but likely to be sufficient within NEWT/0. 
     81 *  
     82 * Requested conversions in NEWT/0 as of 03/24/2007 are: 
     83 *  UTF-16BE to current code page and back 
     84 *  MACROMAN to current code page and back 
     85 * 
     86 * So we define the iconv_t bits ...00ss00dd where ss is the source format and 
     87 * dd is the destination format with: 
     88 *   00 = current code page 
     89 *   01 = Mac Roman 
     90 *   10 = UTF-16BE, Newton Format 
     91 *   11 = UTF-16LE., MSWindows Format 
     92 * iconv_t is -1 on error or unsupported conversion 
     93 * 
     94 * \todo move this into the VC6 directories 
     95 */ 
     96iconv_t iconv_open(const char *tocode, const char *fromcode) 
     97{ 
     98  iconv_t mode = 0; 
     99  // avoid a crash if the user does not privide encodings 
     100  if (!tocode || !fromcode) 
     101    return -1; 
     102 
     103  // determine the source text format 
     104  // if we can't identify the string, we assume the current codepage 
     105  if (strcmp(fromcode, "MACROMAN")==0) 
     106    mode |= 0x10; 
     107  else if (strcmp(fromcode, "UTF-16BE")==0) 
     108    mode |= 0x20; 
     109  else if (strcmp(fromcode, "UTF-16LE")==0) 
     110    mode |= 0x30; 
     111 
     112  // determine the destination text format of the text source 
     113  if (strcmp(tocode, "MACROMAN")==0) 
     114    mode |= 0x01; 
     115  else if (strcmp(tocode, "UTF-16BE")==0) 
     116    mode |= 0x02; 
     117  else if (strcmp(tocode, "UTF-16LE")==0) 
     118    mode |= 0x03; 
     119 
     120  return mode; 
     121} 
    74122 
    75123/* 
    76  * This is a minimalistic "iconv" version which simply maps into WIN32 
    77  * native calls (not even at this point!).  
    78  *  
    79  * Requested conversions are: 
    80  *  UTF-16BE to UTF-8 and back 
    81  *  MACROMAN to UTF-8 and back 
    82  *  where UTF-8 is defined at runtime and could be something else... 
    83  */ 
    84 iconv_t iconv_open(const char *tocode, const char *fromcode) 
    85 { 
    86   iconv_t mode = 0; 
    87   if (!tocode || !fromcode) 
    88     return mode; 
    89   if (strcmp(fromcode, "UTF-16BE")==0) 
    90     mode |= 1; 
    91   if (strcmp(tocode, "UTF-16BE")==0) 
    92     mode |= 2; 
    93   //printf("Requested iconv form '%s' to '%s' (our code: %d)\n", fromcode, tocode, mode); 
    94   return mode; 
    95 } 
    96  
     124 * Flip the endianness of a 16 bit per char string 
     125 */ 
     126static void ic_flip_endian(void *dst, const void *src, int n) { 
     127  unsigned char *d = (unsigned char *)dst; 
     128  unsigned char *s = (unsigned char *)src; 
     129  for ( ; n>0; n--) { 
     130    unsigned char c = *s++; 
     131    *d++ = *s++; 
     132    *d++ = c; 
     133  } 
     134} 
     135 
     136/** 
     137 * Convert a string of text from one encoding to another. 
     138 * 
     139 * \param cd[in] conversion descriptor, see iconv_open() 
     140 * \param inbuf[inout] source buffer, will be incremented for each converted character; this 
     141 *        value may be incorrect if the output buffer is too small. 
     142 * \param inbytesleft[inout] number of bytes in inbuffer, will be decremented for each conversion 
     143 * \param outbuf[inout] destination buffer, will be incremented 
     144 * \param outbytesleft[inout] number of bytes still free in buffer, will be decremented 
     145 * \return number of characters converted that can not be converted back (not implemented); 
     146 *         we return 0 for a complete conversion, and -1 for any error 
     147 */ 
    97148size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) 
    98149{ 
    99   int i, n; 
    100   const char *s; 
    101   char *d; 
    102  
    103   if (!inbuf || !*inbuf || !outbuf || !*outbuf)  
     150  // reusable buffer for temporary conversion 
     151  static unsigned short *wbuf = 0L; 
     152  static int NWbuf = 0; 
     153# define MAKE_ROOM(n) if (NWbuf<(n)) { NWbuf=(n)+32; wbuf = realloc(wbuf, NWbuf); } 
     154 
     155  // addresses require by WIN32 calls 
     156  static char *dflt = "."; 
     157  BOOL dfltUsed; 
     158 
     159  // some variables 
     160  const char *src; 
     161  char *dst; 
     162  unsigned short *tmp; 
     163  int sn, dn, tn; 
     164  size_t ret = 0; 
     165 
     166  // handle the special cases first 
     167  if (inbuf==0L || *inbuf==0L) { 
     168    if (outbuf==0L || *outbuf==0L) { 
     169      // sepcial case: initialize converter 
     170      // (nothing to do here) 
     171      return 0; 
     172    } else { 
     173      // special case: write a format indicator 
     174      // (not implemented) 
     175      return 0; 
     176    } 
     177  } 
     178 
     179  // catch faulty parameters 
     180  if (!inbytesleft || !outbytesleft || outbuf==0L || *outbuf==0L) 
     181    return -1; 
     182 
     183  src = *inbuf; dst = *outbuf; sn = *inbytesleft; dn = *outbytesleft; 
     184  if (sn==0)  
    104185    return 0; 
    105186 
    106   n = *inbytesleft; 
    107   s = *inbuf; 
    108   d = *outbuf; 
    109  
    110   switch (cd) { 
    111   case 0: 
    112   case 3: 
    113     memmove(*outbuf, *inbuf, n); 
    114     *inbytesleft -= n; 
    115     *inbuf += n; 
    116     *outbytesleft -= n; 
    117     *outbuf += n; 
    118     break; 
    119   case 1: // from 16bit to 8bit 
    120     for (i=0; i<n; i++) { 
    121       s++; 
    122       *d++ = *s++; 
    123     } 
    124     *inbytesleft -= 2*n; 
    125     *inbuf += 2*n; 
    126     *outbytesleft -= n; 
    127     *outbuf += n; 
    128     break; 
    129   case 2: // from 8bit to 16bit 
    130     for (i=0; i<n; i++) { 
    131       *d++ = 0; 
    132       *d++ = *s++; 
    133     } 
    134     *inbytesleft -= n; 
    135     *inbuf += n; 
    136     *outbytesleft -= 2*n; 
    137     *outbuf += 2*n; 
    138     break; 
    139   } 
     187  // take care of all cases without any conversion 
     188  if ( (cd&0x3)==((cd>>4)&0x3) ) { 
     189    if (sn<=dn) { 
     190      dn = sn; 
     191    } else { 
     192      sn = dn; 
     193      ret = -1; 
     194    } 
     195    memcpy(dst, src, sn); 
     196    goto fixup_return_values; 
     197  } 
     198 
     199  // now, the conversion on WIN32 is always a two-step process 
     200  // because WIN32 can only convert to and from UTF-16LE 
     201 
     202  // convert from old format to UTF-16LE 
     203  switch (cd & 0x30) { 
     204  case 0x00: // from local code page, WIN32 does that 
     205    MAKE_ROOM(sn*2); 
     206    tn = 2 * MultiByteToWideChar(CP_THREAD_ACP, MB_PRECOMPOSED, src, sn, wbuf, sn); 
     207    tmp = wbuf; 
     208    break; 
     209  case 0x10: // from Mac Roman, WIN32 does that 
     210    MAKE_ROOM(sn*2); 
     211    tn = 2 * MultiByteToWideChar(CP_MACCP, MB_PRECOMPOSED, src, sn, wbuf, sn); 
     212    tmp = wbuf; 
     213    break; 
     214  case 0x20: // from UTF-16BE, flip the byte order 
     215    MAKE_ROOM(sn); 
     216    ic_flip_endian(wbuf, src, sn/2); 
     217    tmp = wbuf; tn = sn; 
     218    break; 
     219  case 0x30: // from UTF-16LE, make the source buffer the temp buffer 
     220    tmp = (unsigned short*)src; tn = sn; 
     221    break; 
     222  } 
     223 
     224  // convert from UTF-16LE to new format 
     225  switch (cd & 0x03) { 
     226  case 0x00: // to local code page 
     227    dn = WideCharToMultiByte(CP_THREAD_ACP, 0, tmp, tn/2, dst, dn, dflt, &dfltUsed); 
     228    if (dn==0)  
     229      ret = -1; 
     230    break; 
     231  case 0x01: // to Mac Roman 
     232    dn = WideCharToMultiByte(CP_MACCP, 0, tmp, tn/2, dst, dn, dflt, &dfltUsed); 
     233    if (dn==0)  
     234      ret = -1; 
     235    break; 
     236  case 0x02: // to UTF-16BE 
     237    if (tn<=dn) { 
     238      dn = tn; 
     239    } else { 
     240      tn = dn; 
     241      ret = -1; 
     242    } 
     243    ic_flip_endian(dst, tmp, tn/2); 
     244    break; 
     245  case 0x03: // to UTF-16LE 
     246    if (tn<=dn) { 
     247      dn = tn; 
     248    } else { 
     249      tn = dn; 
     250      ret = -1; 
     251    } 
     252    memcpy(dst, tmp, tn); 
     253    break; 
     254  } 
     255 
     256fixup_return_values: 
     257  *inbuf  += sn; *inbytesleft  -= sn; 
     258  *outbuf += dn; *outbytesleft -= dn; 
     259  if (ret==-1)  
     260    errno = 7; 
     261  return ret; 
     262 
     263#undef MAKE_ROOM 
     264} 
     265 
     266int iconv_close(iconv_t type) 
     267{ 
    140268  return 0; 
    141269} 
    142270 
    143 int iconv_close(iconv_t type) 
    144 { 
    145   return 0; 
    146 } 
    147  
    148271#endif 
    149272