| | 74 | #include <windows.h> |
| | 75 | #include <winnls.h> |
| | 76 | |
| | 77 | /** |
| | 78 | * This is a minimalistic "iconv" for VC6. |
| | 79 | * This version simply maps into WIN32 native calls and is pretty limited |
| | 80 | * but likely to be sufficient within NEWT/0. |
| | 81 | * |
| | 82 | * Requested conversions in NEWT/0 as of 03/24/2007 are: |
| | 83 | * UTF-16BE to current code page and back |
| | 84 | * MACROMAN to current code page and back |
| | 85 | * |
| | 86 | * So we define the iconv_t bits ...00ss00dd where ss is the source format and |
| | 87 | * dd is the destination format with: |
| | 88 | * 00 = current code page |
| | 89 | * 01 = Mac Roman |
| | 90 | * 10 = UTF-16BE, Newton Format |
| | 91 | * 11 = UTF-16LE., MSWindows Format |
| | 92 | * iconv_t is -1 on error or unsupported conversion |
| | 93 | * |
| | 94 | * \todo move this into the VC6 directories |
| | 95 | */ |
| | 96 | iconv_t iconv_open(const char *tocode, const char *fromcode) |
| | 97 | { |
| | 98 | iconv_t mode = 0; |
| | 99 | // avoid a crash if the user does not privide encodings |
| | 100 | if (!tocode || !fromcode) |
| | 101 | return -1; |
| | 102 | |
| | 103 | // determine the source text format |
| | 104 | // if we can't identify the string, we assume the current codepage |
| | 105 | if (strcmp(fromcode, "MACROMAN")==0) |
| | 106 | mode |= 0x10; |
| | 107 | else if (strcmp(fromcode, "UTF-16BE")==0) |
| | 108 | mode |= 0x20; |
| | 109 | else if (strcmp(fromcode, "UTF-16LE")==0) |
| | 110 | mode |= 0x30; |
| | 111 | |
| | 112 | // determine the destination text format of the text source |
| | 113 | if (strcmp(tocode, "MACROMAN")==0) |
| | 114 | mode |= 0x01; |
| | 115 | else if (strcmp(tocode, "UTF-16BE")==0) |
| | 116 | mode |= 0x02; |
| | 117 | else if (strcmp(tocode, "UTF-16LE")==0) |
| | 118 | mode |= 0x03; |
| | 119 | |
| | 120 | return mode; |
| | 121 | } |
| 76 | | * This is a minimalistic "iconv" version which simply maps into WIN32 |
| 77 | | * native calls (not even at this point!). |
| 78 | | * |
| 79 | | * Requested conversions are: |
| 80 | | * UTF-16BE to UTF-8 and back |
| 81 | | * MACROMAN to UTF-8 and back |
| 82 | | * where UTF-8 is defined at runtime and could be something else... |
| 83 | | */ |
| 84 | | iconv_t iconv_open(const char *tocode, const char *fromcode) |
| 85 | | { |
| 86 | | iconv_t mode = 0; |
| 87 | | if (!tocode || !fromcode) |
| 88 | | return mode; |
| 89 | | if (strcmp(fromcode, "UTF-16BE")==0) |
| 90 | | mode |= 1; |
| 91 | | if (strcmp(tocode, "UTF-16BE")==0) |
| 92 | | mode |= 2; |
| 93 | | //printf("Requested iconv form '%s' to '%s' (our code: %d)\n", fromcode, tocode, mode); |
| 94 | | return mode; |
| 95 | | } |
| 96 | | |
| | 124 | * Flip the endianness of a 16 bit per char string |
| | 125 | */ |
| | 126 | static void ic_flip_endian(void *dst, const void *src, int n) { |
| | 127 | unsigned char *d = (unsigned char *)dst; |
| | 128 | unsigned char *s = (unsigned char *)src; |
| | 129 | for ( ; n>0; n--) { |
| | 130 | unsigned char c = *s++; |
| | 131 | *d++ = *s++; |
| | 132 | *d++ = c; |
| | 133 | } |
| | 134 | } |
| | 135 | |
| | 136 | /** |
| | 137 | * Convert a string of text from one encoding to another. |
| | 138 | * |
| | 139 | * \param cd[in] conversion descriptor, see iconv_open() |
| | 140 | * \param inbuf[inout] source buffer, will be incremented for each converted character; this |
| | 141 | * value may be incorrect if the output buffer is too small. |
| | 142 | * \param inbytesleft[inout] number of bytes in inbuffer, will be decremented for each conversion |
| | 143 | * \param outbuf[inout] destination buffer, will be incremented |
| | 144 | * \param outbytesleft[inout] number of bytes still free in buffer, will be decremented |
| | 145 | * \return number of characters converted that can not be converted back (not implemented); |
| | 146 | * we return 0 for a complete conversion, and -1 for any error |
| | 147 | */ |
| 99 | | int i, n; |
| 100 | | const char *s; |
| 101 | | char *d; |
| 102 | | |
| 103 | | if (!inbuf || !*inbuf || !outbuf || !*outbuf) |
| | 150 | // reusable buffer for temporary conversion |
| | 151 | static unsigned short *wbuf = 0L; |
| | 152 | static int NWbuf = 0; |
| | 153 | # define MAKE_ROOM(n) if (NWbuf<(n)) { NWbuf=(n)+32; wbuf = realloc(wbuf, NWbuf); } |
| | 154 | |
| | 155 | // addresses require by WIN32 calls |
| | 156 | static char *dflt = "."; |
| | 157 | BOOL dfltUsed; |
| | 158 | |
| | 159 | // some variables |
| | 160 | const char *src; |
| | 161 | char *dst; |
| | 162 | unsigned short *tmp; |
| | 163 | int sn, dn, tn; |
| | 164 | size_t ret = 0; |
| | 165 | |
| | 166 | // handle the special cases first |
| | 167 | if (inbuf==0L || *inbuf==0L) { |
| | 168 | if (outbuf==0L || *outbuf==0L) { |
| | 169 | // sepcial case: initialize converter |
| | 170 | // (nothing to do here) |
| | 171 | return 0; |
| | 172 | } else { |
| | 173 | // special case: write a format indicator |
| | 174 | // (not implemented) |
| | 175 | return 0; |
| | 176 | } |
| | 177 | } |
| | 178 | |
| | 179 | // catch faulty parameters |
| | 180 | if (!inbytesleft || !outbytesleft || outbuf==0L || *outbuf==0L) |
| | 181 | return -1; |
| | 182 | |
| | 183 | src = *inbuf; dst = *outbuf; sn = *inbytesleft; dn = *outbytesleft; |
| | 184 | if (sn==0) |
| 106 | | n = *inbytesleft; |
| 107 | | s = *inbuf; |
| 108 | | d = *outbuf; |
| 109 | | |
| 110 | | switch (cd) { |
| 111 | | case 0: |
| 112 | | case 3: |
| 113 | | memmove(*outbuf, *inbuf, n); |
| 114 | | *inbytesleft -= n; |
| 115 | | *inbuf += n; |
| 116 | | *outbytesleft -= n; |
| 117 | | *outbuf += n; |
| 118 | | break; |
| 119 | | case 1: // from 16bit to 8bit |
| 120 | | for (i=0; i<n; i++) { |
| 121 | | s++; |
| 122 | | *d++ = *s++; |
| 123 | | } |
| 124 | | *inbytesleft -= 2*n; |
| 125 | | *inbuf += 2*n; |
| 126 | | *outbytesleft -= n; |
| 127 | | *outbuf += n; |
| 128 | | break; |
| 129 | | case 2: // from 8bit to 16bit |
| 130 | | for (i=0; i<n; i++) { |
| 131 | | *d++ = 0; |
| 132 | | *d++ = *s++; |
| 133 | | } |
| 134 | | *inbytesleft -= n; |
| 135 | | *inbuf += n; |
| 136 | | *outbytesleft -= 2*n; |
| 137 | | *outbuf += 2*n; |
| 138 | | break; |
| 139 | | } |
| | 187 | // take care of all cases without any conversion |
| | 188 | if ( (cd&0x3)==((cd>>4)&0x3) ) { |
| | 189 | if (sn<=dn) { |
| | 190 | dn = sn; |
| | 191 | } else { |
| | 192 | sn = dn; |
| | 193 | ret = -1; |
| | 194 | } |
| | 195 | memcpy(dst, src, sn); |
| | 196 | goto fixup_return_values; |
| | 197 | } |
| | 198 | |
| | 199 | // now, the conversion on WIN32 is always a two-step process |
| | 200 | // because WIN32 can only convert to and from UTF-16LE |
| | 201 | |
| | 202 | // convert from old format to UTF-16LE |
| | 203 | switch (cd & 0x30) { |
| | 204 | case 0x00: // from local code page, WIN32 does that |
| | 205 | MAKE_ROOM(sn*2); |
| | 206 | tn = 2 * MultiByteToWideChar(CP_THREAD_ACP, MB_PRECOMPOSED, src, sn, wbuf, sn); |
| | 207 | tmp = wbuf; |
| | 208 | break; |
| | 209 | case 0x10: // from Mac Roman, WIN32 does that |
| | 210 | MAKE_ROOM(sn*2); |
| | 211 | tn = 2 * MultiByteToWideChar(CP_MACCP, MB_PRECOMPOSED, src, sn, wbuf, sn); |
| | 212 | tmp = wbuf; |
| | 213 | break; |
| | 214 | case 0x20: // from UTF-16BE, flip the byte order |
| | 215 | MAKE_ROOM(sn); |
| | 216 | ic_flip_endian(wbuf, src, sn/2); |
| | 217 | tmp = wbuf; tn = sn; |
| | 218 | break; |
| | 219 | case 0x30: // from UTF-16LE, make the source buffer the temp buffer |
| | 220 | tmp = (unsigned short*)src; tn = sn; |
| | 221 | break; |
| | 222 | } |
| | 223 | |
| | 224 | // convert from UTF-16LE to new format |
| | 225 | switch (cd & 0x03) { |
| | 226 | case 0x00: // to local code page |
| | 227 | dn = WideCharToMultiByte(CP_THREAD_ACP, 0, tmp, tn/2, dst, dn, dflt, &dfltUsed); |
| | 228 | if (dn==0) |
| | 229 | ret = -1; |
| | 230 | break; |
| | 231 | case 0x01: // to Mac Roman |
| | 232 | dn = WideCharToMultiByte(CP_MACCP, 0, tmp, tn/2, dst, dn, dflt, &dfltUsed); |
| | 233 | if (dn==0) |
| | 234 | ret = -1; |
| | 235 | break; |
| | 236 | case 0x02: // to UTF-16BE |
| | 237 | if (tn<=dn) { |
| | 238 | dn = tn; |
| | 239 | } else { |
| | 240 | tn = dn; |
| | 241 | ret = -1; |
| | 242 | } |
| | 243 | ic_flip_endian(dst, tmp, tn/2); |
| | 244 | break; |
| | 245 | case 0x03: // to UTF-16LE |
| | 246 | if (tn<=dn) { |
| | 247 | dn = tn; |
| | 248 | } else { |
| | 249 | tn = dn; |
| | 250 | ret = -1; |
| | 251 | } |
| | 252 | memcpy(dst, tmp, tn); |
| | 253 | break; |
| | 254 | } |
| | 255 | |
| | 256 | fixup_return_values: |
| | 257 | *inbuf += sn; *inbytesleft -= sn; |
| | 258 | *outbuf += dn; *outbytesleft -= dn; |
| | 259 | if (ret==-1) |
| | 260 | errno = 7; |
| | 261 | return ret; |
| | 262 | |
| | 263 | #undef MAKE_ROOM |
| | 264 | } |
| | 265 | |
| | 266 | int iconv_close(iconv_t type) |
| | 267 | { |