root/NEWT0/trunk/src/newt_core/NewtIconv.c

Revision 88, 7.0 kB (checked in by matthiasm, 22 months ago)

Fixed the Win32 "iconv" code from the ground up.

  • Property svn:executable set to *
Line 
1/**
2 * @file    NewtIconv.c
3 * @brief   文字コード処理(libiconv使用)
4 *
5 * @author  M.Nukui
6 * @date    2005-07-17
7 *
8 * Copyright (C) 2005 M.Nukui All rights reserved.
9 */
10
11
12/* ヘッダファイル */
13#include "NewtIconv.h"
14
15
16#ifdef HAVE_LIBICONV
17/*------------------------------------------------------------------------*/
18/** NSOFバッファを読込んで配列オブジェクトに変換する
19 *
20 * @param cd        [in] iconv変換ディスクリプター
21 * @param src       [in] 変換する文字列
22 * @param srclen    [in] 変換する文字列の長さ
23 * @param dstlenp   [out]変換された文字列の長さ
24 *
25 * @return          変換された文字列
26 *
27 * @note            変換された文字列は呼出し元で free する必要あり
28 */
29
30char * NewtIconv(iconv_t cd, char * src, size_t srclen, size_t* dstlenp)
31{
32    char *  dst = NULL;
33    size_t  dstlen = 0;
34
35    if (cd != (iconv_t)-1)
36    {
37        size_t  bufflen;
38
39        bufflen = srclen * 3;
40        dst = malloc(bufflen);
41
42        if (dst)
43        {
44            const char *    inbuf_p = src;
45            char *  outbuf_p = dst;
46            size_t  inbytesleft = srclen;
47            size_t  outbytesleft = bufflen;
48            size_t  status;
49
50            iconv(cd, NULL, NULL, NULL, NULL);
51            status = iconv(cd, &inbuf_p, &inbytesleft, &outbuf_p, &outbytesleft);
52
53            if (status == (size_t)-1)
54            {   // 変換に失敗したのでバッファを解放する
55                free(dst);
56                dst = NULL;
57            }
58            else
59            {   // いらない部分のバッファを切り詰める
60                dstlen = bufflen - outbytesleft;
61                dst = realloc(dst, dstlen);
62            }
63        }
64    }
65
66    if (dstlenp) *dstlenp = dstlen;
67
68    return dst;
69}
70
71#if _MSC_VER
72
73#include <string.h>
74#include <windows.h>
75#include <winnls.h>
76
77/**
78 * This is a minimalistic "iconv" for VC6.
79 * This version simply maps into WIN32 native calls and is pretty limited
80 * but likely to be sufficient within NEWT/0.
81 *
82 * Requested conversions in NEWT/0 as of 03/24/2007 are:
83 *  UTF-16BE to current code page and back
84 *  MACROMAN to current code page and back
85 *
86 * So we define the iconv_t bits ...00ss00dd where ss is the source format and
87 * dd is the destination format with:
88 *   00 = current code page
89 *   01 = Mac Roman
90 *   10 = UTF-16BE, Newton Format
91 *   11 = UTF-16LE., MSWindows Format
92 * iconv_t is -1 on error or unsupported conversion
93 *
94 * \todo move this into the VC6 directories
95 */
96iconv_t iconv_open(const char *tocode, const char *fromcode)
97{
98  iconv_t mode = 0;
99  // avoid a crash if the user does not privide encodings
100  if (!tocode || !fromcode)
101    return -1;
102
103  // determine the source text format
104  // if we can't identify the string, we assume the current codepage
105  if (strcmp(fromcode, "MACROMAN")==0)
106    mode |= 0x10;
107  else if (strcmp(fromcode, "UTF-16BE")==0)
108    mode |= 0x20;
109  else if (strcmp(fromcode, "UTF-16LE")==0)
110    mode |= 0x30;
111
112  // determine the destination text format of the text source
113  if (strcmp(tocode, "MACROMAN")==0)
114    mode |= 0x01;
115  else if (strcmp(tocode, "UTF-16BE")==0)
116    mode |= 0x02;
117  else if (strcmp(tocode, "UTF-16LE")==0)
118    mode |= 0x03;
119
120  return mode;
121}
122
123/*
124 * Flip the endianness of a 16 bit per char string
125 */
126static void ic_flip_endian(void *dst, const void *src, int n) {
127  unsigned char *d = (unsigned char *)dst;
128  unsigned char *s = (unsigned char *)src;
129  for ( ; n>0; n--) {
130    unsigned char c = *s++;
131    *d++ = *s++;
132    *d++ = c;
133  }
134}
135
136/**
137 * Convert a string of text from one encoding to another.
138 *
139 * \param cd[in] conversion descriptor, see iconv_open()
140 * \param inbuf[inout] source buffer, will be incremented for each converted character; this
141 *        value may be incorrect if the output buffer is too small.
142 * \param inbytesleft[inout] number of bytes in inbuffer, will be decremented for each conversion
143 * \param outbuf[inout] destination buffer, will be incremented
144 * \param outbytesleft[inout] number of bytes still free in buffer, will be decremented
145 * \return number of characters converted that can not be converted back (not implemented);
146 *         we return 0 for a complete conversion, and -1 for any error
147 */
148size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
149{
150  // reusable buffer for temporary conversion
151  static unsigned short *wbuf = 0L;
152  static int NWbuf = 0;
153# define MAKE_ROOM(n) if (NWbuf<(n)) { NWbuf=(n)+32; wbuf = realloc(wbuf, NWbuf); }
154
155  // addresses require by WIN32 calls
156  static char *dflt = ".";
157  BOOL dfltUsed;
158
159  // some variables
160  const char *src;
161  char *dst;
162  unsigned short *tmp;
163  int sn, dn, tn;
164  size_t ret = 0;
165
166  // handle the special cases first
167  if (inbuf==0L || *inbuf==0L) {
168    if (outbuf==0L || *outbuf==0L) {
169      // sepcial case: initialize converter
170      // (nothing to do here)
171      return 0;
172    } else {
173      // special case: write a format indicator
174      // (not implemented)
175      return 0;
176    }
177  }
178
179  // catch faulty parameters
180  if (!inbytesleft || !outbytesleft || outbuf==0L || *outbuf==0L)
181    return -1;
182
183  src = *inbuf; dst = *outbuf; sn = *inbytesleft; dn = *outbytesleft;
184  if (sn==0)
185    return 0;
186
187  // take care of all cases without any conversion
188  if ( (cd&0x3)==((cd>>4)&0x3) ) {
189    if (sn<=dn) {
190      dn = sn;
191    } else {
192      sn = dn;
193      ret = -1;
194    }
195    memcpy(dst, src, sn);
196    goto fixup_return_values;
197  }
198
199  // now, the conversion on WIN32 is always a two-step process
200  // because WIN32 can only convert to and from UTF-16LE
201
202  // convert from old format to UTF-16LE
203  switch (cd & 0x30) {
204  case 0x00: // from local code page, WIN32 does that
205    MAKE_ROOM(sn*2);
206    tn = 2 * MultiByteToWideChar(CP_THREAD_ACP, MB_PRECOMPOSED, src, sn, wbuf, sn);
207    tmp = wbuf;
208    break;
209  case 0x10: // from Mac Roman, WIN32 does that
210    MAKE_ROOM(sn*2);
211    tn = 2 * MultiByteToWideChar(CP_MACCP, MB_PRECOMPOSED, src, sn, wbuf, sn);
212    tmp = wbuf;
213    break;
214  case 0x20: // from UTF-16BE, flip the byte order
215    MAKE_ROOM(sn);
216    ic_flip_endian(wbuf, src, sn/2);
217    tmp = wbuf; tn = sn;
218    break;
219  case 0x30: // from UTF-16LE, make the source buffer the temp buffer
220    tmp = (unsigned short*)src; tn = sn;
221    break;
222  }
223
224  // convert from UTF-16LE to new format
225  switch (cd & 0x03) {
226  case 0x00: // to local code page
227    dn = WideCharToMultiByte(CP_THREAD_ACP, 0, tmp, tn/2, dst, dn, dflt, &dfltUsed);
228    if (dn==0)
229      ret = -1;
230    break;
231  case 0x01: // to Mac Roman
232    dn = WideCharToMultiByte(CP_MACCP, 0, tmp, tn/2, dst, dn, dflt, &dfltUsed);
233    if (dn==0)
234      ret = -1;
235    break;
236  case 0x02: // to UTF-16BE
237    if (tn<=dn) {
238      dn = tn;
239    } else {
240      tn = dn;
241      ret = -1;
242    }
243    ic_flip_endian(dst, tmp, tn/2);
244    break;
245  case 0x03: // to UTF-16LE
246    if (tn<=dn) {
247      dn = tn;
248    } else {
249      tn = dn;
250      ret = -1;
251    }
252    memcpy(dst, tmp, tn);
253    break;
254  }
255
256fixup_return_values:
257  *inbuf  += sn; *inbytesleft  -= sn;
258  *outbuf += dn; *outbytesleft -= dn;
259  if (ret==-1)
260    errno = 7;
261  return ret;
262
263#undef MAKE_ROOM
264}
265
266int iconv_close(iconv_t type)
267{
268  return 0;
269}
270
271#endif
272
273
274
275#endif /* HAVE_LIBICONV */
Note: See TracBrowser for help on using the browser.