在网上找到了GB2312转化为utf-8的算法:

C/C++ code
void CChineseCodeLib::UTF_8ToUnicode(WCHAR* pOut,char *pText) { char* uchar = (char *)pOut; uchar[1] = ((pText[0] & 0×0F) << 4) + ((pText[1] >> 2) & 0×0F); uchar[0] = ((pText[1] & 0×03) << 6) + (pText[2] & 0×3F); return; } void CChineseCodeLib::UnicodeToGB2312(char* pOut,WCHAR *uData) { WideCharToMultiByte(CP_ACP,NULL,uData,1,pOut,sizeof(WCHAR),NULL,NULL); return; } void CChineseCodeLib::Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer) { ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1); return; } void CChineseCodeLib::UnicodeToUTF_8(char* pOut,WCHAR* pText) { // 注意 WCHAR高低字的顺序,低字节在前,高字节在后 char* pchar = (char *)pText; pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4)); pOut[1] = (0×80 | ((pchar[1] & 0×0F) << 2)) + ((pchar[1] & 0xC0) >> 6); pOut[2] = (0×80 | (pchar[0] & 0×3F)); return; } void CChineseCodeLib::GB2312ToUTF_8(char *pOut,char *pText, int pLen) { char buf[4]; char* rst = new char[pLen + (pLen >> 2) + 2]; memset(buf,0,4); memset(rst,0,pLen + (pLen >> 2) + 2); int i = 0; int j = 0; while(i < pLen) { //如果是英文直接复制就可以 if( *(pText + i) >= 0) { rst[j++] = pText[i++]; } else { WCHAR pbuffer; Gb2312ToUnicode(&pbuffer,pText+i); UnicodeToUTF_8(buf,&pbuffer); unsigned short int tmp = 0; tmp = rst[j] = buf[0]; tmp = rst[j+1] = buf[1]; tmp = rst[j+2] = buf[2]; j += 3; i += 2; } } rst[j] = \0; //返回结果 strcpy(pOut,rst); delete []rst; return; } void CChineseCodeLib::UTF_8ToGB2312(string &pOut, char *pText, int pLen) { char * newBuf = new char[pLen]; char Ctemp[4]; memset(Ctemp,0,4); int i =0; int j = 0; while(i < pLen) { if(pText[i] > 0) { newBuf[j++] = pText[i++]; } else { WCHAR Wtemp; UTF_8ToUnicode(&Wtemp,pText + i); UnicodeToGB2312(Ctemp,&Wtemp); newBuf[j] = Ctemp[0]; newBuf[j + 1] = Ctemp[1]; i += 3; j += 2; } } newBuf[j] = \0; pOut = newBuf; delete []newBuf; return; }

用下面的代码段调用

C/C++ code
AnsiString srcStr = "中国加油,奥运加油"; int len = srcStr.Length(); char *dest = new char[len]; CChineseCodeLib *codeLib = new CChineseCodeLib(); codeLib->GB2312ToUTF_8(dest,srcStr.c_str(),len); TStringList *tSqlList = new TStringList(); tSqlList->Add(dest); tSqlList->SaveToFile("e:/test.txt"); delete []dest; delete codeLib; delete tSqlList;

最后得到的结果为乱码:

涔櫧鍓犳惫锟屽ゥ杈愬墵姹\xB9

是怎么回事呢?是调用不正确还是算法本身有问题?
小弟刚刚学C++,请高手们指点!!

你应该用ansi转UTF-8,不是GB

mark

BCB 有直接的API使用。

Unit

System

Category

character set conversions

extern PACKAGE UTF8String __fastcall AnsiToUtf8(const AnsiString S);

Description

AnsiToUtf8 converts the string specified by S, which uses the ANSI encoding system, to UTF-8.

LZ的做法是错误的;

utf8是针对unicode的另外一种存储方法;

我有现成的代码,已经经过实际使用了,绝对没有问题;

大体的方法是:

GB2312转到unicode,unicode再转utf8;
GB2312转unicode的办法,在CB里面,简单的要死:

AnsiString gb_str = "xxxx";
WideString un_str = gb_str;

wchar_t * pwstr = un_str.b_cstr();//pwstr就是unicode的字串了,每个字符占用2字,0结尾的;

unicode转到utf8,我是借用的linux的NLS_UTF8的内核代码,高效可靠;

如果你需要,我可以发一份给你;