对于网络程序来说经常需要用到编码转换,比如访问utf8编码网页下载之后,转为unicode进行显示。windows自带的编码API比较难用,所以对其进行简单的封装。分SDK和MFC两个版本代码,SDK版本如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | #pragma once #ifndef __HCODEC_HPP__ #define __HCODEC_HPP__ #include <windows .h> #include <string> class hCodec { //不可实例化 hCodec () = delete; ~hCodec () = delete; static bool hCodec::_conv_Down (std::wstring& _old, std::string& _new, UINT ToType) { int lenOld = lstrlenW (_old.c_str ()); int lenNew = ::WideCharToMultiByte (ToType, 0, _old.c_str (), lenOld, NULL, 0, NULL, NULL); std::string s; s.resize (lenNew); bool bRet = ::WideCharToMultiByte (ToType, 0, _old.c_str (), lenOld, const_cast<char *>(s.c_str ()), lenNew, NULL, NULL); _new.clear (); _new = s.c_str (); return bRet; } static bool hCodec::_conv_Up (std::string& _old, std::wstring& _new, UINT ToType) { int lenOld = lstrlenA (_old.c_str ()); int lenNew = ::MultiByteToWideChar (ToType, 0, _old.c_str (), lenOld, NULL, 0); std::wstring s; s.resize (lenNew); bool bRet = ::MultiByteToWideChar (ToType, 0, _old.c_str (), lenOld, const_cast<wchar_t *>(s.c_str ()), lenNew); _new.clear (); _new = s.c_str (); return bRet; } public: static bool hCodec::AnsiToUnicode (std::string& _old, std::wstring& _new) { return hCodec::_conv_Up (_old, _new, CP_ACP); } static bool hCodec::UnicodeToAnsi (std::wstring& _old, std::string& _new) { return hCodec::_conv_Down (_old, _new, CP_ACP); } static bool hCodec::Utf8ToUnicode (std::string& _old, std::wstring& _new) { return hCodec::_conv_Up (_old, _new, CP_UTF8); } static bool hCodec::UnicodeToUtf8 (std::wstring& _old, std::string& _new) { return hCodec::_conv_Down (_old, _new, CP_UTF8); } static bool hCodec::AnsiToUtf8 (std::string& _old, std::string& _new) { std::wstring t; if (!hCodec::AnsiToUnicode (_old, t)) return false; return hCodec::UnicodeToUtf8 (t, _new); } static bool hCodec::Utf8ToAnsi (std::string& _old, std::string& _new) { std::wstring t; if (!hCodec::Utf8ToUnicode (_old, t)) return false; return hCodec::UnicodeToAnsi (t, _new); } }; #endif //__HCODEC_HPP__</wchar_t></char></string></windows> |
MFC版本与其类似,不过专用于MFC环境,调用方式相同
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | #pragma once #ifndef __HCODEC_HPP__ #define __HCODEC_HPP__ #include <afxstr.h> class hCodec { //不可实例化 hCodec () = delete; ~hCodec () = delete; static bool hCodec::_conv_Down (CStringW& _old, CStringA& _new, UINT ToType) { int lenOld = lstrlenW (_old.GetBuffer ()); int lenNew = ::WideCharToMultiByte (ToType, 0, _old.GetBuffer (), lenOld, NULL, 0, NULL, NULL); CStringA s; s.GetBufferSetLength (lenNew); bool bRet = ::WideCharToMultiByte (ToType, 0, _old.GetBuffer (), lenOld, const_cast<char*>(s.GetBuffer ()), lenNew, NULL, NULL); _new = s.GetBuffer (); return bRet; } static bool hCodec::_conv_Up (CStringA& _old, CStringW& _new, UINT ToType) { int lenOld = lstrlenA (_old.GetBuffer ()); int lenNew = ::MultiByteToWideChar (ToType, 0, _old.GetBuffer (), lenOld, NULL, 0); CStringW s; s.GetBufferSetLength (lenNew); bool bRet = ::MultiByteToWideChar (ToType, 0, _old.GetBuffer (), lenOld, const_cast<wchar_t*>(s.GetBuffer ()), lenNew); _new = s.GetBuffer (); return bRet; } public: static bool hCodec::AnsiToUnicode (CStringA& _old, CStringW& _new) { return hCodec::_conv_Up (_old, _new, CP_ACP); } static bool hCodec::UnicodeToAnsi (CStringW& _old, CStringA& _new) { return hCodec::_conv_Down (_old, _new, CP_ACP); } static bool hCodec::Utf8ToUnicode (CStringA& _old, CStringW& _new) { return hCodec::_conv_Up (_old, _new, CP_UTF8); } static bool hCodec::UnicodeToUtf8 (CStringW& _old, CStringA& _new) { return hCodec::_conv_Down (_old, _new, CP_UTF8); } static bool hCodec::AnsiToUtf8 (CStringA& _old, CStringA& _new) { CStringW t; if (!hCodec::AnsiToUnicode (_old, t)) return false; return hCodec::UnicodeToUtf8 (t, _new); } static bool hCodec::Utf8ToAnsi (CStringA& _old, CStringA& _new) { CStringW t; if (!hCodec::Utf8ToUnicode (_old, t)) return false; return hCodec::UnicodeToAnsi (t, _new); } }; #endif //__HCODEC_HPP__ |
调用方式非常简单,比如SDK版如下
1 2 3 4 5 | //gb2312编码到unicode编码的转换 std::string ansi = "hello world!"; std::wstring unicode; hCodec::AnsiToUnicode (ansi, unicode); //然后unicode这儿就是转换后的文本了 |
除了这个之外,另外推荐一种ATL的编码转换方式,感觉这种方式还是挺好用的,首先引入头文件
1 | #include <atlconv.h> |
然后,在函数内部,如果这个函数里面的代码涉及到编码转换,那么在函数第一行加上
1 | USES_CONVERSION; |
然后就可以自由转换了。转换API比较多,通常使用下面几个:A2T、A2W、T2A、T2W、W2A、W2T等等,使用方式如下:
1 | MessageBox (NULL, A2T ("test"), A2T ("test"), MB_OK); |
直接使用类似C语言那样的访问就可以了。
另外说个小窍门,对于Visual Studio环境,有时候代码中大量的字符串需要初始化为utf-8编码,那么可以在首行加入如下代码
1 | #pragma execution_character_set("utf-8") |
加上之后,全局定义的"xxx"风格的字符串全部默认以utf-8方式进行编码。