Windows下编码转换

对于网络程序来说经常需要用到编码转换,比如访问utf8编码网页下载之后,转为unicode进行显示。windows自带的编码API比较难用,所以对其进行简单的封装。分SDK和MFC两个版本代码,SDK版本如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#pragma once
#ifndef __HCODEC_HPP__
#define __HCODEC_HPP__
 
#include <windows .h>
#include <string>
 
class hCodec {
    //不可实例化
    hCodec () = delete;
    ~hCodec () = delete;
 
    static bool hCodec::_conv_Down (std::wstring& _old, std::string& _new, UINT ToType) {
        int lenOld = lstrlenW (_old.c_str ());
        int lenNew = ::WideCharToMultiByte (ToType, 0, _old.c_str (), lenOld, NULL, 0, NULL, NULL);
        std::string s;
        s.resize (lenNew);
        bool bRet = ::WideCharToMultiByte (ToType, 0, _old.c_str (), lenOld, const_cast<char *>(s.c_str ()), lenNew, NULL, NULL);
        _new.clear ();
        _new = s.c_str ();
        return bRet;
    }
    static bool hCodec::_conv_Up (std::string& _old, std::wstring& _new, UINT ToType) {
        int lenOld = lstrlenA (_old.c_str ());
        int lenNew = ::MultiByteToWideChar (ToType, 0, _old.c_str (), lenOld, NULL, 0);
        std::wstring s;
        s.resize (lenNew);
        bool bRet = ::MultiByteToWideChar (ToType, 0, _old.c_str (), lenOld, const_cast<wchar_t *>(s.c_str ()), lenNew);
        _new.clear ();
        _new = s.c_str ();
        return bRet;
    }
 
public:
    static bool hCodec::AnsiToUnicode (std::string& _old, std::wstring& _new) {
        return hCodec::_conv_Up (_old, _new, CP_ACP);
    }
    static bool hCodec::UnicodeToAnsi (std::wstring& _old, std::string& _new) {
        return hCodec::_conv_Down (_old, _new, CP_ACP);
    }
    static bool hCodec::Utf8ToUnicode (std::string& _old, std::wstring& _new) {
        return hCodec::_conv_Up (_old, _new, CP_UTF8);
    }
    static bool hCodec::UnicodeToUtf8 (std::wstring& _old, std::string& _new) {
        return hCodec::_conv_Down (_old, _new, CP_UTF8);
    }
    static bool hCodec::AnsiToUtf8 (std::string& _old, std::string& _new) {
        std::wstring t;
        if (!hCodec::AnsiToUnicode (_old, t)) return false;
        return hCodec::UnicodeToUtf8 (t, _new);
    }
    static bool hCodec::Utf8ToAnsi (std::string& _old, std::string& _new) {
        std::wstring t;
        if (!hCodec::Utf8ToUnicode (_old, t)) return false;
        return hCodec::UnicodeToAnsi (t, _new);
    }
};
 
#endif //__HCODEC_HPP__</wchar_t></char></string></windows>


MFC版本与其类似,不过专用于MFC环境,调用方式相同

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#pragma once
#ifndef __HCODEC_HPP__
#define __HCODEC_HPP__
 
#include <afxstr.h>
 
class hCodec {
    //不可实例化
    hCodec () = delete;
    ~hCodec () = delete;
 
    static bool hCodec::_conv_Down (CStringW& _old, CStringA& _new, UINT ToType) {
        int lenOld = lstrlenW (_old.GetBuffer ());
        int lenNew = ::WideCharToMultiByte (ToType, 0, _old.GetBuffer (), lenOld, NULL, 0, NULL, NULL);
        CStringA s;
        s.GetBufferSetLength (lenNew);
        bool bRet = ::WideCharToMultiByte (ToType, 0, _old.GetBuffer (), lenOld, const_cast<char*>(s.GetBuffer ()), lenNew, NULL, NULL);
        _new = s.GetBuffer ();
        return bRet;
    }
    static bool hCodec::_conv_Up (CStringA& _old, CStringW& _new, UINT ToType) {
        int lenOld = lstrlenA (_old.GetBuffer ());
        int lenNew = ::MultiByteToWideChar (ToType, 0, _old.GetBuffer (), lenOld, NULL, 0);
        CStringW s;
        s.GetBufferSetLength (lenNew);
        bool bRet = ::MultiByteToWideChar (ToType, 0, _old.GetBuffer (), lenOld, const_cast<wchar_t*>(s.GetBuffer ()), lenNew);
        _new = s.GetBuffer ();
        return bRet;
    }
 
public:
    static bool hCodec::AnsiToUnicode (CStringA& _old, CStringW& _new) {
        return hCodec::_conv_Up (_old, _new, CP_ACP);
    }
    static bool hCodec::UnicodeToAnsi (CStringW& _old, CStringA& _new) {
        return hCodec::_conv_Down (_old, _new, CP_ACP);
    }
    static bool hCodec::Utf8ToUnicode (CStringA& _old, CStringW& _new) {
        return hCodec::_conv_Up (_old, _new, CP_UTF8);
    }
    static bool hCodec::UnicodeToUtf8 (CStringW& _old, CStringA& _new) {
        return hCodec::_conv_Down (_old, _new, CP_UTF8);
    }
    static bool hCodec::AnsiToUtf8 (CStringA& _old, CStringA& _new) {
        CStringW t;
        if (!hCodec::AnsiToUnicode (_old, t)) return false;
        return hCodec::UnicodeToUtf8 (t, _new);
    }
    static bool hCodec::Utf8ToAnsi (CStringA& _old, CStringA& _new) {
        CStringW t;
        if (!hCodec::Utf8ToUnicode (_old, t)) return false;
        return hCodec::UnicodeToAnsi (t, _new);
    }
};
 
#endif //__HCODEC_HPP__

调用方式非常简单,比如SDK版如下

1
2
3
4
5
//gb2312编码到unicode编码的转换
std::string ansi = "hello world!";
std::wstring unicode;
hCodec::AnsiToUnicode (ansi, unicode);
//然后unicode这儿就是转换后的文本了

除了这个之外,另外推荐一种ATL的编码转换方式,感觉这种方式还是挺好用的,首先引入头文件

1
#include <atlconv.h>

然后,在函数内部,如果这个函数里面的代码涉及到编码转换,那么在函数第一行加上

1
USES_CONVERSION;

然后就可以自由转换了。转换API比较多,通常使用下面几个:A2T、A2W、T2A、T2W、W2A、W2T等等,使用方式如下:

1
MessageBox (NULL, A2T ("test"), A2T ("test"), MB_OK);

直接使用类似C语言那样的访问就可以了。

另外说个小窍门,对于Visual Studio环境,有时候代码中大量的字符串需要初始化为utf-8编码,那么可以在首行加入如下代码

1
#pragma execution_character_set("utf-8")

加上之后,全局定义的"xxx"风格的字符串全部默认以utf-8方式进行编码。

Published by

fawdlstty

又一只萌萌哒程序猿~~

发表评论

电子邮件地址不会被公开。 必填项已用*标注