| 发表于:2007-05-09 15:11:462楼 得分:10 |
模块 module_utf8 option explicit public m_bisnt as boolean public declare function widechartomultibyte lib "kernel32 " (byval codepage as long, byval dwflags as long, byval lpwidecharstr as long, byval cchwidechar as long, byref lpmultibytestr as any, byval cchmultibyte as long, byval lpdefaultchar as string, byval lpuseddefaultchar as long) as long public declare function multibytetowidechar lib "kernel32 " (byval codepage as long, byval dwflags as long, byref lpmultibytestr as any, byval cchmultibyte as long, byval lpwidecharstr as long, byval cchwidechar as long) as long public declare sub copymemory lib "kernel32 " alias "rtlmovememory " (lpvdest as any, lpvsource as any, byval cbcopy as long) public const cp_utf8 = 65001 'purpose:convert utf8 to unicode public function utf8_decode(byval sutf8 as string) as string dim lngutf8size as long dim strbuffer as string dim lngbuffersize as long dim lngresult as long dim bytutf8() as byte dim n as long if lenb(sutf8) = 0 then exit function m_bisnt = true ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' add ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' if m_bisnt then on error goto endfunction bytutf8 = strconv(sutf8, vbfromunicode) lngutf8size = ubound(bytutf8) + 1 on error goto 0 'set buffer for longest possible string i.e. each byte is 'ansi, thus 1 unicode(2 bytes)for every utf-8 character. lngbuffersize = lngutf8size * 2 strbuffer = string$(lngbuffersize, vbnullchar) 'translate using code page 65001(utf-8) lngresult = multibytetowidechar(cp_utf8, 0, bytutf8(0), _ lngutf8size, strptr(strbuffer), lngbuffersize) 'trim result to actual length if lngresult then utf8_decode = left$(strbuffer, lngresult) end if else dim i as long dim topindex as long dim twobytes(1) as byte dim threebytes(2) as byte dim abyte as byte dim tstr as string dim barray() as byte 'resume on error in case someone inputs text with accents 'that should have been encoded as utf-8 on error resume next topindex = len(sutf8) ' number of bytes equal topindex+1 if topindex = 0 then exit function ' get out if there 's nothing to convert barray = strconv(sutf8, vbfromunicode) i = 0 ' initialise pointer topindex = topindex - 1 ' iterate through the byte array do while i <= topindex abyte = barray(i) if abyte < &h80 then ' normal ansi character - use it as is tstr = tstr & chr$(abyte): i = i + 1 ' increment byte array index elseif abyte > = &he0 then 'was = &he1 then ' start of 3 byte utf-8 group for a character ' copy 3 byte to threebytes threebytes(0) = barray(i): i = i + 1 threebytes(1) = barray(i): i = i + 1 threebytes(2) = barray(i): i = i + 1 ' convert byte array to utf-16 then unicode tstr = tstr & chrw$((threebytes(0) and &hf) * &h1000 + (threebytes(1) and &h3f) * &h40 + (threebytes(2) and &h3f)) elseif (abyte > = &hc2) and (abyte <= &hdb) then ' start of 2 byte utf-8 group for a character twobytes(0) = barray(i): i = i + 1 twobytes(1) = barray(i): i = i + 1 ' convert byte array to utf-16 then unicode tstr = tstr & chrw$((twobytes(0) and &h1f) * &h40 + (twobytes(1) and &h3f)) else ' normal ansi character - use it as is tstr = tstr & chr$(abyte): i = i + 1 ' increment byte array index end if loop utf8_decode = tstr ' return the resultant string erase barray end if endfunction: end function | | |
|