| 发表于:2008-01-20 15:38:443楼 得分:0 |
function unistrtoutf8(unistring) as byte() ' convert a unicode string to a byte stream of utf-8 dim barray() as byte dim tempb() as byte dim i as long dim k as long dim tlen as long dim b1 as byte dim b2 as byte dim utf16 as long dim j tlen = len(unistring) ' obtain length of unicode input string if tlen = 0 then exit function ' get out if there's nothing to convert k = 0 for i = 1 to tlen ' work out the utf16 value of the unicode character copymemory b1, byval strptr(unistring) + ((i - 1) * 2), 1 copymemory b2, byval strptr(unistring) + ((i - 1) * 2) + 1, 1 ' combine the 2 bytes into the unicode utf-16 utf16 = b2 ' assign b2 to utf16 before multiplying by 256 to avoid overflow utf16 = utf16 * 256 + b1 ' convert utf-16 to 2 or 3 bytes of utf-8 tempb = toutf8(utf16) ' copy the resultant bytes to barray for j = 0 to ubound(tempb) redim preserve barray(k) barray(k) = tempb(j): k = k + 1 next redim tempb(0) next unistrtoutf8 = barray ' return the resultant utf-8 byte array end function function utf8tounistr(barray) as string ' convert a byte stream of utf-8 to unicode string dim i as long dim topindex as long dim twobytes(1) as byte dim threebytes(2) as byte dim abyte as byte dim tstr as string topindex = ubound(barray) ' number of bytes equal topindex+1 if topindex = 0 then exit function ' get out if there's nothing to convert i = 0 ' initialise pointer ' iterate through the byte array do while i <= topindex abyte = barray(i) ' fetch a byte if abyte = &he1 then ' start of 3 byte utf-8 group for a character ' copy 3 byte to threebytes threebytes(0) = barray(i): i = i + 1 threebytes(1) = barray(i): i = i + 1 threebytes(2) = barray(i): i = i + 1 ' convert byte array to utf-16 then unicode tstr = tstr & chrw(toutf16(threebytes)) elseif (abyte > = &hc3) and (abyte <= &hc6) then ' start of 2 byte utf-8 group for a character twobytes(0) = barray(i): i = i + 1 twobytes(1) = barray(i): i = i + 1 ' convert byte array to utf-16 then unicode tstr = tstr & chrw(toutf16(twobytes)) else ' normal ansi character - use it as is tstr = tstr & chr(abyte): i = i + 1 ' increment byte array index end if loop utf8tounistr = tstr ' return the resultant string end function function hexdisplayoffile(tfilename) as string ' display the content of a text file in hex format like: ' ff fe 54 00 b0 01 db 1e 63 00 dim text1, mychar, filenum filenum = freefile ' obtain a file handle from the os open tfilename for binary as #filenum ' open given text file as binary ' read all characters in the file. do while not eof(filenum) mychar = input(1, #filenum) ' read a character as raw binary if mychar <> "" then ' convert byte to hex like 0a, 6b etc.. text1 = text1 & hexof(asc(mychar)) & " " end if loop close #filenum ' close file hexdisplayoffile = text1 ' return the hex display string end function function getfileencoding(tfilename) as coencoding ' return the type of text file : utf16le, utf-8 or ansi dim b1, filenum on error resume next ' ignore error filenum = freefile ' obtain a file handle from the os open tfilename for binary as #filenum ' open given textfile as binary ' read all characters in the file. b1 = input(1, #filenum) ' read the first character. if asc(b1) = &hff then getfileencoding = counicode ' utf-16le elseif asc(b1) = &hef then getfileencoding = coutf8 ' utf-8 else getfileencoding = coansi ' normal ansi end if close #filenum ' close the file end function function tounidecimal(unistring as string) as string ' return the html equivalent string of a unicode string dim i as integer ' must declare as integer for copymemory to work dim tlen, tstr dim b1 as byte dim b2 as byte dim utf16 as long tlen = len(unistring) ' get length of input unicode string if tlen = 0 then exit function ' get out if null string ' iterate through each character in the string for i = 1 to tlen if isunichar(mid(unistring, i, 1)) then ' cast the string character to 2 bytes copymemory b1, byval strptr(unistring) + ((i - 1) * 2), 1 copymemory b2, byval strptr(unistring) + ((i - 1) * 2) + 1, 1 ' combine the 2 bytes into the unicode utf-16 utf16 = b2 ' assign b2 to utf16 before multiplying by 256 to avoid overflow utf16 = utf16 * 256 + b1 ' convert utf-16 to format 𘚟 for html tstr = tstr & "&#" & trim(cstr(utf16)) & ";" else ' get here if it;s an ansi character tstr = tstr & mid(unistring, i, 1) end if next tounidecimal = tstr ' return the html string end function function hexof(byval ascnum as integer) as string ' return the 2 character hex string of ascnum, prefix extra "0" if necessary dim tstr if ascnum > 255 then ascnum = ascnum mod 256 tstr = hex(ascnum) ' convert to hex if len(tstr) = 1 then ' attach "0" on the left tstr = "0" & tstr end if hexof = tstr ' return the 2 character hex string end function sub saveutf8(tstr) ' save given text string in utf-8 format dim a(2) as byte dim barray() as byte dim filenum ' place bom of utf-8 in first 3 bytes a(0) = &hef a(1) = &hbb a(2) = &hbf ' delete output file if it exists 'if dir(commondialog1.filename) <> "" then ' kill commondialog1.filename 'end if filenum = freefile ' obtain a file handle from the os open app.path & "\xml\c.xml" for binary as #filenum put #filenum, , a ' write bom bytes ' convert the unicode string to utf-8 byte array barray = unistrtoutf8(tstr) put #filenum, , barray ' write byte array to file close #filenum ' close the file end sub sub saveutf82(tstr) ' save given text string in utf-8 format dim a(2) as byte dim barray() as byte dim filenum ' place bom of utf-8 in first 3 bytes a(0) = &hef a(1) = &hbb a(2) = &hbf filenum = freefile ' obtain a file handle from the os open app.path & "\xml\userquery.temp" for binary as #filenum put #filenum, , a ' write bom bytes ' convert the unicode string to utf-8 byte array barray = unistrtoutf8(tstr) put #filenum, , barray ' write byte array to file close #filenum ' close the file end sub sub saveutf16(tstr) ' save given text string in utf-16le format dim i as long, ab() as byte dim tlen, filenum ' work out number of bytes required tlen = len(tstr) * 2 redim ab(tlen + 1) ' prepare dimension of byte array ' place bom of utf-16le in first 2 bytes ab(0) = &hff ab(1) = &hfe ' copy unicode string to byte array, 1 byte at a time for i = 0 to tlen - 1 copymemory ab(i + 2), byval strptr(tstr) + i, 1 next ' delete output file if it exists filenum = freefile ' obtain a file handle from the os open app.path & "\xml\userquery.temp" for binary as #filenum ' open output file in binary put #filenum, , ab ' write byte array to file close #filenum ' close the file end sub | | |
|