您的位置:程序门 -> vb -> 基础类



如果将文件保存为utf-8(无bom)


[收藏此页] [打印本页]选择字色:背景色:字体:[][][]


如果将文件保存为utf-8(无bom)
发表于:2008-01-19 13:17:12 楼主
我知道如果将文件保存为utf-8但却不知道如何将其保存为utf-8(无bom类型)表面看起来这两种格式没有区别,但如果用ue打开之后通过二进制格式看就会发现在utf-8前面他会加一个ff   fe,各位大侠有没有办法
发表于:2008-01-19 13:35:291楼 得分:0
帮顶
发表于:2008-01-19 13:43:522楼 得分:0
使用dos下copy   con   filename生成空文件
然后调用shell   (type> filename)添加文件内容(我记不清楚> 还是> > 是添加到文件末尾了,自己试验换换看。)。
看看最后可以么?
发表于:2008-01-20 15:38:443楼 得分:0
function   unistrtoutf8(unistring)   as   byte()
'   convert   a   unicode   string   to   a   byte   stream   of   utf-8
dim   barray()   as   byte
dim   tempb()   as   byte
dim   i   as   long
dim   k   as   long
dim   tlen   as   long
dim   b1   as   byte
dim   b2   as   byte
dim   utf16   as   long
dim   j
      tlen   =   len(unistring)   '   obtain   length   of   unicode   input   string
      if   tlen   =   0   then   exit   function   '   get   out   if   there's   nothing   to   convert
      k   =   0
      for   i   =   1   to   tlen
            '   work   out   the   utf16   value   of   the   unicode   character
            copymemory   b1,   byval   strptr(unistring)   +   ((i   -   1)   *   2),   1
            copymemory   b2,   byval   strptr(unistring)   +   ((i   -   1)   *   2)   +   1,   1
        '   combine   the   2   bytes   into   the   unicode   utf-16
            utf16   =   b2   '   assign   b2   to   utf16   before   multiplying   by   256   to   avoid   overflow
            utf16   =   utf16   *   256   +   b1
            '   convert   utf-16   to   2   or   3   bytes   of   utf-8
            tempb   =   toutf8(utf16)
            '   copy   the   resultant   bytes   to   barray
            for   j   =   0   to   ubound(tempb)
                redim   preserve   barray(k)
                barray(k)   =   tempb(j):   k   =   k   +   1
            next
            redim   tempb(0)
      next
      unistrtoutf8   =   barray   '   return   the   resultant   utf-8   byte   array

end   function
function   utf8tounistr(barray)   as   string
'   convert   a   byte   stream   of   utf-8   to   unicode   string
dim   i   as   long
dim   topindex   as   long
dim   twobytes(1)   as   byte
dim   threebytes(2)   as   byte
dim   abyte   as   byte
dim   tstr   as   string
      topindex   =   ubound(barray)     '   number   of   bytes   equal   topindex+1
      if   topindex   =   0   then   exit   function   '   get   out   if   there's   nothing   to   convert
      i   =   0   '   initialise   pointer
      '   iterate   through   the   byte   array
      do   while   i   <=   topindex
          abyte   =   barray(i)   '   fetch   a   byte
          if   abyte   =   &he1   then
                '   start   of   3   byte   utf-8   group   for   a   character
                '   copy   3   byte   to   threebytes
                threebytes(0)   =   barray(i):   i   =   i   +   1
                threebytes(1)   =   barray(i):   i   =   i   +   1
                threebytes(2)   =   barray(i):   i   =   i   +   1
                '   convert   byte   array   to   utf-16   then   unicode
                tstr   =   tstr   &   chrw(toutf16(threebytes))
          elseif   (abyte   > =   &hc3)   and   (abyte   <=   &hc6)   then
                '   start   of   2   byte   utf-8   group   for   a   character
                twobytes(0)   =   barray(i):   i   =   i   +   1
                twobytes(1)   =   barray(i):   i   =   i   +   1
                '   convert   byte   array   to   utf-16   then   unicode
                tstr   =   tstr   &   chrw(toutf16(twobytes))
          else
                '   normal   ansi   character   -   use   it   as   is
                tstr   =   tstr   &   chr(abyte):   i   =   i   +   1   '   increment   byte   array   index
          end   if
      loop
      utf8tounistr   =   tstr     '   return   the   resultant   string
end   function
function   hexdisplayoffile(tfilename)   as   string
'   display   the   content   of   a   text   file   in   hex   format   like:
'                                 ff   fe   54   00   b0   01   db   1e   63   00
      dim   text1,   mychar,   filenum
      filenum   =   freefile   '   obtain   a   file   handle   from   the   os
      open   tfilename   for   binary   as   #filenum   '   open   given   text   file   as   binary
      '   read   all   characters   in   the   file.
      do   while   not   eof(filenum)
            mychar   =   input(1,   #filenum)       '   read   a   character   as   raw   binary
            if   mychar   <>   ""   then
          '   convert   byte   to   hex   like   0a,   6b   etc..
                  text1   =   text1   &   hexof(asc(mychar))   &   "   "
            end   if
      loop
      close   #filenum     '   close   file
      hexdisplayoffile   =   text1     '   return   the   hex   display   string
end   function
function   getfileencoding(tfilename)   as   coencoding
'   return   the   type   of   text   file   :   utf16le,   utf-8   or   ansi
dim   b1,   filenum
on   error   resume   next     '   ignore   error
filenum   =   freefile   '   obtain   a   file   handle   from   the   os
open   tfilename   for   binary   as   #filenum     '   open   given   textfile     as   binary
'   read   all   characters   in   the   file.
      b1   =   input(1,   #filenum)       '   read   the   first   character.
      if   asc(b1)   =   &hff   then
              getfileencoding   =   counicode     '   utf-16le
      elseif   asc(b1)   =   &hef   then
              getfileencoding   =   coutf8         '   utf-8
      else
              getfileencoding   =   coansi         '   normal   ansi
      end   if
close   #filenum     '   close   the   file
end   function
function   tounidecimal(unistring   as   string)   as   string
'   return   the   html   equivalent   string   of   a   unicode   string
dim   i   as   integer     '   must   declare   as   integer   for   copymemory   to   work
dim   tlen,   tstr
dim   b1   as   byte
dim   b2   as   byte
dim   utf16   as   long
    tlen   =   len(unistring)     '   get   length   of   input   unicode   string
    if   tlen   =   0   then   exit   function   '   get   out   if   null   string
    '   iterate   through   each   character   in   the   string
    for   i   =   1   to   tlen
        if   isunichar(mid(unistring,   i,   1))   then
        '   cast   the   string   character   to   2   bytes
            copymemory   b1,   byval   strptr(unistring)   +   ((i   -   1)   *   2),   1
            copymemory   b2,   byval   strptr(unistring)   +   ((i   -   1)   *   2)   +   1,   1
        '   combine   the   2   bytes   into   the   unicode   utf-16
            utf16   =   b2   '   assign   b2   to   utf16   before   multiplying   by   256   to   avoid   overflow
            utf16   =   utf16   *   256   +   b1
            '   convert   utf-16   to   format   &#99999;     for   html
            tstr   =   tstr   &   "&#"   &   trim(cstr(utf16))   &   ";"
        else
            '   get   here   if   it;s   an   ansi   character
            tstr   =   tstr   &   mid(unistring,   i,   1)
        end   if
    next
    tounidecimal   =   tstr     '   return   the   html   string
end   function
function   hexof(byval   ascnum   as   integer)   as   string
    '   return   the   2   character   hex   string   of     ascnum,   prefix   extra   "0"   if   necessary
    dim   tstr
    if   ascnum   >   255   then   ascnum   =   ascnum   mod   256
    tstr   =   hex(ascnum)     '   convert   to   hex
    if   len(tstr)   =   1   then
        '   attach   "0"   on   the   left
        tstr   =   "0"   &   tstr
    end   if
    hexof   =   tstr   '   return   the   2   character   hex   string
end   function


sub   saveutf8(tstr)
'   save   given   text   string   in   utf-8   format
      dim   a(2)   as   byte
      dim   barray()   as   byte
      dim   filenum
      '   place   bom   of   utf-8   in   first   3   bytes
      a(0)   =   &hef
      a(1)   =   &hbb
      a(2)   =   &hbf
      '   delete   output   file   if   it   exists
      'if   dir(commondialog1.filename)   <>   ""   then
      '     kill   commondialog1.filename
      'end   if
      filenum   =   freefile   '   obtain   a   file   handle   from   the   os
      open   app.path   &   "\xml\c.xml"   for   binary   as   #filenum
      put   #filenum,   ,   a     '   write   bom   bytes
      '   convert   the   unicode   string   to   utf-8   byte   array
      barray   =   unistrtoutf8(tstr)
      put   #filenum,   ,   barray     '   write   byte   array   to   file
      close   #filenum     '   close   the   file
end   sub
sub   saveutf82(tstr)
'   save   given   text   string   in   utf-8   format
      dim   a(2)   as   byte
      dim   barray()   as   byte
      dim   filenum
      '   place   bom   of   utf-8   in   first   3   bytes
      a(0)   =   &hef
      a(1)   =   &hbb
      a(2)   =   &hbf
      filenum   =   freefile   '   obtain   a   file   handle   from   the   os
      open   app.path   &   "\xml\userquery.temp"   for   binary   as   #filenum
      put   #filenum,   ,   a     '   write   bom   bytes
      '   convert   the   unicode   string   to   utf-8   byte   array
      barray   =   unistrtoutf8(tstr)
      put   #filenum,   ,   barray     '   write   byte   array   to   file
      close   #filenum     '   close   the   file
end   sub
sub   saveutf16(tstr)
'   save   given   text   string   in   utf-16le   format
      dim   i   as   long,   ab()   as   byte
      dim   tlen,   filenum
      '   work   out   number   of   bytes   required
      tlen   =   len(tstr)   *   2
      redim   ab(tlen   +   1)     '   prepare   dimension   of   byte   array
      '   place   bom   of   utf-16le   in   first   2   bytes
      ab(0)   =   &hff
      ab(1)   =   &hfe
      '   copy   unicode   string   to   byte   array,   1   byte   at   a   time
      for   i   =   0   to   tlen   -   1
            copymemory   ab(i   +   2),   byval   strptr(tstr)   +   i,   1
      next
      '   delete   output   file   if   it   exists
      filenum   =   freefile   '   obtain   a   file   handle   from   the   os
      open   app.path   &   "\xml\userquery.temp"   for   binary   as   #filenum     '   open   output   file   in   binary
      put   #filenum,   ,   ab     '   write   byte   array   to   file
      close   #filenum     '   close   the   file
end   sub
发表于:2008-01-21 13:42:454楼 得分:0
gb/big5/utf-8   文件编码批量转换程序源码

http://www.pc-soft.cn/blogview.asp?logid=44&cateid=14

有部分注释。
发表于:2008-01-23 13:48:555楼 得分:0
http://www.mndsoft.com/blog/article.asp?id=896


快速检索

最新资讯
热门点击