| 发表于:2007-03-08 11:11:048楼 得分:0 |
我用过c#阿 但是这个比较急我没法再去学习java了 下面是其中一个类的程序 import java.io.bufferedreader; import java.io.filenotfoundexception; import java.io.filereader; import java.io.ioexception; import java.util.iterator; import java.util.linkedlist; import java.util.listiterator; import java.util.hashmap; import java.util.set; import java.util.stringtokenizer; import java.util.arraylist; //author=xingrui ji //data=oct 7 2006 public class support { // reads sentence public static linkedlist readsentences(string filename) throws filenotfoundexception, ioexception { filereader myfilereader = new filereader(filename); bufferedreader mybufferedreader = new bufferedreader(myfilereader); string line = mybufferedreader.readline(); linkedlist sentencelist = new linkedlist(); while (line != null) { sentencelist.add(line); line = mybufferedreader.readline(); } return sentencelist; } // get every word 's tag static hashmap addwordtag(hashmap wordmap, string keyword) { keyword = keyword.tolowercase(); string[] tagword = keyword.split( "/ "); if (tagword.length < 2) return wordmap; if (wordmap.containskey(tagword[1])) { arraylist arrattr = (arraylist) wordmap.get(tagword[1]); if (!arrattr.contains(tagword[0])) { arrattr.add(tagword[0]); } } else { arraylist arrattr = new arraylist(); arrattr.add(tagword[0]); wordmap.put(tagword[1], arrattr); } return wordmap; } // support function, add a new key to hashmap or increase value as count static hashmap addwordinc(hashmap wordmap, string keyword) { keyword = keyword.tolowercase(); if (wordmap.containskey(keyword)) { integer wordcounterinteger = (integer) wordmap.get(keyword); int wordcounterint = wordcounterinteger.intvalue(); wordcounterinteger = new integer(wordcounterint + 1); wordmap.put(keyword, wordcounterinteger); } else { wordmap.put(keyword, new integer(1)); } return wordmap; } // get every word 's tag static hashmap getwordtag(linkedlist sentencelist) { listiterator i = sentencelist.listiterator(); hashmap wordmap = new hashmap(); while (i.hasnext()) { string currentsentence = (string) i.next(); stringtokenizer sentencetokenizer = new stringtokenizer( currentsentence); while (sentencetokenizer.hasmoretokens()) { string keyword = sentencetokenizer.nexttoken().tolowercase(); wordmap = addwordtag(wordmap, keyword); } } return wordmap; } // get every tag 's count static hashmap counttagsingle(linkedlist sentencelist) { listiterator i = sentencelist.listiterator(); hashmap wordmap = new hashmap(); while (i.hasnext()) { string currentsentence = (string) i.next(); stringtokenizer sentencetokenizer = new stringtokenizer( currentsentence); while (sentencetokenizer.hasmoretokens()) { string keyword = sentencetokenizer.nexttoken().tolowercase(); string[] attrword = keyword.split( "/ "); if (attrword.length == 1) break; wordmap = addwordinc(wordmap, attrword[0]); } } return wordmap; } // counts p(x,y) static hashmap counttagpair(linkedlist sentencelist) { listiterator i = sentencelist.listiterator(); hashmap wordmap = new hashmap(); while (i.hasnext()) { string currentsentence = (string) i.next(); stringtokenizer sentencetokenizer = new stringtokenizer( currentsentence); if (sentencetokenizer.hasmoretokens()) { string word1 = sentencetokenizer.nexttoken().tolowercase(); string[] attrword1 = word1.split( "/ "); if (attrword1.length == 1) break; while (sentencetokenizer.hasmoretokens()) { string word2 = sentencetokenizer.nexttoken().tolowercase(); attrword1 = word1.split( "/ "); string[] attrword2 = word2.split( "/ "); if (attrword2.length == 1) break; string keyword = attrword1[0] + " " + attrword2[0]; wordmap = addwordinc(wordmap, keyword); word1 = word2; } } } return wordmap; } // calculates p(w_x ¦ w_y) and stores in hashmap as value // p(wx ¦wy)=p(wx,wy)/p(wy) public static hashmap calcposterior(hashmap tagtimesmap, hashmap tagpairtimesmap) { hashmap probmap = new hashmap(); set wordset = tagpairtimesmap.keyset(); iterator itword = wordset.iterator(); while (itword.hasnext()) { string keyword = (string) itword.next(); stringtokenizer sentencetokenizer = new stringtokenizer(keyword); if (sentencetokenizer.hasmoretokens()) { string word1 = sentencetokenizer.nexttoken().tolowercase(); integer freqword = (integer) tagpairtimesmap.get(keyword); double probword = freqword.doublevalue() / ((integer) tagtimesmap.get(word1)).doublevalue(); probmap.put(keyword, new float(probword)); } } return probmap; } } | | |
|