import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
public class Test {
//I/O讀取文件
public String getFile(String path) {
StringBuffer context = new StringBuffer();
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
String temp = br.readLine();
while (temp!=null) {
context.append(temp+"\n");
temp = br.readLine();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return context.toString().toLowerCase();
}
//分割內容方法
public List mySplit(String context) {
String[] words = {};
List all = new ArrayList();
//按標點符號,分割內容的正則表達式
String regex = "\\W";//[():,./'\"\n\r\f\\s-]
words = context.split(regex);
for(int i=0; i<words.length; i++) {
if(!words[i].equals(""))//把空格去掉
all.add(words[i]);
}
return all;
}
//統計全部單詞及其個數
public Hashtable contWords(List all) {
//用於保存全部的單詞及其個數
Hashtable allTable = new Hashtable();
for(int i=0; i<all.size(); i++) {
//兩個臨時的變量,壹個鍵壹個值
String temp = all.get(i).toString();
int count = 0;
for(int j=0; j<all.size(); j++) {
if(temp.equalsIgnoreCase(all.get(j).toString())) {
count++;
}
}
allTable.put(temp, count);
}
return allTable;
}
//找出個數最多的那五個單詞
public Hashtable findMax5(Hashtable allTable) {
//用於保存結果的Hashtable
Hashtable result = new Hashtable();
Object[] keyToValue = allTable.entrySet().toArray();
Object[] values = allTable.values().toArray();
int[] v = new int[5];
for(int i=0; i<values.length; i++) {
int value = Integer.parseInt(values[i].toString());
int min = findMin(v);
if(value>min) {
for(int j=0; j<v.length; j++) {
if(v[j]==min) {
v[j] = value;
break;
}
}
}
}
//把v裏面的無素從大到小排序壹下
for(int i=0; i<v.length; i++) {
for(int j=i+1; j<v.length; j++) {
if(v[i]<v[j]) {
int temp = v[i];
v[i] = v[j];
v[j] = temp;
}
}
}
for(int i=0; i<v.length; i++) {
// System.out.println(v[i]);
for(int j=0; j<keyToValue.length; j++) {
String ktv = keyToValue[j].toString();
int tv = Integer.parseInt(ktv.substring(ktv.indexOf("=")+1));
if(v[i]==tv) {
//保證只取五個頻率最高的單詞
if(result.size()>=5) break;
String key = ktv.substring(0, ktv.indexOf("="));
result.put(key, v[i]);
}
}
}
return result;
}
//簡單的查找數組中最小的那個數
public int findMin(int[] v) {
for(int i=0; i<v.length-1; i++) {
if(v[i]<v[i+1]) {
int temp = v[i];
v[i] = v[i+1];
v[i+1] = temp;
}
}
return v[v.length-1];
}
//打印出結果
public static void printResult(Hashtable result) {
System.out.println("排前五的單詞情況如下:");
Enumeration e = result.keys();
Iterator it = result.values().iterator();
while (e.hasMoreElements()) {
System.out.println(e.nextElement() + " 的個數為: " + it.next());
}
}
//main方法
public static void main(String[] args) {
Test test = new Test();
String context = test.getFile("c:/test3.txt");
List all = test.mySplit(context);
Hashtable allTable = test.contWords(all);
Hashtable result = test.findMax5(allTable);
// System.out.println(result);
Test.printResult(result);
}
}