當前位置:編程學習大全網 - 源碼下載 - 求KNN文本分類算法java實現源代碼散分了!!!!

求KNN文本分類算法java實現源代碼散分了!!!!

#include <iostream>

#include <cmath>

#include <fstream>

using namespace std;

#define NATTRS 5 //number of attributes

#define MAXSZ 1700 //max size of training set

#define MAXVALUE 10000.0 //the biggest attribute's value is below 10000(int)

#define K 5

struct vector {

double attributes[NATTRS];

double classlabel;

};

struct item {

double distance;

double classlabel;

};

struct vector trSet[MAXSZ];//global variable,the training set

struct item knn[K];//global variable,the k-neareast-neighbour set

int curTSize = 0; //current size of the training set

int AddtoTSet(struct vector v)

{

if(curTSize>=MAXSZ) {

cout<<endl<<"The training set has "<<MAXSZ<<" examples!"<<endl<<endl;

return 0;

}

trSet[curTSize] = v;

curTSize++;

return 1;

}

double Distance(struct vector v1,struct vector v2)

{

double d = 0.0;

double tem = 0.0;

for(int i = 0;i < NATTRS;i++)

tem += (v1.attributes[i]-v2.attributes[i])*(v1.attributes[i]-v2.attributes[i]);

d = sqrt(tem);

return d;

}

int max(struct item knn[]) //return the no. of the item which has biggest distance(

//should be replaced)

{

int maxNo = 0;

if(K > 1)

for(int i = 1;i < K;i++)

if(knn[i].distance>knn[maxNo].distance)

maxNo = i;

return maxNo;

}double Classify(struct vector v)//decide which class label will be assigned to

//a given input vetor with the knn method

{

double dd = 0;

int maxn = 0;

int freq[K];

double mfreqC = 0;//the class label appears most frequently

int i;

for(i = 0;i < K;i++)

knn[i].distance = MAXVALUE;

for(i = 0;i < curTSize;i++)

{

dd = Distance(trSet[i],v);

maxn = max(knn);//for every new state of the training set should update maxn

if(dd < knn[maxn].distance) {

knn[maxn].distance = dd;

knn[maxn].classlabel = trSet[i].classlabel;

}

}

for(i = 0;i < K;i++)//freq[i] represents knn[i].classlabel appears how many times

freq[i] = 1;

for(i = 0;i < K;i++)

for(int j = 0;j < K;j++)

if((i!=j)&&(knn[i].classlabel == knn[j].classlabel))

freq[i]+=1;

int mfreq = 1;

mfreqC = knn[0].classlabel;

for(i = 0;i < K;i++)

if(freq[i] > mfreq) {

mfreq = freq[i];//mfreq represents the most frepuences

mfreqC = knn[i].classlabel; //mfreqNo is the item no. with the most frequent

//classlabel

}

return mfreqC;

}

void main()

{ double classlabel;

double c;

double n;

struct vector trExmp;

int i;

ifstream filein("G:\\data\\for knn\\data.txt");

if(filein.fail()){cout<<"Can't open data.txt"<<endl; return;}

while(!filein.eof()) {

filein>>c;

trExmp.classlabel = c;

cout<<trExmp.classlabel<<" "; for(int i = 0;i < NATTRS;i++) {

filein>>n;

trExmp.attributes[i] = n;

cout<<trExmp.attributes[i]<<" ";

} cout<<endl;

if(!AddtoTSet(trExmp))

break;

}filein.close();struct vector testv={{142,188,11,1159,0.5513196},17};

classlabel = Classify(testv);

cout<<"The classlable of the testv is: ";

cout<<classlabel<<endl;

for(i = 0;i < K;i++)

cout<<knn[i].distance<<"\t"<<knn[i].classlabel<<endl;

//cout<<max(knn);

}

  • 上一篇:洛陽師範學院的網絡工程(物聯網方向)專業怎麽樣
  • 下一篇:Microsoft(微軟) 的介紹,歷史
  • copyright 2024編程學習大全網