當前位置:編程學習大全網 - 源碼下載 - 數據挖掘源代碼

數據挖掘源代碼

基本Kmeans算法實現 C++代碼

#include?<iostream>

#include?<sstream>

#include?<fstream>

#include?<vector>

#include?<math.h>

#include?<stdlib.h>

#define?k?3//簇的數目

using?namespace?std;

//存放元組的屬性信息

typedef?vector<double>?Tuple;//存儲每條數據記錄

int?dataNum;//數據集中數據記錄數目

int?dimNum;//每條記錄的維數

//計算兩個元組間的歐幾裏距離

double?getDistXY(const?Tuple&?t1,?const?Tuple&?t2)?

{

double?sum?=?0;

for(int?i=1;?i<=dimNum;?++i)

{

sum?+=?(t1[i]-t2[i])?*?(t1[i]-t2[i]);

}

return?sqrt(sum);

}

//根據質心,決定當前元組屬於哪個簇

int?clusterOfTuple(Tuple?means[],const?Tuple&?tuple){

double?dist=getDistXY(means[0],tuple);

double?tmp;

int?label=0;//標示屬於哪壹個簇

for(int?i=1;i<k;i++){

tmp=getDistXY(means[i],tuple);

if(tmp<dist)?{dist=tmp;label=i;}

}

return?label;

}

//獲得給定簇集的平方誤差

double?getVar(vector<Tuple>?clusters[],Tuple?means[]){

double?var?=?0;

for?(int?i?=?0;?i?<?k;?i++)

{

vector<Tuple>?t?=?clusters[i];

for?(int?j?=?0;?j<?t.size();?j++)

{

var?+=?getDistXY(t[j],means[i]);

}

}

//cout<<"sum:"<<sum<<endl;

return?var;

}

//獲得當前簇的均值(質心)

Tuple?getMeans(const?vector<Tuple>&?cluster){

int?num?=?cluster.size();

Tuple?t(dimNum+1,?0);

for?(int?i?=?0;?i?<?num;?i++)

{

for(int?j=1;?j<=dimNum;?++j)

{

t[j]?+=?cluster[i][j];

}

}

for(int?j=1;?j<=dimNum;?++j)

t[j]?/=?num;

return?t;

//cout<<"sum:"<<sum<<endl;

}

void?print(const?vector<Tuple>?clusters[])

{

for(int?lable=0;?lable<k;?lable++)

{

cout<<"第"<<lable+1<<"個簇:"<<endl;

vector<Tuple>?t?=?clusters[lable];

for(int?i=0;?i<t.size();?i++)

{

cout<<i+1<<".(";

for(int?j=0;?j<=dimNum;?++j)

{

cout<<t[i][j]<<",?";

}

cout<<")\n";

}

}

}

void?KMeans(vector<Tuple>&?tuples){

vector<Tuple>?clusters[k];//k個簇

Tuple?means[k];//k個中心點

int?i=0;

//壹開始隨機選取k條記錄的值作為k個簇的質心(均值)

srand((unsigned?int)time(NULL));

for(i=0;i<k;){

int?iToSelect?=?rand()%tuples.size();

if(means[iToSelect].size()?==?0)

{

for(int?j=0;?j<=dimNum;?++j)

{

means[i].push_back(tuples[iToSelect][j]);

}

++i;

}

}

int?lable=0;

//根據默認的質心給簇賦值

for(i=0;i!=tuples.size();++i){

lable=clusterOfTuple(means,tuples[i]);

clusters[lable].push_back(tuples[i]);

}

double?oldVar=-1;

double?newVar=getVar(clusters,means);

cout<<"初始的的整體誤差平方和為:"<<newVar<<endl;?

int?t?=?0;

while(abs(newVar?-?oldVar)?>=?1)?//當新舊函數值相差不到1即準則函數值不發生明顯變化時,算法終止

{

cout<<"第?"<<++t<<"?次叠代開始:"<<endl;

for?(i?=?0;?i?<?k;?i++)?//更新每個簇的中心點

{

means[i]?=?getMeans(clusters[i]);

}

oldVar?=?newVar;

newVar?=?getVar(clusters,means);?//計算新的準則函數值

for?(i?=?0;?i?<?k;?i++)?//清空每個簇

{

clusters[i].clear();

}

//根據新的質心獲得新的簇

for(i=0;?i!=tuples.size();?++i){

lable=clusterOfTuple(means,tuples[i]);

clusters[lable].push_back(tuples[i]);

}

cout<<"此次叠代之後的整體誤差平方和為:"<<newVar<<endl;?

}

cout<<"The?result?is:\n";

print(clusters);

}

int?main(){

char?fname[256];

cout<<"請輸入存放數據的文件名:?";

cin>>fname;

cout<<endl<<"?請依次輸入:?維數?樣本數目"<<endl;

cout<<endl<<"?維數dimNum:?";

cin>>dimNum;

cout<<endl<<"?樣本數目dataNum:?";

cin>>dataNum;

ifstream?infile(fname);

if(!infile){

cout<<"不能打開輸入的文件"<<fname<<endl;

return?0;

}

vector<Tuple>?tuples;

//從文件流中讀入數據

for(int?i=0;?i<dataNum?&&?!infile.eof();?++i)

{

string?str;

getline(infile,?str);

istringstream?istr(str);

Tuple?tuple(dimNum+1,?0);//第壹個位置存放記錄編號,第2到dimNum+1個位置存放實際元素

tuple[0]?=?i+1;

for(int?j=1;?j<=dimNum;?++j)

{

istr>>tuple[j];

}

tuples.push_back(tuple);

}

cout<<endl<<"開始聚類"<<endl;

KMeans(tuples);

return?0;

}

  • 上一篇:我的友善之臂micro2440開發板,燒寫uboot和Linux內核後我想設置nfs網絡根文件系統,但是壹直有問題。
  • 下一篇:android 源碼裏有u-boot嗎
  • copyright 2024編程學習大全網