有沒有人自己編寫的adaboost算法的r code

《機器學習實戰》這上面有AdaBoost的源碼，這裏直接摘抄給妳，順帶安利壹下這本書

'''

Created?on?Nov?28,?2010

Adaboost?is?short?for?Adaptive?Boosting

@author:?Peter

'''

from?numpy?import?*

def?loadSimpData():

datMat?=?matrix([[?1.?,?2.1],

[?2.?,?1.1],

[?1.3,?1.?],

[?1.?,?1.?],

[?2.?,?1.?]])

classLabels?=?[1.0,?1.0,?-1.0,?-1.0,?1.0]

return?datMat,classLabels

def?loadDataSet(fileName):?#general?function?to?parse?tab?-delimited?floats

numFeat?=?len(open(fileName).readline().split('\t'))?#get?number?of?fields?

dataMat?=?[];?labelMat?=?[]

fr?=?open(fileName)

for?line?in?fr.readlines():

lineArr?=[]

curLine?=?line.strip().split('\t')

for?i?in?range(numFeat-1):

lineArr.append(float(curLine[i]))

dataMat.append(lineArr)

labelMat.append(float(curLine[-1]))

return?dataMat,labelMat

def?stumpClassify(dataMatrix,dimen,threshVal,threshIneq):#just?classify?the?data

retArray?=?ones((shape(dataMatrix)[0],1))

if?threshIneq?==?'lt':

retArray[dataMatrix[:,dimen]?<=?threshVal]?=?-1.0

else:

retArray[dataMatrix[:,dimen]?>?threshVal]?=?-1.0

return?retArray

def?buildStump(dataArr,classLabels,D):

dataMatrix?=?mat(dataArr);?labelMat?=?mat(classLabels).T

m,n?=?shape(dataMatrix)

numSteps?=?10.0;?bestStump?=?{};?bestClasEst?=?mat(zeros((m,1)))

minError?=?inf?#init?error?sum,?to?+infinity

for?i?in?range(n):#loop?over?all?dimensions

rangeMin?=?dataMatrix[:,i].min();?rangeMax?=?dataMatrix[:,i].max();

stepSize?=?(rangeMax-rangeMin)/numSteps

for?j?in?range(-1,int(numSteps)+1):#loop?over?all?range?in?current?dimension

for?inequal?in?['lt',?'gt']:?#go?over?less?than?and?greater?than

threshVal?=?(rangeMin?+?float(j)?*?stepSize)

predictedVals?=?stumpClassify(dataMatrix,i,threshVal,inequal)#call?stump?classify?with?i,?j,?lessThan

errArr?=?mat(ones((m,1)))

errArr[predictedVals?==?labelMat]?=?0

weightedError?=?D.T*errArr?#calc?total?error?multiplied?by?D

#print?"split:?dim?%d,?thresh?%.2f,?thresh?ineqal:?%s,?the?weighted?error?is?%.3f"?%?(i,?threshVal,?inequal,?weightedError)

if?weightedError?<?minError:

minError?=?weightedError

bestClasEst?=?predictedVals.copy()

bestStump['dim']?=?i

bestStump['thresh']?=?threshVal

bestStump['ineq']?=?inequal

return?bestStump,minError,bestClasEst

def?adaBoostTrainDS(dataArr,classLabels,numIt=40):

weakClassArr?=?[]

m?=?shape(dataArr)[0]

D?=?mat(ones((m,1))/m)#init?D?to?all?equal

aggClassEst?=?mat(zeros((m,1)))

for?i?in?range(numIt):

bestStump,error,classEst?=?buildStump(dataArr,classLabels,D)#build?Stump

#print?"D:",D.T

alpha?=?float(0.5*log((1.0-error)/max(error,1e-16)))#calc?alpha,?throw?in?max(error,eps)?to?account?for?error=0

bestStump['alpha']?=?alpha?

weakClassArr.append(bestStump)?#store?Stump?Params?in?Array

#print?"classEst:?",classEst.T

expon?=?multiply(-1*alpha*mat(classLabels).T,classEst)?#exponent?for?D?calc,?getting?messy

D?=?multiply(D,exp(expon))?#Calc?New?D?for?next?iteration

D?=?D/D.sum()

#calc?training?error?of?all?classifiers,?if?this?is?0?quit?for?loop?early?(use?break)

aggClassEst?+=?alpha*classEst

#print?"aggClassEst:?",aggClassEst.T

aggErrors?=?multiply(sign(aggClassEst)?!=?mat(classLabels).T,ones((m,1)))

errorRate?=?aggErrors.sum()/m

print?"total?error:?",errorRate

if?errorRate?==?0.0:?break

return?weakClassArr,aggClassEst

def?adaClassify(datToClass,classifierArr):

dataMatrix?=?mat(datToClass)#do?stuff?similar?to?last?aggClassEst?in?adaBoostTrainDS

m?=?shape(dataMatrix)[0]

aggClassEst?=?mat(zeros((m,1)))

for?i?in?range(len(classifierArr)):

classEst?=?stumpClassify(dataMatrix,classifierArr[i]['dim'],\

classifierArr[i]['thresh'],\

classifierArr[i]['ineq'])#call?stump?classify

aggClassEst?+=?classifierArr[i]['alpha']*classEst

print?aggClassEst

return?sign(aggClassEst)

def?plotROC(predStrengths,?classLabels):

import?matplotlib.pyplot?as?plt

cur?=?(1.0,1.0)?#cursor

ySum?=?0.0?#variable?to?calculate?AUC

numPosClas?=?sum(array(classLabels)==1.0)

yStep?=?1/float(numPosClas);?xStep?=?1/float(len(classLabels)-numPosClas)

sortedIndicies?=?predStrengths.argsort()#get?sorted?index,?it's?reverse

fig?=?plt.figure()

fig.clf()

ax?=?plt.subplot(111)

#loop?through?all?the?values,?drawing?a?line?segment?at?each?point

for?index?in?sortedIndicies.tolist()[0]:

if?classLabels[index]?==?1.0:

delX?=?0;?delY?=?yStep;

else:

delX?=?xStep;?delY?=?0;

ySum?+=?cur[1]

#draw?line?from?cur?to?(cur[0]-delX,cur[1]-delY)

ax.plot([cur[0],cur[0]-delX],[cur[1],cur[1]-delY],?c='b')

cur?=?(cur[0]-delX,cur[1]-delY)

ax.plot([0,1],[0,1],'b--')

plt.xlabel('False?positive?rate');?plt.ylabel('True?positive?rate')

plt.title('ROC?curve?for?AdaBoost?horse?colic?detection?system')

ax.axis([0,1,0,1])

plt.show()

print?"the?Area?Under?the?Curve?is:?",ySum*xStep

上一篇:IP地址219.25.23.56的缺省子網掩碼有幾位，什麽叫子網掩碼。？

下一篇:如何搭建自己的crm客戶管理系統

北京紫禁城的外觀

國內知名內部源代碼

IE瀏覽器如何更新flash插件在IE瀏覽器中更新flash插件的操作方法