您好,登錄后才能下訂單哦!
KNN算法實現:
提取文本:
import numpy as np //提取文本 def loadDataSet(fileName): numFeat = len(open(fileName).readline().split(',')) dataMat = []; labelMat = [] fr = open(fileName) for line in fr.readlines(): lineArr=[] line = line.strip() curline = line.split(',') for i in range(0,numFeat-1): lineArr.append(float(curline[i])) dataMat.append(lineArr) labelMat.append(float(curline[-1])) xMat = np.mat(dataMat) return xMat,labelMat //訓練樣本標準化 def autoNorm(dataSet): minVals = dataSet.min(0) maxVals = dataSet.max(0) range = maxVals - minVals normDataSet = np.zeros(np.shape(dataSet)) m = dataSet.shape[0] normDataSet = dataSet - np.tile(minVals,(m,1)) normDataSet = normDataSet/np.tile(range,(m,1)) return normDataSet //講總樣本分為訓練樣本和檢測樣本 def classifyDataSet(normDataSet,labelMat): labDataSet = np.array(normDataSet.copy()) testDataSet = [] testResultSet = [] classfiDataSet = [] classfiResultSet = [] size = labDataSet.shape[0] for j in range(size): if(j%50==0): testDataSet.append(labDataSet[j]) testResultSet.append(labelMat[j]) else: classfiDataSet.append(labDataSet[j]) classfiResultSet.append(labelMat[j]) return classfiDataSet,classfiResultSet,testDataSet,testResultSet
KNN
這里進入的是兩個數組,不是矩陣
import numpy as np import operator as op def classify(inX, dataSet, labels,k=7): dataSetSize = dataSet.shape[0] diffMat = np.tile(inX,(dataSetSize,1))-dataSet sqDiffMat = diffMat**2 sqlDistances = sqDiffMat.sum(axis=1) distances = sqlDistances**0.5 sortedDistIndices = distances.argsort() classCount = {} for i in range(k): voteLabel = labels[sortedDistIndices[i]] classCount[voteLabel] = classCount.get(voteLabel,0)+1 sortedResult = sorted(classCount.iteritems(),key=op.itemgetter(1),reverse=True) return sortedResult[0][0]
main函數
import KNN import fileOp import numpy as np import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) dataMat,labelsMat = fileOp.loadDataSet('donate.txt') normDataSet = fileOp.autoNorm(dataMat) result = [] classfiDataSet,classfiResultSet,testDataSet,testResultSet = fileOp.classifyDataSet(normDataSet,labelsMat) testDataSet = np.array(testDataSet) classfiDataSet = np.array(classfiDataSet) for i in range(testDataSet.shape[0]): result.append(KNN.classify(testDataSet[i,:],classfiDataSet,classfiResultSet,5)) print result print testResultSet ax.scatter(normDataSet[:,2],normDataSet[:,3],15.0*(np.array(labelsMat)+1),15.0*(np.array(labelsMat)+1)) plt.show()
注意:
序列可以增加或減小,無shape操作
數組有shape,轉置等操作,是基于某個軸進行操作的。數組有切片功能,一般用數據操作即可,矩陣用于運算。
np.dot(arr.T,arr)可以用于計算內積
numpy
array和matrix之間的區別:參考http://www.aichengxu.com/view/12902
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。