From bf9e2b0400698ba7f623f14fe1ed00f58e3b2e31 Mon Sep 17 00:00:00 2001
From: Parth Paradkar <parthparadkar3@gmail.com>
Date: Sat, 5 Oct 2019 12:25:32 +0530
Subject: [PATCH 1/3] Pure implementation of KNN added

---
 machine_learning/k_nearest_neighbours.py | 41 ++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 machine_learning/k_nearest_neighbours.py

diff --git a/machine_learning/k_nearest_neighbours.py b/machine_learning/k_nearest_neighbours.py
new file mode 100644
index 000000000000..307afdedbab6
--- /dev/null
+++ b/machine_learning/k_nearest_neighbours.py
@@ -0,0 +1,41 @@
+import numpy as np
+from collections import Counter
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+
+data = datasets.load_iris()
+
+# print(data)
+
+X = np.array(data['data'])
+y = np.array(data['target'])
+classes = data['target_names']
+
+X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+def euclidean_distance(a, b):
+    """
+    Gives the euclidean distance between two points
+    >>> euclidean_distance([0, 0], [3, 4])
+    5.0
+    >>> euclidean_distance([1, 2, 3], [1, 8, 11])
+    10.0
+    """
+    return np.linalg.norm(np.array(a) - np.array(b))
+
+def classifier(train_data, train_target, classes, point, k=5):
+    """
+    Classifies the point using the KNN algorithm
+    k closest points are found (ranked in ascending order of euclidean distance)
+    """
+    data = zip(train_data, train_target)
+    distances = []
+    for data_point in data:
+        distance = euclidean_distance(data_point[0], point)
+        distances.append((distance, data_point[1]))
+    votes = [i[1] for i in sorted(distances)[:k]]
+    result = Counter(votes).most_common(1)[0][0]
+    return classes[result]
+
+if __name__ == "__main__":
+    print(classifier(X_train, y_train, classes, [4.4, 3.1, 1.3, 1.4]))

From 0ae6ccd9a8dc9932cc438017ce510ad118e1097a Mon Sep 17 00:00:00 2001
From: Parth Paradkar <parthparadkar3@gmail.com>
Date: Sat, 5 Oct 2019 18:42:54 +0530
Subject: [PATCH 2/3] Comments and test case added

---
 machine_learning/k_nearest_neighbours.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/machine_learning/k_nearest_neighbours.py b/machine_learning/k_nearest_neighbours.py
index 307afdedbab6..7c80d07e7bc2 100644
--- a/machine_learning/k_nearest_neighbours.py
+++ b/machine_learning/k_nearest_neighbours.py
@@ -29,13 +29,17 @@ def classifier(train_data, train_target, classes, point, k=5):
     k closest points are found (ranked in ascending order of euclidean distance)
     """
     data = zip(train_data, train_target)
+    # List of distances of all points from the point to be classified
     distances = []
     for data_point in data:
         distance = euclidean_distance(data_point[0], point)
         distances.append((distance, data_point[1]))
+    # Choosing 'k' points with the least distances. 
     votes = [i[1] for i in sorted(distances)[:k]]
+    # Most common class occuring among them is chosen to be the class into which the point is classified
     result = Counter(votes).most_common(1)[0][0]
     return classes[result]
 
+
 if __name__ == "__main__":
-    print(classifier(X_train, y_train, classes, [4.4, 3.1, 1.3, 1.4]))
+    print(classifier([[0, 0], [1, 0], [0, 1], [0.5, 0.5], [3, 3], [2, 3], [3, 2]], [0, 0, 0, 0, 1, 1, 1], ['A', 'B'], [1.2, 1.2]))

From 42b37adac88d0814b6823797540f32deff95a4ae Mon Sep 17 00:00:00 2001
From: Parth Paradkar <parthparadkar3@gmail.com>
Date: Sat, 5 Oct 2019 19:49:59 +0530
Subject: [PATCH 3/3] doctest added

---
 machine_learning/k_nearest_neighbours.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/machine_learning/k_nearest_neighbours.py b/machine_learning/k_nearest_neighbours.py
index 7c80d07e7bc2..83d8399fe9b6 100644
--- a/machine_learning/k_nearest_neighbours.py
+++ b/machine_learning/k_nearest_neighbours.py
@@ -5,8 +5,6 @@
 
 data = datasets.load_iris()
 
-# print(data)
-
 X = np.array(data['data'])
 y = np.array(data['target'])
 classes = data['target_names']
@@ -27,6 +25,17 @@ def classifier(train_data, train_target, classes, point, k=5):
     """
     Classifies the point using the KNN algorithm
     k closest points are found (ranked in ascending order of euclidean distance)
+    Params:
+    :train_data: Set of points that are classified into two or more classes
+    :train_target: List of classes in the order of train_data points
+    :classes: Labels of the classes
+    :point: The data point that needs to be classifed
+
+    >>> X_train = [[0, 0], [1, 0], [0, 1], [0.5, 0.5], [3, 3], [2, 3], [3, 2]]
+    >>> y_train = [0, 0, 0, 0, 1, 1, 1]
+    >>> classes = ['A','B']; point = [1.2,1.2]
+    >>> classifier(X_train, y_train, classes,point)
+    'A'
     """
     data = zip(train_data, train_target)
     # List of distances of all points from the point to be classified
@@ -36,10 +45,11 @@ def classifier(train_data, train_target, classes, point, k=5):
         distances.append((distance, data_point[1]))
     # Choosing 'k' points with the least distances. 
     votes = [i[1] for i in sorted(distances)[:k]]
-    # Most common class occuring among them is chosen to be the class into which the point is classified
+    # Most commonly occuring class among them 
+    # is the class into which the point is classified
     result = Counter(votes).most_common(1)[0][0]
     return classes[result]
 
 
 if __name__ == "__main__":
-    print(classifier([[0, 0], [1, 0], [0, 1], [0.5, 0.5], [3, 3], [2, 3], [3, 2]], [0, 0, 0, 0, 1, 1, 1], ['A', 'B'], [1.2, 1.2]))
+    print(classifier(X_train, y_train, classes, [4.4, 3.1, 1.3, 1.4]))
\ No newline at end of file