mirror of
https://hub.njuu.cf/TheAlgorithms/Python.git
synced 2023-10-11 13:06:12 +08:00
Merge branch 'master' into master
This commit is contained in:
commit
6ee6f1236f
@ -6,8 +6,7 @@ python:
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.6-dev"
|
||||
- "3.7-dev"
|
||||
- "nightly"
|
||||
|
||||
install:
|
||||
- if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi
|
||||
- if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi
|
||||
|
@ -1,12 +1,10 @@
|
||||
class Graph:
|
||||
class GRAPH:
|
||||
"""docstring for GRAPH"""
|
||||
def __init__(self, nodes):
|
||||
self.nodes=nodes
|
||||
self.graph=[[0]*nodes for i in range (nodes)]
|
||||
self.visited=[0]*nodes
|
||||
|
||||
def __init__(self, vertex):
|
||||
self.vertex = vertex
|
||||
self.graph = [[0] * vertex for i in range(vertex) ]
|
||||
|
||||
def add_edge(self, u, v):
|
||||
self.graph[u - 1][v - 1] = 1
|
||||
self.graph[v - 1][u - 1] = 1
|
||||
|
||||
def show(self):
|
||||
|
||||
@ -43,3 +41,30 @@ g.add_edge(4,8)
|
||||
g.add_edge(5,9)
|
||||
g.add_edge(6,10)
|
||||
g.bfs(4)
|
||||
=======
|
||||
print self.graph
|
||||
|
||||
def add_edge(self, i, j):
|
||||
self.graph[i][j]=1
|
||||
self.graph[j][i]=1
|
||||
|
||||
def bfs(self,s):
|
||||
queue=[s]
|
||||
self.visited[s]=1
|
||||
while len(queue)!=0:
|
||||
x=queue.pop(0)
|
||||
print(x)
|
||||
for i in range(0,self.nodes):
|
||||
if self.graph[x][i]==1 and self.visited[i]==0:
|
||||
queue.append(i)
|
||||
self.visited[i]=1
|
||||
|
||||
n=int(input("Enter the number of Nodes : "))
|
||||
g=GRAPH(n)
|
||||
e=int(input("Enter the no of edges : "))
|
||||
print("Enter the edges (u v)")
|
||||
for i in range(0,e):
|
||||
u,v=map(int, raw_input().split())
|
||||
g.add_edge(u,v)
|
||||
s=int(input("Enter the source node :"))
|
||||
g.bfs(s)
|
||||
|
@ -1,33 +1,32 @@
|
||||
class Graph:
|
||||
class GRAPH:
|
||||
"""docstring for GRAPH"""
|
||||
def __init__(self, nodes):
|
||||
self.nodes=nodes
|
||||
self.graph=[[0]*nodes for i in range (nodes)]
|
||||
self.visited=[0]*nodes
|
||||
|
||||
def __init__(self, vertex):
|
||||
self.vertex = vertex
|
||||
self.graph = [[0] * vertex for i in range(vertex) ]
|
||||
self.visited = [False] * vertex
|
||||
|
||||
def add_edge(self, u, v):
|
||||
self.graph[u - 1][v - 1] = 1
|
||||
self.graph[v - 1][u - 1] = 1
|
||||
def show(self):
|
||||
print self.graph
|
||||
|
||||
for i in self.graph:
|
||||
for j in i:
|
||||
print(j, end=' ')
|
||||
print(' ')
|
||||
def add_edge(self, i, j):
|
||||
self.graph[i][j]=1
|
||||
self.graph[j][i]=1
|
||||
|
||||
|
||||
def dfs(self, u):
|
||||
self.visited[u - 1] = True
|
||||
print('%d visited' % u)
|
||||
for i in range(1, self.vertex + 1):
|
||||
if self.graph[u - 1][i - 1] == 1 and self.visited[i - 1] == False:
|
||||
def dfs(self,s):
|
||||
self.visited[s]=1
|
||||
print(s)
|
||||
for i in range(0,self.nodes):
|
||||
if self.visited[i]==0 and self.graph[s][i]==1:
|
||||
self.dfs(i)
|
||||
|
||||
|
||||
|
||||
g = Graph(5)
|
||||
g.add_edge(1,4)
|
||||
g.add_edge(4,2)
|
||||
g.add_edge(4,5)
|
||||
g.add_edge(2,5)
|
||||
g.add_edge(5,3)
|
||||
g.dfs(1)
|
||||
n=int(input("Enter the number of Nodes : "))
|
||||
g=GRAPH(n)
|
||||
e=int(input("Enter the no of edges : "))
|
||||
print("Enter the edges (u v)")
|
||||
for i in range(0,e):
|
||||
u,v=map(int, raw_input().split())
|
||||
g.add_edge(u,v)
|
||||
s=int(input("Enter the source node :"))
|
||||
g.dfs(s)
|
||||
|
@ -74,7 +74,7 @@ __Properties__
|
||||
### Shell
|
||||
![alt text][shell-image]
|
||||
|
||||
From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywherem considereing every nth element gives a sorted list. Such a list is said to be h-sorted. Equivanelty, it can be thought of as h intterleaved lists, each individually sorted.
|
||||
From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywhere, considering every nth element gives a sorted list. Such a list is said to be h-sorted. Equivalently, it can be thought of as h interleaved lists, each individually sorted.
|
||||
|
||||
__Properties__
|
||||
* Worst case performance O(nlog2 2n)
|
||||
@ -83,7 +83,7 @@ __Properties__
|
||||
|
||||
###### View the algorithm in [action][shell-toptal]
|
||||
|
||||
###Time-Compexity Graphs
|
||||
### Time-Compexity Graphs
|
||||
|
||||
Comparing the complexity of sorting algorithms (Bubble Sort, Insertion Sort, Selection Sort)
|
||||
|
||||
|
131
data_structures/AVL/AVL.py
Normal file
131
data_structures/AVL/AVL.py
Normal file
@ -0,0 +1,131 @@
|
||||
'''
|
||||
A AVL tree
|
||||
'''
|
||||
|
||||
|
||||
class Node:
|
||||
|
||||
def __init__(self, label):
|
||||
self.label = label
|
||||
self.left = None
|
||||
self.rigt = None
|
||||
self.parent = None
|
||||
self.height = 0
|
||||
|
||||
def getLabel(self):
|
||||
return self.label
|
||||
|
||||
def setLabel(self, label):
|
||||
self.label = label
|
||||
|
||||
def getLeft(self):
|
||||
return self.left
|
||||
|
||||
def setLeft(self, left):
|
||||
self.left = left
|
||||
|
||||
def getRight(self):
|
||||
return self.rigt
|
||||
|
||||
def setRight(self, right):
|
||||
self.rigt = right
|
||||
|
||||
def getParent(self):
|
||||
return self.parent
|
||||
|
||||
def setParent(self, parent):
|
||||
self.parent = parent
|
||||
|
||||
def setHeight(self, height):
|
||||
self.height = height
|
||||
|
||||
def getHeight(self, height):
|
||||
return self.height
|
||||
|
||||
|
||||
class AVL:
|
||||
|
||||
def __init__(self):
|
||||
self.root = None
|
||||
self.size = 0
|
||||
|
||||
def insert(self, value):
|
||||
node = Node(value)
|
||||
if self.root is None:
|
||||
self.root = node
|
||||
self.size = 1
|
||||
else:
|
||||
# Same as Binary Tree
|
||||
dad_node = None
|
||||
curr_node = self.root
|
||||
|
||||
while True:
|
||||
if curr_node is not None:
|
||||
|
||||
dad_node = curr_node
|
||||
|
||||
if node.getLabel() < curr_node.getLabel():
|
||||
curr_node = curr_node.getLeft()
|
||||
else:
|
||||
curr_node = curr_node.getRight()
|
||||
else:
|
||||
if node.getLabel() < dad_node.getLabel():
|
||||
dad_node.setLeft(node)
|
||||
dad_node.setHeight(dad_node.getHeight() + 1)
|
||||
|
||||
if (dad_node.getRight().getHeight() -
|
||||
dad_node.getLeft.getHeight() > 1):
|
||||
self.rebalance(dad_node)
|
||||
|
||||
else:
|
||||
dad_node.setRight(node)
|
||||
dad_node.setHeight(dad_node.getHeight() + 1)
|
||||
|
||||
if (dad_node.getRight().getHeight() -
|
||||
dad_node.getLeft.getHeight() > 1):
|
||||
self.rebalance(dad_node)
|
||||
break
|
||||
|
||||
def rebalance(self, node):
|
||||
if (node.getRight().getHeight() -
|
||||
node.getLeft.getHeight() > 1):
|
||||
if (node.getRight().getHeight() >
|
||||
node.getLeft.getHeight()):
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
pass
|
||||
elif (node.getRight().getHeight() -
|
||||
node.getLeft.getHeight() > 2):
|
||||
if (node.getRight().getHeight() >
|
||||
node.getLeft.getHeight()):
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
pass
|
||||
pass
|
||||
|
||||
def rotate_left(self, node):
|
||||
# TODO: is this pythonic enought?
|
||||
aux = node.getLabel()
|
||||
node = aux.getRight()
|
||||
node.setHeight(node.getHeight() - 1)
|
||||
node.setLeft(Node(aux))
|
||||
node.getLeft().setHeight(node.getHeight() + 1)
|
||||
node.getRight().setHeight(node.getRight().getHeight() - 1)
|
||||
|
||||
def rotate_right(self, node):
|
||||
aux = node.getLabel()
|
||||
node = aux.getLeft()
|
||||
node.setHeight(node.getHeight() - 1)
|
||||
node.setRight(Node(aux))
|
||||
node.getLeft().setHeight(node.getHeight() + 1)
|
||||
node.getLeft().setHeight(node.getLeft().getHeight() - 1)
|
||||
|
||||
def double_rotate_left(self, node):
|
||||
self.rotate_right(node.getRight().getRight())
|
||||
self.rotate_left(node)
|
||||
|
||||
def double_rotate_right(self, node):
|
||||
self.rotate_left(node.getLeft().getLeft())
|
||||
self.rotate_right(node)
|
@ -1,6 +1,8 @@
|
||||
'''
|
||||
A binary search Tree
|
||||
'''
|
||||
|
||||
|
||||
class Node:
|
||||
|
||||
def __init__(self, label):
|
||||
@ -12,7 +14,7 @@ class Node:
|
||||
return self.label
|
||||
|
||||
def setLabel(self, label):
|
||||
self.label = label
|
||||
self.label = label
|
||||
|
||||
def getLeft(self):
|
||||
return self.left
|
||||
@ -34,7 +36,7 @@ class BinarySearchTree:
|
||||
|
||||
def insert(self, label):
|
||||
|
||||
#Create a new Node
|
||||
# Create a new Node
|
||||
|
||||
node = Node(label)
|
||||
|
||||
@ -45,7 +47,7 @@ class BinarySearchTree:
|
||||
curr_node = self.root
|
||||
|
||||
while True:
|
||||
if curr_node != None:
|
||||
if curr_node is not None:
|
||||
|
||||
dad_node = curr_node
|
||||
|
||||
@ -61,12 +63,12 @@ class BinarySearchTree:
|
||||
break
|
||||
|
||||
def empty(self):
|
||||
if self.root == None:
|
||||
if self.root is None:
|
||||
return True
|
||||
return False
|
||||
|
||||
def preShow(self, curr_node):
|
||||
if curr_node != None:
|
||||
if curr_node is None:
|
||||
print(curr_node.getLabel(), end=" ")
|
||||
|
||||
self.preShow(curr_node.getLeft())
|
||||
|
40
data_structures/Graph/Graph.py
Normal file
40
data_structures/Graph/Graph.py
Normal file
@ -0,0 +1,40 @@
|
||||
# Author: OMKAR PATHAK
|
||||
|
||||
# We can use Python's dictionary for constructing the graph
|
||||
|
||||
class AdjacencyList(object):
|
||||
def __init__(self):
|
||||
self.List = {}
|
||||
|
||||
def addEdge(self, fromVertex, toVertex):
|
||||
# check if vertex is already present
|
||||
if fromVertex in self.List.keys():
|
||||
self.List[fromVertex].append(toVertex)
|
||||
else:
|
||||
self.List[fromVertex] = [toVertex]
|
||||
|
||||
def printList(self):
|
||||
for i in self.List:
|
||||
print(i,'->',' -> '.join([str(j) for j in self.List[i]]))
|
||||
|
||||
if __name__ == '__main__':
|
||||
al = AdjacencyList()
|
||||
al.addEdge(0, 1)
|
||||
al.addEdge(0, 4)
|
||||
al.addEdge(4, 1)
|
||||
al.addEdge(4, 3)
|
||||
al.addEdge(1, 0)
|
||||
al.addEdge(1, 4)
|
||||
al.addEdge(1, 3)
|
||||
al.addEdge(1, 2)
|
||||
al.addEdge(2, 3)
|
||||
al.addEdge(3, 4)
|
||||
|
||||
al.printList()
|
||||
|
||||
# OUTPUT:
|
||||
# 0 -> 1 -> 4
|
||||
# 1 -> 0 -> 4 -> 3 -> 2
|
||||
# 2 -> 3
|
||||
# 3 -> 4
|
||||
# 4 -> 1 -> 3
|
61
data_structures/Graph/P01_BreadthFirstSearch.py
Normal file
61
data_structures/Graph/P01_BreadthFirstSearch.py
Normal file
@ -0,0 +1,61 @@
|
||||
# Author: OMKAR PATHAK
|
||||
|
||||
class Graph():
|
||||
def __init__(self):
|
||||
self.vertex = {}
|
||||
|
||||
# for printing the Graph vertexes
|
||||
def printGraph(self):
|
||||
for i in self.vertex.keys():
|
||||
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
|
||||
|
||||
# for adding the edge beween two vertexes
|
||||
def addEdge(self, fromVertex, toVertex):
|
||||
# check if vertex is already present,
|
||||
if fromVertex in self.vertex.keys():
|
||||
self.vertex[fromVertex].append(toVertex)
|
||||
else:
|
||||
# else make a new vertex
|
||||
self.vertex[fromVertex] = [toVertex]
|
||||
|
||||
def BFS(self, startVertex):
|
||||
# Take a list for stoting already visited vertexes
|
||||
visited = [False] * len(self.vertex)
|
||||
|
||||
# create a list to store all the vertexes for BFS
|
||||
queue = []
|
||||
|
||||
# mark the source node as visited and enqueue it
|
||||
visited[startVertex] = True
|
||||
queue.append(startVertex)
|
||||
|
||||
while queue:
|
||||
startVertex = queue.pop(0)
|
||||
print(startVertex, end = ' ')
|
||||
|
||||
# mark all adjacent nodes as visited and print them
|
||||
for i in self.vertex[startVertex]:
|
||||
if visited[i] == False:
|
||||
queue.append(i)
|
||||
visited[i] = True
|
||||
|
||||
if __name__ == '__main__':
|
||||
g = Graph()
|
||||
g.addEdge(0, 1)
|
||||
g.addEdge(0, 2)
|
||||
g.addEdge(1, 2)
|
||||
g.addEdge(2, 0)
|
||||
g.addEdge(2, 3)
|
||||
g.addEdge(3, 3)
|
||||
|
||||
g.printGraph()
|
||||
print('BFS:')
|
||||
g.BFS(2)
|
||||
|
||||
# OUTPUT:
|
||||
# 0 -> 1 -> 2
|
||||
# 1 -> 2
|
||||
# 2 -> 0 -> 3
|
||||
# 3 -> 3
|
||||
# BFS:
|
||||
# 2 0 3 1
|
61
data_structures/Graph/P02_DepthFirstSearch.py
Normal file
61
data_structures/Graph/P02_DepthFirstSearch.py
Normal file
@ -0,0 +1,61 @@
|
||||
# Author: OMKAR PATHAK
|
||||
|
||||
class Graph():
|
||||
def __init__(self):
|
||||
self.vertex = {}
|
||||
|
||||
# for printing the Graph vertexes
|
||||
def printGraph(self):
|
||||
print(self.vertex)
|
||||
for i in self.vertex.keys():
|
||||
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
|
||||
|
||||
# for adding the edge beween two vertexes
|
||||
def addEdge(self, fromVertex, toVertex):
|
||||
# check if vertex is already present,
|
||||
if fromVertex in self.vertex.keys():
|
||||
self.vertex[fromVertex].append(toVertex)
|
||||
else:
|
||||
# else make a new vertex
|
||||
self.vertex[fromVertex] = [toVertex]
|
||||
|
||||
def DFS(self):
|
||||
# visited array for storing already visited nodes
|
||||
visited = [False] * len(self.vertex)
|
||||
|
||||
# call the recursive helper function
|
||||
for i in range(len(self.vertex)):
|
||||
if visited[i] == False:
|
||||
self.DFSRec(i, visited)
|
||||
|
||||
def DFSRec(self, startVertex, visited):
|
||||
# mark start vertex as visited
|
||||
visited[startVertex] = True
|
||||
|
||||
print(startVertex, end = ' ')
|
||||
|
||||
# Recur for all the vertexes that are adjacent to this node
|
||||
for i in self.vertex.keys():
|
||||
if visited[i] == False:
|
||||
self.DFSRec(i, visited)
|
||||
|
||||
if __name__ == '__main__':
|
||||
g = Graph()
|
||||
g.addEdge(0, 1)
|
||||
g.addEdge(0, 2)
|
||||
g.addEdge(1, 2)
|
||||
g.addEdge(2, 0)
|
||||
g.addEdge(2, 3)
|
||||
g.addEdge(3, 3)
|
||||
|
||||
g.printGraph()
|
||||
print('DFS:')
|
||||
g.DFS()
|
||||
|
||||
# OUTPUT:
|
||||
# 0 -> 1 -> 2
|
||||
# 1 -> 2
|
||||
# 2 -> 0 -> 3
|
||||
# 3 -> 3
|
||||
# DFS:
|
||||
# 0 1 2 3
|
27
data_structures/Stacks/Balanced_Parentheses.py
Normal file
27
data_structures/Stacks/Balanced_Parentheses.py
Normal file
@ -0,0 +1,27 @@
|
||||
# Author: OMKAR PATHAK
|
||||
|
||||
import Stack
|
||||
|
||||
def parseParenthesis(string):
|
||||
balanced = 1
|
||||
index = 0
|
||||
myStack = Stack.Stack(len(string))
|
||||
while (index < len(string)) and (balanced == 1):
|
||||
check = string[index]
|
||||
if check == '(':
|
||||
myStack.push(check)
|
||||
else:
|
||||
if myStack.isEmpty():
|
||||
balanced = 0
|
||||
else:
|
||||
myStack.pop()
|
||||
index += 1
|
||||
|
||||
if balanced == 1 and myStack.isEmpty():
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(parseParenthesis('((()))')) # True
|
||||
print(parseParenthesis('((())')) # False
|
48
data_structures/Stacks/Infix_To_Postfix_Conversion.py
Normal file
48
data_structures/Stacks/Infix_To_Postfix_Conversion.py
Normal file
@ -0,0 +1,48 @@
|
||||
# Author: OMKAR PATHAK
|
||||
|
||||
import Stack
|
||||
|
||||
def isOperand(char):
|
||||
return (ord(char) >= ord('a') and ord(char) <= ord('z')) or (ord(char) >= ord('A') and ord(char) <= ord('Z'))
|
||||
|
||||
def precedence(char):
|
||||
if char == '+' or char == '-':
|
||||
return 1
|
||||
elif char == '*' or char == '/':
|
||||
return 2
|
||||
elif char == '^':
|
||||
return 3
|
||||
else:
|
||||
return -1
|
||||
|
||||
def infixToPostfix(myExp, myStack):
|
||||
postFix = []
|
||||
for i in range(len(myExp)):
|
||||
if (isOperand(myExp[i])):
|
||||
postFix.append(myExp[i])
|
||||
elif(myExp[i] == '('):
|
||||
myStack.push(myExp[i])
|
||||
elif(myExp[i] == ')'):
|
||||
topOperator = myStack.pop()
|
||||
while(not myStack.isEmpty() and topOperator != '('):
|
||||
postFix.append(topOperator)
|
||||
topOperator = myStack.pop()
|
||||
else:
|
||||
while (not myStack.isEmpty()) and (precedence(myExp[i]) <= precedence(myStack.peek())):
|
||||
postFix.append(myStack.pop())
|
||||
myStack.push(myExp[i])
|
||||
|
||||
while(not myStack.isEmpty()):
|
||||
postFix.append(myStack.pop())
|
||||
return ' '.join(postFix)
|
||||
|
||||
if __name__ == '__main__':
|
||||
myExp = 'a+b*(c^d-e)^(f+g*h)-i'
|
||||
myExp = [i for i in myExp]
|
||||
print('Infix:',' '.join(myExp))
|
||||
myStack = Stack.Stack(len(myExp))
|
||||
print('Postfix:',infixToPostfix(myExp, myStack))
|
||||
|
||||
# OUTPUT:
|
||||
# Infix: a + b * ( c ^ d - e ) ^ ( f + g * h ) - i
|
||||
# Postfix: a b c d ^ e - f g h * + ^ * + i -
|
50
data_structures/Stacks/Stack.py
Normal file
50
data_structures/Stacks/Stack.py
Normal file
@ -0,0 +1,50 @@
|
||||
# Author: OMKAR PATHAK
|
||||
|
||||
class Stack(object):
|
||||
def __init__(self, limit = 10):
|
||||
self.stack = []
|
||||
self.limit = limit
|
||||
|
||||
# for printing the stack contents
|
||||
def __str__(self):
|
||||
return ' '.join([str(i) for i in self.stack])
|
||||
|
||||
# for pushing an element on to the stack
|
||||
def push(self, data):
|
||||
if len(self.stack) >= self.limit:
|
||||
print('Stack Overflow')
|
||||
else:
|
||||
self.stack.append(data)
|
||||
|
||||
# for popping the uppermost element
|
||||
def pop(self):
|
||||
if len(self.stack) <= 0:
|
||||
return -1
|
||||
else:
|
||||
return self.stack.pop()
|
||||
|
||||
# for peeking the top-most element of the stack
|
||||
def peek(self):
|
||||
if len(self.stack) <= 0:
|
||||
return -1
|
||||
else:
|
||||
return self.stack[len(self.stack) - 1]
|
||||
|
||||
# to check if stack is empty
|
||||
def isEmpty(self):
|
||||
return self.stack == []
|
||||
|
||||
# for checking the size of stack
|
||||
def size(self):
|
||||
return len(self.stack)
|
||||
|
||||
if __name__ == '__main__':
|
||||
myStack = Stack()
|
||||
for i in range(10):
|
||||
myStack.push(i)
|
||||
print(myStack)
|
||||
myStack.pop() # popping the top element
|
||||
print(myStack)
|
||||
myStack.peek() # printing the top element
|
||||
myStack.isEmpty()
|
||||
myStack.size()
|
141
dynamic_programming/k_means_clustering_tensorflow.py
Normal file
141
dynamic_programming/k_means_clustering_tensorflow.py
Normal file
@ -0,0 +1,141 @@
|
||||
import tensorflow as tf
|
||||
from random import choice, shuffle
|
||||
from numpy import array
|
||||
|
||||
|
||||
def TFKMeansCluster(vectors, noofclusters):
|
||||
"""
|
||||
K-Means Clustering using TensorFlow.
|
||||
'vectors' should be a n*k 2-D NumPy array, where n is the number
|
||||
of vectors of dimensionality k.
|
||||
'noofclusters' should be an integer.
|
||||
"""
|
||||
|
||||
noofclusters = int(noofclusters)
|
||||
assert noofclusters < len(vectors)
|
||||
|
||||
#Find out the dimensionality
|
||||
dim = len(vectors[0])
|
||||
|
||||
#Will help select random centroids from among the available vectors
|
||||
vector_indices = list(range(len(vectors)))
|
||||
shuffle(vector_indices)
|
||||
|
||||
#GRAPH OF COMPUTATION
|
||||
#We initialize a new graph and set it as the default during each run
|
||||
#of this algorithm. This ensures that as this function is called
|
||||
#multiple times, the default graph doesn't keep getting crowded with
|
||||
#unused ops and Variables from previous function calls.
|
||||
|
||||
graph = tf.Graph()
|
||||
|
||||
with graph.as_default():
|
||||
|
||||
#SESSION OF COMPUTATION
|
||||
|
||||
sess = tf.Session()
|
||||
|
||||
##CONSTRUCTING THE ELEMENTS OF COMPUTATION
|
||||
|
||||
##First lets ensure we have a Variable vector for each centroid,
|
||||
##initialized to one of the vectors from the available data points
|
||||
centroids = [tf.Variable((vectors[vector_indices[i]]))
|
||||
for i in range(noofclusters)]
|
||||
##These nodes will assign the centroid Variables the appropriate
|
||||
##values
|
||||
centroid_value = tf.placeholder("float64", [dim])
|
||||
cent_assigns = []
|
||||
for centroid in centroids:
|
||||
cent_assigns.append(tf.assign(centroid, centroid_value))
|
||||
|
||||
##Variables for cluster assignments of individual vectors(initialized
|
||||
##to 0 at first)
|
||||
assignments = [tf.Variable(0) for i in range(len(vectors))]
|
||||
##These nodes will assign an assignment Variable the appropriate
|
||||
##value
|
||||
assignment_value = tf.placeholder("int32")
|
||||
cluster_assigns = []
|
||||
for assignment in assignments:
|
||||
cluster_assigns.append(tf.assign(assignment,
|
||||
assignment_value))
|
||||
|
||||
##Now lets construct the node that will compute the mean
|
||||
#The placeholder for the input
|
||||
mean_input = tf.placeholder("float", [None, dim])
|
||||
#The Node/op takes the input and computes a mean along the 0th
|
||||
#dimension, i.e. the list of input vectors
|
||||
mean_op = tf.reduce_mean(mean_input, 0)
|
||||
|
||||
##Node for computing Euclidean distances
|
||||
#Placeholders for input
|
||||
v1 = tf.placeholder("float", [dim])
|
||||
v2 = tf.placeholder("float", [dim])
|
||||
euclid_dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(
|
||||
v1, v2), 2)))
|
||||
|
||||
##This node will figure out which cluster to assign a vector to,
|
||||
##based on Euclidean distances of the vector from the centroids.
|
||||
#Placeholder for input
|
||||
centroid_distances = tf.placeholder("float", [noofclusters])
|
||||
cluster_assignment = tf.argmin(centroid_distances, 0)
|
||||
|
||||
##INITIALIZING STATE VARIABLES
|
||||
|
||||
##This will help initialization of all Variables defined with respect
|
||||
##to the graph. The Variable-initializer should be defined after
|
||||
##all the Variables have been constructed, so that each of them
|
||||
##will be included in the initialization.
|
||||
init_op = tf.initialize_all_variables()
|
||||
|
||||
#Initialize all variables
|
||||
sess.run(init_op)
|
||||
|
||||
##CLUSTERING ITERATIONS
|
||||
|
||||
#Now perform the Expectation-Maximization steps of K-Means clustering
|
||||
#iterations. To keep things simple, we will only do a set number of
|
||||
#iterations, instead of using a Stopping Criterion.
|
||||
noofiterations = 100
|
||||
for iteration_n in range(noofiterations):
|
||||
|
||||
##EXPECTATION STEP
|
||||
##Based on the centroid locations till last iteration, compute
|
||||
##the _expected_ centroid assignments.
|
||||
#Iterate over each vector
|
||||
for vector_n in range(len(vectors)):
|
||||
vect = vectors[vector_n]
|
||||
#Compute Euclidean distance between this vector and each
|
||||
#centroid. Remember that this list cannot be named
|
||||
#'centroid_distances', since that is the input to the
|
||||
#cluster assignment node.
|
||||
distances = [sess.run(euclid_dist, feed_dict={
|
||||
v1: vect, v2: sess.run(centroid)})
|
||||
for centroid in centroids]
|
||||
#Now use the cluster assignment node, with the distances
|
||||
#as the input
|
||||
assignment = sess.run(cluster_assignment, feed_dict = {
|
||||
centroid_distances: distances})
|
||||
#Now assign the value to the appropriate state variable
|
||||
sess.run(cluster_assigns[vector_n], feed_dict={
|
||||
assignment_value: assignment})
|
||||
|
||||
##MAXIMIZATION STEP
|
||||
#Based on the expected state computed from the Expectation Step,
|
||||
#compute the locations of the centroids so as to maximize the
|
||||
#overall objective of minimizing within-cluster Sum-of-Squares
|
||||
for cluster_n in range(noofclusters):
|
||||
#Collect all the vectors assigned to this cluster
|
||||
assigned_vects = [vectors[i] for i in range(len(vectors))
|
||||
if sess.run(assignments[i]) == cluster_n]
|
||||
#Compute new centroid location
|
||||
new_location = sess.run(mean_op, feed_dict={
|
||||
mean_input: array(assigned_vects)})
|
||||
#Assign value to appropriate variable
|
||||
sess.run(cent_assigns[cluster_n], feed_dict={
|
||||
centroid_value: new_location})
|
||||
|
||||
#Return centroids and assignments
|
||||
centroids = sess.run(centroids)
|
||||
assignments = sess.run(assignments)
|
||||
return centroids, assignments
|
||||
|
121
machine_learning/gradient_descent.py
Normal file
121
machine_learning/gradient_descent.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""
|
||||
Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
|
||||
"""
|
||||
import numpy
|
||||
|
||||
# List of input, output pairs
|
||||
train_data = (((5, 2, 3), 15), ((6, 5, 9), 25),
|
||||
((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
|
||||
test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
|
||||
parameter_vector = [2, 4, 1, 5]
|
||||
m = len(train_data)
|
||||
LEARNING_RATE = 0.009
|
||||
|
||||
|
||||
def _error(example_no, data_set='train'):
|
||||
"""
|
||||
:param data_set: train data or test data
|
||||
:param example_no: example number whose error has to be checked
|
||||
:return: error in example pointed by example number.
|
||||
"""
|
||||
return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set)
|
||||
|
||||
|
||||
def _hypothesis_value(data_input_tuple):
|
||||
"""
|
||||
Calculates hypothesis function value for a given input
|
||||
:param data_input_tuple: Input tuple of a particular example
|
||||
:return: Value of hypothesis function at that point.
|
||||
Note that there is an 'biased input' whose value is fixed as 1.
|
||||
It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
|
||||
So, we have to take care of it separately. Line 36 takes care of it.
|
||||
"""
|
||||
hyp_val = 0
|
||||
for i in range(len(parameter_vector) - 1):
|
||||
hyp_val += data_input_tuple[i]*parameter_vector[i+1]
|
||||
hyp_val += parameter_vector[0]
|
||||
return hyp_val
|
||||
|
||||
|
||||
def output(example_no, data_set):
|
||||
"""
|
||||
:param data_set: test data or train data
|
||||
:param example_no: example whose output is to be fetched
|
||||
:return: output for that example
|
||||
"""
|
||||
if data_set == 'train':
|
||||
return train_data[example_no][1]
|
||||
elif data_set == 'test':
|
||||
return test_data[example_no][1]
|
||||
|
||||
|
||||
def calculate_hypothesis_value(example_no, data_set):
|
||||
"""
|
||||
Calculates hypothesis value for a given example
|
||||
:param data_set: test data or train_data
|
||||
:param example_no: example whose hypothesis value is to be calculated
|
||||
:return: hypothesis value for that example
|
||||
"""
|
||||
if data_set == "train":
|
||||
return _hypothesis_value(train_data[example_no][0])
|
||||
elif data_set == "test":
|
||||
return _hypothesis_value(test_data[example_no][0])
|
||||
|
||||
|
||||
def summation_of_cost_derivative(index, end=m):
|
||||
"""
|
||||
Calculates the sum of cost function derivative
|
||||
:param index: index wrt derivative is being calculated
|
||||
:param end: value where summation ends, default is m, number of examples
|
||||
:return: Returns the summation of cost derivative
|
||||
Note: If index is -1, this means we are calculating summation wrt to biased parameter.
|
||||
"""
|
||||
summation_value = 0
|
||||
for i in range(end):
|
||||
if index == -1:
|
||||
summation_value += _error(i)
|
||||
else:
|
||||
summation_value += _error(i)*train_data[i][0][index]
|
||||
return summation_value
|
||||
|
||||
|
||||
def get_cost_derivative(index):
|
||||
"""
|
||||
:param index: index of the parameter vector wrt to derivative is to be calculated
|
||||
:return: derivative wrt to that index
|
||||
Note: If index is -1, this means we are calculating summation wrt to biased parameter.
|
||||
"""
|
||||
cost_derivative_value = summation_of_cost_derivative(index, m)/m
|
||||
return cost_derivative_value
|
||||
|
||||
|
||||
def run_gradient_descent():
|
||||
global parameter_vector
|
||||
# Tune these values to set a tolerance value for predicted output
|
||||
absolute_error_limit = 0.000002
|
||||
relative_error_limit = 0
|
||||
j = 0
|
||||
while True:
|
||||
j += 1
|
||||
temp_parameter_vector = [0, 0, 0, 0]
|
||||
for i in range(0, len(parameter_vector)):
|
||||
cost_derivative = get_cost_derivative(i-1)
|
||||
temp_parameter_vector[i] = parameter_vector[i] - \
|
||||
LEARNING_RATE*cost_derivative
|
||||
if numpy.allclose(parameter_vector, temp_parameter_vector,
|
||||
atol=absolute_error_limit, rtol=relative_error_limit):
|
||||
break
|
||||
parameter_vector = temp_parameter_vector
|
||||
print("Number of iterations:", j)
|
||||
|
||||
|
||||
def test_gradient_descent():
|
||||
for i in range(len(test_data)):
|
||||
print("Actual output value:", output(i, 'test'))
|
||||
print("Hypothesis output:", calculate_hypothesis_value(i, 'test'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_gradient_descent()
|
||||
print("\nTesting gradient descent for a linear hypothesis function.\n")
|
||||
test_gradient_descent()
|
108
machine_learning/linear_regression.py
Normal file
108
machine_learning/linear_regression.py
Normal file
@ -0,0 +1,108 @@
|
||||
"""
|
||||
Linear regression is the most basic type of regression commonly used for
|
||||
predictive analysis. The idea is preety simple, we have a dataset and we have
|
||||
a feature's associated with it. The Features should be choose very cautiously
|
||||
as they determine, how much our model will be able to make future predictions.
|
||||
We try to set these Feature weights, over many iterations, so that they best
|
||||
fits our dataset. In this particular code, i had used a CSGO dataset (ADR vs
|
||||
Rating). We try to best fit a line through dataset and estimate the parameters.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import numpy as np
|
||||
|
||||
|
||||
def collect_dataset():
|
||||
""" Collect dataset of CSGO
|
||||
The dataset contains ADR vs Rating of a Player
|
||||
:return : dataset obtained from the link, as matrix
|
||||
"""
|
||||
response = requests.get('https://raw.githubusercontent.com/yashLadha/' +
|
||||
'The_Math_of_Intelligence/master/Week1/ADRvs' +
|
||||
'Rating.csv')
|
||||
lines = response.text.splitlines()
|
||||
data = []
|
||||
for item in lines:
|
||||
item = item.split(',')
|
||||
data.append(item)
|
||||
data.pop(0) # This is for removing the labels from the list
|
||||
dataset = np.matrix(data)
|
||||
return dataset
|
||||
|
||||
|
||||
def run_steep_gradient_descent(data_x, data_y,
|
||||
len_data, alpha, theta):
|
||||
""" Run steep gradient descent and updates the Feature vector accordingly_
|
||||
:param data_x : contains the dataset
|
||||
:param data_y : contains the output associated with each data-entry
|
||||
:param len_data : length of the data_
|
||||
:param alpha : Learning rate of the model
|
||||
:param theta : Feature vector (weight's for our model)
|
||||
;param return : Updated Feature's, using
|
||||
curr_features - alpha_ * gradient(w.r.t. feature)
|
||||
"""
|
||||
n = len_data
|
||||
|
||||
prod = np.dot(theta, data_x.transpose())
|
||||
prod -= data_y.transpose()
|
||||
sum_grad = np.dot(prod, data_x)
|
||||
theta = theta - (alpha / n) * sum_grad
|
||||
return theta
|
||||
|
||||
|
||||
def sum_of_square_error(data_x, data_y, len_data, theta):
|
||||
""" Return sum of square error for error calculation
|
||||
:param data_x : contains our dataset
|
||||
:param data_y : contains the output (result vector)
|
||||
:param len_data : len of the dataset
|
||||
:param theta : contains the feature vector
|
||||
:return : sum of square error computed from given feature's
|
||||
"""
|
||||
error = 0.0
|
||||
prod = np.dot(theta, data_x.transpose())
|
||||
prod -= data_y.transpose()
|
||||
sum_elem = np.sum(np.square(prod))
|
||||
error = sum_elem / (2 * len_data)
|
||||
return error
|
||||
|
||||
|
||||
def run_linear_regression(data_x, data_y):
|
||||
""" Implement Linear regression over the dataset
|
||||
:param data_x : contains our dataset
|
||||
:param data_y : contains the output (result vector)
|
||||
:return : feature for line of best fit (Feature vector)
|
||||
"""
|
||||
iterations = 100000
|
||||
alpha = 0.0001550
|
||||
|
||||
no_features = data_x.shape[1]
|
||||
len_data = data_x.shape[0] - 1
|
||||
|
||||
theta = np.zeros((1, no_features))
|
||||
|
||||
for i in range(0, iterations):
|
||||
theta = run_steep_gradient_descent(data_x, data_y,
|
||||
len_data, alpha, theta)
|
||||
error = sum_of_square_error(data_x, data_y, len_data, theta)
|
||||
print('At Iteration %d - Error is %.5f ' % (i + 1, error))
|
||||
|
||||
return theta
|
||||
|
||||
|
||||
def main():
|
||||
""" Driver function """
|
||||
data = collect_dataset()
|
||||
|
||||
len_data = data.shape[0]
|
||||
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
|
||||
data_y = data[:, -1].astype(float)
|
||||
|
||||
theta = run_linear_regression(data_x, data_y)
|
||||
len_result = theta.shape[1]
|
||||
print('Resultant Feature vector : ')
|
||||
for i in range(0, len_result):
|
||||
print('%.5f' % (theta[0, i]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -80,6 +80,39 @@ def binary_search_std_lib(sorted_collection, item):
|
||||
return index
|
||||
return None
|
||||
|
||||
def binary_search_by_recursion(sorted_collection, item, left, right):
|
||||
|
||||
"""Pure implementation of binary search algorithm in Python by recursion
|
||||
|
||||
Be careful collection must be sorted, otherwise result will be
|
||||
unpredictable
|
||||
First recursion should be started with left=0 and right=(len(sorted_collection)-1)
|
||||
|
||||
:param sorted_collection: some sorted collection with comparable items
|
||||
:param item: item value to search
|
||||
:return: index of found item or None if item is not found
|
||||
|
||||
Examples:
|
||||
>>> binary_search_std_lib([0, 5, 7, 10, 15], 0)
|
||||
0
|
||||
|
||||
>>> binary_search_std_lib([0, 5, 7, 10, 15], 15)
|
||||
4
|
||||
|
||||
>>> binary_search_std_lib([0, 5, 7, 10, 15], 5)
|
||||
1
|
||||
|
||||
>>> binary_search_std_lib([0, 5, 7, 10, 15], 6)
|
||||
|
||||
"""
|
||||
midpoint = left + (right - left) // 2
|
||||
|
||||
if sorted_collection[midpoint] == item:
|
||||
return midpoint
|
||||
elif sorted_collection[midpoint] > item:
|
||||
return binary_search_by_recursion(sorted_collection, item, left, right-1)
|
||||
else:
|
||||
return binary_search_by_recursion(sorted_collection, item, left+1, right)
|
||||
|
||||
def __assert_sorted(collection):
|
||||
"""Check if collection is sorted, if not - raises :py:class:`ValueError`
|
||||
|
56
sorts/bucket_sort.py
Normal file
56
sorts/bucket_sort.py
Normal file
@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
# Author: OMKAR PATHAK
|
||||
# This program will illustrate how to implement bucket sort algorithm
|
||||
|
||||
# Wikipedia says: Bucket sort, or bin sort, is a sorting algorithm that works by distributing the
|
||||
# elements of an array into a number of buckets. Each bucket is then sorted individually, either using
|
||||
# a different sorting algorithm, or by recursively applying the bucket sorting algorithm. It is a
|
||||
# distribution sort, and is a cousin of radix sort in the most to least significant digit flavour.
|
||||
# Bucket sort is a generalization of pigeonhole sort. Bucket sort can be implemented with comparisons
|
||||
# and therefore can also be considered a comparison sort algorithm. The computational complexity estimates
|
||||
# involve the number of buckets.
|
||||
|
||||
# Time Complexity of Solution:
|
||||
# Best Case O(n); Average Case O(n); Worst Case O(n)
|
||||
|
||||
from P26_InsertionSort import insertionSort
|
||||
import math
|
||||
|
||||
DEFAULT_BUCKET_SIZE = 5
|
||||
|
||||
def bucketSort(myList, bucketSize=DEFAULT_BUCKET_SIZE):
|
||||
if(len(myList) == 0):
|
||||
print('You don\'t have any elements in array!')
|
||||
|
||||
minValue = myList[0]
|
||||
maxValue = myList[0]
|
||||
|
||||
# For finding minimum and maximum values
|
||||
for i in range(0, len(myList)):
|
||||
if myList[i] < minValue:
|
||||
minValue = myList[i]
|
||||
elif myList[i] > maxValue:
|
||||
maxValue = myList[i]
|
||||
|
||||
# Initialize buckets
|
||||
bucketCount = math.floor((maxValue - minValue) / bucketSize) + 1
|
||||
buckets = []
|
||||
for i in range(0, bucketCount):
|
||||
buckets.append([])
|
||||
|
||||
# For putting values in buckets
|
||||
for i in range(0, len(myList)):
|
||||
buckets[math.floor((myList[i] - minValue) / bucketSize)].append(myList[i])
|
||||
|
||||
# Sort buckets and place back into input array
|
||||
sortedArray = []
|
||||
for i in range(0, len(buckets)):
|
||||
insertionSort(buckets[i])
|
||||
for j in range(0, len(buckets[i])):
|
||||
sortedArray.append(buckets[i][j])
|
||||
|
||||
return sortedArray
|
||||
|
||||
if __name__ == '__main__':
|
||||
sortedArray = bucketSort([12, 23, 4, 5, 3, 2, 12, 81, 56, 95])
|
||||
print(sortedArray)
|
32
sorts/topological_sort.py
Normal file
32
sorts/topological_sort.py
Normal file
@ -0,0 +1,32 @@
|
||||
# a
|
||||
# / \
|
||||
# b c
|
||||
# / \
|
||||
# d e
|
||||
edges = {'a': ['c', 'b'], 'b': ['d', 'e'], 'c': [], 'd': [], 'e': []}
|
||||
vertices = ['a', 'b', 'c', 'd', 'e']
|
||||
|
||||
|
||||
def topological_sort(start, visited, sort):
|
||||
"""Perform topolical sort on a directed acyclic graph."""
|
||||
current = start
|
||||
# add current to visited
|
||||
visited.append(current)
|
||||
neighbors = edges[current]
|
||||
for neighbor in neighbors:
|
||||
# if neighbor not in visited, visit
|
||||
if neighbor not in visited:
|
||||
sort = topological_sort(neighbor, visited, sort)
|
||||
# if all neighbors visited add current to sort
|
||||
sort.append(current)
|
||||
# if all vertices haven't been visited select a new one to visit
|
||||
if len(visited) != len(vertices):
|
||||
for vertice in vertices:
|
||||
if vertice not in visited:
|
||||
sort = topological_sort(vertice, visited, sort)
|
||||
# return sort
|
||||
return sort
|
||||
|
||||
|
||||
sort = topological_sort('a', [], [])
|
||||
print(sort)
|
Loading…
Reference in New Issue
Block a user