#
Longest Increasing Subsequence

#======================================================================= # Author: Isai Damier # Title: Longest Increasing Subsequence # Project: geekviewpoint # Package: algorithms # # Statement: # Given a sequence of numbers, find a longest increasing subsequence. # # Time Complexity: O(n^2) # # Sample Input: [8,1,2,3,0,5] # Sample Output: [1,2,3,5] # # DEFINITION OF SUBSEQUENCE: # A sequence is a particular order in which related objects follow # each other (e.g. DNA, Fibonacci). A sub-sequence is a sequence # obtained by omitting some of the elements of a larger sequence. # # SEQUENCE SUBSEQUENCE OMISSION # [3,1,2,5,4] [1,2] 3,5,4 # [3,1,2,5,4] [3,1,4] 2,5 # # SEQUENCE NOT SUBSEQUENCE REASON # [3,1,2,5,4] [4,2,5] 4 should follow 5 # # STRATEGY: # Illustrating by finding a longest increasing subsequence # of [8,1,2,3,0,5]: # # - Start by finding all subsequences of size 1: [8],[1],[2],[3],[0],[5]; # each element is its own increasing subsequence. # # - Since we already have the solutions for the size 1 subsequences, # we can use them in solving for the size two subsequences. For # instance, we already know that 0 is the smallest element of an # increasing subsequence of size 1, i.e. the subsequence [0]. # Therefore, all we need to get a subsequence of size 2 is add an # element greater than 0 to [0]: [0,5]. The other size 2 # subsequences are: [1,2], [1,3], [1,5], [2,3], [2,5], [3,5]. # # - Now we use the size 2 subsequences to get the size 3 subsequences: # [1,2,3], [1,2,5], [1,3,5], [2,3,5] # # - Then we use the size 3 subsequences to get the size 4 subsequences: # [1,2,3,5]. Since there are no size 5 solutions, we are done. # # SUMMARY: # Instead of directly solving the big problem, we solved a smaller # version and then 'copied and pasted' the solution of the subproblem # to find the solution to the big problem. To make the 'copy and paste' # part easy, we use a table (i.e. array) to track the subproblems # and their solutions. This strategy as a whole is called Dynamic # Programming. The tabling part is known as memoization, which means # writing memo. # # To recognize whether you can use dynamic programming on a problem, # look for the following two traits: optimal substructures and # overlapping subproblems. # # Optimal Substructures: the ability to 'copy and paste' the solution # of a subproblem plus an additional trivial amount of work so to # solve a larger problem. For example, we were able to use [1,2] # itself an optimal solution to the problem [8,1,2] to get [1,2,3] # as an optimal solution to the problem [8,1,2,3]. # # Overlapping Subproblems: Okay. So in our approach the solution grew # from left to right: [1] to [1,2] to [1,2,3] etc. But in reality # we could have solved the problem using recursion trees so that # for example [1,2] could be reached either from [1] or from [2]. # That wouldn't really be a problem except we would be solving for # [1,2] more than once. Anytime a recursive solution would lead to # such overlaps, the bet is dynamic programming is the way to go. # # [1] [2] # / | \ / | \ # / | \ / | \ # / | \ / | \ # [1,2] [1,3] [1,5] [1,2] [2,3] [2,5] # # NOTE: # Dynamic Programming = Optimal Substructures + Overlapping Subproblems # Divide and Conquer = Optimal Substructures - Overlapping Subproblems # see merge sort: http://www.geekviewpoint.com/python/sorting/mergesort # # Alternate coding: Not really much difference here, just another code # that some readers will find more intuitive: # # m = [1]*len(A) # # for x in range(len(A)): # for y in range(x): # if m[y] >= m[x] and A[y] < A[x]: # m[x]+=1 # # max_value = max(m) # # result = [] # for i in range(m-1,-1,-1): # if max == m[i]: # result.append(A[i]) # max-=1 # # result.reverse() # return result #======================================================================= def LIS( A ): m = [0] * len( A ) # m = [1]*len(A) not important here for x in range( len( A ) - 2, -1, -1 ): for y in range( len( A ) - 1, x, -1 ): if A[x] < A[y] and m[x] <= m[y]: m[x] += 1 # or use m[x] = m[y] + 1 #=================================================================== # Use the following snippet or the one line below to get max_value # max_value=m[0] # for i in range(m): # if max_value < m[i]: # max_value = m[i] #=================================================================== max_value = max( m ) result = [] for i in range( len( m ) ): if max_value == m[i]: result.append( A[i] ) max_value -= 1 return result

import unittest from dynamic_programming import LongestIncreasingSubsequence as subsequence class Test( unittest.TestCase ): def testLIS( self ): A = [1, 2, 3, 4, 5, 6, 7, 8, 9] B = [9, 8, 7, 6, 5, 4, 3, 2, 1] C = [50, 2, 45, 3, 5, 44, 1, 8, 40, 12, 37, 15] D = [2, 3, 5, 8, 12, 37] actual = subsequence.LIS( A ) self.assertEquals( A, actual ) self.assertEquals( 1, len( subsequence.LIS( B ) ) ) actual = subsequence.LIS( C ) self.assertEquals( D, actual )