Algorithms_in_C 1.0.0
Set of algorithms implemented in C.
Loading...
Searching...
No Matches
lcs.c File Reference

Longest Common Subsequence algorithm More...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
Include dependency graph for lcs.c:

Enumerations

enum  { LEFT , UP , DIAG }
 

Functions

void lcslen (const char *s1, const char *s2, int l1, int l2, int **L, int **B)
 @breif Computes LCS between s1 and s2 using a dynamic-programming approach
 
char * lcsbuild (const char *s1, int l1, int l2, int **L, int **B)
 @breif Builds the LCS according to B using a traceback approach
 
static void test ()
 Self-test implementations.
 
int main (int argc, char *argv[])
 Main function.
 

Detailed Description

Longest Common Subsequence algorithm

From Wikipedia: The longest common subsequence (LCS) problem is the problem of finding the longest subsequence common to all sequences in a set of sequences (often just two sequences).

Author
Kurtz

Enumeration Type Documentation

◆ anonymous enum

anonymous enum
15{LEFT, UP, DIAG};

Function Documentation

◆ lcsbuild()

char * lcsbuild ( const char *  s1,
int  l1,
int  l2,
int **  L,
int **  B 
)

@breif Builds the LCS according to B using a traceback approach

Parameters
1s1 first null-terminated string
2l1 length of s1
3l2 length of s2
4L matrix of size l1 x l2
5B matrix of size l1 x l2
Returns
lcs longest common subsequence
59 {
60 int i, j, lcsl;
61 char *lcs;
62 lcsl = L[l1][l2];
63
64 /* my lcs is at least the empty symbol */
65 lcs = (char *)calloc(lcsl+1, sizeof(char)); /* null-terminated \0 */
66 if (!lcs) {
67 perror("calloc: ");
68 return NULL;
69 }
70
71 i = l1, j = l2;
72 while (i > 0 && j > 0) {
73 /* walk the matrix backwards */
74 if (B[i][j] == DIAG) {
75 lcs[--lcsl] = s1[i-1];
76 i = i - 1;
77 j = j - 1;
78 }
79 else if (B[i][j] == LEFT)
80 j = j - 1;
81 else
82 i = i - 1;
83 }
84 return lcs;
85}
#define calloc(elemCount, elemSize)
This macro replace the standard calloc function with calloc_dbg.
Definition: malloc_dbg.h:22
Definition: list.h:8

◆ lcslen()

void lcslen ( const char *  s1,
const char *  s2,
int  l1,
int  l2,
int **  L,
int **  B 
)

@breif Computes LCS between s1 and s2 using a dynamic-programming approach

Parameters
1s1 first null-terminated string
2s2 second null-terminated string
3l1 length of s1
4l2 length of s2
5L matrix of size l1 x l2
6B matrix of size l1 x l2
Returns
void
27 {
28 /* B is the directions matrix
29 L is the LCS matrix */
30 int i, j;
31
32 /* loop over the simbols in my sequences
33 save the directions according to the LCS */
34 for (i = 1; i <= l1; ++i)
35 for (j = 1; j <= l2; ++j)
36 if (s1[i-1] == s2[j-1]) {
37 L[i][j] = 1 + L[i-1][j-1];
38 B[i][j] = DIAG;
39 }
40 else if (L[i-1][j] < L[i][j-1]) {
41 L[i][j] = L[i][j-1];
42 B[i][j] = LEFT;
43 }
44 else {
45 L[i][j] = L[i-1][j];
46 B[i][j] = UP;
47 }
48}

◆ main()

int main ( int  argc,
char *  argv[] 
)

Main function.

Parameters
argccommandline argument count (ignored)
argvcommandline array of arguments (ignored)
Returns
0 on exit
150 {
151 test(); // run self-test implementations
152 return 0;
153}
static void test()
Self-test implementations.
Definition: lcs.c:90
Here is the call graph for this function:

◆ test()

static void test ( void  )
static

Self-test implementations.

Returns
void
90 {
91 /* https://en.wikipedia.org/wiki/Subsequence#Applications */
92 int **L, **B, j, l1, l2;
93
94 char *s1 = "ACGGTGTCGTGCTATGCTGATGCTGACTTATATGCTA";
95 char *s2 = "CGTTCGGCTATCGTACGTTCTATTCTATGATTTCTAA";
96 char *lcs;
97
98 l1 = strlen(s1);
99 l2 = strlen(s2);
100
101 L = (int **)calloc(l1+1, sizeof(int *));
102 B = (int **)calloc(l1+1, sizeof(int *));
103
104 if (!L) {
105 perror("calloc: ");
106 exit(1);
107 }
108 if (!B) {
109 perror("calloc: ");
110 exit(1);
111 }
112 for (j = 0; j <= l1; j++) {
113 L[j] = (int *)calloc(l2+1, sizeof(int));
114 if (!L[j]) {
115 perror("calloc: ");
116 exit(1);
117 }
118 B[j] = (int *)calloc(l2+1, sizeof(int));
119 if (!L[j]) {
120 perror("calloc: ");
121 exit(1);
122 }
123 }
124
125 lcslen(s1, s2, l1, l2, L, B);
126 lcs = lcsbuild(s1, l1, l2, L, B);
127
128 assert(L[l1][l2] == 27);
129 assert(strcmp(lcs, "CGTTCGGCTATGCTTCTACTTATTCTA") == 0);
130
131 printf("S1: %s\tS2: %s\n", s1, s2);
132 printf("LCS len:%3d\n", L[l1][l2]);
133 printf("LCS: %s\n", lcs);
134
135 free(lcs);
136 for (j = 0; j <= l1; j++)
137 free(L[j]), free(B[j]);
138 free(L);
139 free(B);
140
141 printf("All tests have successfully passed!\n");
142}
char * lcsbuild(const char *s1, int l1, int l2, int **L, int **B)
@breif Builds the LCS according to B using a traceback approach
Definition: lcs.c:59
void lcslen(const char *s1, const char *s2, int l1, int l2, int **L, int **B)
@breif Computes LCS between s1 and s2 using a dynamic-programming approach
Definition: lcs.c:27
#define free(ptr)
This macro replace the standard free function with free_dbg.
Definition: malloc_dbg.h:26
Here is the call graph for this function: