Exercise 7.
authorEugen Sawin <sawine@me73.com>
Wed, 06 Jul 2011 17:00:17 +0200
changeset 10a3feee84f858
parent 9 15594952470f
child 11 df83621781e1
Exercise 7.
src/kmp.py
tex/theory1_ex7.tex
     1.1 --- a/src/kmp.py	Mon Jul 04 16:41:40 2011 +0200
     1.2 +++ b/src/kmp.py	Wed Jul 06 17:00:17 2011 +0200
     1.3 @@ -5,21 +5,42 @@
     1.4  def main():   
     1.5      args = parse_arguments()
     1.6      print "next ", next(args.pattern)
     1.7 +   
     1.8 +    for x in range(len(args.text)): 
     1.9 +        pad = ""
    1.10 +        for p in range(2 - len(str(x))):
    1.11 +            pad += " "
    1.12 +        print str(x) + pad,
    1.13 +    print
    1.14 +    #print " ".join([str(i) for i in range(len(args.text))]) 
    1.15 +    for x in range(len(args.text)): 
    1.16 +        pad = " "
    1.17 +        print args.text[x] + pad,
    1.18 +    #for i in range(len(args.text)):
    1.19 +    #    pad = ""
    1.20 +    #    for x in range(1, len(str(i))):
    1.21 +    #        pad += " "
    1.22 +    #    print args.text[i] + pad,
    1.23 +    print
    1.24      print kmp(args.text, args.pattern)
    1.25  
    1.26 -def kmp(text, pattern):
    1.27 +def kmp(text, pattern):    
    1.28      l = []
    1.29      n = next(pattern)
    1.30      j = 0
    1.31      for i in range(len(text)):
    1.32 -        while j >= 0 and text[i] != pattern[j]:
    1.33 -            #print "mismatch at text %i and pattern %i" % (i, j)
    1.34 -            #print "new pattern %i" % (n[j],)
    1.35 +        pad = ""
    1.36 +        for x in range(1, i-j+1):
    1.37 +            pad += "   "
    1.38 +        print pad + "  ".join(pattern[:j+1]),
    1.39 +        while j >= 0 and text[i] != pattern[j]:           
    1.40              j = n[j]
    1.41          j += 1
    1.42          if j == len(pattern):
    1.43 +            print " matched",
    1.44              l.append(i - len(pattern) + 1)
    1.45              j = n[j]
    1.46 +        print
    1.47      return l
    1.48  
    1.49  def next(pattern):
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/tex/theory1_ex7.tex	Wed Jul 06 17:00:17 2011 +0200
     2.3 @@ -0,0 +1,108 @@
     2.4 +\documentclass[a4paper, 10pt, pagesize, smallheadings]{article}  
     2.5 +\usepackage{graphicx}
     2.6 +%\usepackage[latin1]{inputenc}
     2.7 +\usepackage{amsmath, amsthm, amssymb}
     2.8 +\usepackage{typearea}
     2.9 +\usepackage{algorithm}
    2.10 +\usepackage{algorithmic}
    2.11 +\usepackage{fullpage}
    2.12 +\usepackage{mathtools}
    2.13 +\usepackage[all]{xy}
    2.14 +\title{Theory I, Sheet 7 Solution}
    2.15 +\author{Eugen Sawin}
    2.16 +\renewcommand{\familydefault}{\sfdefault}
    2.17 +\include{pythonlisting}
    2.18 +
    2.19 +\pagestyle{empty}
    2.20 +\begin{document}
    2.21 +\maketitle
    2.22 +
    2.23 +\section*{Exercise 7.1}
    2.24 +For this exercise I've used a slightly different implementation of the KMP algorithm, which is semantically equal to the one presented in the lecture. 
    2.25 +
    2.26 +In this version the $next$ array is prepended with a $-1$, which I omit from the solutions to correspond to the lecture's algorithm's output.
    2.27 +\begin{python}
    2.28 +def kmp(text, pattern):    
    2.29 +    l = []
    2.30 +    n = next(pattern)
    2.31 +    j = 0
    2.32 +    for i in range(len(text)):        
    2.33 +        while j >= 0 and text[i] != pattern[j]:           
    2.34 +            j = n[j]
    2.35 +        j += 1
    2.36 +        if j == len(pattern):            
    2.37 +            l.append(i - len(pattern) + 1)
    2.38 +            j = n[j]        
    2.39 +    return l
    2.40 +
    2.41 +def next(pattern):
    2.42 +    n = [-1]
    2.43 +    j = -1
    2.44 +    for i in range(len(pattern)):        
    2.45 +        while j >= 0 and pattern[i] != pattern[j]:
    2.46 +            j = n[j]        
    2.47 +        j += 1
    2.48 +        n.append(j)        
    2.49 +    return n
    2.50 +\end{python}
    2.51 +
    2.52 +\subsection*{1}
    2.53 +\begin{align*}
    2.54 +P &= rlrrlr\\
    2.55 +next &= [0, 0, 1, 1, 2, 3]\\
    2.56 +\end{align*}
    2.57 +
    2.58 +\subsection*{2}
    2.59 +\begin{align*}
    2.60 +P' &= rrllrrll\\
    2.61 +next &= [0, 1, 0, 0, 1, 2, 3, 4]\\
    2.62 +\end{align*}
    2.63 +
    2.64 +\subsection*{3}
    2.65 +\begin{align*}
    2.66 +P &= rlrrlr\\
    2.67 +T &= lrrrlrrlllrrrlrrlrrllrlrrlrrlr\\
    2.68 +next &= [0, 0, 1, 1, 2, 3]\\
    2.69 +matched &= [12, 21, 24]\\
    2.70 +\end{align*}
    2.71 +You can find the full KMP-execution table in the attached text file \emph{kmp-output.txt}.
    2.72 +
    2.73 +\section*{Exercise 7.2}
    2.74 +\begin{figure}[h]
    2.75 +\entrymodifiers={+[F-]}
    2.76 +\xymatrix
    2.77 +{
    2.78 +  &   & C & H & U & P & A & C & A & B & R & A  & S\\
    2.79 +  & 0\ar@{-->}[dr] \ar[d] \ar[r] & \ar[r]1\ar[dr] & 2\ar[dr]\ar[r] & 3\ar[dr]\ar[r] & 4\ar[dr]\ar[r] & \ar[r]5 & 6\ar[r]\ar[dr] & 7\ar[r] & 8\ar[r] & 9\ar[dr]\ar[r] & 10\ar[r] & 11\\
    2.80 +A & 1\ar[dr] \ar[d] & 1\ar[d]\ar[dr] \ar@{-->}[r] & 2\ar[r]\ar@{-->}[dr] & 3\ar[r]\ar[dr] & 4\ar[dr] & 4\ar[d]\ar[r]\ar[dr] & 5\ar[dr]\ar[r] & 6\ar[r]\ar[dr] & 7\ar[r] & \ar[r]8 & 9\ar[r]  & 10\\
    2.81 +B & 2\ar[dr] \ar[d] & 2\ar[d]\ar[dr] & 2\ar[d]\ar[r]\ar[dr] & 3\ar[r]\ar@{-->}[dr] & 4\ar[r]\ar[dr] & 5\ar[dr] & 5\ar[d]\ar[r]\ar[dr] & 6\ar[dr] & 6\ar[d]\ar[r]\ar[dr] & 7\ar[r] & 8\ar[r]  & 9\\
    2.82 +R & 3\ar[dr] \ar[d] & 3\ar[d]\ar[dr] & 3\ar[d]\ar[dr] & 3\ar[d]\ar[r]\ar[dr] & 4\ar[r]\ar@{-->}[dr] & 5\ar[r] & 6\ar[dr] & 6\ar[r]\ar[dr] & 7 & 6\ar[d]\ar[r]\ar[dr] & 7\ar[r]  & 8\\
    2.83 +A & 4\ar[dr] \ar[d] & 4\ar[dr] & 4\ar[d]\ar[dr] & 4\ar[d]\ar[dr] & 4\ar[d]\ar[dr] & 4\ar[d]\ar[r]\ar@{-->}[dr] & 5\ar[r] & 6\ar[r] & 7 & 7 & 6\ar[d]\ar[r]\ar[dr]  & 7\\
    2.84 +C & 5 \ar[d] & 4\ar[d]\ar[r]\ar[dr] & 5\ar[dr] & 5\ar[d]\ar[dr] & 5\ar[d]\ar[dr] & 5 & 4\ar[d]\ar[r]\ar@{-->}[dr] & 5\ar[r] & 6\ar[r] & 7\ar[dr] & 7\ar[dr]  & 7\ar[d]\\
    2.85 +A & 6 \ar[d] & 5\ar[d]\ar[dr] & 5\ar[d]\ar[r]\ar[dr] & 6\ar[dr] & 6\ar[d] & 5\ar[d]\ar[dr] & 5\ar[d] & 4\ar[r]\ar[dr] \ar@{-->}[d] & 5\ar[r]\ar[dr] & 6\ar[r]\ar[dr] & 7\ar[r]\ar[dr]  & 8\\
    2.86 +D & 7 \ar[d] & 6\ar[d]\ar[dr] & 6\ar[d]\ar[dr] & 6\ar[d]\ar[r]\ar[dr] & 7\ar[dr] & 6\ar[d]\ar[dr] & 6\ar[d]\ar[dr] & 5\ar[dr] \ar@{-->}[d] & 5\ar[d]\ar[dr]\ar[r] & 6\ar[r]\ar[dr] & 7\ar[r]  & 8\\
    2.87 +A & 8 \ar[d] & 7\ar[d]\ar[dr] & 7\ar[d]\ar[dr] & 7\ar[d]\ar[dr] & 7\ar[d]\ar[dr] & 7\ar[d]\ar[dr] & 7\ar[d] & 6\ar[d] \ar@{-->}[dr] & 6\ar[dr] & 6\ar[d]\ar[dr] & 6\ar[d]\ar[dr]\ar[r]  & 7\\
    2.88 +B & 9 \ar[d] & 8\ar[d]\ar[dr] & 8\ar[d]\ar[dr] & 8\ar[d]\ar[dr] & \ar[dr]8\ar[d] & 8\ar[d]\ar[dr] & 8\ar[d] & 7\ar[d] & 6\ar[d]\ar[r] \ar@{-->}[dr] & 7 & 7\ar[dr]  & 7\ar[d]\\
    2.89 +R & 10 \ar[d]& 9\ar[d]\ar[dr] & 9\ar[d]\ar[dr] & 9\ar[d]\ar[dr] & 9\ar[d]\ar[dr] & 9\ar[dr] & 9\ar[d]\ar[dr] & 8\ar[d] & 7\ar[d] & 6\ar[d]\ar[r] \ar@{-->}[dr] & \ar[r]7  & 8\\
    2.90 +A & 11 & 10& 10& 10& 10& 9\ar[r] & 10& 9 & 8 & 7 & 6 \ar@{-->}[r]  & 7\\
    2.91 +}
    2.92 +\end{figure}
    2.93 +\newpage
    2.94 +\noindent We specify the edit operations:
    2.95 +\begin{enumerate}
    2.96 +\item replace A with C
    2.97 +\item insert H
    2.98 +\item replace B with U
    2.99 +\item replace R with P
   2.100 +\item keep A
   2.101 +\item keep C
   2.102 +\item keep A
   2.103 +\item delete D
   2.104 +\item delete A
   2.105 +\item keep B
   2.106 +\item keep R
   2.107 +\item keep A
   2.108 +\item insert S
   2.109 +\end{enumerate}
   2.110 +And we get $D(A, B) = 7$.
   2.111 +\end{document}