@article {IOPORT.05694935, author = {Arslan, Abdullah N.}, title = {An algorithm with linear expected running time for string editing with substitutions and substring reversals.}, year = {2008}, journal = {Information Processing Letters}, volume = {106}, number = {5}, issn = {0020-0190}, pages = {213-218}, publisher = {Elsevier Sciences Publishers (North-Holland), Amsterdam}, doi = {10.1016/j.ipl.2007.11.017}, abstract = {Summary: The edit distance between given two strings $X$ and $Y$ is the minimum number of edit operations that transform $X$ into $Y$ without performing multiple operations that involve the same position. Ordinarily, string editing is based on character insert, delete, and substitute operations. Motivated from the facts that substring reversals are observed in genomic sequences, and it is not always possible to transform a given sequence $X$ into a given sequence $Y$ by reversals alone (e.g., $X$ is all 0's, and $Y$ is all 1's), {\it S. Muthukrishnan} and {\it S. C. Sahinalp} [``Approximate nearest neighbors and sequence comparison with block operations", in: Proceedings of the Thirty-Second Annual ACM Symposium on Theory of Computing. New York: ACM. 416--424 (2000); ``An efficient algorithm for sequence comparison with block reversals", Theor. Comput. Sci. 321, No.~1, 95--101 (2004; Zbl 1068.68115)] considered a ``simple'' well-defined edit distance model in which the edit operations are: replace a character, and reverse and replace a substring. A substring of $X$ can only be reversed if the reversal results in a match in the same position in $Y$. The cost of each character replacement and substring reversal is 1. The distance in this case is defined only when $|X|=|Y|=n$. There is an algorithm for computing the distance in this model with worst-case time complexity $O(n\log ^{2}n)$ [loc.~cit.]. We present a dynamic programming algorithm with worst-case time complexity $O(n^{2})$ but its expected running-time is $O(n)$. In our dynamic programming solution the weights of edit operations can vary for different substitutions, and the costs of reversals can be a function of the reversal-length.}, identifier = {05694935}, }