@inbook {IOPORT.05222912, author = {Cortes, Corinna and Kontorovich, Leonid and Mohri, Mehryar}, title = {Learning languages with rational kernels.}, year = {2007}, booktitle = {Learning theory. 20th annual conference on learning theory, COLT 2007, San Diego, CA, USA, June 13--15, 2007. Proceedings.}, isbn = {978-3-540-72925-9}, pages = {349-364}, publisher = {Berlin: Springer}, doi = {10.1007/978-3-540-72927-3_26}, abstract = {Summary: We present a general study of learning and linear separability with rational kernels, the sequence kernels commonly used in computational biology and natural language processing. We give a characterization of the class of all languages linearly separable with rational kernels and prove several properties of the class of languages linearly separable with a fixed rational kernel. In particular, we show that for kernels with transducer values in a finite set, these languages are necessarily finite Boolean combinations of preimages by a transducer of a single sequence. We also analyze the margin properties of linear separation with rational kernels and show that kernels with transducer values in a finite set guarantee a positive margin and lead to better learning guarantees. Creating a rational kernel with values in a finite set is often non-trivial even for relatively simple cases. However, we present a novel and general algorithm, double-tape disambiguation, that takes as input a transducer mapping sequences to sequence features, and yields an associated transducer that defines a finite range rational kernel. We describe the algorithm in detail and show its application to several cases of interest.}, identifier = {05222912}, }