@inbook {IOPORT.05965482, author = {Hong, Tzung-Pei and Lin, Chun-Wei and Yang, Kuo-Tung and Wang, Shyue-Liang}, title = {A heuristic data-sanitization approach based on TF-IDF.}, year = {2011}, booktitle = {Modern approaches in applied intelligence. 24th international conference on industrial engineering and other applications of applied intelligent systems, IEA/AIE 2011, Syracuse, NY, USA, June 28 -- July 1, 2011. Proceedings, Part I}, isbn = {978-3-642-21821-7}, pages = {156-164}, publisher = {Berlin: Springer}, doi = {10.1007/978-3-642-21822-4_17}, abstract = {Summary: Data mining technology can help extract useful knowledge from large data sets. The process of data collection and data dissemination may, however, result in an inherent risk of privacy threats. In this paper, the SIF-IDF algorithm is proposed to modify original databases in order to hide sensitive itemsets. It is a greedy approach based on the concept of the Term Frequency and Inverse Document Frequency (TF-IDF) borrowed from text mining. Experimental results also show the performance of the proposed approach.}, identifier = {05965482}, }