@article{oai:uec.repo.nii.ac.jp:00009664, author = {Oishi, Keiichiro and Sei, Yuichi and Tahara, Yasuyuki and Ohsuga, Akihiko}, journal = {Computers & Security}, month = {Jul}, note = {A database that contains personal information and is collected by crowdsensing can be used for various purposes. Therefore, database holders may want to share their databases with other organizations. However, since a database contains information about individuals, database recipients must take privacy concerns into consideration. One of the mainstream privacy protection indicators, l-diversity, guarantees that the probability of identifying a sensitive attribute value of an individual in a database is less than 1/l. However, when there are several semantically similar values in the sensitive attribute, there is a possibility that actual diversity is not satisfied, even if anonymization is performed to satisfy l-diversity. For example, an attacker may know that candidates of Alice’s disease are a set of HIV-1(M), HIV-1(N), and HIV-2 if the anonymized database satisfies 3-diversity. In this case, the attacker can conclude that Alice has HIV, although the detailed type remains unknown. In this research, to solve how actual diversity cannot be taken into consideration with existing l-diversity, we proposed a novel privacy indicator, (l, d)-semantic diversity, and an algorithm that anonymizes a database to satisfy (l, d)-semantic diversity. We also proposed an analysis algorithm that is suitable for the proposed anonymizing algorithm because the output of the anonymizing algorithm is difficult to understand. Our proposed algorithms were experimentally evaluated using synthetic and real datasets.}, pages = {101823--10182}, title = {Semantic diversity: Privacy considering distance between values of sensitive attribute}, volume = {94}, year = {2020} }