@misc{oai:uec.repo.nii.ac.jp:00001171, author = {太田, 隆博 and Ota, Takahiro}, month = {2016-09-16}, note = {2007, Data compression is particularly useful in systems where resources arescarce, e.g., limited bandwidth in communication systems and the capacityof storage systems. A wide variety of data compression algorithms have beenproposed for inherently digital data such as text and digitized audio, imageand video.To compress an input string efficiently, data compression algorithms typicallyuse a dictionary to construct statistical models and replace substringswith indices in the dictionary. The dictionary is the set of all substrings ofan input string, and it is represented by a tree representation such as a suffixtree in many applications.In 2000, Crochemore et al. [CMRS00] proposed an off-line lossless datacompression algorithm using an antidictionary of an input binary string.The antidictionary is the set of all minimal strings that never appear inthe string. They showed that their method, called Data Compression usingAntidictionaries (DCA), achieves compression ratios that are as goodas the Lempel-Ziv (LZ) algorithms [ZL77, ZL78]. In 2005, to improve thecompression ratios, Ohkawa, Harada and Yamamoto [OHY05] applied anarithmetic coding to an off-line DCA method for binary strings. In otherwords, the authors used antidictionaries as statistical models for an arithmeticcoding. It was shown by simulation that their method, called Ohkawa-Harada-Yamamoto (OHY) method, achieves better compression ratios thanthe DCA method.Both a dictionary and an antidictionary are useful for data compression.The combination of the antidictionary and the dictionary are expected toprovide efficient statistical models for arithmetic coding.The main goal of this thesis is to achieve: Construction of an antidictionary using a suffix tree in linear time andspace. An on-line DCA method using dynamic suffix trees in linear time andspace. An on-line arithmetic coding based on antidictionaries using dynamicsuffix trees in linear time and space. One-pass lossless data compression for ElectroCardioGrams (ECGs)using antidictionaries.Traditional construction algorithm of an antidictionary using suffix treesrequires quadratic computational time with respect to a given string length.To reduce this computational time, we introduce a new kind of pointer calledan MF-link in the suffix tree. By using MF-links, the proposed constructionalgorithm operates in linear time and space. We prove that the proposed algorithmworks in linear time and space with respect to the string length, andexperimental results show that the proposed algorithm is fast and memoryefficient.On-line DCA methods can achieve better compression ratios than off-lineDCA methods. However, on-line traditional DCA methods need quadraticcomputational time with respect to the string length, since the method requiresupdating the antidictionary and its encoder when a new symbol isread. To reduce this computational time, we show useful conditions to implementthe on-line DCA using only suffix trees without constructing the antidictionary.By using these conditions, we propose an on-line DCA methodusing dynamic suffix trees without updating antidictionaries. Moreover, weapply arithmetic coding to the proposed algorithm. We prove that the proposedalgorithms work in linear time and space with respect to the stringlength. Experimental results show that the proposed method applied with anarithmetic coding achieves better compression ratios than traditional on-lineDCA for almost all files on Calgary Corpus [Cal], and this approach givesa 3% decrease in compressed file size relative the on-line DCA method anda 1% decrease in the size relative to the OHY by simulation results for theCalgary Corpus.Finally, we propose a one-pass ECG lossless compression method usingantidictionaries. The proposed algorithm constructs an encoder of the DCAfrom the substring of ECG, called the learning string, by means of the propertythat each ECG signal is an almost periodic waveform. We show thatthe length of learning string needed to construct the antidictionary whosesize is almost the same as that of the entire string of ECG using the resultsof coupon collector’s problem. Experimental results show that the proposedalgorithm gives 15% decrease in compressed file size relative to the LZ algorithmsfor ECG files of the MIT-BIH Arrhythmia Database [MIT]. Moreover,it is shown that the proposed algorithm can implement to compress the ECGfiles for real-time by simulation results.}, title = {Antidictionary Data Compression Using Dynamic Suffix Trees}, year = {}, yomi = {オオタ, タカヒロ} }