In the time-decay model for data streams, elements of an underlying data set arrive sequentially with the recently arrived elements being more important. A common approach for handling large data sets is to maintain a coreset, a succinct summary of the processed data that allows approximate recovery of a predetermined query. We provide a general framework that takes any offline-coreset and gives a time-decay coreset for polynomial time decay functions. We also consider the exponential time decay model for k-median clustering, where we provide a constant factor approximation algorithm that utilizes the online facility location algorithm. Our algorithm stores O(k log(h Delta)+h) points where h is the half-life of the decay function and Delta is the aspect ratio of the dataset. Our techniques extend to k-means clustering and M-estimators as well.
@InProceedings{braverman_et_al:LIPIcs.APPROX-RANDOM.2019.27, author = {Braverman, Vladimir and Lang, Harry and Ullah, Enayat and Zhou, Samson}, title = {{Improved Algorithms for Time Decay Streams}}, booktitle = {Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques (APPROX/RANDOM 2019)}, pages = {27:1--27:17}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-125-2}, ISSN = {1868-8969}, year = {2019}, volume = {145}, editor = {Achlioptas, Dimitris and V\'{e}gh, L\'{a}szl\'{o} A.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {http://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.APPROX-RANDOM.2019.27}, URN = {urn:nbn:de:0030-drops-112429}, doi = {10.4230/LIPIcs.APPROX-RANDOM.2019.27}, annote = {Keywords: Streaming algorithms, approximation algorithms, facility location and clustering} }
Feedback for Dagstuhl Publishing