% This data is distributed under the terms of the Open Data Commons Attribution License (ODC-By) v1.0 - See more at: http://opendatacommons.org/licenses/by/1-0/ @Article{OJIOT_2019v5i1n04_Semmler, title = {Online Replication Strategies for Distributed Data Stores}, author = {Niklas Semmler and Georgios Smaragdakis and Anja Feldmann}, journal = {Open Journal of Internet Of Things (OJIOT)}, issn = {2364-7108}, year = {2019}, volume = {5}, number = {1}, pages = {47--57}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-2019092919335387371884}, urn = {urn:nbn:de:101:1-2019092919335387371884}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {The rate at which data is produced at the network edge, e.g., collected from sensors and Internet of Things (IoT) devices, will soon exceed the storage and processing capabilities of a single system and the capacity of the network. Thus, data will need to be collected and preprocessed in distributed data stores - as part of a distributed database - at the network edge. Yet, even in this setup, the transfer of query results will incur prohibitive costs. To further reduce the data transfers, patterns in the workloads must be exploited. Particularly in IoT scenarios, we expect data access to be highly skewed. Most data will be store-only, while a fraction will be popular. Here, the replication of popular, raw data, as opposed to the shipment of partially redundant query results, can reduce the volume of data transfers over the network. In this paper, we design online strategies to decide between replicating data from data stores or forwarding the queries and retrieving their results. Our insight is that by profiling access patterns of the data we can lower the data transfer cost and the corresponding response times. We evaluate the benefit of our strategies using two real-world datasets.} }