% This data is distributed under the terms of the Open Data Commons Attribution License (ODC-By) v1.0 - See more at: http://opendatacommons.org/licenses/by/1-0/ % Volume 4, Issue 1, 2018(In Progress) @Article{OJBD_2018v4i1n01_Jesus, title = {Modelling Patterns in Continuous Streams of Data}, author = {Ricardo Jesus and Mario Antunes and Diogo Gomes and Rui L. Aguiar}, journal = {Open Journal of Big Data (OJBD)}, issn = {2365-029X}, year = {2018}, volume = {4}, number = {1}, pages = {1--13}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201801234777}, urn = {urn:nbn:de:101:1-201801234777}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {The untapped source of information, extracted from the increasing number of sensors, can be explored to improve and optimize several systems. Yet, hand in hand with this growth goes the increasing difficulty to manage and organize all this new information. The lack of a standard context representation scheme is one of the main struggles in this research area. Conventional methods for extracting knowledge from data rely on a standard representation or a priori relation, which may not be feasible for IoT and M2M scenarios. With this in mind we propose a stream characterization model in order to provide the foundations for a novel stream similarity metric. Complementing previous work on context organization, we aim to provide an automatic stream organizational model without enforcing specific representations. In this paper we extend our work on stream characterization and devise a novel similarity method.} } @Article{OJBD_2018v4i1n02_Cejka, title = {Operation of Modular Smart Grid Applications Interacting through a Distributed Middleware}, author = {Stephan Cejka and Albin Frischenschlager and Mario Faschang and Mark Stefan and Konrad Diwold}, journal = {Open Journal of Big Data (OJBD)}, issn = {2365-029X}, year = {2018}, volume = {4}, number = {1}, pages = {14--29}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201801212419}, urn = {urn:nbn:de:101:1-201801212419}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {IoT-functionality can broaden the scope of distribution system automation in terms of functionality and communication. However, it also poses risks regarding resource consumption and security. This article presents a field approved IoT-enabled smart grid middleware, which allows for flexible deployment and management of applications within smart grid operation. In the first part of the work, the resource consumption of the middleware is analyzed and current memory bottlenecks are identified. The bottlenecks can be resolved by introducing a new entity that allows to dynamically load multiple applications within one JVM. The performance was experimentally tested and the results suggest that its application can significantly reduce the applications' memory footprint on the physical device. The second part of the study identifies and discusses potential security threats, with a focus on attacks stemming from malicious software applications within the framework. In order to prevent such attacks a proxy based prevention mechanism is developed and demonstrated.} } @Article{OJBD_2018v4i1n03_Chen, title = {Cloud-Scale Entity Resolution: Current State and Open Challenges}, author = {Xiao Chen and Eike Schallehn and Gunter Saake}, journal = {Open Journal of Big Data (OJBD)}, issn = {2365-029X}, year = {2018}, volume = {4}, number = {1}, pages = {30--51}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201804155766}, urn = {urn:nbn:de:101:1-201804155766}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {Entity resolution (ER) is a process to identify records in information systems, which refer to the same real-world entity. Because in the two recent decades the data volume has grown so large, parallel techniques are called upon to satisfy the ER requirements of high performance and scalability. The development of parallel ER has reached a relatively prosperous stage, and has found its way into several applications. In this work, we first comprehensively survey the state of the art of parallel ER approaches. From the comprehensive overview, we then extract the classification criteria of parallel ER, classify and compare these approaches based on these criteria. Finally, we identify open research questions and challenges and discuss potential solutions and further research potentials in this field.} }