% This data is distributed under the terms of the Open Data Commons Attribution License (ODC-By) v1.0 - See more at: http://opendatacommons.org/licenses/by/1-0/ % Volume 4, Issue 1, 2017 @Article{OJDB_2017v4i1n01_Lindstroem, title = {An NVM Aware MariaDB Database System and Associated IO Workload on File Systems}, author = {Jan Lindstr{\"o}m and Dhananjoy Das and Nick Piggin and Santhosh Konundinya and Torben Mathiasen and Nisha Talagala and Dulcardo Arteaga}, journal = {Open Journal of Databases (OJDB)}, issn = {2199-3459}, year = {2017}, volume = {4}, number = {1}, pages = {1--21}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201705194662}, urn = {urn:nbn:de:101:1-201705194662}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {MariaDB is a community-developed fork of the MySQL relational database management system and originally designed and implemented in order to use the traditional spinning disk architecture. With Non-Volatile memory (NVM) technology now in the forefront and main stream for server storage (Data centers), MariaDB addresses the need by adding support for NVM devices and introduces NVM Compression method. NVM Compression is a novel hybrid technique that combines application level compression with flash awareness for optimal performance and storage efficiency. Utilizing new interface primitives exported by Flash Translation Layers (FTLs), we leverage the garbage collection available in flash devices to optimize the capacity management required by compression systems. We implement NVM Compression in the popular MariaDB database and use variants of commonly available POSIX file system interfaces to provide the extended FTL capabilities to the user space application. The experimental results show that the hybrid approach of NVM Compression can improve compression performance by 2-7x, deliver compression performance for flash devices that is within 5\% of uncompressed performance, improve storage efficiency by 19\% over legacy Row-Compression, reduce data writes by up to 4x when combined with other flash aware techniques such as Atomic Writes, and deliver further advantages in power efficiency and CPU utilization. Various micro benchmark measurement and findings on sparse files call for required improvement in file systems for handling of punch hole operations on files.} } @Article{OJDB_2017v4i1n02_Marten, title = {Machine Learning on Large Databases: Transforming Hidden Markov Models to SQL Statements}, author = {Dennis Marten and Andreas Heuer}, journal = {Open Journal of Databases (OJDB)}, issn = {2199-3459}, year = {2017}, volume = {4}, number = {1}, pages = {22--42}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-2017100112181}, urn = {urn:nbn:de:101:1-2017100112181}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {Machine Learning is a research field with substantial relevance for many applications in different areas. Because of technical improvements in sensor technology, its value for real life applications has even increased within the last years. Nowadays, it is possible to gather massive amounts of data at any time with comparatively little costs. While this availability of data could be used to develop complex models, its implementation is often narrowed because of limitations in computing power. In order to overcome performance problems, developers have several options, such as improving their hardware, optimizing their code, or use parallelization techniques like the MapReduce framework. Anyhow, these options might be too cost intensive, not suitable, or even too time expensive to learn and realize. Following the premise that developers usually are not SQL experts we would like to discuss another approach in this paper: using transparent database support for Big Data Analytics. Our aim is to automatically transform Machine Learning algorithms to parallel SQL database systems. In this paper, we especially show how a Hidden Markov Model, given in the analytics language R, can be transformed to a sequence of SQL statements. These SQL statements will be the basis for a (inter-operator and intra-operator) parallel execution on parallel DBMS as a second step of our research, not being part of this paper.} }