% This data is distributed under the terms of the Open Data Commons Attribution License (ODC-By) v1.0 - See more at: http://opendatacommons.org/licenses/by/1-0/ @Article{OJSW_2016v3i1n01_Peixoto, title = {Hierarchical Multi-Label Classification Using Web Reasoning for Large Datasets}, author = {Rafael Peixoto and Thomas Hassan and Christophe Cruz and Aur\'{e}lie Bertaux and Nuno Silva}, journal = {Open Journal of Semantic Web (OJSW)}, issn = {2199-336X}, year = {2016}, volume = {3}, number = {1}, pages = {1--15}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201705194907}, urn = {urn:nbn:de:101:1-201705194907}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {Extracting valuable data among large volumes of data is one of the main challenges in Big Data. In this paper, a Hierarchical Multi-Label Classification process called Semantic HMC is presented. This process aims to extract valuable data from very large data sources, by automatically learning a label hierarchy and classifying data items.The Semantic HMC process is composed of five scalable steps, namely Indexation, Vectorization, Hierarchization, Resolution and Realization. The first three steps construct automatically a label hierarchy from statistical analysis of data. This paper focuses on the last two steps which perform item classification according to the label hierarchy. The process is implemented as a scalable and distributed application, and deployed on a Big Data platform. A quality evaluation is described, which compares the approach with multi-label classification algorithms from the state of the art dedicated to the same goal. The Semantic HMC approach outperforms state of the art approaches in some areas.} }