% This data is distributed under the terms of the Open Data Commons Attribution License (ODC-By) v1.0 - See more at: http://opendatacommons.org/licenses/by/1-0/ @Article{OJBD_2015v1i1n03_YR, title = {Cognitive Spam Recognition Using Hadoop and Multicast-Update}, author = {Mukund YR and Sunil Sandeep Nayak and K. Chandrasekaran}, journal = {Open Journal of Big Data (OJBD)}, issn = {2365-029X}, year = {2015}, volume = {1}, number = {1}, pages = {16--28}, url = {http://nbn-resolving.de/urn:nbn:de:101:1-201705194340}, urn = {urn:nbn:de:101:1-201705194340}, publisher = {RonPub}, bibsource = {RonPub}, abstract = {In today's world of exponentially growing technology, spam is a very common issue faced by users on the internet. Spam not only hinders the performance of a network, but it also wastes space and time, and causes general irritation and presents a multitude of dangers - of viruses, malware, spyware and consequent system failure, identity theft, and other cyber criminal activity. In this context, cognition provides us with a method to help improve the performance of the distributed system. It enables the system to learn what it is supposed to do for different input types as different classifications are made over time and this learning helps it increase its accuracy as time passes. Each system on its own can only do so much learning, because of the limited sample set of inputs that it gets to process. However, in a network, we can make sure that every system knows the different kinds of inputs available and learns what it is supposed to do with a better success rate. Thus, distribution and combination of this cognition across different components of the network leads to an overall improvement in the performance of the system. In this paper, we describe a method to make machines cognitively label spam using Machine Learning and the Naive Bayesian approach. We also present two possible methods of implementation - using a MapReduce Framework (hadoop), and also using messages coupled with a multicast-send based network - with their own subtypes, and the pros and cons of each. We finally present a comparative analysis of the two main methods and provide a basic idea about the usefulness of the two in various different scenarios.} }